import numpy as np
x = np.array([[1, 2, 3],[4, 5, 6]])
x.shape
(2, 3)
x[:, 0] #all row elements of the 0th column
array([1, 4])
x[0,:] # all column elements of the first row
array([1, 2, 3])
x[:,1]
array([2, 5])
np.zeros((3, 2), dtype=int, order='C')
array([[0, 0], [0, 0], [0, 0]])
np.zeros((3, 2), dtype=int, order='F')
array([[0, 0], [0, 0], [0, 0]])
np.eye(3, 3, dtype = int, k = 1) #k denotes upper or lower diagonal default value is zero for the main diagnola
array([[0, 1, 0], [0, 0, 1], [0, 0, 0]])
x = np.random.randint(10, size=(2, 3))
x
array([[3, 5, 0], [7, 1, 2]])
X = np.random.random((4, 4))
X.T #transpose
array([[0.96594058, 0.5299669 , 0.39409202, 0.25514001], [0.97798272, 0.94243758, 0.33068925, 0.06422544], [0.89037585, 0.97994436, 0.33506156, 0.62903456], [0.01301274, 0.0088598 , 0.02122486, 0.06714273]])
Y = X.reshape(16, 1)
Y
array([[0.96594058], [0.97798272], [0.89037585], [0.01301274], [0.5299669 ], [0.94243758], [0.97994436], [0.0088598 ], [0.39409202], [0.33068925], [0.33506156], [0.02122486], [0.25514001], [0.06422544], [0.62903456], [0.06714273]])
x.reshape(3, 2) #the original matrix gets divided by three blocks of size 2.
array([[3, 5], [0, 7], [1, 2]])
np.arange(1)
array([0])
A = np.zeros((3, 2))
B = np.ones((3, 2))
print(f"{A+B},\n {A-B},\n {A*B},\n {B/A},\n {A/B}") #all standard operations are coloumn wise note we are dividing zero we will get a runtime error
[[1. 1.] [1. 1.] [1. 1.]], [[-1. -1.] [-1. -1.] [-1. -1.]], [[0. 0.] [0. 0.] [0. 0.]], [[inf inf] [inf inf] [inf inf]], [[0. 0.] [0. 0.] [0. 0.]]
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: RuntimeWarning: divide by zero encountered in true_divide """Entry point for launching an IPython kernel.
print(A+np.pi) #brodcasting
[[3.14159265 3.14159265] [3.14159265 3.14159265] [3.14159265 3.14159265]]
#Orthornormal vectors of \mathbb{R}^3
e1 = np.eye(1, 3)
e2 = np.eye(1,3, k=1)
e3 = np.eye(1, 3, k = 2)
np.dot(e1, e2.T) #dot acts through matrix multplication
array([[0.]])
np.inner(e1, e2) #inner is the standard innerproduct on R^n
array([[0.]])
np.outer(e1, e2)
array([[0., 1., 0.], [0., 0., 0.], [0., 0., 0.]])
np.arange(len(e1))
array([0])
B.sum(axis=0)
array([3., 3.])
sum(B)
array([3., 3.])
B.sum(axis=1)
array([2., 2., 2.])
import matplotlib.pyplot as plt
x = np.linspace(0, 2*np.pi)
y = np.sin(x)
fig = plt.figure(figsize=(16, 4), facecolor='grey',)
fig.suptitle("Sin(x)") #this applies even if there are mutliple plots.
plt.plot(x, y)
plt.show()
plt.close()
plt.plot(x, y,"+")
plt.show()
plt.close()
import seaborn as sns #works with matplotlib
sns.set()
sns.lineplot(x=x, y=y)
plt.show()
plt.close()
import pandas as pd
data1 = pd.read_csv("sample_data/california_housing_train.csv")
data1.head(-1)
longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
---|---|---|---|---|---|---|---|---|---|
0 | -114.31 | 34.19 | 15.0 | 5612.0 | 1283.0 | 1015.0 | 472.0 | 1.4936 | 66900.0 |
1 | -114.47 | 34.40 | 19.0 | 7650.0 | 1901.0 | 1129.0 | 463.0 | 1.8200 | 80100.0 |
2 | -114.56 | 33.69 | 17.0 | 720.0 | 174.0 | 333.0 | 117.0 | 1.6509 | 85700.0 |
3 | -114.57 | 33.64 | 14.0 | 1501.0 | 337.0 | 515.0 | 226.0 | 3.1917 | 73400.0 |
4 | -114.57 | 33.57 | 20.0 | 1454.0 | 326.0 | 624.0 | 262.0 | 1.9250 | 65500.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
16994 | -124.25 | 40.28 | 32.0 | 1430.0 | 419.0 | 434.0 | 187.0 | 1.9417 | 76100.0 |
16995 | -124.26 | 40.58 | 52.0 | 2217.0 | 394.0 | 907.0 | 369.0 | 2.3571 | 111400.0 |
16996 | -124.27 | 40.69 | 36.0 | 2349.0 | 528.0 | 1194.0 | 465.0 | 2.5179 | 79000.0 |
16997 | -124.30 | 41.84 | 17.0 | 2677.0 | 531.0 | 1244.0 | 456.0 | 3.0313 | 103600.0 |
16998 | -124.30 | 41.80 | 19.0 | 2672.0 | 552.0 | 1298.0 | 478.0 | 1.9797 | 85800.0 |
16999 rows × 9 columns
data1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 17000 entries, 0 to 16999 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 longitude 17000 non-null float64 1 latitude 17000 non-null float64 2 housing_median_age 17000 non-null float64 3 total_rooms 17000 non-null float64 4 total_bedrooms 17000 non-null float64 5 population 17000 non-null float64 6 households 17000 non-null float64 7 median_income 17000 non-null float64 8 median_house_value 17000 non-null float64 dtypes: float64(9) memory usage: 1.2 MB
data1.describe()
longitude | latitude | housing_median_age | total_rooms | total_bedrooms | population | households | median_income | median_house_value | |
---|---|---|---|---|---|---|---|---|---|
count | 17000.000000 | 17000.000000 | 17000.000000 | 17000.000000 | 17000.000000 | 17000.000000 | 17000.000000 | 17000.000000 | 17000.000000 |
mean | -119.562108 | 35.625225 | 28.589353 | 2643.664412 | 539.410824 | 1429.573941 | 501.221941 | 3.883578 | 207300.912353 |
std | 2.005166 | 2.137340 | 12.586937 | 2179.947071 | 421.499452 | 1147.852959 | 384.520841 | 1.908157 | 115983.764387 |
min | -124.350000 | 32.540000 | 1.000000 | 2.000000 | 1.000000 | 3.000000 | 1.000000 | 0.499900 | 14999.000000 |
25% | -121.790000 | 33.930000 | 18.000000 | 1462.000000 | 297.000000 | 790.000000 | 282.000000 | 2.566375 | 119400.000000 |
50% | -118.490000 | 34.250000 | 29.000000 | 2127.000000 | 434.000000 | 1167.000000 | 409.000000 | 3.544600 | 180400.000000 |
75% | -118.000000 | 37.720000 | 37.000000 | 3151.250000 | 648.250000 | 1721.000000 | 605.250000 | 4.767000 | 265000.000000 |
max | -114.310000 | 41.950000 | 52.000000 | 37937.000000 | 6445.000000 | 35682.000000 | 6082.000000 | 15.000100 | 500001.000000 |
fig1 = plt.figure(figsize=(14, 10))
scatter_plot = sns.scatterplot(x='longitude',y='latitude',hue='median_house_value', data=data1,size='median_house_value')
plt.show()
plt.close()
fig2=plt.figure(figsize=(14, 10))
dist_plot = sns.displot(data1['median_house_value'],kde=True)
plt.show()
plt.close()
<Figure size 1008x720 with 0 Axes>
fig3 = plt.figure(figsize=(14, 10))
box_plot = sns.scatterplot(y='total_rooms',x='median_house_value',hue='population',data=data1)
plt.show()
plt.close()