Difference makes the DIFFERENCE
import numpy as np
arr = np.random.rand(100000)
np.amin(arr)
np.amax(arr)
np.mean(arr)
np.std(arr)
np.median(arr)
np.var(arr)
np.percentile(arr, 50)
np.median(arr)
np.percentile(arr, 75)
np.percentile(arr, [10, 30, 60])
np.percentile(arr, [25, 75])
np.percentile(arr, 10)
np.percentile(arr, 90)
np.percentile(arr, 30)
iqr = np.percentile(arr, 75) - np.percentile(arr, 25)
print(iqr)
quartiles = np.percentile(arr, [25, 75])
print(quartiles)
iqr = quartiles[1] - quartiles[0]
print(iqr)
%%time
iqr = np.percentile(arr, 75) - np.percentile(arr, 25)
%%time
quartiles = np.percentile(arr, [75, 25])
iqr = quartiles[0] - quartiles[1]
arr2 = np.random.rand(100000000)
%%time
iqr = np.percentile(arr2, 75) - np.percentile(arr2, 25)
print(iqr)
%%time
quartiles = np.percentile(arr2, [75, 25])
iqr = quartiles[0] - quartiles[1]
print(iqr)
(arr - np.mean(arr))/np.std(arr) # returns one number per array element
# displaying the distance of that particular element in the array from the mean
np.histogram(arr)
np.histogram(arr, 5)
np.histogram(arr, bins = [0, .25, .27, 1])
bins = [0, .25, .5, .75, 1]
np.digitize(arr, bins)
arr3 = np.random.randint(10, 20, 10)
arr3
bins = [10, 14, 18, 20] # here the bins are 10-14, 14-18, 18-20
np.digitize(arr3, bins) # 19 in bin 3, 17 in bin2, 15 in bin 2, 13 in bin1 and so on
height = np.random.randint(100, 180, 10)
weight = np.random.randint(40, 150, 10)
age = np.random.randint(10, 80, 10)
height
weight
age
np.min(weight)
np.max(weight)
np.min(height)
np.max(height)
arr_concat = np.concatenate((weight, height, age))
print(arr_concat)
np.amin(arr_concat)
np.concatenate((weight, height, age)).shape
np.vstack((height, weight, age))
np.vstack((height, weight, age)).shape
arr4 = np.vstack((height, weight, age))
arr4
np.amin(arr4, axis=1)
np.amax(arr4, axis=0)
np.amin(arr4, axis=1)
import numpy as np
base_mean_data = np.random.rand(10000000)
base_mean = base_mean_data - np.mean(base_mean_data)
print(base_mean)
print(np.mean(base_mean))
import matplotlib.pyplot as plt
arr = np.random.randint(1, 100, 100)
arr[:10]
arr[0]
np.mean(33)
np.mean([33, 26])
print(arr[:10])
print(arr[0:0])
print(arr[0:1])
print(arr[:10])
for i in range(1, 50):
arr1 = arr[0:i]
print(i, arr[i-1], np.mean(arr1))
np.cumsum(arr)
np.cumsum(arr)/(np.arange(1, 101))
means = np.cumsum(arr)/np.arange(1, 101)
means[:10]
ex_arr = np.random.randint(1, 100, 100)
np.mean(ex_arr)
ex_arr[:10]
np.median(ex_arr)
ex_arr = np.append(ex_arr, [4000, 2000])
ex_arr[-10:]
np.mean(ex_arr)
np.median(ex_arr)
sca_arr = np.random.randint(1, 100, 100)
np.mean(sca_arr)
np.median(sca_arr)
sca_arr1 = 2.5 * sca_arr + 10.02
print(np.mean(sca_arr1),
2.5 * np.mean(sca_arr) + 10.02)
print(np.mean(2.5 * sca_arr + 10.02),
2.5 * np.mean(sca_arr) + 10.02)
print(np.median(sca_arr1), 2.5 * np.median(sca_arr) + 10.02)
print(np.var(sca_arr1), 2.5 * np.var(sca_arr) + 10.02)
print(np.std(sca_arr1), 2.5 * np.std(sca_arr) + 10.02)