# uniform distribution over [0, 1)
np.random.rand(3, 3)
# [[0.20966286 0.72581506 0.78926387]
# [0.85719525 0.00163033 0.45001818]
# [0.17630303 0.40184026 0.89585902]]
# discrete uniform distribution in [0, 10)
np.random.randint(0, 10, size=[3,3])
# [[6 8 4]
# [1 3 3]
# [6 9 7]]
# normal distribution around 5 with standard deviation of 2
np.random.normal(5, 2, size=[3,3])
# [[3.8768528 5.73747086 3.63564872]
# [5.49814587 2.62757122 3.61948982]
# [3.36409537 7.86431236 5.16509868]]
from numpy import random
randArray = random.random(size=(2,4))
#输出
#array([[0.93848018,0.42005976,0.81470729,0.98797783],[0.12242703,0.42756378,0.59705163,0.36619101]])
# train test split
df = pd.read_csv('file_location')
mask = np.random.rand(len(df)) < 0.8
train = df[mask]
test = df[~mask]