from sklearn.model_selection import train_test_split
X = df.drop(['target'],axis=1).values # independant features
y = df['target'].values # dependant variable
# Choose your test size to split between training and testing sets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
import numpy as np
from sklearn.model_selection import train_test_split
# Data example
X, y = np.arange(10).reshape((5, 2)), range(5)
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
#You could just use sklearn.model_selection.train_test_split twice. First to split to train,
#test and then split train again into validation and train.
#Something like this:
X_train, X_test, y_train, y_test
= train_test_split(X, y, test_size=0.2, random_state=1)
X_train, X_val, y_train, y_val
= train_test_split(X_train, y_train, test_size=0.25, random_state=1) # 0.25 x 0.8 = 0.2
from sklearn.model_selection import train_test_split
# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42, stratify=y)
##sklearn train test split
from sklearn.model_selection import train_test_split
X = df.drop(['target'],axis=1).values # independant features
y = df['target'].values # dependant variable
# Choose your test size to split between training and testing sets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
#OR Randomly split your whole dataset to your desired percentage, insted of using a ttarget variable:
training_data = df.sample(frac=0.8, random_state=25) #here we choose 80% as our training sample and for reproduciblity, we use random_state of 42
testing_data = df.drop(training_data.index) # testing sample is 20% of our initial data
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
cal_housing = fetch_california_housing()
X = pd.DataFrame(cal_housing.data, columns=cal_housing.feature_names)
y = cal_housing.target
y -= y.mean()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)