from sklearn.model_selection import train_test_split
y = df.pop('output')
X = df
X_train,X_test,y_train,y_test = train_test_split(X.index,y,test_size=0.2)
X.iloc[X_train] # return dataframe train
train=df.sample(frac=0.8,random_state=200) #random state is a seed value
test=df.drop(train.index)
from sklearn.model_selection import train_test_split
train, test = train_test_split(df, test_size=0.2)
df_permutated = df.sample(frac=1)
train_size = 0.8
train_end = int(len(df_permutated)*train_size)
df_train = df_permutated[:train_end]
df_test = df_permutated[train_end:]
# Dataframe splitting helper function
def SplitDataframe(df, y_column, test_size=3):
train_count = int(round(test_size*10/len(df)*100))
train_ds = df[train_count:]
test_ds = df[:train_count]
train_ds_X = train_ds.drop([y_column], axis=1)
train_ds_y = train_ds[y_column]
test_ds_X = test_ds.drop([y_column], axis=1)
test_ds_y = test_ds[y_column]
return (train_ds_X, train_ds_y), (test_ds_X, test_ds_y)