from sklearn.preprocessing import MultiLabelBinarizer mlb = MultiLabelBinarizer() # get dummy array dummy = mlb.fit_transform(df['cat'].str.split('|')) # get category list mlb.classes_