df.duplicated().sum()
boolean = df['Student'].duplicated().any() # True
df.duplicated('Id')
#return total duplicate
df.duplicated('Id').sum()
>>> df.duplicated(subset=['brand'])
0 False
1 True
2 False
3 True
4 True
dtype: bool
animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama'])
>>> animals.duplicated()
>>> df.duplicated(keep='last')
0 True
1 False
2 False
3 False
4 False
dtype: bool
In [28]:
df.groupby(df.columns.tolist(),as_index=False).size()
Out[28]:
one three two
False False True 1
True False False 2
True True 1
dtype: int64
ids = df["ID"]
df[ids.isin(ids[ids.duplicated()])].sort_values("ID")