import pandas as pd
# Drop all duplicates in the DataFrame
df = df.drop_duplicates()
# Drop all duplicates in a specific column of the DataFrame
df = df.drop_duplicates(subset = "column")
# Drop all duplicate pairs in DataFrame
df = df.drop_duplicates(subset = ["column", "column2"])
# Display DataFrame
print(df)
# HOW TO REMOVE DUPLICATES FROM A LIST:
# 1) CREATE A LIST
my_list = [1, 2, 3, 4, 5, 5, 5, 1]
# 2) CONVERT IT TO A SET AND THEN BACK INTO A LIST
my_list = list(set(my_list))
# 3) DONE!
print(my_list) #WILL PRINT: [1, 2, 3, 4, 5]
# removing duplicated from the list using naive methods
# initializing list
sam_list = [11, 13, 15, 16, 13, 15, 16, 11]
print ("The list is: " + str(sam_list))
# remove duplicated from list
result = []
for i in sam_list:
if i not in result:
result.append(i)
# printing list after removal
print ("The list after removing duplicates : " + str(result))
# ----- Create a list with no repeating elements ------ #
mylist = [67, 7, 89, 7, 2, 7]
newlist = []
for i in mylist:
if i not in newlist:
newlist.append(i)
k = [[1, 2], [4], [5, 6, 2], [1, 2], [3], [4]]
new_k = []
for elem in k:
if elem not in new_k:
new_k.append(elem)
k = new_k
print k
# prints [[1, 2], [4], [5, 6, 2], [3]]
# Python 3 code to demonstrate
# removing duplicated from list
# using naive methods
# initializing list
test_list = [1, 3, 5, 6, 3, 5, 6, 1]
print ("The original list is : " + str(test_list))
# using naive method
# to remove duplicated
# from list
res = []
for i in test_list:
if i not in res:
res.append(i)
# printing list after removal
print ("The list after removing duplicates : " + str(res))
def dedupe(items):
seen = set()
for item in items:
if item not in seen:
yield item
seen.add(item)
a = [1, 5, 2, 1, 9, 1, 5, 10]
list(dedupe(a))# [1, 5, 2, 9, 10]
if mylist:
mylist.sort()
last = mylist[-1]
for i in range(len(mylist)-2, -1, -1):
if last == mylist[i]:
del mylist[i]
else:
last = mylist[i]
# Quicker if all elements are hashables:
mylist = list(set(mylist))