# Visualization is the easiest way to have an inference about the overall data and the outliers.
#Plotting Graphs Before treating outliers
for col in df.describe().columns:
fig, ax =plt.subplots(1,3, constrained_layout=True)
fig.set_size_inches(20, 3)
sns.distplot(df[col], ax=ax[0]).set(title="Distplot")
sns.histplot(df[col], ax=ax[1]).set(title="Histplot")
sns.boxplot(df[col], ax=ax[2]).set(title="Boxplot")
plt.suptitle(f'{col.title()}',weight='bold')
fig.show()
# Defining a function to Notate the percent count of each value on the bars
def annot_percent(axes):
'''Takes axes as input and labels the percent count of each bar in a countplot'''
for p in plot.patches:
total = sum(p.get_height() for p in plot.patches)/100
percent = round((p.get_height()/total),2)
x = p.get_x() + p.get_width()/2
y = p.get_height()
plot.annotate(f'{percent}%', (x, y), ha='center', va='bottom')
# Defining r to autofit the number and size of plots
r = int(len(catagorical_columns)/3 +1)
# Plotting the countplots for each target variable
plt.figure(figsize=(18,r*3))
for n,column in enumerate(catagorical_columns):
plot = plt.subplot(r,3,n+1)
sns.countplot(df[column]).margins(y=0.15)
plt.title(f'{column.title()}',weight='bold')
plt.tight_layout()
annot_percent(plot)