| | |
| | import numpy as np |
| | import seaborn as sns |
| | import matplotlib.pyplot as plt |
| | import os |
| | plt.style.use('seaborn-colorblind') |
| |
|
| | |
| |
|
| |
|
| | def get_dtypes(data,drop_col=[]): |
| | """Return the dtypes for each column of a pandas Dataframe |
| | |
| | Parameters |
| | ---------- |
| | data : pandas Dataframe |
| | |
| | drop_col : columns to omit in a list |
| | |
| | Returns |
| | ------- |
| | str_var_list, num_var_list, all_var_list |
| | |
| | """ |
| |
|
| | name_of_col = list(data.columns) |
| | num_var_list = [] |
| | str_var_list = [] |
| | all_var_list = [] |
| |
|
| | str_var_list = name_of_col.copy() |
| | for var in name_of_col: |
| | |
| | if (data[var].dtypes in (np.int, np.int64, np.uint, np.int32, np.float, |
| | np.float64, np.float32, np.double)): |
| | str_var_list.remove(var) |
| | num_var_list.append(var) |
| | |
| | for var in drop_col: |
| | if var in str_var_list: |
| | str_var_list.remove(var) |
| | if var in num_var_list: |
| | num_var_list.remove(var) |
| |
|
| | all_var_list.extend(str_var_list) |
| | all_var_list.extend(num_var_list) |
| | return str_var_list, num_var_list, all_var_list |
| |
|
| |
|
| | def describe(data,output_path=None): |
| | """output the general description of a pandas Dataframe |
| | into a csv file |
| | |
| | """ |
| | |
| | result = data.describe(include='all') |
| | if output_path is not None: |
| | output = os.path.join(output_path,'describe.csv') |
| | result.to_csv(output) |
| | print('result saved at:', str(output)) |
| | return result |
| | |
| | |
| | def discrete_var_barplot(x,y,data,output_path=None): |
| | """draw the barplot of a discrete variable x against y(target variable). |
| | By default the bar shows the mean value of y. |
| | |
| | Parameters |
| | ---------- |
| | |
| | |
| | Returns |
| | ------- |
| | figure save as PNG |
| | """ |
| | |
| | plt.figure(figsize=(15,10)) |
| | sns.barplot(x=x,y=y,data=data) |
| | if output_path is not None: |
| | output = os.path.join(output_path,'Barplot_'+str(x)+'_'+str(y)+'.png') |
| | plt.savefig(output) |
| | print('Image saved at', str(output)) |
| | |
| | |
| | def discrete_var_countplot(x,data,output_path=None): |
| | """draw the countplot of a discrete variable x. |
| | |
| | Parameters |
| | ---------- |
| | |
| | |
| | Returns |
| | ------- |
| | figure save as PNG |
| | """ |
| | |
| | plt.figure(figsize=(15,10)) |
| | sns.countplot(x=x,data=data) |
| | if output_path is not None: |
| | output = os.path.join(output_path,'Countplot_'+str(x)+'.png') |
| | plt.savefig(output) |
| | print('Image saved at',str(output)) |
| |
|
| |
|
| | def discrete_var_boxplot(x,y,data,output_path=None): |
| | """draw the boxplot of a discrete variable x against y. |
| | |
| | Parameters |
| | ---------- |
| | |
| | |
| | Returns |
| | ------- |
| | figure save as PNG |
| | """ |
| | |
| | plt.figure(figsize=(15,10)) |
| | sns.boxplot(x=x,y=y,data=data) |
| | if output_path is not None: |
| | output = os.path.join(output_path,'Boxplot_'+str(x)+'_'+str(y)+'.png') |
| | plt.savefig(output) |
| | print('Image saved at',str(output)) |
| |
|
| |
|
| | def continuous_var_distplot(x,output_path=None,bins=None): |
| | """draw the distplot of a continuous variable x. |
| | |
| | Parameters |
| | ---------- |
| | |
| | |
| | Returns |
| | ------- |
| | figure save as PNG |
| | """ |
| | |
| | plt.figure(figsize=(15,10)) |
| | sns.distplot(a=x,kde=False,bins=bins) |
| | if output_path is not None: |
| | output=os.path.join(output_path,'Distplot_'+str(x.name)+'.png') |
| | plt.savefig(output) |
| | print('Image saved at',str(output)) |
| | |
| | |
| | |
| |
|
| | def scatter_plot(x,y,data,output_path=None): |
| | """draw the scatter-plot of two variables. |
| | |
| | Parameters |
| | ---------- |
| | |
| | |
| | Returns |
| | ------- |
| | figure save as PNG |
| | """ |
| | |
| | plt.figure(figsize=(15,10)) |
| | sns.scatterplot(x=x,y=y,data=data) |
| | if output_path is not None: |
| | output = os.path.join(output_path,'Scatter_plot_'+str(x.name)+'_'+str(y.name)+'.png') |
| | plt.savefig(output) |
| | print('Image saved at',str(output)) |
| | |
| | |
| | def correlation_plot(data,output_path=None): |
| | """draw the correlation plot between variables. |
| | |
| | Parameters |
| | ---------- |
| | |
| | |
| | Returns |
| | ------- |
| | figure save as PNG |
| | """ |
| | |
| | corrmat = data.corr() |
| | fig, ax = plt.subplots() |
| | fig.set_size_inches(11,11) |
| | sns.heatmap(corrmat,cmap="YlGnBu",linewidths=.5,annot=True) |
| | if output_path is not None: |
| | output = os.path.join(output_path,'Corr_plot'+'.png') |
| | plt.savefig(output) |
| | print('Image saved at',str(output)) |
| | |
| | |
| | def heatmap(data,output_path=None,fmt='d'): |
| | """draw the heatmap between 2 variables. |
| | |
| | Parameters |
| | ---------- |
| | |
| | |
| | Returns |
| | ------- |
| | figure save as PNG |
| | """ |
| | |
| | fig, ax = plt.subplots() |
| | fig.set_size_inches(11,11) |
| | sns.heatmap(data,cmap="YlGnBu",linewidths=.5,annot=True,fmt=fmt) |
| | if output_path is not None: |
| | output = os.path.join(output_path,'Heatmap'+'.png') |
| | plt.savefig(output) |
| | print('Image saved at',str(output)) |