import matplotlib.pyplot as plt
连续值分布
x = np.random.normal(size=100) sns.distplot(x);
sns.distplot(df_train0['var_'+str(i+j16)], label = '0') sns.distplot(df_train1['var_'+str(i+j16)], label = '1')
分组对比
import seaborn as sns, matplotlib.pyplot as plt df_train0 = df_train[ df_train['target']==0 ] df_train1 = df_train[ df_train['target']==1 ] for j in range(13): plt.figure(figsize=(15,18)) for i in range(16): if i+j16>199: continue plt.subplot(4, 4, i+1) sns.distplot(df_train0['var_'+str(i+j16)], label = '0') sns.distplot(df_train1['var_'+str(i+j16)], label = '1') plt.title('var_'+str(i+j16)) plt.xlabel('') plt.show()
class GridPlot(object): def init(self, data, category, subgraph): self.category = pd.unique(data[category]) self.subgraph = subgraph self.data_grouped = {name:group for name, group in data.groupby(category)}
def plot_subgraph(self):
plt.figure(figsize=(15,18))
for i,g in enumerate(self.subgraph):
plt.subplot(4, 4, i+1)
for cat in self.category:
#print(self.data_grouped[cat][g])
sns.distplot(self.data_grouped[cat][g], label=str(cat))
plt.title(g)
plt.show()
def distplot_column(data, target, column_list, sub_figsize=(4,4)): ''' data - pandas dataframe target - string. group by target column in each subplot var_list - list. echa var have one subplot ''' data_groups = {name:group for name, group in data.groupby(target)} target_groups = pd.unique(data[target])
plt.figure(figsize=np.multiply(sub_figsize, len(column_list)))
for i,col in enumerate(column_list):
plt.subplot(sub_figsize[0], sub_figsize[1], i+1)
for target_group in target_groups:
sns.distplot(data_groups[target_group][col], label=str(target_group))
plt.title(target_group)
plt.xlabel('')
plt.show()
tsne
https://github.com/zaburo-ch/Parametric-t-SNE-in-Keras/blob/master/mlp_param_tsne.py