本次综合实验以阿里巴巴天池的婴儿用品购买数据集为例,进行相关分析并提出建议。
数据来源说明:该数据源自于阿里天池,数据具体为2012年7月2日至2015年2月5日发生在淘宝天猫交易平台关于婴幼儿商品的交易数据,数据包含两个表格:
1.
import pandas as pdimport numpy as npimport matplotlib.pyplot as pltfrom pandas import Series, Dataframe
2.
df_gender = pd.read_csv('./mum_baby.csv')df_gender
3.
df_mums = pd.read_csv('./mum_baby_trade_history.csv')df_mums
4.
plt.rcParams['font.sans-serif']=['SimHei']plt.rcParams['axes.unicode_minus'] = Falsedf_mums['cat_id']data_boy = df_mums['cat_id'].value_counts()label_boy = data_boy.index.tolist()value_boy = data_boy.tolist()data_pair_boy = [i for i in zip(label_boy,value_boy)]data_pair_boydata = data_pair_boy[:20]datadf = pd.Dataframe(data,columns =['商品二级分类','数量'])dfdf.plot(kind='bar',x = '商品二级分类',y = '数量',title = '商品二级分类的前二十名')plt.show()
5.
df_mums_gender = pd.merge(df_mums,df_gender,how='inner')df_mums_gender
6.
df_mums_gender['birthday']=df_mums_gender['birthday'].astype(str).str.slice(0,4)df_mums_gender
7.
df_mums_gender['day']=df_mums_gender['day'].astype(str).str.slice(4,6)df_mums_gender
8.
df_month_goods = pd.Dataframe(df_mums_gender,columns = ['auction_id','cat_id','cat1','day','birthday','gender'])df_month_goods
9.
df_month_goods.columns = ['auction_id','cat_id','cat1','month','birthday_year','gender']df_month_goods
10.
boy_list = (df_mums_gender[df_mums_gender['gender']==0]).index.tolist()boy_list
11.
df_boy = df_month_goods.iloc[boy_list, :]df_boy
12.
df_month_onegoods = df_boy.groupby('month')['cat1'].value_counts()df_month_onegoods
13.
df_month_onegoods_label = df_month_onegoods.index.tolist()df_month_onegoods_label
14.
import pandas as pddf_month_boy_one = pd.Dataframe(df_month_onegoods_label,columns =['month','boy_cat1'])df_month_boy_one
15.
df_month_onegoods_value = df_month_onegoods.tolist()df_month_onegoods_value
16.
df_month_boy_one['numbers'] = df_month_onegoods_valuedf_month_boy_one
17.
import pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsplt.rcParams['font.sans-serif']=['SimHei']plt.rcParams['axes.unicode_minus'] = False# df_mums_gender['birthday']=df_mums_gender['birthday'].astype(str).str.slice(0,4)dfp = df_month_boy_one.pivot_table(index='month', columns='boy_cat1', values='numbers')# plot# fig = plt.figure(figsize=(width, height), dpi=dpi)dfp.plot(kind='bar', figsize=(15, 10), rot=0)plt.xlabel("月份")# 增加纵轴名称plt.ylabel("数量")plt.legend(fontsize = 10)# 增加标题plt.title("男婴在不同月份喜欢购买的一级商品")plt.show()
18.总代码·一
import pandas as pdimport numpy as npimport matplotlib.pyplot as pltfrom pandas import Series, Dataframedf_gender = pd.read_csv('./mum_baby.csv')df_mums = pd.read_csv('./mum_baby_trade_history.csv')plt.rcParams['font.sans-serif']=['SimHei']plt.rcParams['axes.unicode_minus'] = Falsedf_mums['cat_id']data_boy = df_mums['cat_id'].value_counts()label_boy = data_boy.index.tolist()value_boy = data_boy.tolist()data_pair_boy = [i for i in zip(label_boy,value_boy)]data_pair_boydata = data_pair_boy[:20]datadf = pd.Dataframe(data,columns =['商品二级分类','数量'])dfdf.plot(kind='bar',x = '商品二级分类',y = '数量',title = '商品二级分类的前二十名')plt.show()
19.总代码二
df_mums_gender = pd.merge(df_mums,df_gender,how='inner')df_mums_gender['birthday']=df_mums_gender['birthday'].astype(str).str.slice(0,4)df_mums_gender['day']=df_mums_gender['day'].astype(str).str.slice(4,6)df_month_goods = pd.Dataframe(df_mums_gender,columns = ['auction_id','cat_id','cat1','day','birthday','gender'])df_month_goods.columns = ['auction_id','cat_id','cat1','month','birthday_year','gender']boy_list = (df_mums_gender[df_mums_gender['gender']==0]).index.tolist()df_boy = df_month_goods.iloc[boy_list, :]df_month_onegoods = df_boy.groupby('month')['cat1'].value_counts()df_month_onegoods_label = df_month_onegoods.index.tolist()df_month_boy_one = pd.Dataframe(df_month_onegoods_label,columns =['month','boy_cat1'])df_month_onegoods_value = df_month_onegoods.tolist()df_month_boy_one['numbers'] = df_month_onegoods_valueimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsplt.rcParams['font.sans-serif']=['SimHei']plt.rcParams['axes.unicode_minus'] = False# df_mums_gender['birthday']=df_mums_gender['birthday'].astype(str).str.slice(0,4)dfp = df_month_boy_one.pivot_table(index='month', columns='boy_cat1', values='numbers')# plot# fig = plt.figure(figsize=(width, height), dpi=dpi)dfp.plot(kind='bar', figsize=(15, 10), rot=0)plt.xlabel("月份")# 增加纵轴名称plt.ylabel("数量")plt.legend(fontsize = 10)# 增加标题plt.title("男婴在不同月份喜欢购买的一级商品")plt.show()