天池大赛o2o优惠券第一名代码解读(3)

来源:互联网 发布:log4j.xml 打印sql 编辑:程序博客网 时间:2024/04/30 19:32

感谢大神!!!

#提取商品的特征#对于数据集3merchant3 = feature3[['merchant_id','coupon_id','distance','date_received','date']]t = merchant3[['merchant_id']]#删除重复行数据t.drop_duplicates(inplace=True)#显示卖出的商品t1 = merchant3[merchant3.date!='null'][['merchant_id']]t1['total_sales'] = 1#显示每个商品的销售数量t1 = t1.groupby('merchant_id').agg('sum').reset_index()#显示使用了优惠券消费的商品,正样本t2 = merchant3[(merchant3.date!='null')&(merchant3.coupon_id!='null')][['merchant_id']]t2['sales_use_coupon'] = 1t2 = t2.groupby('merchant_id').agg('sum').reset_index()#显示了商品的优惠券的总数量t3 = merchant3[merchant3.coupon_id != 'null'][['merchant_id']]t3 ['total_coupon'] = 1t3 = t3.groupby('merchant_id').agg('sum').reset_index()#显示商品销量和距离的关系t4 = merchant3[(merchant3.date != 'null')&(merchant3.coupon_id != 'null')][['merchant_id','distance']]#把数据中的null值全部替换为-1t4.replace('null',-1,inplace=True)t4.distance = t4.distance.astype('int')#再把数据中的-1全部替换为NaNt4.replace(-1,np.nan,inplace=True)#返回用户离商品的距离最小值t5 = t4.groupby('merchant_id').agg('min').reset_index()t5.rename(columns={'distance':'merchant_min_distance'},inplace = True)#返回用户离商品的距离最大值t6 = t4.groupby('merchant_id').agg('max').reset_index()t6.rename(columns={'distance':'merchant_max_distance'},inplace = True)#print(t6)#返回距离的平均值t7 = t4.groupby('merchant_id').agg('mean').reset_index()t7.rename(columns = {'distance':'merchant_mean_distance'},inplace= True)#返回距离的中位值t8 = t4.groupby('merchant_id').agg('median').reset_index()t8.rename(columns={'distance':'merchant_median_distance'},inplace = True)merchant3_feature = pd.merge(t,t1,on='merchant_id',how='left')#print(merchant3_feature)merchant3_feature = pd.merge(merchant3_feature,t2,on='merchant_id',how='left')#print(merchant3_feature)merchant3_feature = pd.merge(merchant3_feature,t3,on='merchant_id',how='left')#print(merchant3_feature)merchant3_feature = pd.merge(merchant3_feature,t5,on='merchant_id',how='left')#print(merchant3_feature)merchant3_feature = pd.merge(merchant3_feature,t6,on='merchant_id',how='left')#print(merchant3_feature)merchant3_feature = pd.merge(merchant3_feature,t7,on='merchant_id',how='left')#print(merchant3_feature)merchant3_feature = pd.merge(merchant3_feature,t8,on='merchant_id',how='left')#print(merchant3_feature)#将数据中的NaN用0来替换merchant3_feature.sales_use_coupon = merchant3_feature.sales_use_coupon.replace(np.nan,0)#即优惠券的使用率merchant3_feature['merchant_coupon_transfer_rate'] = merchant3_feature.sales_use_coupon.astype('float')/merchant3_feature.total_coupon#即卖出商品中使用优惠券的占比merchant3_feature['coupon_rate'] = merchant3_feature.sales_use_coupon.astype('float') / merchant3_feature.total_sales#将数据中的NaN用0来替换merchant3_feature.total_coupon = merchant3_feature.total_coupon.replace(np.nan,0)merchant3_feature.to_csv('data/merchant3_feature.csv',index=None)
阅读全文
0 0
原创粉丝点击