python pandas字符串过滤

来源:互联网 发布:万网域名备案在哪 编辑:程序博客网 时间:2024/06/06 01:36
import pandas as pddf = pd.read_csv("together.csv" , index_col=False)#新建motor group fr lrdf["motor"] = "#"df["group"] = "0"  #默认不是组合df["fr"] = "#"df["lr"] = "#"#打标签motor#找出motor 中含某些字符串的然后将对应的motor值改变df["product_name"]=df["product_name"].str.lower()  #将name字段全部换成小写a = (df.product_name.str.find("with motor") != -1)b = (df.product_name.str.find("w/motor") != -1)c = (df.product_name.str.find("w/ motor") != -1)d = (df.product_name.str.find("& motor") != -1)e = (df.product_name.str.find("power") != -1)pattern = a| b | c | d| edf.loc[pattern , "motor"] = "1"a = (df.product_name.str.find("without motor") != -1)b = (df.product_name.str.find("manual") != -1)pattern = a| bdf.loc[pattern, "motor"] = "0"# print(df[df["motor"] == "1"].describe())#查看信息# print(df[df["motor"] == "0"].describe())# print(df[df["motor"] == "#"].describe())#打标签 group 默认是0a = (df.product_name.str.find("pair") != -1)b = (df.product_name.str.find("set") != -1)c = (df.product_name.str.find("2x") != -1)d = (df.product_name.str.find("of 2") != -1)pattern = a| b | c | ddf.loc[pattern , "group"] = "1"# print(df[df.group == "1"].describe())# print(df[df.group == "0"].describe())#打标签 前后 默认#a = (df.product_name.str.find("front") != -1)df.loc[a , "fr"] = "f"a = (df.product_name.str.find("rear") != -1)df.loc[a , "fr"] = "r"# print(df[df.fr == "f"].describe())# print(df[df.fr == 'r'].describe())# print(df[df.fr == '#'].describe())#打标签 左右 默认#a = (df.product_name.str.find("right") != -1)b = (df.product_name.str.find("rh") != -1)c = (df.product_name.str.find("passenger") != -1)pattern = a| b | cdf.loc[pattern , "lr"] = "r"a = (df.product_name.str.find("left") != -1)b = (df.product_name.str.find("lh") != -1)c = (df.product_name.str.find("driver") != -1)pattern = a| b | cdf.loc[pattern , "lr"] = "l"# print(df[df.lr == "l"].describe())# print(df[df.lr == "r"].describe())# print(df[df.lr == "#"].describe())# print(df)df.to_csv("together_tag.csv" , index=False)