Python修改数据中的字符串数据列
来源:互联网 发布:施耐德可编程编程软件 编辑:程序博客网 时间:2024/06/08 01:22
有时候,我们想修改数据中的字符串数据列。下面的方法供参考:
str.extract()
str.upper()
str.lower()
str.len()
str.split()
str.replace()
参考实例:
>>>import pandas as pd>>>df = pd.DataFrame([['RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT'],['RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT'],['RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT'],['COHOES CITY SCHOOL DISTRICT'],['COHOES CITY SCHOOL DISTRICT']])>>>df.columns = ['AREA NAME']>>>df---------- AREA NAME0 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT1 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT2 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT3 COHOES CITY SCHOOL DISTRICT4 COHOES CITY SCHOOL DISTRICT
str.extract()
>>>df['AREA NAME'].str.extract('(\w+)')#提取SREA NAME列字符串中的第一个单词----------0 RAVENA1 RAVENA2 RAVENA3 COHOES4 COHOESName: AREA NAME, dtype: object>>>df['AREA NAME'].str.extract('(\w+)\s(\w+)')#将AREA NAME列中的第二个单词作为单独的列提取---------- 0 10 RAVENA COEYMANS1 RAVENA COEYMANS2 RAVENA COEYMANS3 COHOES CITY4 COHOES CITY
str.upper()
>>>df['AREA NAME'].str.upper()#因为数据已经是大写,所以没有改变----------0 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT1 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT2 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT3 COHOES CITY SCHOOL DISTRICT4 COHOES CITY SCHOOL DISTRICTName: AREA NAME, dtype: object
str.lower()
>>>df['AREA NAME'].str.lower()#将NAME NAME列中的字符串转化为小写----------0 ravena coeymans selkirk central school district1 ravena coeymans selkirk central school district2 ravena coeymans selkirk central school district3 cohoes city school district4 cohoes city school districtName: AREA NAME, dtype: object
str.len()
>>>df['NAME NAME'].str.len()#AREA NAME列中每个元素的长度----------0 471 472 473 274 27Name: AREA NAME, dtype: int64
str.split()
>>>df['NAME NAME'].str.split(' ')#用空格分割AREA NAME列中的字符串----------0 [RAVENA, COEYMANS, SELKIRK, CENTRAL, SCHOOL, D...1 [RAVENA, COEYMANS, SELKIRK, CENTRAL, SCHOOL, D...2 [RAVENA, COEYMANS, SELKIRK, CENTRAL, SCHOOL, D...3 [COHOES, CITY, SCHOOL, DISTRICT]4 [COHOES, CITY, SCHOOL, DISTRICT]dtype: object
str.replace()
>>>df['NAME NAME'].str.replace('DISTRICT$', 'DIST')#将AREA NAME列中每个元素末尾的DISTRICT替换为DIST----------0 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DIST1 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DIST2 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DIST3 COHOES CITY SCHOOL DIST4 COHOES CITY SCHOOL DISTName: AREA NAME, dtype: object
str.cat()
>>>df['AREA NAME'].str.cat(['a', 'b', 'c', 'd', 'e'],sep=' ')#拼接字符串,在原有字符串后各拼接一个字符串----------0 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT a1 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT b2 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT c3 COHOES CITY SCHOOL DISTRICT d4 COHOES CITY SCHOOL DISTRICT eName: AREA NAME, dtype: object>>>df['AREA NAME'].str.cat([['a', 'b', 'c', 'd', 'e'], ['1', '2', '3', '4', '5']], sep=' ')#在原有字符串后各拼接两个字符串----------0 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRIC...1 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRIC...2 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRIC...3 COHOES CITY SCHOOL DISTRICT d 44 COHOES CITY SCHOOL DISTRICT e 5Name: AREA NAME, dtype: object>>>df['AREA NAME'].str.cat(sep=',')#将某一列拼接成一个完整的字符串----------RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT,RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT,RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT,COHOES CITY SCHOOL DISTRICT,COHOES CITY SCHOOL DISTRICT
str.get()
>>>df['AREA NAME'].str.get(0)#获取指定位置的字符串----------0 R1 R2 R3 C4 CName: AREA NAME, dtype: object
str.contains()
>>>df['AREA NAME'].str.contains('RAVENA')#是否包含表达式----------0 True1 True2 True3 False4 FalseName: AREA NAME, dtype: bool
str.pad()
>>>df['AREA NAME'].str.pad(47, fillchar='?')#左补齐----------0 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT1 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT2 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT3 ????????????????????COHOES CITY SCHOOL DISTRICT4 ????????????????????COHOES CITY SCHOOL DISTRICTName: AREA NAME, dtype: object>>>df['AREA NAME'].str.pad(47, side='right', fillchar='?')#右补齐----------0 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT1 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT2 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT3 COHOES CITY SCHOOL DISTRICT????????????????????4 COHOES CITY SCHOOL DISTRICT????????????????????Name: AREA NAME, dtype: object
str.center()
>>>df['AREA NAME'].str.center(47, fillchar=' ')#中间补齐----------0 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT1 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT2 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT3 COHOES CITY SCHOOL DISTRICT 4 COHOES CITY SCHOOL DISTRICT Name: AREA NAME, dtype: object
str.ljust()
>>>df['AREA NAME'].str.ljust(47, fillchar='?')#右边补齐----------0 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT1 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT2 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT3 COHOES CITY SCHOOL DISTRICT????????????????????4 COHOES CITY SCHOOL DISTRICT????????????????????Name: AREA NAME, dtype: object
str.rjust()
>>>df['AREA NAME'].str.rjust(47, fillchar='?')#左边补齐----------0 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT1 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT2 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT3 ????????????????????COHOES CITY SCHOOL DISTRICT4 ????????????????????COHOES CITY SCHOOL DISTRICTName: AREA NAME, dtype: object
str.zfill()
>>>df['AREA NAME'].str.zfill(47)#左边补0----------0 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT1 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT2 RAVENA COEYMANS SELKIRK CENTRAL SCHOOL DISTRICT3 00000000000000000000COHOES CITY SCHOOL DISTRICT4 00000000000000000000COHOES CITY SCHOOL DISTRICTName: AREA NAME, dtype: object
str.slice()
>>>df['AREA NAME'].str.slice(8,23)#按给定的开始结束位置切割字符串----------0 OEYMANS SELKIRK1 OEYMANS SELKIRK2 OEYMANS SELKIRK3 ITY SCHOOL DIST4 ITY SCHOOL DISTName: AREA NAME, dtype: object
str.slice_repalce()
>>>df['AREA NAME'].str.slice_replace(8, 23, '??')#使用给定的字符串,替换指定位置的字符----------0 RAVENA C?? CENTRAL SCHOOL DISTRICT1 RAVENA C?? CENTRAL SCHOOL DISTRICT2 RAVENA C?? CENTRAL SCHOOL DISTRICT3 COHOES C??RICT4 COHOES C??RICTName: AREA NAME, dtype: object
str.count()
>>>df['AREA NAME'].str.count('A')#计算给定单词出现的次数----------0 41 42 43 04 0Name: AREA NAME, dtype: int64
str.startswith()
>>>df['AREA NAME'].str.startswith('R')#判断是否以给定的字符串开头----------0 True1 True2 True3 False4 FalseName: AREA NAME, dtype: bool
str.endswith()
>>>df['AREA NAME'].str.endswith('T')#判断是否以给定的字符串结束----------0 True1 True2 True3 True4 TrueName: AREA NAME, dtype: bool
str.findall()
>>>df['AREA NAME'].str.findall('[A-D]')#查找所有符合正则表达式的字符,以数组形式返回----------0 [A, A, C, A, C, A, C, D, C]1 [A, A, C, A, C, A, C, D, C]2 [A, A, C, A, C, A, C, D, C]3 [C, C, C, D, C]4 [C, C, C, D, C]Name: AREA NAME, dtype: object
str.match()
>>>df['AREA NAME'].str.match('[A-D]')#检测是否完全匹配给定的字符串或表达式----------0 False1 False2 False3 True4 TrueName: AREA NAME, dtype: bool
str.isalnum()
>>>df['AREA NAME'].str.isalnum()#是否全部是数字和字母组成----------0 False1 False2 False3 False4 FalseName: AREA NAME, dtype: bool
str.swapcase()
>>>df['AREA NAME'].str.swapcase()#大小写互换----------0 ravena coeymans selkirk central school district1 ravena coeymans selkirk central school district2 ravena coeymans selkirk central school district3 cohoes city school district4 cohoes city school districtName: AREA NAME, dtype: object
阅读全文
0 0
- Python修改数据中的字符串数据列
- 动态列数据修改专用!
- MySQL 添加列,修改列,删除列,修改。删除数据,
- 【python 数据框重命名列名】Pandas中DateFrame修改列名
- 自定义列中的数据更新
- bootstrapTable 数据绑定和修改列
- Python中的数据对象
- Python中的数据标准化
- python中的数据标准化
- 数据表中列数据转换成字符串
- ALV根据显示列动态修改行汇总列数据
- python 批量修改/替换数据
- VC 修改SQL中的数据
- oracle修改数据库中的数据
- 取一行多列数据中的最大值
- GridView 模板列中的数据绑定
- 合并数据库中某一列中的数据
- sscanf 取出字符串中的数据
- 计算几何之线段相交问题模板
- sql查询语句27到练习
- [新手入门]什么是量化策略
- 2017 Multi-University Training Contest
- HDU 6053 容斥dp 或 莫比乌斯反演
- Python修改数据中的字符串数据列
- 莫比乌斯函数
- oracle
- 针对百家号抓取项目
- hdu 6053 TrickGCD 筛法+莫比乌斯函数+分块处理
- 数据库优化
- 剑指offer——链表中倒数第k个节点
- 日常学习2017.7.28
- 多态&多态对象模型