Python_正则表达式入门(实例讲解)

来源:互联网 发布:站长之家素材源码 编辑:程序博客网 时间:2024/06/05 05:49
import re# compile   #编译成正则# findall   #寻找所有符合条件的字符,返回列表# match     #开头匹配,返回最前面匹配到的内容,通过group调用# search    #全局匹配,返回最前面匹配到的内容,通过group调用# split     #用正则分割字符串,功能更强大,返回列表# sub       #替换,返回替换后的字符# subn      #替换,返回替换后的字符和替换次数,用元组存储返回


#最简单的实例,findall找到所有符合条件的字符串,生成一个列表# pattern='fool'# s1='I am a fool'# s2='You are a idiot'# s3='fools is more than fool'# print re.findall(pattern,s1)# print re.findall(pattern,s2)# print re.findall(pattern,s3)#不过更建议写成这样,速度更快,解析更准确# pattern=re.compile(r'fool')# pattern=re.compile('fool')# s3='fools is more than fool'# print re.findall(pattern,s3)#字符集合匹配# pattern1=re.compile('f[abc]')#包含在内# pattern2=re.compile('f[^abc]')#去除# pattern3=re.compile('f[a-z]')#包含范围# pattern4=re.compile('f[a-z]{2}')# s1='father is fb while mother is dfc and dfv'# print pattern1.findall(s1)# print pattern2.findall(s1)# print pattern3.findall(s1)# print pattern4.findall(s1)#开头匹配# pattern=re.compile('^egg')# s1='egg belongs to you'# s2='the egg belongs to you'# print pattern.findall(s1)# print pattern.findall(s2)#结尾匹配# pattern=re.compile('you$')# s1='what are you'# s2='what you are'# print pattern.findall(s1)# print pattern.findall(s2)#匹配个数# pattern1=re.compile('abc{4}')# pattern2=re.compile('abc{2,4}')# pattern3=re.compile('abc{2,}')# pattern4=re.compile('abc*')#大于1,贪婪模式,等于{1,}# pattern5=re.compile('abc+')#大于等于1,贪婪模式,等于{0,}# s1='ab abc abcc abccc abcccc'# print pattern1.findall(s1)# print pattern2.findall(s1)# print pattern3.findall(s1)# print pattern4.findall(s1)# print pattern5.findall(s1)#非贪婪模式# pattern1=re.compile('abc+?')#非贪婪# pattern2=re.compile('abc+')#贪婪# s1='abc abcc abccc abcccc'# print pattern1.findall(s1)# print pattern2.findall(s1)#转义字符# re.findall('\d','dert5322ws')#十进制数字# re.findall('\D','defds846sdbv')#非十进制数字# re.findall('\s','se4 se23e 21342 d44,s')#空白字符# re.findall('\S','se4 se23e 21342 d44,s')#非空字符# re.findall('\w','se4 se23e 21342 d44,s')#数字字母字符# re.findall('\W','se4 se23e a42 d44,s')#非数字字母字符# re.findall('s.{4}','se4 se23e a42 d44,s')#.可表示任意字符#筛选有用信息# pattern1=re.compile('name=(.+),')# pattern2=re.compile('name=(.+),age=(\d+)')# s1='name=zhangweiguo,age=18'# print pattern1.findall(s1)# print pattern2.findall(s1)#多个筛选通道# pattern1=re.compile('(good|bad)')# s1='apple is good, while weilong is bad'# print pattern1.findall(s1)#搜索选项# pattern1=re.compile('zw[123]',re.I)#不区分大小写# pattern2=re.compile('zw[123]')# s1='Zw1 zw2 ZW3'# print pattern1.findall(s1)# print pattern2.findall(s1)## pattern3=re.compile('^Begin.+end$',re.M)#每一行当作一个元素# pattern4=re.compile('^Begin.+end$',re.S)#多行匹配# pattern5=re.compile('^Begin.+end$')# s2='Begin You do end\nBegin I do end'# print pattern3.findall(s2)# print pattern4.findall(s2)# print pattern5.findall(s2)#有时一次性找出所有的太占内存,采用迭代器finditer# pattern1=re.compile('(Liu.+?)\s',re.I|re.S)# s1='Liusan and Liusi are all named by \nLiuke and liumei '# S=pattern1.finditer(s1)# for string in S:#     print string.group(),string.start(),string.end()#有时并不需要findall,那么来试试match和search# pattern1=re.compile('Chinese',re.I)# s1='Do you know Chinese is a language, and chinese is good'# s2='Chinese is a kind language'# p1=pattern1.search(s1)#全局匹配,无则返回None# p2=pattern1.match(s1)#开头匹配,无则返回None,有则# p3=pattern1.match(s2)# print p1.span(),p1.start(),p1.end(),p1.group()# print p2# print p3.span(),p3.group()#还有替换与分割的功能:split、sub(subn)# pattern1=re.compile('go+d')# s1='the best good food is made by god'# print pattern1.sub('fool',s1)#返回更改后的字符串# print pattern1.subn('fool_copy',s1)#返回更改后的字符串与更换次数#约等价于以下# s1=s1.repalce('good','fool')# s1=s1.repalce('god','fool')# pattern2=re.compile('[\+\-\*/]')# pattern3=re.compile('<.*?>')# s2='12+13*34/34'# s3='Bod<thin>Amily<fat>Bill<>Youself<the best>Newton'# ss=pattern2.split(s2)# print ss# print pattern3.split(s3)

# 更高级的功能(一):生成字典(?P<name>)# s=[r'^(?P<first_word>\w+)',#    r'[\s,\.]*(\w+)[\s,\.]*',#    r'(?P<last_word>\w+)\.$']# S='I am a student, and you are my teacher.'# for i in s:#     pattern=re.compile(i)#     p1=pattern.findall(S)#     p2=pattern.search(S)#     print '模式:%-10s;findall匹配到:%-10s'%(i,p1)#     print 'search匹配到:%-10s,生成的字典是:%-10s'%(p2.group(),p2.groupdict())# 更高级的功能(二):前向或后向,你希望匹配的字符前面或后面出现的东西# pattern1=re.compile('(?<=name=)\w+')#目的字符前面出现的内容,这里是:name=# pattern2=re.compile('\d+(?=,)')#目的字符后面应出现的内容,这里是:,# pattern3=re.compile('(?<!,)\w+(?=,)')#目的字符前面不出现',',后面出现'='# pattern4=re.compile('\d+(?!,)')#目的字符后面不出现的内容,这里是:,# s1='myinfo:name=zhangweiguo,age=12,id=100,sex=female'# print pattern1.search(s1).group(),pattern1.findall(s1)# print pattern2.search(s1).group(),pattern2.findall(s1)# print pattern3.search(s1).group(),pattern3.findall(s1)# print pattern4.search(s1).group(),pattern4.findall(s1)

# 下面是几个简单的实例# 实例一:电话号码格式# pattern1=re.compile('^\d{3}-\d{7}$')# s1='021-2823456'# s2='201-3456789'# s3='12d-1235675'# print(pattern1.findall(s1))# print(pattern1.search(s2))# print(pattern1.match(s3))# 实例二:密码规则# 要求:必须有且只有大写字母、小写字母、数字,8位以上# pattern=re.compile('[A-Za-z\d]{8,}')# s1='Python520'# s2='#python520'# s3='python5'# print pattern.match(s1)# print pattern.match(s2)# print pattern.match(s3)# 实例三:邮箱格式验证# pattern=re.compile('[\d\w-]+@(qq|163|126|gmail|sina|Outlook|Yaho)\.com')# s1='420943404@qq.com'# s2='dehu_des@163.com'# p1=pattern.findall(s1)#findall只返回括号内的内容,并且多个括号时返回元组的形式# p2=pattern.match(s1)#match和search返回全部匹配的内容# p3=pattern.search(s2)# print p1# print p2.group()# print p3.group()



1 0