python自然语言处理 第四章基本语法
来源:互联网 发布:java字符串比较大小 编辑:程序博客网 时间:2024/06/03 09:25
#赋值foo = 'Monty'bar = foofoo = 'Python'bar# 'Monty'foo = ['Monty', 'Python']bar = foofoo[1] = 'Bodkin'bar# ['Monty', 'Bodkin']empty = []nested = [empty, empty, empty]nested# [[], [], []]nested[1].append('Python')nested# [['Python'], ['Python'], ['Python']]nested = [[]] * 3nested[1].append('aaa')nested# [['aaa'], ['aaa'], ['aaa']]nested[1] = ['Monty']nested# [['aaa'], ['Monty'], ['aaa']]#等式size = 5python = ['Python']snake_nest = [python] * sizesnake_nest# [['Python'], ['Python'], ['Python'], ['Python'], ['Python']]snake_nest[0] == snake_nest[1] == snake_nest[2] == snake_nest[3] == snake_nest[4]# Truesnake_nest[0] is snake_nest[1] is snake_nest[2] is snake_nest[3] is snake_nest[4]# Trueimport randomposition = random.choice(range(size))snake_nest[position] = ['Python']snake_nest# [['Python'], ['Python'], ['Python'], ['Python'], ['Python']]snake_nest[0] == snake_nest[1] == snake_nest[2] == snake_nest[3] == snake_nest[4]# Truesnake_nest[0] is snake_nest[1] is snake_nest[2] is snake_nest[3] is snake_nest[4]# False[id(snake) for snake in snake_nest]# [212336032, 212336032, 212128032, 212336032, 212336032]#条件语句,if...elif...,if中为真不会执行elif语句mixed = ['cat', '', ['dog'], []]for element in mixed: if element: print elementanimals = ['cat', 'dog']if 'rabbit' in animals: print 1elif 'dog' in animals: print 2sent = ['No', 'good', 'fish', 'goes', 'anywhere', 'without', 'a', 'porpoise', '.']all(len(w) > 4 for w in sent)# False,反映的是是否全部满足any(len(w) > 4 for w in sent)# True,反映的是有一项满足#序列t = 'walk', 'fem', 3t# ('walk', 'fem', 3)t[0]# 'walk't[1:]# ('fem', 3)len(t)# 3#字符串、元组、列表比较raw = 'I turned off the spectroroute'text = ['I', 'turned', 'off', 'the', 'spectroroute']pair = (6, 'turned')raw[2],text[3],pair[1]# ('t', 'the', 'turned')raw[-3:],text[-3:],pair[-3:]# ('ute', ['off', 'the', 'spectroroute'], (6, 'turned'))len(raw),len(text),len(pair)# (29, 5, 2)import nltkraw = 'Red lorry, yellow lorry, red lorry, yellow lorry'text = nltk.word_tokenize(raw)fdist = nltk.FreqDist(text)list(fdist)# ['yellow', 'red', 'lorry', 'Red', ',']for key in fdist: print fdist[key],#2 1 4 1 3,2个yellow,4个lorrywords = ['I', 'turned', 'off', 'the', 'spectroroute']words[2], words[3], words[4] = words[3], words[4], words[2]words# ['I', 'turned', 'the', 'spectroroute', 'off']tmp = words[2]words[2] = words[3]words[3] = words[4]words[4] = tmpwords# ['I', 'turned', 'spectroroute', 'off', 'the']words = ['I' ,'turned', 'off', 'the', 'spectroroute']tags = ['noun', 'verb' ,'prep', 'det', 'noun']zip(words, tags)# zip取两个或两个以上序列中的项目,将其“压缩”打包成单个配对列表#==============================================================================# [('I', 'noun'),# ('turned', 'verb'),# ('off', 'prep'),# ('the', 'det'),# ('spectroroute', 'noun')]#==============================================================================#数据分割text = nltk.corpus.nps_chat.words()cut = int(0.9 * len(text))training_data, test_data = text[:cut], text[cut:]text == training_data + test_data# Truelen(training_data) / len(test_data)# 9#合并不同类型的序列words = 'I turned off the spectroroute'.split()wordlens = [(len(word), word) for word in words]wordlens# [(1, 'I'), (6, 'turned'), (3, 'off'), (3, 'the'), (12, 'spectroroute')]wordlens.sort()' '.join(w for (_,w) in wordlens)# 'I off the turned spectroroute'#产生器表达式text = """When I use a word,"Humpty Dumpty said in rather a acornful tone,"it means just what I choose it to mean - neither more nor less."""[w.lower() for w in nltk.word_tokenize(text)]#如何读中文分词,啊不明白!!!!!!!max([w.lower() for w in nltk.word_tokenize(text)])min([w.lower() for w in nltk.word_tokenize(text)])#产生一个词链表的所有排列def permutations(seq): if len(seq) <= 1: yield seq else: for perm in permutations(seq[1:]): for i in range(len(perm)+1): yield perm[:i] + seq[0:1] + perm[i:]list(permutations(['police', 'fish', 'buffalo']))#检查一个词是否来自一个开放的实词类def is_content_word(word): return word.lower() not in ['a', 'of', 'the', 'and', 'will', ',', '.']sent = ['Take', 'care', 'of', 'the', 'sense', ',', 'and', 'the', 'sounds', 'will', 'take', 'care', 'of', 'themeselves', '.']filter(is_content_word, sent)[w for w in sent if is_content_word(w)]nltk.metrics.distance.__file__def factorial2(n): if n == 1: return 1 else: return n*factorial2(n-1)factorial2(5)
阅读全文
0 0
- python自然语言处理 第四章基本语法
- 《Python自然语言处理》学习笔记-第四章
- python 自然语言处理 第二章
- python 自然语言处理 第三章
- python 自然语言处理 第五章
- python 自然语言处理第三章
- python自然语言处理学习笔记第二章
- python自然语言处理学习笔记第三章
- python自然语言处理笔记-第三章
- Python自然语言处理第二章部分习题
- python自然语言处理 第二章(上)
- 《Python自然语言处理》学习笔记-第二章
- 《Python自然语言处理》学习笔记-第五章
- python自然语言处理第二章笔记
- Python 自然语言处理 一
- 《python自然语言处理》笔记
- 《Python自然语言处理》
- PYTHON 自然语言处理
- 解决OneNote“未安装桌面体验功能”问题
- 抽象代数学习笔记(3)映射
- 59 linux i2c设备驱动之dht12驱动
- (1)Linux的初步了解
- 低延时的RTMP网络直播
- python自然语言处理 第四章基本语法
- maven-assembly-plugin打包插件配置
- 20年程序员的碎碎念
- android 获取其他应用程序的缓存大小以及清理应用缓存
- 二维码好高大尚,我也想生成一个
- 加固+多渠道打包+Bugly统计+热修复
- ACM (4) ASCII码排序
- hibernate使用MySQLInnoDBDialect不能自动建表的问题
- 【Java并发】- 使用CompletionService异步收集任务结果