NLTK10《Python自然语言处理》code09 建立基于特征的文法
来源:互联网 发布:淘宝挂机是什么意思 编辑:程序博客网 时间:2024/06/10 10:44
建立基于特征的文法
# -*- coding: utf-8 -*-# win10 python3.5.3/python3.6.1 nltk3.2.4# 《Python自然语言处理》 09 建立基于特征的文法# pnlp09.pyimport nltk# 9.1 文法特征kim = {'CAT': 'NP', 'ORTH': 'Kim', 'REF': 'k'}chase = {'CAT': 'V', 'ORTH': 'chased', 'REL': 'chase'}# 对象kim、chase有一些共同特征,CAT(文法类别)、ORTH(正字法,即拼写)# 具有面向语义的特征:kim['REF']表示kim的指示物,chase['REL']表示chase表示的关系chase['AGT'] = 'sbj' # sbj:主语chase['PAT'] = 'obj' # obj:宾语sent = "Kim chased Lee"tokens = sent.split()lee = {'CAT': 'NP', 'ORTH': 'Lee', 'REF': 'l'}def lex2fs(word): for fs in [kim, lee, chase]: if fs['ORTH'] ==word: return fssubj, verb, obj = lex2fs(tokens[0]), lex2fs(tokens[1]), lex2fs(tokens[2])verb['AGT'] = subj['REF'] # agent of 'chase' is Kimverb['PAT'] = obj['REF'] # patient of 'chase' is Leefor k in ['ORTH', 'REL', 'AGT', 'PAT']: # check featstruct of 'chase' print("%-5s => %s" % (k, verb[k]))"""ORTH => chasedREL => chaseAGT => kPAT => l"""surprise = {'CAT': 'V', 'ORTH': 'surprised', 'REL': 'surprise', 'SRC': 'sbj', 'EXP': 'obj'}# 句法协议# 使用属性和约束# 例9-1 基于特征的文法例子nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg')"""% start S# #################### Grammar Productions# #################### S expansion productionsS -> NP[NUM=?n] VP[NUM=?n]# NP expansion productionsNP[NUM=?n] -> N[NUM=?n] NP[NUM=?n] -> PropN[NUM=?n] NP[NUM=?n] -> Det[NUM=?n] N[NUM=?n]NP[NUM=pl] -> N[NUM=pl] # VP expansion productionsVP[TENSE=?t, NUM=?n] -> IV[TENSE=?t, NUM=?n]VP[TENSE=?t, NUM=?n] -> TV[TENSE=?t, NUM=?n] NP# #################### Lexical Productions# ###################Det[NUM=sg] -> 'this' | 'every'Det[NUM=pl] -> 'these' | 'all'Det -> 'the' | 'some' | 'several'PropN[NUM=sg]-> 'Kim' | 'Jody'N[NUM=sg] -> 'dog' | 'girl' | 'car' | 'child'N[NUM=pl] -> 'dogs' | 'girls' | 'cars' | 'children' IV[TENSE=pres, NUM=sg] -> 'disappears' | 'walks'TV[TENSE=pres, NUM=sg] -> 'sees' | 'likes'IV[TENSE=pres, NUM=pl] -> 'disappear' | 'walk'TV[TENSE=pres, NUM=pl] -> 'see' | 'like'IV[TENSE=past] -> 'disappeared' | 'walked'TV[TENSE=past] -> 'saw' | 'liked'"""# 例9-2 跟踪基于特征的图表分析器tokens = 'Kim likes children'.split()from nltk import load_parsercp = load_parser('grammars/book_grammars/feat0.fcfg', trace=2)trees = cp.parse(tokens)"""|.Kim .like.chil.|Leaf Init Rule:|[----] . .| [0:1] 'Kim'|. [----] .| [1:2] 'likes'|. . [----]| [2:3] 'children'Feature Bottom Up Predict Combine Rule:|[----] . .| [0:1] PropN[NUM='sg'] -> 'Kim' *Feature Bottom Up Predict Combine Rule:|[----] . .| [0:1] NP[NUM='sg'] -> PropN[NUM='sg'] *Feature Bottom Up Predict Combine Rule:|[----> . .| [0:1] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'sg'}Feature Bottom Up Predict Combine Rule:|. [----] .| [1:2] TV[NUM='sg', TENSE='pres'] -> 'likes' *Feature Bottom Up Predict Combine Rule:|. [----> .| [1:2] VP[NUM=?n, TENSE=?t] -> TV[NUM=?n, TENSE=?t] * NP[] {?n: 'sg', ?t: 'pres'}Feature Bottom Up Predict Combine Rule:|. . [----]| [2:3] N[NUM='pl'] -> 'children' *Feature Bottom Up Predict Combine Rule:|. . [----]| [2:3] NP[NUM='pl'] -> N[NUM='pl'] *Feature Bottom Up Predict Combine Rule:|. . [---->| [2:3] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'pl'}Feature Single Edge Fundamental Rule:|. [---------]| [1:3] VP[NUM='sg', TENSE='pres'] -> TV[NUM='sg', TENSE='pres'] NP[] *Feature Single Edge Fundamental Rule:|[==============]| [0:3] S[] -> NP[NUM='sg'] VP[NUM='sg'] *"""for tree in trees:print(tree)"""(S[] (NP[NUM='sg'] (PropN[NUM='sg'] Kim)) (VP[NUM='sg', TENSE='pres'] (TV[NUM='sg', TENSE='pres'] likes) (NP[NUM='pl'] (N[NUM='pl'] children))))"""# 术语# 9.2 处理特征结构fs1 = nltk.FeatStruct(TENSE='past', NUM='sg')print(fs1)"""[ NUM = 'sg' ][ TENSE = 'past' ]"""fs1 = nltk.FeatStruct(PER=3, NUM='pl', GND='fem')print(fs1['GND']) # femfs1['CASE'] = 'acc'fs2 = nltk.FeatStruct(POS='N', AGR=fs1)print(fs2)"""[ [ CASE = 'acc' ] ][ AGR = [ GND = 'fem' ] ][ [ NUM = 'pl' ] ][ [ PER = 3 ] ][ ][ POS = 'N' ]"""print(fs2['AGR'])"""[ CASE = 'acc' ][ GND = 'fem' ][ NUM = 'pl' ][ PER = 3 ]"""print(fs2['AGR']['PER']) # 3print(nltk.FeatStruct("[POS='N', AGR=[PER=3, NUM='pl', GND='fem']]"))"""[ [ GND = 'fem' ] ][ AGR = [ NUM = 'pl' ] ][ [ PER = 3 ] ][ ][ POS = 'N' ]"""print(nltk.FeatStruct(name='Lee', telno='01 27 86 42 96', age=33))"""[ age = 33 ][ name = 'Lee' ][ telno = '01 27 86 42 96' ]"""print(nltk.FeatStruct("""[NAME='Lee', ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],SPOUSE=[NAME='Kim', ADDRESS->(1)]]"""))"""[ ADDRESS = (1) [ NUMBER = 74 ] ][ [ STREET = 'rue Pascal' ] ][ ][ NAME = 'Lee' ][ ][ SPOUSE = [ ADDRESS -> (1) ] ][ [ NAME = 'Kim' ] ]"""print(nltk.FeatStruct("[A='a', B=(1)[C='c'], D->(1), E->(1)]"))"""[ A = 'a' ][ ][ B = (1) [ C = 'c' ] ][ ][ D -> (1) ][ E -> (1) ]"""# 包含和统一fs1 = nltk.FeatStruct(NUMBER=74, STREET='rue Pascal')fs2 = nltk.FeatStruct(CITY='Paris')print(fs1.unify(fs2))"""[ CITY = 'Paris' ][ NUMBER = 74 ][ STREET = 'rue Pascal' ]"""print(fs2.unify(fs1))"""[ CITY = 'Paris' ][ NUMBER = 74 ][ STREET = 'rue Pascal' ]"""fs0 = nltk.FeatStruct(A='a')fs1 = nltk.FeatStruct(A='b')fs2 = fs0.unify(fs1)print(fs2) # Nonefs0 = nltk.FeatStruct("""[NAME=Lee,ADDRESS=[NUMBER=74,STREET='rue Pascal'],SPOUSE=[NAME=Kim, ADDRESS=[number=74,STREET='rue Pascal']]]""")print(fs0)"""[ ADDRESS = [ NUMBER = 74 ] ][ [ STREET = 'rue Pascal' ] ][ ][ NAME = 'Lee' ][ ][ [ ADDRESS = [ STREET = 'rue Pascal' ] ] ][ SPOUSE = [ [ number = 74 ] ] ][ [ ] ][ [ NAME = 'Kim' ] ]"""fs1 = nltk.FeatStruct("[SPOUSE=[ADDRESS=[CITY=Paris]]]")print(fs1.unify(fs0))"""[ ADDRESS = [ NUMBER = 74 ] ][ [ STREET = 'rue Pascal' ] ][ ][ NAME = 'Lee' ][ ][ [ [ CITY = 'Paris' ] ] ][ [ ADDRESS = [ STREET = 'rue Pascal' ] ] ][ SPOUSE = [ [ number = 74 ] ] ][ [ ] ][ [ NAME = 'Kim' ] ]"""fs2 = nltk.FeatStruct("""[NAME=Lee, ADDRESS=(1)[NUMBER=74, STREET='rue Pascal'],SPOUSE=[NAME=Kim, ADDRESS->(1)]]""")print(fs1.unify(fs2))"""[ ADDRESS = (1) [ NUMBER = 74 ] ][ [ STREET = 'rue Pascal' ] ][ ][ NAME = 'Lee' ][ ][ SPOUSE = [ ADDRESS -> (1) ] ][ [ NAME = 'Kim' ] ]"""fs1 = nltk.FeatStruct("[ADDRESS1=[NUMBER=74, STREET='rue Pascal']]")fs2 = nltk.FeatStruct("[ADDRESS1=?x, ADDRESS2=?x]")print(fs2)"""[ ADDRESS1 = ?x ][ ADDRESS2 = ?x ]"""print(fs2.unify(fs1))"""[ ADDRESS1 = (1) [ NUMBER = 74 ] ][ [ STREET = 'rue Pascal' ] ][ ][ ADDRESS2 -> (1) ]"""# 9.3 扩展基于特征的文法# 子类别# 核心词# 助动词和倒装# 无限制依赖成分# 例9-3 具有倒装从句和长距离依赖的产生式的文法,使用斜线类别nltk.data.show_cfg('grammars/book_grammars/feat1.fcfg')"""% start S# #################### Grammar Productions# ###################S[-INV] -> NP VPS[-INV]/?x -> NP VP/?xS[-INV] -> NP S/NPS[-INV] -> Adv[+NEG] S[+INV]S[+INV] -> V[+AUX] NP VPS[+INV]/?x -> V[+AUX] NP VP/?xSBar -> Comp S[-INV]SBar/?x -> Comp S[-INV]/?xVP -> V[SUBCAT=intrans, -AUX]VP -> V[SUBCAT=trans, -AUX] NPVP/?x -> V[SUBCAT=trans, -AUX] NP/?xVP -> V[SUBCAT=clause, -AUX] SBarVP/?x -> V[SUBCAT=clause, -AUX] SBar/?xVP -> V[+AUX] VPVP/?x -> V[+AUX] VP/?x# #################### Lexical Productions# ###################V[SUBCAT=intrans, -AUX] -> 'walk' | 'sing'V[SUBCAT=trans, -AUX] -> 'see' | 'like'V[SUBCAT=clause, -AUX] -> 'say' | 'claim'V[+AUX] -> 'do' | 'can'NP[-WH] -> 'you' | 'cats'NP[+WH] -> 'who'Adv[+NEG] -> 'rarely' | 'never'NP/NP ->Comp -> 'that'"""tokens = 'who do you claim that you like'.split()from nltk import load_parsercp = load_parser('grammars/book_grammars/feat1.fcfg')for tree in cp.parse(tokens): print(tree)"""(S[-INV] (NP[+WH] who) (S[+INV]/NP[] (V[+AUX] do) (NP[-WH] you) (VP[]/NP[] (V[-AUX, SUBCAT='clause'] claim) (SBar[]/NP[] (Comp[] that) (S[-INV]/NP[] (NP[-WH] you) (VP[]/NP[] (V[-AUX, SUBCAT='trans'] like) (NP[]/NP[] )))))))"""tokens = 'you claim that you like cats'.split()for tree in cp.parse(tokens): print(tree)"""(S[-INV] (NP[-WH] you) (VP[] (V[-AUX, SUBCAT='clause'] claim) (SBar[] (Comp[] that) (S[-INV] (NP[-WH] you) (VP[] (V[-AUX, SUBCAT='trans'] like) (NP[-WH] cats))))))"""tokens = 'rarely do you sing'.split()for tree in cp.parse(tokens): print(tree)"""(S[-INV] (Adv[+NEG] rarely) (S[+INV] (V[+AUX] do) (NP[-WH] you) (VP[] (V[-AUX, SUBCAT='intrans'] sing))))"""# 例9-4 基于特征的文法的例子nltk.data.show_cfg('grammars/book_grammars/german.fcfg')"""% start S# Grammar ProductionsS -> NP[CASE=nom, AGR=?a] VP[AGR=?a]NP[CASE=?c, AGR=?a] -> PRO[CASE=?c, AGR=?a]NP[CASE=?c, AGR=?a] -> Det[CASE=?c, AGR=?a] N[CASE=?c, AGR=?a]..."""tokens = 'ich folge den Katzen'.split()cp = nltk.load_parser('grammars/book_grammars/german.fcfg')for tree in cp.parse(tokens): print(tree)"""(S[] (NP[AGR=[NUM='sg', PER=1], CASE='nom'] (PRO[AGR=[NUM='sg', PER=1], CASE='nom'] ich)) (VP[AGR=[NUM='sg', PER=1]] (TV[AGR=[NUM='sg', PER=1], OBJCASE='dat'] folge) (NP[AGR=[GND='fem', NUM='pl', PER=3], CASE='dat'] (Det[AGR=[NUM='pl', PER=3], CASE='dat'] den) (N[AGR=[GND='fem', NUM='pl', PER=3]] Katzen))))"""tokens = 'ich folge den Katze'.split()cp = nltk.load_parser('grammars/book_grammars/german.fcfg', trace=2)for tree in cp.parse(tokens): print(tree)"""|.ich.fol.den.Kat.|Leaf Init Rule:|[---] . . .| [0:1] 'ich'|. [---] . .| [1:2] 'folge'|. . [---] .| [2:3] 'den'|. . . [---]| [3:4] 'Katze'Feature Bottom Up Predict Combine Rule:|[---] . . .| [0:1] PRO[AGR=[NUM='sg', PER=1], CASE='nom'] -> 'ich' *Feature Bottom Up Predict Combine Rule:|[---] . . .| [0:1] NP[AGR=[NUM='sg', PER=1], CASE='nom'] -> PRO[AGR=[NUM='sg', PER=1], CASE='nom'] *Feature Bottom Up Predict Combine Rule:|[---> . . .| [0:1] S[] -> NP[AGR=?a, CASE='nom'] * VP[AGR=?a] {?a: [NUM='sg', PER=1]}Feature Bottom Up Predict Combine Rule:|. [---] . .| [1:2] TV[AGR=[NUM='sg', PER=1], OBJCASE='dat'] -> 'folge' *Feature Bottom Up Predict Combine Rule:|. [---> . .| [1:2] VP[AGR=?a] -> TV[AGR=?a, OBJCASE=?c] * NP[CASE=?c] {?a: [NUM='sg', PER=1], ?c: 'dat'}Feature Bottom Up Predict Combine Rule:|. . [---] .| [2:3] Det[AGR=[GND='masc', NUM='sg', PER=3], CASE='acc'] -> 'den' *|. . [---] .| [2:3] Det[AGR=[NUM='pl', PER=3], CASE='dat'] -> 'den' *Feature Bottom Up Predict Combine Rule:|. . [---> .| [2:3] NP[AGR=?a, CASE=?c] -> Det[AGR=?a, CASE=?c] * N[AGR=?a, CASE=?c] {?a: [NUM='pl', PER=3], ?c: 'dat'}Feature Bottom Up Predict Combine Rule:|. . [---> .| [2:3] NP[AGR=?a, CASE=?c] -> Det[AGR=?a, CASE=?c] * N[AGR=?a, CASE=?c] {?a: [GND='masc', NUM='sg', PER=3], ?c: 'acc'}Feature Bottom Up Predict Combine Rule:|. . . [---]| [3:4] N[AGR=[GND='fem', NUM='sg', PER=3]] -> 'Katze' *"""
阅读全文
0 0
- NLTK10《Python自然语言处理》code09 建立基于特征的文法
- Python自然语言处理 9 建立基于特征的文法
- 【译文】基于Python的自然语言处理指南
- 基于 Python 的简单自然语言处理
- Python与自然语言处理(二)基于Gensim的Word2Vec
- 自然语言处理学习(二):概率化的上下文无关文法
- 七、文法分析还是基于特征好啊
- 基于统计的自然语言处理和基于规则的自然语言处理的一些个人看法
- Python 自然语言处理 一
- 《python自然语言处理》笔记
- 《Python自然语言处理》
- PYTHON 自然语言处理
- python与自然语言处理
- Python自然语言处理
- python自然语言处理-WordNet
- python自然语言处理
- python 自然语言处理
- Python 自然语言处理 第一章
- Servlet接收Post请求以及回复请求
- CSDN上如何转载他人文章
- Linux学习笔记——静态库创建
- emmet官方语法速查
- 深圳P2P:有了银行存管也不一定安全
- NLTK10《Python自然语言处理》code09 建立基于特征的文法
- Python 字符串大小写转换,值域范围
- 全国环游怎么走----蚁群算法
- (项目笔记)<include>与<jsp:param>配合 传值
- 关于Git管理工具,在从服务器端pull代码时,发生的异常信息如下!
- 剑指Offer-60
- HttpURLConnection从客户端向服务器发送Http请求以及服务器响应全过程解析
- Groovy方法注入
- call,apply,bind的联系与区别