两个节点在依存解析树上的最短子树(Python)

来源:互联网 发布:织梦返利系统源码 编辑:程序博客网 时间:2024/05/18 03:38

get_node_father前提条件,根据StanfordParser解析出的结果(调用现成的结果),针对给出两个节点找其最短依存子书
shortest_dependency_tree.py

import parser.dependent_tree_rela as dt_relaparseTree = ["case(cells-5, In-1)","amod(cells-5, mature-2)","amod(cells-5, human-3)","compound(cells-5, B-4)",             "nmod(inhibited-8, cells-5)","nsubj(inhibited-8, BMP-6-7)","root(ROOT-0, inhibited-8)","compound(growth-10, cell-9)",             "dobj(inhibited-8, growth-10)","cc(inhibited-8, and-12)","advmod(induced-14, rapidly-13)","conj(inhibited-8, induced-14)",             "dobj(induced-14, phosphorylation-15)","case(Smad1/5/8-17, of-16)","nmod(phosphorylation-15, Smad1/5/8-17)",             "acl(Smad1/5/8-17, followed-18)","case(upregulation-21, by-19)","det(upregulation-21, an-20)","nmod(followed-18, upregulation-21)",             "case(Id1-23, of-22)","nmod(upregulation-21, Id1-23)]"]word1_father_index = []word1_father_word = []word1_father_rela = []word2_father_index = []word2_father_word = []word2_father_rela = []def get_shortest_dependent_path(word1, word1_index, word2, word2_index, tdl, is_first):    if is_first == True: #第一次运行的时候把当前词也放到,Word1_father中        word1_father_index.append(word1_index)        word1_father_word.append(word1)        word2_father_index.append(word2_index)        word2_father_word.append(word2)    word1_father_value, word1_father_id = get_node_father(word1_index, tdl, word1_father_word, word1_father_index, word1_father_rela)    word2_father_value, word2_father_id = get_node_father(word2_index, tdl, word2_father_word, word2_father_index, word2_father_rela)    branch1 = ""    branch2 = ""    branch3 = ""    branch4 = ""    # word1_father_index依次与word2_father进行比较,    mark1 = False    path1_length = 0    for i in range(len(word2_father_word)):        if word1_father_id == word2_father_index[i]: #存在依存子数, 将树的两个分支给出            mark1= True            #定位到word1是第几个词,从word1_father_word中截取            record_loc = -1            for h in range(len(word1_father_index)):                if word1_father_index[h] == word2_father_index[i]: # word1_father_index序列中第几个与word2_father_index.get(i)相等                    record_loc = h            # 生成branch1            for k in range(record_loc+1):                path1_length += 1                if k == record_loc:                    branch1 += word1_father_word[k]+"_"+str(word1_father_index[k])                else:                    branch1 += word1_father_word[k]+"_"+str(word1_father_index[k])+"__("+word1_father_rela[k]+")__"            #生成branch2            for e in range(i+1):                path1_length += 1                if e == i:                    branch2 += word2_father_word[e]+"_"+str(word2_father_index[e])                else:                    branch2 += word2_father_word[e]+"_"+str(word2_father_index[e])+"__("+word2_father_rela[e]+")__"    #word2_father_index依次与word1_father进行比较,    mark2 = False    path2_length = 0        for j in range(len(word1_father_word)):        if word2_father_id == word1_father_index[j]: # 存在依存书            mark2 = True            record_loc = -1            for m in range(len(word2_father_index)):                                if word2_father_index[m] == word1_father_index[j]:                    record_loc = m            #生成branch4                       for l in range(record_loc+1):                path2_length += 1                if l == record_loc:                    branch4 += word2_father_word[l]+"_"+str(word2_father_index[l])                else:                    branch4 += word2_father_word[l]+"_"+str(word2_father_index[l])+"__("+word2_father_rela[l]+")__"            #生成branch3            for e in range(j+1):                path2_length += 1                if e == j:                    branch3 += word1_father_word[e]+"_"+str(word1_father_index[e])                else:                    branch3 += word1_father_word[e]+"_"+str(word1_father_index[e])+"__("+word1_father_rela[e]+")__"    if mark1 == False and mark2 == False: #没有找到最短依存路径        #如果没有相同的,则继续找两个父节点的子节点        father_wordd, father_indexx = get_shortest_dependent_path(word1_father_value, word1_father_id, word2_father_value, word2_father_id, tdl, False)        return father_wordd, father_indexx    elif mark1 == True and mark2 == False: #找到最短依存路径,打印出        print(branch1)        print(branch2)    elif mark1 == False and mark2 == True:        print(branch3)        print(branch4)    else: #找到两棵树        #如果找到了两个依存书,比较那个依存书最短        if path1_length > path2_length:            print(branch1)            print(branch2)        else:            print(branch3)            print(branch4)# 找父节点def get_node_father(wordId, tdl, father_node_word, father_node_index, rela_set):    father_word = ""    father_index = -1    for tdp in tdl:        if wordId == tdp.dep_index:            if tdp.gov_word != father_node_word[-1] or tdp.gov_index != father_node_word[-1]:                father_node_word.append(tdp.gov_word)                father_node_index.append(tdp.gov_index)                rela_set.append(tdp.dep_rela)                father_word = tdp.gov_word                father_index = tdp.gov_index                break    return father_word, father_indexdef changeFormat(parseTree):    dependent_tree = []    for element in parseTree:        lbracket = element.index('(')        rbracket = element.rindex(')')        comma_index = element.index(',')        part_one = element[lbracket+1:comma_index]        part_two = element[comma_index+1:rbracket]        line1_loc = part_one.rindex('-')        gov_word = part_one[0:line1_loc]        gov_index = part_one[line1_loc+1:]        line2_loc = part_two.rindex('-')        dep_word = part_two[0:line2_loc]        dep_index = part_two[line2_loc+1:]        rela = element[0:lbracket]        dep_relation = dt_rela.dependent_tree_rela()        dep_relation.gov_word = gov_word        dep_relation.gov_index =  int(gov_index)        dep_relation.dep_word = dep_word        dep_relation.dep_index = int(dep_index)        dep_relation.dep_rela = rela        dependent_tree.append(dep_relation)    return dependent_treedef testResult():    #sdt.getShortestDependentPath("In", 1, "Smad1/5/8", 17, parse_result, is_first);    #sdt.getShortestDependentPath("cells", 5, "growth", 10, parse_result, is_first);    #sdt.getShortestDependentPath("BMP-6", 7, "inhibited", 8, parse_result, is_first);    parse_result = changeFormat(parseTree)    #get_shortest_dependent_path("BMP-6", 7, "inhibited", 8, parse_result, True)    #get_shortest_dependent_path("In", 1, "Smad1/5/8", 17, parse_result, True)    get_shortest_dependent_path("cells", 5, "growth", 10, parse_result, True)if __name__ == '__main__':    testResult()

dependent_tree_rela.py

#一条依存关系的表示class dependent_tree_rela:    dep_word = ""    dep_index = -1    gov_word = ""    gov_index = -1    dep_rela = ""
0 0
原创粉丝点击