AbstractKnowledgeGraph icon indicating copy to clipboard operation
AbstractKnowledgeGraph copied to clipboard

代码在python3.6.2上有些问题,把我改完后能运行的贴一下

Open onewaymyway opened this issue 6 years ago • 0 comments

coding = utf-8

import os import networkx as nx import numpy as np import matplotlib matplotlib.use('TkAgg') import matplotlib.pyplot as plt

class ConceptNet: def init(self): cur = "/".join(os.path.abspath(file).split('/')[:-1]) self.hiearchy_file = os.path.join(cur, "dict/hiearchy.txt") self.concept_file = os.path.join(cur, "dict/concept_total.txt") return

'''加载概念边'''
def load_concept_edges(self, ):
    edges = []
    #加编码 不然读文件不正常
    for line in open(self.hiearchy_file,"r",encoding="utf-8"):
        line = line.strip().split(' ')
        if len(line) < 2:
            continue
        from_ = line[0].split('|')[-1]
        to_ = line[1].split('|')[-1]
        edges.append((to_, from_))

    return edges

'''利用networkx构建有向图'''
def build_graph(self, edges):
    G = nx.DiGraph()
    G.add_edges_from(edges)
    return G

'''构造底层概念词典'''
def build_basic_concept(self):
    concept_dict = {}
    print("loading concept edges")
    edges = self.load_concept_edges()
    print("build grpah")
    graph = self.build_graph(edges)
    path = nx.all_pairs_shortest_path(graph)

    for i in path:
        #python3.7 下得这么取才对
        wd = i
        path_dict = path[i]

        len_dict = {i:len(j) for i,j in path_dict.items()}
        len_dict_ = sorted(len_dict.items(), key=lambda asd:asd[1], reverse=True)
        longest_path = path_dict.get(len_dict_[0][0])
        if not longest_path:
            continue
        concept_dict[wd] = longest_path

    return concept_dict

'''搜集主函数'''
def build_all_concepts(self):
    all_dict = {}
    concept_dict = self.build_basic_concept()
    print('building all concepts')
    #加编码 不然读文件不正常
    for line in open(self.concept_file,"r",encoding="utf-8"):
        line = line.strip().split('\t')
        wd = line[0]
        concepts = [i.split('|')[-1] for i in line[-1].split(',')]
        concept_path = concept_dict.get(wd, '')
        if not concept_path:
            concept_path = [[wd] + concept_dict.get(c, [c]) for c in concepts]
        all_dict[wd] = concept_path

            
    #处理纯概念的 不然格式 不一致 比如输入 "打" 就会出现奇怪的输出
    for wd in all_dict:
        arr=all_dict[wd]
        if type(arr[0])==str:
            all_dict[wd]=[arr]
    return all_dict

'''层级搜索主函数'''
def search_hiearchy(self):
    import time
    start_time = time.time()
    all_dict = self.build_all_concepts()
    print(time.time()-start_time)
    while 1:
        wd = input('enter an wd to search:').strip()
        #增加quit处理
        if wd=="quit":
            break
        paths = all_dict.get(wd, '')
        if paths:
            for path in paths:
                print(wd, '抽象路径为:', '->'.join(path))

if name == 'main': handler = ConceptNet() handler.search_hiearchy()

onewaymyway avatar Oct 15 '19 15:10 onewaymyway