# 目前的实体有 书名 作品补充 作者 几年级上下册 出版社 推荐 时期 标题 正文 科目 课文序列号 # 目前的关系有 主要作品 作者 全文 出版 出版社 属于 推荐书籍 选自 import json # 起始节点与终止节点的标签类 class_dict = { "ns0__description" : "描述" , "edukg_prop_history__main-P43" : "时间" , "edukg_prop_history__main-P33" : "时间" , "rdfs__label" : "描述" , "edukg_prop_history__main-P272" : "时间" , "edukg_prop_chinese__main-P25" : "描述" , "edukg_prop_history__main-P163" : "地址" , "edukg_prop_chinese__main-P24" : "职业" , "edukg_prop_history__main-P144" : "描述" , "edukg_prop_chinese__main-P17" : "时间" , "edukg_prop_history__main-P399" : "描述" , "edukg_prop_chinese__main-P19" : "时间" , "edukg_prop_common__main-P3" : "描述" , "edukg_prop_chinese__main-P5" : "时间" , "edukg_prop_chinese__main-P7" : "描述" , "edukg_prop_chinese__main-P9" : "描述" , "edukg_prop_chinese__main-R2" : "书名" , "edukg_prop_history__main-P65" : "地址" , "edukg_prop_history__main-P1" : "地址" , "edukg_prop_history__main-P283" : "时期" , "edukg_prop_history__main-P400" : "描述" , "edukg_prop_history__main-P227" : "时代" , "edukg_prop_common__main-P4" : "描述" , "edukg_prop_history__main-P509" : "风格" , "edukg_prop_history__main-P70" : "评价" , "edukg_prop_common__main-P1" : "别名" , "edukg_prop_common__main-P6" : "风格" , "dukg_prop_history__main-P35" : "评价" , "edukg_prop_history__main-P35" : "评价" , "edukg_prop_history__main-P270" : "地址" , "edukg_prop_history__main-P254" : "评价" , "edukg_prop_history__main-P216" : "描述" , "edukg_prop_history__main-P416" : "主要作品" , "edukg_prop_history__main-P62" : "代表作" , "edukg_prop_common__main-P21" : "评价" } Parameters: json_file_name (str): 文件名 Return: data(dict):字典格式的文件数据 def read_json ( json_file_name ): data = None with open (json_file_name, 'r' , encoding= 'utf-8' ,errors= 'replace' ) as file: # 读取文件内容,并将其解析为Python字典 data = json.load(file) return data Parameters: data (dict): 读取的文件数据 Return: [{"StartNodeLabel":{attrs..}},relationship,{"endNodeLabel":{attrs..}}] ]:多个三元组数据 [{"StartNodeLabel":{attrs..}},relationship,{"endNodeLabel":{attrs..}}] 表示的意思是{起始节点的标签:起始 def json2triple ( data ): final_result = [] if "instanceInfo" in data and data[ "instanceInfo" ]: if "property" in data[ "instanceInfo" ]: for item in data[ "instanceInfo" ][ "property" ]: triple_arr = [{ "作者" :item[ 'subject' ]},item[ "predicateLabel" ],{class_dict[item[ "predicate" ]]:item[ "object" ]}] final_result.append(triple_arr) if "relation" in data[ "instanceInfo" ]: for item in data[ "instanceInfo" ][ "relation" ]: triple_arr = [{ "标题" : item[ 'subject' ]}, item[ "predicateLabel" ], { "作者" :item[ "object" ]}] final_result.append(triple_arr) if "bookList" in data and data[ "bookList" ]: if "data" in data[ "bookList" ]: for item in data[ "bookList" ][ "data" ]: triple_arr = [{ "几年级上下册" :item[ "bookName" ]}, "学科属于" ,{ "学科" :item[ "subject" ]}] triple_arr1= [{ "几年级上下册" :item[ "bookName" ]}, "出版时间" ,{ "时间" :item[ "editionTime" ]}] triple_arr2 = [{ "几年级上下册" :item[ "bookName" ]}, "版本属于" ,{ "版本" :item[ "edition" ]}] final_result.append(triple_arr) final_result.append(triple_arr1) final_result.append(triple_arr2) return final_result if __name__ == "__main__" : print (json2triple(read_json( 'data//author//json//ywl.json' )[ "data" ]))

Education_Graph

from py2neo import Graph, Node, Relationship,NodeMatcher, RelationshipMatcher
from json2triple import json2triple,read_json
class EducationGraph:
    def __init__(self,host,port,username,password):
        # 连接数据库的示例
        self.neo4j_connection(host,port,username,password)
    def neo4j_connection(self,host,port,username,password):
         self.driver = Graph(
            host=host,  # 127.0.0.1",  # neo4j 搭载服务器的ip地址,ifconfig可获取到
            port=port,  # neo4j 服务器监听的端口号
            user=username,  # "lhy",  # 数据库user name,如果没有更改过,应该是neo4j
            password=password)  # "lhy123")
    # name为节点查询关键字,查询时以节点标签Type与节点name节点标签为唯一查询条件   
    # 建立一个节点
    def create_node(self,label, attrs):
        n = "_.name=" + "\"" + attrs["name"] + "\""
        matcher = NodeMatcher(self.driver)
        # 查询是否已经存在,若存在则返回节点,否则返回None
        value = matcher.match(label).where(n).first()
        # 如果要创建的节点不存在则创建
        if value is None:
            node = Node(label, **attrs)
            n = self.driver.create(node)
            # 返回节点
            return node
        else:
            # 更新节点
            # value.update(attrs)  # 修改结点的属性
            # self.driver.push(value)  # 更新结点,注意:如果没有这一步,则结点不会被更新
            # 返回已经存在的节点
            return value
   # 建立两个节点之间的关系
#    创建关系通过标签、与属性
#    self只在实例方法中可使用,充当类的实例,接收的第一个参数仍是使用者传递的参数
    def create_relationship(self, label1, attrs1, label2, attrs2, r_name):
        value1 = self.match_node(self.driver, label1, attrs1)
        value2 = self.match_node(self.driver, label2, attrs2)
        if value1 is None or value2 is None:
            return False
        r = Relationship(value1, r_name, value2)
        self.driver.create(r)
    # 创建关系通过节点的方式
    def create_relationship_by_node(self, node1,rel,node2):
       if self.has_relationship(node1,node2,rel):
           return 
       ab = Relationship(node1, rel, node2)
       self.driver.create(ab)
    # 查询是否存在某个节点
    def match_node(self, label, attrs):
        n = "_.name=" + "\"" + attrs["name"] + "\""
        matcher = NodeMatcher(self.driver)
        return matcher.match(label).where(n).first()
    # 更新节点
    def update_node(self,label,attrs,new_attrs):
        node = self.match_node(label, attrs)  # 找到对应的结点
        node.update(new_attrs)  # 修改结点的属性
        self.driver.push(node)  # 更新结点,注意:如果没有这一步,则结点不会被更新
    # 返回节点
    def search_nodes(self,label,attrs):
        macher1 = NodeMatcher(self.driver)
        node = macher1.match(label, **attrs)  
        return node
    # 返回关系
    def search_relationships(self,r_type):
        macher2 = RelationshipMatcher(self.driver)
        relationship = macher2.match(r_type=r_type)  # 找出关系类型为KNOWS的关系
        return relationship
    # 判断关系是否存在
    def has_relationship(self,firstNode, finalNode, rel):
       query = f"MATCH {firstNode}-[r:{rel}]->{finalNode} RETURN r"
       relationships = self.driver.run(query)
       if len(list(relationships)) > 0:
           print('关系已经存在')
           return True
       else:
           return False
if __name__ == '__main__':
    data = json2triple(read_json('data//author//json//鲍圭埃特.json')["data"])
    # neo4j操作
    education_graph = EducationGraph(host=自己的host,port=端口号,username=用户名,password=密码)
    for item in data:
        start_value = None
        end_value = None
        for key, value in item[0].items():
            start_value = [key,{"name":value}]
        for key, value in item[2].items():
            end_value = [key, {"name":value}]
        # print(start_value)
        start_node = education_graph.create_node(start_value[0],start_value[1])
        relationship = item[1]
        end_node = education_graph.create_node(end_value[0],end_value[1])
        education_graph.create_relationship_by_node(start_node,relationship,end_node)