基于SpringBoot+Neo4j+Spark实现的论文智能分析问答系统(采用朴素贝叶斯分类器)

写在前面

分析了下这个电影知识问答系统,底层功能实现是操作cypher语句,前台的业务: 1.汉语分词器HanLP将原始语句分词 2.语句抽象化(提高匹配问题模板标签准确率) 3.获取模板标签,使用模板将句子转化成系统可以识别的结果 4.cypher语句获取结果返回前台 既然涉及问答系统,中途也看了微软小冰和其他的语料库资料,感觉自己做出一个偏向应用的石油相关智能问答系统的可能性不大,首先自己不做 爬虫就语料库这个问题也解决不了的,要真有现成的语料库那也就没我做的必要了。

区别

对比自己想做的石油论文智能分析系统,我的数据来源都是国外网站,用户的原始语句是英文就用不到分词,但词汇库就复杂了,需要自己去找英 文人名词汇表,提取论文信息生成全文搜索词汇表。所以对这个项目我抱的期望不是很大,先罗列几个比较困难的点,做出来更新: 1.项目中通过稠密向量来生成训练集,而每个局部向量是由词汇表来确定的,电影知识问答系统中是个190词的电影相关汉语词汇表,但石油相关 词汇都是英语网站的数据,所以词汇表内容都是英语词汇,解决办法是在有些数据后生成这个表,但是搞爬虫的同学还在准备中期考试 、 2.问题归类,英语比较吃力了,同样的一个问题怎么来问,同一个问题预设问法越多,模型在学习后识别同类问题的准确率才会更高。 3.。。。

/*将author.csv引入到neo4j中,在Neo4j中创建Author节点**/

load csv with headers from "file:///author.csv" as line

merge(p:Author{id:toInteger(line.id),name:line.name,email:line.email,birth:line.birth});

/*将paper.csv引入到neo4j中,在Neo4j中创建Paper节点*/

load csv with headers from "file:///paper.csv" as line

merge(p:Paper{id:toInteger(line.id),name:line.name,doi:line.doi,document_id:line.document_id,publisher:line.publisher,

publication_date:line.publication_date,summary:line.summary,introduction:line.introduction});

/*将keyword.csv引入到neo4j中,在Neo4j中创建Keyword节点**/

load csv with headers from "file:///keyword.csv" as line

merge(p:Keyword{id:toInteger(line.id),name:line.name});

/*将author_paper.csv引入到neo4j,创建Author与Paper之间的create的relationship*/

load csv with headers from "file:///author_paper.csv" as line

match (from:Author{id:toInteger(line.author_id)}),(to:Paper{id:toInteger(line.paper_id)})

merge (from)-[r:create{author_id:toInteger(line.author_id),paper_id:toInteger(line.paper_id)}]->(to);

/*将paper_keyword.csv引入到neo4j,创建paper与keyword之间的attribute的relationship*/

load csv with headers from "file:///paper_keyword.csv" as line

match (from:Paper{id:toInteger(line.paper_id)}),(to:Keyword{id:toInteger(line.keyword_id)})

merge (from)-[r:attribute{paper_id:toInteger(line.paper_id),keyword_id:toInteger(line.keyword_id)}]->(to);

/* mysql数据库的SQL */

--实体类表

CREATE TABLE `author` (

`id` INT(11) NOT NULL AUTO_INCREMENT,

`name` VARCHAR(100) NULL DEFAULT NULL,

`email` VARCHAR(50) NULL DEFAULT NULL,

`birth` INT(11) NULL DEFAULT NULL,

PRIMARY KEY (`id`)

)

COMMENT='论文作者'

COLLATE='latin1_swedish_ci'

ENGINE=InnoDB

AUTO_INCREMENT=4

;

CREATE TABLE `paper` (

`id` INT(11) NOT NULL AUTO_INCREMENT,

`doi` VARCHAR(50) NULL DEFAULT NULL,

`document_id` VARCHAR(50) NULL DEFAULT NULL,

`publisher` VARCHAR(50) NULL DEFAULT NULL,

`publication_date` VARCHAR(50) NULL DEFAULT NULL,

`abstract` VARCHAR(255) NULL DEFAULT NULL,

`keywords` VARCHAR(100) NULL DEFAULT NULL,

PRIMARY KEY (`id`)

)

COMMENT='论文详细信息'

COLLATE='latin1_swedish_ci'

ENGINE=InnoDB

AUTO_INCREMENT=2

;

CREATE TABLE `genre` (

`id` INT(11) NOT NULL AUTO_INCREMENT,

`type` VARCHAR(255) NULL DEFAULT NULL,

PRIMARY KEY (`id`)

)

COMMENT='论文类别'

COLLATE='latin1_swedish_ci'

ENGINE=InnoDB

AUTO_INCREMENT=2

;

CREATE TABLE `meeting` (

`id` INT(11) NOT NULL AUTO_INCREMENT,

`location` VARCHAR(100) NULL DEFAULT NULL,

`date` VARCHAR(100) NULL DEFAULT NULL,

`name` VARCHAR(100) NULL DEFAULT NULL,

PRIMARY KEY (`id`)

)

COMMENT='论文参与的会议'

COLLATE='latin1_swedish_ci'

ENGINE=InnoDB

AUTO_INCREMENT=2

;

CREATE TABLE `origination` (

`id` INT(11) NOT NULL AUTO_INCREMENT,

`name` VARCHAR(255) NULL DEFAULT NULL,

`location` VARCHAR(255) NULL DEFAULT NULL,

PRIMARY KEY (`id`)

)

COMMENT='作者属于的组织'

COLLATE='latin1_swedish_ci'

ENGINE=InnoDB

AUTO_INCREMENT=2

;

--关联表,在转入neo4j会转化成相应的relationship

CREATE TABLE `author_paper` (

`author_id` INT(11) NOT NULL,

`paper_id` INT(11) NULL DEFAULT NULL,

UNIQUE INDEX `author_id` (`author_id`),

INDEX `paper_id` (`paper_id`),

CONSTRAINT `FK__author_paper_author` FOREIGN KEY (`author_id`) REFERENCES `author` (`id`),

CONSTRAINT `FK__author_paper_paper` FOREIGN KEY (`paper_id`) REFERENCES `paper` (`id`)

)

COLLATE='latin1_swedish_ci'

ENGINE=InnoDB

;

CREATE TABLE `paper_genre` (

`paper_id` INT(11) NOT NULL,

`genre_id` INT(11) NULL DEFAULT NULL,

UNIQUE INDEX `paper_id` (`paper_id`),

INDEX `genre_id` (`genre_id`),

CONSTRAINT `FK__paper_genre_genre` FOREIGN KEY (`genre_id`) REFERENCES `genre` (`id`),

CONSTRAINT `FK__paper_genre_paper` FOREIGN KEY (`paper_id`) REFERENCES `paper` (`id`)

)

COLLATE='latin1_swedish_ci'

ENGINE=InnoDB

;

CREATE TABLE `paper_meeting` (

`paper_id` INT(11) NOT NULL,

`meeting_id` INT(11) NOT NULL,

UNIQUE INDEX `paper_id` (`paper_id`),

INDEX `meeting_id` (`meeting_id`),

CONSTRAINT `FK__paper_meeting_meeting` FOREIGN KEY (`meeting_id`) REFERENCES `meeting` (`id`),

CONSTRAINT `FK__paper_meeting_paper` FOREIGN KEY (`paper_id`) REFERENCES `paper` (`id`)

)

COLLATE='latin1_swedish_ci'

ENGINE=InnoDB

;

CREATE TABLE `author_origination` (

`author_id` INT(11) NOT NULL,

`origination_id` INT(11) NOT NULL,

UNIQUE INDEX `author_id` (`author_id`),

INDEX `origination_id` (`origination_id`),

CONSTRAINT `FK__author_origination_author` FOREIGN KEY (`author_id`) REFERENCES `author` (`id`),

CONSTRAINT `FK__author_origination_origination` FOREIGN KEY (`origination_id`) REFERENCES `origination` (`id`)

)

COLLATE='latin1_swedish_ci'

ENGINE=InnoDB

;

精彩链接

评论可见,请评论后查看内容,谢谢!!!
 您阅读本篇文章共花了: