人工打标实现交通画像
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>AnalysisHotWords</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.4.5</version>
<exclusions>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>14.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.4.5</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.4.5</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.11</artifactId>
<version>2.4.5</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_2.11</artifactId>
<version>2.4.5</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.18</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version>
<configuration>
<recompileMode>modified-only</recompileMode>
</configuration>
<executions>
<execution>
<id>main-scalac</id>
<phase>process-resources</phase>
<goals>
<goal>add-source</goal>
<goal>compile</goal>
</goals>
</execution>
<execution>
<id>scala-test-compile</id>
<phase>process-test-resources</phase>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<appendAssemblyId>false</appendAssemblyId>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>com.xxx.uploadFile</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>assembly</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<executions>
<execution>
<phase>compile</phase>
<goals>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
<directory>target</directory>
<outputDirectory>target/classes</outputDirectory>
<testOutputDirectory>target/test-classes</testOutputDirectory>
<sourceDirectory>src</sourceDirectory>
</build>
</project>
package main.scala.com.jsptpd.anylysishotwords
import java.util.Properties
import com.jsptpd.anylysishotwords.HostWordInfo
import org.apache.spark.sql.{Dataset, SparkSession}
import scala.collection.mutable
import org.json4s._
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._
case class ParticiplelabelAnalysis(item:String,updatatype:String)
object ParticiplelabelAnalysis {
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder().appName("ParticiplelabelAnalysis").master("local[*]").getOrCreate()
val prop = new Properties()
prop.put("user", "xxxxxxxxxxxxxxxxx")
prop.put("password", "xxxxxxxxxxxxx")
prop.put("driver", "com.mysql.jdbc.Driver")
val dataFrame = spark.read.jdbc("jdbc:mysql://xxxxxxxxxxxxxxxxxxxx1/city?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC", "xxxxxxxxxxxx", prop).select("rqsttime", "rqstcontent").
where("DATE_SUB(current_date,30) < rqsttime and cagou in ('市消防局','市交通局','市交通运输局', '市政务办','市城建集团'," +
"'市民卡公司','南京市公共交通集团','12345','市地铁集团','信息中心','市城管局','市交管局','市应急管理局'," +
"'市大数据局','市公安局交管局','市公交集团','市气象局','南京港集团','市公安局','南京地铁集团有限公司','市交通集团'," +
"'南京公交集团','南京地铁集团')")
//dataFrame.map(row=>{println(row(0),row(1))
import spark.implicits._
val iteminfo: Dataset[String] = dataFrame.map(row => (row(0).toString, row(1).toString
.trim.replace("\n", ""))).map(item => (item._2))
val stringToStrings: mutable.Map[String, Set[String]] = mutable.Map(
"设施不合理" -> Set("不合理", "设施", "规划", "设置"),
"施工影响大" -> Set("工程", "建设", "影响", "施工"),
"交通安全隐患" -> Set("安全隐患", "维修", "坏了"),
"相关部门不作为" -> Set("事故", "不处理", "不作为", "不解决"),
"公交不准时" -> Set("公交", "不准时", "晚点", "时间长"),
"公交不按线路行驶" -> Set("公交", "站台", "线路", "溜站"),
"交通拥堵" -> Set("堵塞", "拥堵", "车多"),
"占道违停" -> Set("占道", "违停"),
"出租车乱收费" -> Set("出租车", "费用", "收费", "打车", "计费"),
"改善地铁设施" -> Set("地铁", "不方便", "坏了"),
"处罚不认可" -> Set("曝光", "不认可", "罚单", "处罚"),
"增加地铁站点" -> Set("地铁", "线路", "站点", "规划")
var infoitem: Map[String, Long] = Map[String, Long]()
for (key <- stringToStrings.keys) {
println(key)
var sum: Long = 0
for (stringToStringsvalue <- stringToStrings.get(key)) {
for (item <- stringToStringsvalue) {
val l1: Long = iteminfo.filter(l => l.contains(item)).count()
sum = sum + l1
infoitem += (key -> sum)
val jsons1 = compact(render(infoitem))
println(jsons1)
val propitem = new Properties()
val db2url="jdbc:mysql://xxxxxxxxxxxxxxxxxx/pc?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC"
propitem.setProperty("user", "xxxxxxxxxxxxxxxxx")
propitem.setProperty("password", "xxxxxxxxxxxxxxxxxxxx")
import spark.implicits._
val value = Seq(ParticiplelabelAnalysis(jsons1,"30")).toDS()
value.write.mode("append").jdbc(db2url,"t_particpanalysis",propitem)
spark.stop()