人工打标实现交通画像

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>org.example</groupId>
    <artifactId>AnalysisHotWords</artifactId>
    <version>1.0-SNAPSHOT</version>
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>
    <dependencies>
        <dependency>
            <groupId>com.janeluo</groupId>
            <artifactId>ikanalyzer</artifactId>
            <version>2012_u6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>2.4.5</version>
            <exclusions>
                <exclusion>
                    <groupId>com.google.guava</groupId>
                    <artifactId>guava</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>14.0.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>2.4.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming_2.11</artifactId>
            <version>2.4.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-mllib_2.11</artifactId>
            <version>2.4.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-graphx_2.11</artifactId>
            <version>2.4.5</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>8.0.18</version>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.scala-tools</groupId>
                <artifactId>maven-scala-plugin</artifactId>
                <version>2.15.2</version>
                <configuration>
                    <recompileMode>modified-only</recompileMode>
                </configuration>
                <executions>
                    <execution>
                        <id>main-scalac</id>
                        <phase>process-resources</phase>
                        <goals>
                            <goal>add-source</goal>
                            <goal>compile</goal>
                        </goals>
                    </execution>
                    <execution>
                        <id>scala-test-compile</id>
                        <phase>process-test-resources</phase>
                        <goals>
                            <goal>testCompile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <artifactId>maven-assembly-plugin</artifactId>
                <configuration>
                    <appendAssemblyId>false</appendAssemblyId>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                    <archive>
                        <manifest>
                            <mainClass>com.xxx.uploadFile</mainClass>
                        </manifest>
                    </archive>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>assembly</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <executions>
                    <execution>
                        <phase>compile</phase>
                        <goals>
                            <goal>compile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
        <directory>target</directory>
        <outputDirectory>target/classes</outputDirectory>
        <testOutputDirectory>target/test-classes</testOutputDirectory>
        <sourceDirectory>src</sourceDirectory>
    </build>
</project>
package main.scala.com.jsptpd.anylysishotwords
import java.util.Properties
import com.jsptpd.anylysishotwords.HostWordInfo
import org.apache.spark.sql.{Dataset, SparkSession}
import scala.collection.mutable
import org.json4s._
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._
case class ParticiplelabelAnalysis(item:String,updatatype:String)
object ParticiplelabelAnalysis {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("ParticiplelabelAnalysis").master("local[*]").getOrCreate()
    val prop = new Properties()
    prop.put("user", "xxxxxxxxxxxxxxxxx")
    prop.put("password", "xxxxxxxxxxxxx")
    prop.put("driver", "com.mysql.jdbc.Driver")
    val dataFrame = spark.read.jdbc("jdbc:mysql://xxxxxxxxxxxxxxxxxxxx1/city?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC", "xxxxxxxxxxxx", prop).select("rqsttime", "rqstcontent").
      where("DATE_SUB(current_date,30) < rqsttime and cagou in ('市消防局','市交通局','市交通运输局', '市政务办','市城建集团'," +
        "'市民卡公司','南京市公共交通集团','12345','市地铁集团','信息中心','市城管局','市交管局','市应急管理局'," +
        "'市大数据局','市公安局交管局','市公交集团','市气象局','南京港集团','市公安局','南京地铁集团有限公司','市交通集团'," +
        "'南京公交集团','南京地铁集团')")
    //dataFrame.map(row=>{println(row(0),row(1))
    import spark.implicits._
    val iteminfo: Dataset[String] = dataFrame.map(row => (row(0).toString, row(1).toString
      .trim.replace("\n", ""))).map(item => (item._2))
    val stringToStrings: mutable.Map[String, Set[String]] = mutable.Map(
      "设施不合理" -> Set("不合理", "设施", "规划", "设置"),
      "施工影响大" -> Set("工程", "建设", "影响", "施工"),
      "交通安全隐患" -> Set("安全隐患", "维修", "坏了"),
      "相关部门不作为" -> Set("事故", "不处理", "不作为", "不解决"),
      "公交不准时" -> Set("公交", "不准时", "晚点", "时间长"),
      "公交不按线路行驶" -> Set("公交", "站台", "线路", "溜站"),
      "交通拥堵" -> Set("堵塞", "拥堵", "车多"),
      "占道违停" -> Set("占道", "违停"),
      "出租车乱收费" -> Set("出租车", "费用", "收费", "打车", "计费"),
      "改善地铁设施" -> Set("地铁", "不方便", "坏了"),
      "处罚不认可" -> Set("曝光", "不认可", "罚单", "处罚"),
      "增加地铁站点" -> Set("地铁", "线路", "站点", "规划")
    var infoitem: Map[String, Long] = Map[String, Long]()
    for (key <- stringToStrings.keys) {
      println(key)
      var sum: Long = 0
      for (stringToStringsvalue <- stringToStrings.get(key)) {
        for (item <- stringToStringsvalue) {
          val l1: Long = iteminfo.filter(l => l.contains(item)).count()
          sum = sum + l1
        infoitem += (key -> sum)
    val jsons1 = compact(render(infoitem))
    println(jsons1)
    val propitem = new Properties()
    val db2url="jdbc:mysql://xxxxxxxxxxxxxxxxxx/pc?useUnicode=true&characterEncoding=UTF-8&serverTimezone=UTC"
    propitem.setProperty("user", "xxxxxxxxxxxxxxxxx")
    propitem.setProperty("password", "xxxxxxxxxxxxxxxxxxxx")
    import spark.implicits._
    val value = Seq(ParticiplelabelAnalysis(jsons1,"30")).toDS()
    value.write.mode("append").jdbc(db2url,"t_particpanalysis",propitem)
    spark.stop()