<?xml version="1.0" encoding="UTF-8"?>
<IPC2547Event><ProcessSessionStart dateTime="2017-09-07T11:37:23.00+08:00" sessionId="S_20200008618100004_1504755459">
<Product itemType="852RN5691MB2003"/>
<Entity stationId="TBA7150495" stage="AOI"/>
<Recipe recipeId="852RN5691MB2003.tst" revision="2017-09-07T09:42:35.00+08:00">
<RecipeModule moduleId="456.bib" revision="2017-09-07T10:48:48.00+08:00"</Recipe></ProcessSessionStart>
<ItemProcessStatus dateTime="2017-09-07T11:37:23.00+08:00" itemInstanceId="20200008618100004" sessionRef="S_20200008618100004_1504755459" itemProcessId="20200008618100004_1504755459" status="PASSED">
<ItemEventCount eventType="PROCESSSTEPSTATUS" count="2"/>
</ItemProcessStatus>
<ProcessStepStatus dateTime="2017-09-07T11:37:23.00+08:00" itemInstanceId="20200008618100004" sessionRef="S_20200008618100004_1504755459" itemProcessRef="20200008618100004_1504755459" processStepId="20200008618100004_1" status="PASSED" imageId="1"/>
<ProcessStepStatus dateTime="2017-09-07T11:37:23.00+08:00" itemInstanceId="20200008618100004" sessionRef="S_20200008618100004_1504755459" itemProcessRef="20200008618100004_1504755459" processStepId="20200008618100003" status="PASSED" imageId="2"/>
</IPC2547Event>
解析配置如下:
input {
stdin{ }
#解析文件
filter {
xml {
remove_namespaces=>"true"
source => "message"
target =>"doc"
xpath => [
"/IPC2547Event/ItemProcessStatus/@dateTime","dateTime",
"/IPC2547Event/ItemProcessStatus/@itemInstanceId","itemInstanceId",
"/IPC2547Event/ItemProcessStatus/@sessionRef","sessionRef",
"/IPC2547Event/ItemProcessStatus/@itemProcessId","itemProcessId",
"/IPC2547Event/ItemProcessStatus/@status","status",
"/IPC2547Event/ItemProcessStatus/ItemEventCount/@eventType","eventType",
"/IPC2547Event/ItemProcessStatus/ItemEventCount/@count","count",
"/IPC2547Event/ProcessStepStatus[1]/@dateTime","dateTime",
"/IPC2547Event/ProcessStepStatus[1]/@itemInstanceId","itemInstanceId",
"/IPC2547Event/ProcessStepStatus[1]/@sessionRef","sessionRef",
"/IPC2547Event/ProcessStepStatus[1]/@itemProcessRef","itemProcessRef",
"/IPC2547Event/ProcessStepStatus[1]/@processStepId","processStepId",
"/IPC2547Event/ProcessStepStatus[1]/@status","status",
"/IPC2547Event/ProcessStepStatus[1]/@imageId","imageId",
"/IPC2547Event/ProcessStepStatus[2]/@dateTime","dateTime",
"/IPC2547Event/ProcessStepStatus[2]/@itemInstanceId","itemInstanceId",
"/IPC2547Event/ProcessStepStatus[2]/@sessionRef","sessionRef",
"/IPC2547Event/ProcessStepStatus[2]/@itemProcessRef","itemProcessRef",
"/IPC2547Event/ProcessStepStatus[2]/@processStepId","processStepId",
"/IPC2547Event/ProcessStepStatus[2]/@status","status",
"/IPC2547Event/ProcessStepStatus[2]/@imageId","imageId"
output {
stdout{
codec=>rubydebug
往ES中写入数据:
filter {
xml {
remove_namespaces=>"true"
source => "message"
target =>"doc"
xpath => [
"/IPC2547Event/ItemProcessStatus/@dateTime","dateTime",
"/IPC2547Event/ItemProcessStatus/@itemInstanceId","itemInstanceId",
"/IPC2547Event/ItemProcessStatus/@sessionRef","sessionRef",
"/IPC2547Event/ItemProcessStatus/@itemProcessId","itemProcessId",
"/IPC2547Event/ItemProcessStatus/@status","status",
"/IPC2547Event/ItemProcessStatus/ItemEventCount/@eventType","eventType",
"/IPC2547Event/ItemProcessStatus/ItemEventCount/@count","count",
"/IPC2547Event/ProcessStepStatus[1]/@dateTime","dateTime",
"/IPC2547Event/ProcessStepStatus[1]/@itemInstanceId","itemInstanceId",
"/IPC2547Event/ProcessStepStatus[1]/@sessionRef","sessionRef",
"/IPC2547Event/ProcessStepStatus[1]/@itemProcessRef","itemProcessRef",
"/IPC2547Event/ProcessStepStatus[1]/@processStepId","processStepId",
"/IPC2547Event/ProcessStepStatus[1]/@status","status",
"/IPC2547Event/ProcessStepStatus[1]/@imageId","imageId",
"/IPC2547Event/ProcessStepStatus[2]/@dateTime","dateTime",
"/IPC2547Event/ProcessStepStatus[2]/@itemInstanceId","itemInstanceId",
"/IPC2547Event/ProcessStepStatus[2]/@sessionRef","sessionRef",
"/IPC2547Event/ProcessStepStatus[2]/@itemProcessRef","itemProcessRef",
"/IPC2547Event/ProcessStepStatus[2]/@processStepId","processStepId",
"/IPC2547Event/ProcessStepStatus[2]/@status","status",
"/IPC2547Event/ProcessStepStatus[2]/@imageId","imageId"
output {
elasticsearch {
index => "log-%{+YYYY.MM.dd}"
hosts => ["10.2.1.238:9200", "10.2.1.237:9200", "10.2.1.241:9200"]
stdout{
codec=>rubydebug
解析结果:
"@version" => "1",
"host" => "localhost.localdomain",
"@timestamp" => 2018-02-02T06:38:24.163Z,
"message" => "<?xml version=\"1.0\" encoding=\"UTF-8\"?>",
"tags" => [
[0] "_xmlparsefailure"
"dateTime" => [
[0] "2017-09-07T11:37:23.00+08:00",
[1] "2017-09-07T11:37:23.00+08:00",
[2] "2017-09-07T11:37:23.00+08:00"
"itemInstanceId" => [
[0] "20200008618100004",
[1] "20200008618100004",
[2] "20200008618100004"
"imageId" => [
[0] "1",
[1] "2"
"count" => [
[0] "2"
"eventType" => [
[0] "PROCESSSTEPSTATUS"
"message" => "<IPC2547Event><ProcessSessionStart dateTime=\"2017-09-07T11:37:23.00+08:00\" sessionId=\"S_20200008618100004_1504755459\"><Product itemType=\"852RN5691MB2003\"/><Entity stationId=\"TBA7150495\" stage=\"AOI\"/><Recipe recipeId=\"852RN5691MB2003.tst\" revision=\"2017-09-07T09:42:35.00+08:00\"><RecipeModule moduleId=\"456.bib\" revision=\"2017-09-07T10:48:48.00+08:00\"/></Recipe></ProcessSessionStart><ItemProcessStatus dateTime=\"2017-09-07T11:37:23.00+08:00\" itemInstanceId=\"20200008618100004\" sessionRef=\"S_20200008618100004_1504755459\" itemProcessId=\"20200008618100004_1504755459\" status=\"PASSED\"><ItemEventCount eventType=\"PROCESSSTEPSTATUS\" count=\"2\"/></ItemProcessStatus><ProcessStepStatus dateTime=\"2017-09-07T11:37:23.00+08:00\" itemInstanceId=\"20200008618100004\" sessionRef=\"S_20200008618100004_1504755459\" itemProcessRef=\"20200008618100004_1504755459\" processStepId=\"20200008618100004_1\" status=\"PASSED\" imageId=\"1\"/><ProcessStepStatus dateTime=\"2017-09-07T11:37:23.00+08:00\" itemInstanceId=\"20200008618100004\" sessionRef=\"S_20200008618100004_1504755459\" itemProcessRef=\"20200008618100004_1504755459\" processStepId=\"20200008618100003\" status=\"PASSED\" imageId=\"2\"/></IPC2547Event>",
"sessionRef" => [
[0] "S_20200008618100004_1504755459",
[1] "S_20200008618100004_1504755459",
[2] "S_20200008618100004_1504755459"
"processStepId" => [
[0] "20200008618100004_1",
[1] "20200008618100003"
"@timestamp" => 2018-02-02T06:38:24.166Z,
"@version" => "1",
"host" => "localhost.localdomain",
"doc" => {
"ItemProcessStatus" => [
[0] {
"dateTime" => "2017-09-07T11:37:23.00+08:00",
"sessionRef" => "S_20200008618100004_1504755459",
"itemInstanceId" => "20200008618100004",
"itemProcessId" => "20200008618100004_1504755459",
"ItemEventCount" => [
[0] {
"count" => "2",
"eventType" => "PROCESSSTEPSTATUS"
"status" => "PASSED"
"ProcessSessionStart" => [
[0] {
"dateTime" => "2017-09-07T11:37:23.00+08:00",
"Product" => [
[0] {
"itemType" => "852RN5691MB2003"
"Entity" => [
[0] {
"stage" => "AOI",
"stationId" => "TBA7150495"
"sessionId" => "S_20200008618100004_1504755459",
"Recipe" => [
[0] {
"RecipeModule" => [
[0] {
"moduleId" => "456.bib",
"revision" => "2017-09-07T10:48:48.00+08:00"
"recipeId" => "852RN5691MB2003.tst",
"revision" => "2017-09-07T09:42:35.00+08:00"
"ProcessStepStatus" => [
[0] {
"dateTime" => "2017-09-07T11:37:23.00+08:00",
"sessionRef" => "S_20200008618100004_1504755459",
"processStepId" => "20200008618100004_1",
"itemInstanceId" => "20200008618100004",
"imageId" => "1",
"itemProcessRef" => "20200008618100004_1504755459",
"status" => "PASSED"
[1] {
"dateTime" => "2017-09-07T11:37:23.00+08:00",
"sessionRef" => "S_20200008618100004_1504755459",
"processStepId" => "20200008618100003",
"itemInstanceId" => "20200008618100004",
"imageId" => "2",
"itemProcessRef" => "20200008618100004_1504755459",
"status" => "PASSED"
"itemProcessId" => [
[0] "20200008618100004_1504755459"
"status" => [
[0] "PASSED",
[1] "PASSED",
[2] "PASSED"
"itemProcessRef" => [
[0] "20200008618100004_1504755459",
[1] "20200008618100004_1504755459"
2019独角兽企业重金招聘Python工程师标准>>> ...
JSON 是目前非常流行的一种存储文件的格式,但是在实际的应用中,也有很多的文件格式是 XML 格式的。那么我们该如何来处理 XML 格式的文件并把它们导入到 Elasticsearch 中呢?在今天的文章中,我们将以一个例子来说明。我们将使用 XML filter 来导入 XML 格式的数据。
如何在 ElasticSearch 中导入我的自定义 XML 文件,幸运的是 Logstash 可以为你提供帮助。 让我们创建一个示例 XML 文件,该文件要导入到 Elasticsearch 中。 复制下面的.
Logstash 支持不同的数据源头,在数据从源头到目标的过程中,
Logstash提供了对数据处理的操作。对数据的操作需要配置filter的内容。
关于安装
Logstash的安装可以看之前的文章安装
Logstash并完成一个简单的日志收集功能
Logstash过滤器对不同格式数据的处理
Logstash涉及对不同格式数据处理的过滤器主要是下面几个
timestamp 用来标记事件的发生时间。因为这个字段涉及到Logstash的内部流转,所以必须是一个joda对象,如果你尝试自己给一个字符串字段重命名为@timestamp的话,Logstash会直接报错。所以,请使用 filter/date插件来管理这个字段。
type 标记事情的唯一类型。
host 标记事情发生在哪里。
tags 标记事件的某方面属性。只是一个数组,一个时间可以有多个标签。
进入正题吧,上篇介绍了Logstash的基础知识和入门demo,本篇介绍几个比较常用的命令和案例
通过上篇介绍,我们大体知道了整个logstash处理日志的流程:
input => filter => output
除了上面几个基础命令,还有ruby,mutate,gork,codes几个命令比较常用:
在线gork正则的地址:http://grokdebug.herok
logstash是一个数据分析软件,主要目的是分析log日志。整一套软件可以当作一个MVC模型,logstash是controller层,Elasticsearch是一个model层,kibana是view层。
首先将数据传给logstash,它将数据进行过滤和格式化(转成JSON格式),然后传给Elasticsearch进行存储、建搜索的索引,kibana提供前端的...