一个正则表达式导致 CPU 高的问题排查过程

这篇文章记录一个正则表达是导致 CPU 高的问题排查。由于无法直接使用线上的代码测试,所以我自己把代码整理了下来,具体代码如下:
public class AppMain {
public static void main(String[] args) throws InterruptedException {
final String regex="^([a-z0-9A-Z]+[-|\\.]?)+[a-z0-9A-Z]@([a-z0-9A-Z]+(-[a-z0-9A-Z]+)?\\.)+[a-zA-Z]{2,}$";
final String email="blog.laofu.online.fuweilao@vip.qq.com#";
for (int i = 0; i < 1000000; i++) {
Matcher matcher = RegexUtils.matcher(regex, email);
matcher.find();
Thread.sleep(10);
// matcher.group();
}
当运行程序的时候,我们可以看到 java 的进程占用了 CPU 了
82.1%
,由于我使用的服务器是 1核+2G, 所以 load avg 占用也很高。

使用 top -H -p 4214 查看各个线程占用的情况

使用
printf '%x\n' 4217
把进程转成 16 进制值为 1079。执行
jstack 4214|grep 1079 -A 100
得到线程的堆栈信息:
"main" #1 prio=5 os_prio=0 tid=0x00007f943004c800 nid=0x1079 runnable [0x00007f9439fe0000]
java.lang.Thread.State: RUNNABLE
at java.util.regex.Pattern$Curly.match0(Pattern.java:4264)
at java.util.regex.Pattern$Curly.match(Pattern.java:4248)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4672)
at java.util.regex.Pattern$Loop.match(Pattern.java:4799)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4731)
at java.util.regex.Pattern$Ques.match(Pattern.java:4196)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4286)
at java.util.regex.Pattern$Curly.match(Pattern.java:4248)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4672)
at java.util.regex.Pattern$Loop.match(Pattern.java:4799)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4731)
// at java.util.regex.Pattern$Ques.match(Pattern.java:4196)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4286)
at java.util.regex.Pattern$Curly.match(Pattern.java:4248)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4672)
at java.util.regex.Pattern$Loop.match(Pattern.java:4799)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4731)
at java.util.regex.Pattern$Ques.match(Pattern.java:4196)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4286)
at java.util.regex.Pattern$Curly.match(Pattern.java:4248)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4672)
at java.util.regex.Pattern$Loop.match(Pattern.java:4799)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4731)
at java.util.regex.Pattern$Ques.match(Pattern.java:4195)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4293)
at java.util.regex.Pattern$Curly.match(Pattern.java:4248)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4672)
at java.util.regex.Pattern$Loop.match(Pattern.java:4799)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4731)
at java.util.regex.Pattern$Ques.match(Pattern.java:4196)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4286)
at java.util.regex.Pattern$Curly.match(Pattern.java:4248)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4672)
at java.util.regex.Pattern$Loop.match(Pattern.java:4799)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4731)
at java.util.regex.Pattern$Ques.match(Pattern.java:4196)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4286)
at java.util.regex.Pattern$Curly.match(Pattern.java:4248)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4672)
at java.util.regex.Pattern$Loop.match(Pattern.java:4799)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4731)
at java.util.regex.Pattern$Ques.match(Pattern.java:4195)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4293)
at java.util.regex.Pattern$Curly.match(Pattern.java:4248)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4672)
at java.util.regex.Pattern$Loop.match(Pattern.java:4799)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4731)
at java.util.regex.Pattern$Ques.match(Pattern.java:4196)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4286)
at java.util.regex.Pattern$Curly.match(Pattern.java:4248)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4672)
at java.util.regex.Pattern$Loop.match(Pattern.java:4799)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4731)
at java.util.regex.Pattern$Ques.match(Pattern.java:4196)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4286)
at java.util.regex.Pattern$Curly.match(Pattern.java:4248)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4672)
at java.util.regex.Pattern$Loop.match(Pattern.java:4799)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4731)
at java.util.regex.Pattern$Ques.match(Pattern.java:4195)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4293)
at java.util.regex.Pattern$Curly.match(Pattern.java:4248)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4672)
at java.util.regex.Pattern$Loop.match(Pattern.java:4799)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4731)
at java.util.regex.Pattern$Ques.match(Pattern.java:4196)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4286)
at java.util.regex.Pattern$Curly.match(Pattern.java:4248)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4672)
at java.util.regex.Pattern$Loop.match(Pattern.java:4799)
at java.util.regex.Pattern$GroupTail.match(Pattern.java:4731)
at java.util.regex.Pattern$Ques.match(Pattern.java:4196)
at java.util.regex.Pattern$Curly.match0(Pattern.java:4286)
at java.util.regex.Pattern$Curly.match(Pattern.java:4248)
at java.util.regex.Pattern$GroupHead.match(Pattern.java:4672)
at java.util.regex.Pattern$Loop.matchInit(Pattern.java:4815)
at java.util.regex.Pattern$Prolog.match(Pattern.java:4755)
at java.util.regex.Pattern$Begin.match(Pattern.java:3539)
at java.util.regex.Matcher.search(Matcher.java:1248)
at java.util.regex.Matcher.find(Matcher.java:637)
at org.rz.search.spider.AppMain.main(AppMain.java:13)
"VM Thread" os_prio=0 tid=0x00007f94300cc800 nid=0x1079 runnable