1 public String readfile(String filePath){
2 File file = new File(filePath);
3 InputStream input = null;
4 try {
5 input = new FileInputStream(file);
6 } catch (FileNotFoundException e) {
7 e.printStackTrace();
9 StringBuffer buffer = new StringBuffer();
10 byte[] bytes = new byte[1024];
11 try {
12 for(int n ; (n = input.read(bytes))!=-1 ; ){
13 buffer.append(new String(bytes,0,n,"GBK"));
14 }
15 } catch (IOException e) {
16 e.printStackTrace();
17 }
18 // System.out.println(buffer);
19 return buffer.toString();
20 }
22 public String getBody(String val) {
23 String start = "<body>";
24 String end = "</body>";
25 int s = val.indexOf(start) + start.length();
26 int e = val.indexOf(end);
27 return val.substring(s, e);
28 }
1 public static void main(String [] args){
2 OaDao m = new OaDao();
3 // String sql = "SELECT sth,xdh FROM TK_ST_0331 where sth='022012050101131000100' and rownum <=10";
4 String sql = "select t.sth , t.stgjz ,t.stly, x.mc from TK_ST_0331 t ,TK_STK_ST_0331 k,TK_TX X where t.sth = k.sth AND X.BH = t.tx and rownum <10 ";
5 List<OaVo> datalist= m.findAll(sql);
6 for(OaVo vo : datalist){
7 System.out.println(vo.getVal1()+"///"+vo.getVal2());
9 // String sth = "022012010100000100100";
10 String sth = vo.getVal1();
11 String kmh = sth.substring(0, 2); //科目号
12 String nf = sth.substring(2, 6); //年份
13 String yf = sth.substring(6,10); //月份
14 String serialno = sth.substring(10, 16); //序列号
15 String stxl = sth.substring(16, 19); //题型
16 String path ="/"+kmh+"/"+nf+"/"+yf+"/"+serialno+"/"+stxl+"/";
18 String tm_path ="H:/tk_source/"+kmh+"/"+yf+"/"+serialno+"/"+stxl+"/"+sth+"_tm.htm";
19 String da_path ="H:/tk_source/"+kmh+"/"+yf+"/"+serialno+"/"+stxl+"/"+sth+"_da.htm";
20 String jx_path ="H:/tk_source/"+kmh+"/"+yf+"/"+serialno+"/"+stxl+"/"+sth+"_jx.htm";
22 // String path = "H:/tk_source/02/0101/000001/001/022012010100000100100_da.htm";
24 String tm = m.getBody(m.readfile(tm_path));
25 System.out.println("----------------------题目------------------------------");
26 System.out.println(tm);
28 String da = m.getBody(m.readfile(da_path));
29 System.out.println("----------------------答案------------------------------");
30 System.out.println(da);
33 String jx = m.getBody(m.readfile(da_path));
34 System.out.println("----------------------解析------------------------------");
35 System.out.println(jx);
36 }
1 /**
2 * 从HTML源码中提取图片路径,最后以一个 String 类型的 List 返回,如果不包含任何图片,则返回一个 size=0 的List
3 * 需要注意的是,此方法只会提取以下格式的图片:.jpg|.bmp|.eps|.gif|.mif|.miff|.png|.tif|.tiff|.svg|.wmf|.jpe|.jpeg|.dib|.ico|.tga|.cut|.pic
4 * @param htmlCode HTML源码
5 * @return <img>标签 src 属性指向的图片地址的List集合
6 * @author Carl He
7 */
8 public static List<String> getImageSrc(String htmlCode) {
9 List<String> imageSrcList = new ArrayList<String>();
10 Pattern p = Pattern.compile("<img//b[^>]*//bsrc//b//s*=//s*('|/")?([^'/"/n/r/f>]+(//.jpg|//.bmp|//.eps|//.gif|//.mif|//.miff|//.png|//.tif|//.tiff|//.svg|//.wmf|//.jpe|//.jpeg|//.dib|//.ico|//.tga|//.cut|//.pic)//b)[^>]*>", Pattern.CASE_INSENSITIVE);
11 Matcher m = p.matcher(htmlCode);
12 String quote = null;
13 String src = null;
14 while (m.find()) {
15 quote = m.group(1);
16 src = (quote == null || quote.trim().length() == 0) ? m.group(2).split("//s+")[0] : m.group(2);
17 imageSrcList.add(src);
18 }
19 return imageSrcList;