html javascript 转成PDF html转成pdf_davisl的技术博客_

这里写自定义目录标题

将html文件转成pdf文件

依赖
代码实现
工具类
总结

将html文件转成pdf文件

需求场景：后端根据数据库数据，生成产品所需要的pdf样式。
本来以为只是简单的pdf制作模板，然后填充，后来发现，离了大谱，需要的pdf文件中有一部分是html页面代码块，使用模板填充，难实现转译，所以更换思路，全部拼接成html代码，然后转成pdf，简单高效。

依赖

我的实现，是使用iText插件，因人而异，看自己喜欢，以下是pom文件中导入的依赖。

<!-- pdf  -->
        <dependency>
            <groupId>com.itextpdf.tool</groupId>
            <artifactId>xmlworker</artifactId>
            <version>5.5.13</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itextpdf</artifactId>
            <version>5.5.13</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itext-asian</artifactId>
            <version>5.2.0</version>
        </dependency>
        <!-- 转换html为标准xhtml包-->
        <dependency>
            <groupId>net.sf.jtidy</groupId>
            <artifactId>jtidy</artifactId>
            <version>r938</version>
        </dependency>

代码实现

我是将逻辑写在了实现层（impl），controller层就是简单的调用一下service就可以了，ChecklistInfoDto,SpecialSituation都是普通的bean，根据自己的查询结果即可：

//XXXX信息
        ChecklistInfoDto checkListInfo = checklistMapper.getContByCheckListId(id);
        //XXXXX信息
        String checkListId = checkListInfo.getChecklistId();
        SpecialSituation cont = specialSituationMapper.getContByTemplateId(checkListId);
        if (cont == null || ObjectUtils.isEmpty(cont)) {
            logger.info("当前id{}未查询到XXXX信息",checkListId);
            throw new RuntimeException("XXX"+checkListId+"未查询到XXXXX信息");
        String content;
        //判断当前XXXXXX  我需要生成两种不同的pdf，所以加了个类型判断区别，按照自己的需求来
        if (EMERGENCY.equals(checkListInfo.getTypeName())) {
            content = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" +
                    "<html>\n" +
                    "<body>\n" +
                    "<h2 style=\"text-align: center\"> "+ checkListInfo.getChecklistName()+" </h2>"+
                    "<table style='width: 100%;border: 1px solid red;text-align: left'>\n" +
                    "    <tr>\n" +
                    "        <td style='font-weight: bold;line-height: 1.5em'>XXXX：</td>\n" +
                    "        <td style='line-height: 1.5em'>"+ cont.getArcId()+"</td>\n" +
                    "        <td style='font-weight: bold;line-height: 1.5em'>XXXX：</td>\n" +
                    "        <td style='line-height: 1.5em'>"+ cont.getSsr()+"</td>\n" +
                    "    </tr>\n" +
                    "    <tr>\n" +
                    "        <td style='font-weight: bold;line-height: 1.5em'>XXXX：</td>\n" +
                    "        <td style='line-height: 1.5em'>"+ cont.getTypeName()+"</td>\n" +
                    "        <td style='font-weight: bold;line-height: 1.5em'>XXXX：</td>\n" +
                    "        <td style='line-height: 1.5em'>"+ cont.getHappenTime()+"</td>\n" +
                    "    </tr>\n" +
                    "    <tr>\n" +
                    "        <td style='font-weight: bold;line-height: 1.5em'>XXXX：</td>\n" +
                    "        <td style='line-height: 1.5em'>"+ cont.getAdep()+"</td>\n" +
                    "        <td style='font-weight: bold;line-height: 1.5em'>XXXXX：</td>\n" +
                    "        <td style='line-height: 1.5em'>"+ cont.getAdes()+"</td>\n" +
                    "    </tr>\n" +
                    "</table>"+checkListInfo.getContent()+"</body>\n" +
                    "</html>";
        } else {
            content = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" +
                    "<html>\n" +
                    "<body>\n" +
                    "<h2 style=\"text-align: center\"> " + checkListInfo.getChecklistName() + " </h2>" +
                    "<table style='width: 100%;border: 1px solid red;text-align: left'>\n" +
                    "    <tr>\n" +
                    "        <td style='font-weight: bold;line-height: 1.5em'>XXXXX：</td>\n" +
                    "        <td style='line-height: 1.5em'>" + checkListInfo.getTypeName() + "</td>\n" +
                    "        <td style='font-weight: bold;line-height: 1.5em'>XXXXX：</td>\n" +
                    "        <td style='line-height: 1.5em'>" + checkListInfo.getSeatName() + "</td>\n" +
                    "        <td style='font-weight: bold;line-height: 1.5em'>XXXXX：</td>\n" +
                    "        <td style='line-height: 1.5em'>" + checkListInfo.getConsuming() + "</td>\n" +
                    "    </tr>\n" +
                    "    <tr>\n" +
                    "        <td style='font-weight: bold;line-height: 1.5em'>XXXXX：</td>\n" +
                    "        <td style='line-height: 1.5em'>" + checkListInfo.getUserName() + "</td>\n" +
                    "        <td style='font-weight: bold;line-height: 1.5em'>XXXXX：</td>\n" +
                    "        <td style='line-height: 1.5em'>" + checkListInfo.getEndTime() + "</td>\n" +
                    "    </tr>\n" +
                    "</table>" + checkListInfo.getContent() + "</body>\n" +
                    "</html>";
        FileWriter fwriter = null;
        try {
            // true表示不覆盖原来的内容，而是加到文件的后面。若要覆盖原来的内容，直接省略这个参数就好
            fwriter = new FileWriter(filePath,true);
            fwriter.write(content);
        } catch (IOException ex) {
            ex.printStackTrace();
        } finally {
            try {
                fwriter.flush();
                fwriter.close();
            } catch (IOException ex) {
                ex.printStackTrace();
        //首先查看当前服务器上是否存在pdf
        //获取服务器上全部文件名
        String fileName = "checkList.pdf";
        List<String> fileNames = readAllFile(upLoadPath);
        if (fileNames != null && fileNames.size() > 0) {
            if (fileNames.contains(fileName)) {
                //已存在pdf，删除当前pdf
                Boolean b = deleteUploadfile(upLoadPath+"/checkList.pdf");
                logger.info("删除操作{}",b);
                if (!b) {
                    //未删除成功
                    throw new RuntimeException("当前文件下存在旧的pdf文件，删除失败！");
        //将写入完成的html页面转成pdf
        PDFUtils.convertHtmlToPdf(
                "logs/atcmis-logs/flow/flowBase/2022-07-19/test.html"
                ,upLoadPath+"/checkList.pdf");

工具类

上面的代码，使用到了转换的工具类，代码如下：

public class PDFUtils {
    private static final Logger logger = LoggerFactory.getLogger(PDFUtils.class);
    private static String path = "C:\\11\\" + new Random().nextInt() + ".pdf";; // 生成PDF后的存放路径
    public static void main(String[] args) {
        try {
            convertHtmlToPdf("logs/atcmis-logs/flow/flowBase/2022-07-19/test.html",
                    path);
        } catch (IOException | DocumentException e) {
            e.printStackTrace();
    /*把html文件转换为pdf文件输出流*/
    public static byte[] convertHtmlToPdf(byte[] html) throws IOException, DocumentException {
        String pdfName = UUID.randomUUID()+".pdf";
        String pdfPath = path+pdfName;
        convertHtmlToPdf(html,pdfPath);
        File file = new File(pdfPath);
        InputStream inputStream = new FileInputStream(file);
        byte[] buffer = getByteByInputStream(inputStream);
        inputStream.close();
        file.delete();
        return buffer;
    /*把html文件转换为pdf文件*/
    public static void convertHtmlToPdf(byte[] html,String pdfPath) throws IOException, DocumentException {
        logger.info("源html文件传入字节流!");
        OutputStream outputStream = new FileOutputStream(pdfPath);
        Rectangle rectPageSize = new Rectangle(PageSize.A4);// A4纸张
        Document document = new Document(rectPageSize, 40, 40, 40, 40);// 上、下、左、右间距
        PdfWriter pdfWriter = PdfWriter.getInstance(document,outputStream);
        document.open();
        ByteArrayInputStream bin = new ByteArrayInputStream(htmlFormat(html));
        logger.info("源html文件读取为缓冲字节流!");
        XMLWorkerHelper wh = XMLWorkerHelper.getInstance();
        wh.parseXHtml(pdfWriter, document, bin, null,Charset.forName("UTF-8"), new ChinaFontProvide());
        logger.info("源html文件转换为pdf文件!");
        document.close();
    /*把html文件转换为pdf文件*/
    public static void convertHtmlToPdf(String htmlPath,String pdfPath) throws IOException, DocumentException {
        InputStream htmlFileStream = new FileInputStream(htmlPath);
        byte[] buffer = getByteByInputStream(htmlFileStream);
        ByteArrayInputStream bin = new ByteArrayInputStream(htmlFormat(buffer));
        // 创建一个document对象实例
        Document document = new Document();
        // 为该Document创建一个Writer实例
        PdfWriter pdfwriter = PdfWriter.getInstance(document,new FileOutputStream(pdfPath));
        pdfwriter.setViewerPreferences(PdfWriter.HideToolbar);
        // 打开当前的document
        document.open();
        XMLWorkerHelper wh = XMLWorkerHelper.getInstance();
        wh.parseXHtml(pdfwriter, document,bin,null,Charset.forName("UTF-8"),new ChinaFontProvide());
        htmlFileStream.close();
        document.close();
    /*创建pdf 返回pdf全路径*/
    public static String createPdf(String pdfPath) throws DocumentException, IOException {
        // 生成pdf 文件
        String pdfName = UUID.randomUUID()+".pdf";
        File outPdf = new File(pdfPath,pdfName);
        OutputStream file = new FileOutputStream(outPdf);
        // 创建pdf Document
        Document document = new Document();
        PdfWriter writer = PdfWriter.getInstance(document, file);
        //打开Document写入内容
        document.open();
        document.add(new Paragraph("Hello World, iText"));
        document.add(new Paragraph(new Date().toString()));
        document.close();
        file.close();
        // 返回生成的pdf路径
        return outPdf.getPath();
    /*将不标准html转换为标准xhtml格式*/
    public static byte[] htmlFormat(byte[] html) throws FileNotFoundException{
        //输出为xhtml
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        // 实例化Tidy对象
        Tidy tidy = new Tidy();
        // 设置输入
        tidy.setInputEncoding("UTF-8");
        // 如果是true 不输出注释，警告和错误信息
        tidy.setQuiet(true);
        // 设置输出
        tidy.setOutputEncoding("UTF-8");
        // 不显示警告信息
        tidy.setShowWarnings(false);
        // 缩进适当的标签内容。
        tidy.setIndentContent(true);
        // 内容缩进
        tidy.setSmartIndent(true);
        tidy.setIndentAttributes(false);
        // 只输出body内部的内容
//        tidy.setPrintBodyOnly(true);
        // 多长换行
        tidy.setWraplen(1024);
        // 输出为xhtml
        tidy.setXHTML(true);
        // 去掉没用的标签
        tidy.setMakeClean(true);
        // 清洗word2000的内容
        tidy.setWord2000(true);
        // 设置错误输出信息
        tidy.setErrout(new PrintWriter(System.out));
        tidy.parseDOM(new ByteArrayInputStream(html), baos);
        return baos.toByteArray();
    /*输入流中获取字节数组 */
    public static byte[] getByteByInputStream(InputStream inputStream) throws IOException {
        ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
        byte[] b = new byte[1024];
        int n;
        //每次从fis读1024个长度到b中，fis中读完就会返回-1
        while ((n = inputStream.read(b)) != -1)
            bos.write(b, 0, n);
        bos.close();
        return bos.toByteArray();
    /** 解决中文字体   */
    public static final class ChinaFontProvide implements FontProvider {
        @Override
        public Font getFont(String arg0, String arg1, boolean arg2, float arg3, int arg4, BaseColor arg5) {
            BaseFont bfChinese = null;
            try {
                bfChinese = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
            } catch (Exception e) {
                e.printStackTrace();
            Font FontChinese = new Font(bfChinese, arg3, arg4);
            return FontChinese;
        @Override
        public boolean isRegistered(String arg0) {
            return false;
}

总结

以上就是全部的代码，没有接触，会感觉较难，开发起来，容易上手，至于iText的局限性，百度有很多，感兴趣可以自行搜索。最后希望有看到的大佬，多多斧正，谢谢啦。