相关文章推荐
想发财的大脸猫  ·  Application ...·  8 月前    · 
寂寞的台灯  ·  Get-SecureRandom ...·  1 年前    · 
<!--使用的是pdfbox计数总页数与缩略图-->
<!-- https://mvnrepository.com/artifact/com.sleepycat/je -->
<dependency>
    <groupId>com.sleepycat</groupId>
    <artifactId>je</artifactId>
    <version>5.0.73</version>
</dependency>
<!--pdf-->
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
    <groupId>org.apache.pdfbox</groupId>
    <artifactId>pdfbox</artifactId>
    <version>2.0.8</version>
</dependency>

二、实现代码

import lombok.extern.slf4j.Slf4j; import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.rendering.ImageType; import org.apache.pdfbox.rendering.PDFRenderer; import org.apache.pdfbox.text.PDFTextStripper; import javax.imageio.IIOImage; import javax.imageio.ImageIO; import javax.imageio.ImageWriter; import javax.imageio.stream.ImageOutputStream; import java.awt.image.BufferedImage; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.util.Iterator; @Slf4j public class PdfUtil { * 通过PDFbox获取文章总页数 * @param filePath:文件路径 * @return * @throws IOException public static int getNumberOfPages(String filePath) throws IOException, InterruptedException { File file = new File(filePath); PDDocument pdDocument = PDDocument.load(new File(filePath)); int pages = pdDocument.getNumberOfPages(); pdDocument.close(); return pages; * 通过PDFbox获取文章内容 * @param filePath * @return public static String getContent(String filePath) throws IOException { PDFParser pdfParser = new PDFParser(new org.apache.pdfbox.io.RandomAccessFile(new File(filePath), "rw")); pdfParser.parse(); PDDocument pdDocument = pdfParser.getPDDocument(); String text = new PDFTextStripper().getText(pdDocument); pdDocument.close(); return text; * 通过PDFbox生成文件的缩略图 * @param filePath:文件路径 * @param outPath:输出图片路径 * @throws IOException public static void getThumbnails(String filePath, String outPath) throws IOException { // 利用PdfBox生成图像 PDDocument pdDocument = PDDocument.load(new File(filePath)); PDFRenderer renderer = new PDFRenderer(pdDocument); // 构造图片 BufferedImage img_temp = renderer.renderImageWithDPI(0, 30, ImageType.RGB); // 设置图片格式 Iterator<ImageWriter> it = ImageIO.getImageWritersBySuffix("png"); // 将文件写出 ImageWriter writer = (ImageWriter) it.next(); ImageOutputStream imageout = ImageIO.createImageOutputStream(new FileOutputStream(outPath)); writer.setOutput(imageout); writer.write(new IIOImage(img_temp, null, null)); img_temp.flush(); imageout.flush(); imageout.close(); //Warning: You did not close a PDF Document pdDocument.close();

三、测试类--Main

public class Main { public static void main(String[] args) throws IOException, InterruptedException { int numberOfPages = getNumberOfPages("D:\\Desktop\\DocCloud\\testDir\\hadoopClientCode.pdf"); System.out.println(numberOfPages); String content = getContent(""); System.out.println(content); getThumbnails("D:\\Desktop\\DocCloud\\testDir\\hadoopClientCoed.pdf", "D:\\Desktop\\DocCloud\\testDir\\hadoopClientCoed.pdf.png");

1>首先测试生成PDF文件的页码,在控制台可以看到

2>测试获取PDF文件的内容,在控制台可以看到--你自己PDF文件中的内容

3>测试生成PDF缩略图

缩略图的大小,可以在代码中修改