引用借鉴博客地址:
https://blog.csdn.net/yjclsx/article/details/51441632
https://blog.csdn.net/qq_36903131/article/details/82529676
直接上代码了:
package com.allen.utils;
import java.awt.Color;
import java.awt.Dimension;
import java.awt.Graphics2D;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.util.Date;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.FileUtils;
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFShape;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xslf.usermodel.XSLFTextParagraph;
import org.apache.poi.xslf.usermodel.XSLFTextRun;
import org.apache.poi.xslf.usermodel.XSLFTextShape;
import org.w3c.dom.Document;
/*通过poi实现word、excel、ppt转html
* poi 版本用的org.apache.poi 4.1.0
* 包名分别为:
* poi
* poi-ooxml
* poi-ooxml-schemas
* poi-scratchpad
public class WordExcelPptToHtml {
// 适用格式为*.doc,即Word 97-2003文档
public static void WordToHtml(String filePath,String fileName){
InputStream input;
try {
input = new FileInputStream(filePath + fileName);
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(
new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType,
String suggestedName, float widthInches, float heightInches) {
return suggestedName;
wordToHtmlConverter.processDocument(wordDocument);
List pics = wordDocument.getPicturesTable().getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new FileOutputStream(filePath+ pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toByteArray());
FileUtils.writeStringToFile(new File(filePath, "WorldToHtml.html"), content, "utf-8");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
// 适用格式 为 *.xls,即EXCEL 97-2003文档, 格式为.xlsx 的不行
public static void ExcelToHtml(String filePath,String fileName){
InputStream input;
try {
input = new FileInputStream(filePath+fileName);
HSSFWorkbook excelBook=new HSSFWorkbook(input);
ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter (DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() );
excelToHtmlConverter.processWorkbook(excelBook);
List pics = excelBook.getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get (i);
try {
pic.writeImageContent (new FileOutputStream (filePath + pic.suggestFullFileName() ) );
} catch (FileNotFoundException e) {
e.printStackTrace();
Document htmlDocument =excelToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource (htmlDocument);
StreamResult streamResult = new StreamResult (outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty (OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty (OutputKeys.INDENT, "yes");
serializer.setOutputProperty (OutputKeys.METHOD, "html");
serializer.transform (domSource, streamResult);
outStream.close();
String content = new String (outStream.toByteArray() );
FileUtils.writeStringToFile(new File (filePath, "ExcelToHtml.html"), content, "utf-8");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
// 适用PPT格式为 *.pptx ,ppt转图片插入html 即可
public static boolean PPTtoImage(String filePath,String fileName){
File file = new File(filePath+fileName);
boolean isppt = checkFile(file);
if (!isppt) {
System.out.println("The image you specify don't exit!");
return false;
XMLSlideShow ppt = null;
try {
ppt = new XMLSlideShow(new FileInputStream(filePath+fileName));
Dimension pgsize = ppt.getPageSize();
List<XSLFSlide> slides = ppt.getSlides();
// 遍历幻灯片
for (XSLFSlide slide : slides) {
// for(int i=0;i<slides.size();i++){
// 获取幻灯片中的所有图形(文本框、表格、图形...)
List<XSLFShape> shapes = slide.getShapes();
// 遍历图形
for (XSLFShape shape : shapes) {
// 判断该图形类是否是文本框类
if (shape instanceof XSLFTextShape) {
// 将图像类强制装换成文本框类
XSLFTextShape ts = (XSLFTextShape) shape;
// 获取文本框内的文字
String str = ts.getText();
System.out.println(str);
// 若想对文本框内的文字进行更改,还需要进行如下步骤
List<XSLFTextParagraph> textParagraphs = ts.getTextParagraphs();
for (XSLFTextParagraph tp : textParagraphs) {
List<XSLFTextRun> textRuns = tp.getTextRuns();
for (XSLFTextRun r : textRuns) {
if ("fuck you".equals(r.getRawText())) {
// 对匹配到的字符串进行更改
r.setText("I love you");
// 设置字体颜色
r.setFontColor(Color.RED);
BufferedImage img = new BufferedImage(pgsize.width,pgsize.height, BufferedImage.TYPE_INT_RGB);
Graphics2D graphics = img.createGraphics();
graphics.setPaint(Color.BLUE);
graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));
slide.draw(graphics);
// 这里设置图片的存放路径和图片的格式(jpeg,png,bmp等等),注意生成文件路径
FileOutputStream out = new FileOutputStream(filePath+"ppt_"+ new Date().getTime() + ".jpeg");
javax.imageio.ImageIO.write(img, "jpeg", out);
out.close();
System.out.println("success..........");
return true;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
return false;
// function 检查文件是否为PPT
public static boolean checkFile(File file) {
boolean isppt = false;
String filename = file.getName();
String suffixname = null;
if (filename != null && filename.indexOf(".") != -1) {
suffixname = filename.substring(filename.indexOf("."));
if (suffixname.equals(".pptx")) {
isppt = true;
return isppt;
} else {
return isppt;
public static void main(String[] args) throws Throwable {
final String filePath = "G:\\";
final String worldFileName = "Word_test.doc";
// WordToHtml( filePath, worldFileName);
String excelFileName="123.xls";
// ExcelToHtml(filePath, excelFileName);
PPTtoImage(filePath,"testPPT.pptx");
pom 引用包:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.0</version>
</dependency>
引用借鉴博客地址:https://blog.csdn.net/yjclsx/article/details/51441632https://blog.csdn.net/qq_36903131/article/details/82529676直接上代码了:package com.allen.utils;import java.awt.Color;import java.awt...
把jacob.jar加载到工程里。
在C:\WINDOWS\system32;C:\Program Files\Java\jdk1.5.0_04\bin;C:\Program Files\Java\jdk1.5.0_04\jre\bin;下添加jacob.dll文件。
OfficeToXML.java文件是实现代码。
我自己工程用到的代码,现共享出来给大家!
(附件以二进制的形式存储在数据库将其转file,file转html),支持doc\docx\wps\xls\xlsx\et\ppt\dps\txt多种格式的文件预览
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStrea
3. 创建XHTMLContentHandler对象,用于处理Word文档中的内容。
4. 创建WordToHtmlConverter对象,并将XWPFDocument和XHTMLContentHandler作为参数传入。
5. 调用WordToHtmlConverter的processDocument方法,将Word文档转换为HTML。
6. 获取HTML内容,可以将其保存到文件或输出到页面。
以下是一个简单的代码示例:
```java
import java.io.*;
import org.apache.poi.xwpf.converter.core.*;
import org.apache.poi.xwpf.converter.xhtml.*;
import org.apache.poi.xwpf.usermodel.*;
public class WordToHtml {
public static void main(String[] args) throws Exception {
// 读取Word文档
XWPFDocument document = new XWPFDocument(new FileInputStream("input.docx"));
// 创建XHTMLContentHandler对象
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream("output.html"), "UTF-8");
XHTMLContentHandler contentHandler = new XHTMLContentHandler(writer, null);
// 创建WordToHtmlConverter对象
WordToHtmlConverter converter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
converter.setParagraphCssStyle("text-align: justify;"); // 设置段落样式
// 将XHTMLContentHandler设置为converter的内容处理器
converter.processDocument(document, contentHandler);
// 获取HTML内容并输出
String html = writer.toString();
System.out.println(html);
// 关闭流
writer.close();
document.close();
注意:该示例代码中的XWPFDocument仅适用于处理.docx格式的Word文档,如果需要处理.doc格式的Word文档,需要使用HWPFDocument类。
码上去学:
Cannot get a STRING value from a NUMERIC cell poi异常解决
m0_71138235:
将excel按照某一列拆分成多个单独文件
jj_y_69:
解决:连接不上 docker中的mysql
AraneidaSword: