package com.steerinfo.dil.util; import fr.opensagres.poi.xwpf.converter.xhtml.Base64EmbedImgManager; import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter; import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions; import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.converter.WordToHtmlUtils; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.xwpf.usermodel.*; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDecimalNumber; import org.w3c.dom.Document; import org.w3c.dom.Element; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import java.io.*; import java.math.BigInteger; import java.util.Base64; import java.util.Date; import java.util.List; /** * POIExcelToHtml 文件转换: * * @author generator * @version 1.0-SNAPSHORT 2021-08-09 18:06 * 类描述 * 修订历史: * 日期:2021-08-09 * 作者:shadow * 参考:https://blog.csdn.net/u013585096/article/details/85104888 * 描述:Execl转HTML * @Copyright 湖南视拓信息技术股份有限公司. All rights reserved. * @see null */ public class POIWordToHtml { private static final String ENCODING = "GB2312";// UTF-8 public String docToHtml(InputStream input) throws Exception { String htmlData = "预览失败"; try { HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc(input); WordToHtmlConverter wordToHtmlConverter = new ImageConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() ); //wordToHtmlConverter.setPicturesManager(new PicturesManager() { // @Override // public String savePicture(byte[] content, // PictureType pictureType, String suggestedName, // float widthInches, float heightInches) { // //给生成的页面写图片的路径 // return "word/media/" + suggestedName; // } //}); wordToHtmlConverter.processDocument(wordDocument); Document htmlDocument = wordToHtmlConverter.getDocument(); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(outStream); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); htmlData = outStream.toString(); outStream.close(); } catch (Exception e){ e.printStackTrace(); } return htmlData; } public String docxToHtml(InputStream inputStream) throws IOException { String htmlData = "预览失败"; try{ XWPFDocument docxDocument = new XWPFDocument(inputStream); XHTMLOptions options = XHTMLOptions.create().indent(4); //获取文档中的图片 //List allPictures = docxDocument.getAllPictures(); //for (XWPFPictureData xwpfPictureData : allPictures) { // String name = xwpfPictureData.getFileName(); // byte[] data = xwpfPictureData.getData(); // InputStream input = new ByteArrayInputStream(data); // TODO 图片处理 // //} //final String imageUrl = ""; //不把图片生成出来 options.setExtractor(null); options.setIgnoreStylesIfUnused(false); options.setFragment(true); //options.URIResolver(new IURIResolver() { // @Override // public String resolve(String uri) { // return imageUrl + uri; // } //}); // 图片转base64 新版本支持 options.setImageManager(new Base64EmbedImgManager()); //转换htm1 ByteArrayOutputStream htmlStream = new ByteArrayOutputStream(); XHTMLConverter.getInstance().convert(docxDocument, htmlStream, options); htmlData = htmlStream.toString(); htmlStream.close(); } catch(Exception e) { e.printStackTrace(); } return htmlData; } /** * 图片处理 * * */ public class ImageConverter extends WordToHtmlConverter { public ImageConverter(Document document) { super(document); } @Override protected void processImageWithoutPicturesManager(Element currentBlock, boolean inlined, Picture picture) { Element imgNode = currentBlock.getOwnerDocument().createElement("img"); StringBuffer sb = new StringBuffer(); sb.append(Base64.getMimeEncoder().encodeToString(picture.getRawContent())); sb.insert(0, "data:" + picture.getMimeType() + ";base64,"); imgNode.setAttribute("src", sb.toString()); currentBlock.appendChild(imgNode); } } /** * 读取word中的文本内容(段落、表格、图片分开处理)转HTML docx后缀名的Word * @param * @throws IOException */ public String readWordImgToHtml(InputStream inputStream) throws IOException{ String htmlData = "预览失败"; XWPFDocument document = new XWPFDocument(inputStream); String htmlText=""; try { // 获取word中的所有段落与表格 List elements = document.getBodyElements(); for (IBodyElement element : elements) { // 段落 if (element instanceof XWPFParagraph) { htmlText+=getParagraphHtmlText((XWPFParagraph) element); } // 表格 else if (element instanceof XWPFTable) { htmlText+=getTabelHtmlText((XWPFTable) element); } } XHTMLOptions options = XHTMLOptions.create().indent(4); options.setExtractor(null); options.setIgnoreStylesIfUnused(false); options.setFragment(true); options.setImageManager(new Base64EmbedImgManager()); ByteArrayOutputStream htmlStream = new ByteArrayOutputStream(); XHTMLConverter.getInstance().convert(document, htmlStream, options); htmlData = htmlStream.toString(); htmlStream.close(); //获取word中的所有图片 List picLists= document.getAllPictures(); for(XWPFPictureData pic:picLists) { System.out.println("图片名称:\t" + pic.getFileName()); System.out.println("图片类型:\t" + pic.getPictureType()); byte[] data = pic.getData(); System.out.println(data); //字节流图片上传,并返回服务器地址 String imgUrl = getImageUrl(data, pic.getFileName()); System.out.println("图片服务器地址:" + imgUrl); //组装img htmlText += "

"; } } catch (Exception e) { e.printStackTrace(); } return htmlData; } /** * 获取段落内容并组装段落HTML * @param paragraph */ private static String getParagraphHtmlText(XWPFParagraph paragraph) { // 获取段落中所有内容 List runs = paragraph.getRuns(); if (runs.size() == 0) { return ""; } StringBuffer runText = new StringBuffer(); for (XWPFRun run : runs) { runText.append(run.text()); } return "

"+runText.toString()+"

"; } /** * 获取表格内容并组装表格HTML * @param table */ private static String getTabelHtmlText(XWPFTable table) { String result=""; //获取表格数据行 List rows = table.getRows(); if(rows.size()>0){ result+=""; //遍历 for (int i=0;i cells = row.getTableCells(); for (XWPFTableCell cell : cells) { //获取单元格跨列个数 BigInteger gridSpanNum = getCellGridSpanNum(cell); result+=""; } result+=""; } result+="
"; String cellText=""; // 简单获取内容(简单方式是不能获取字体对齐方式的) // System.out.println(cell.getText()); // 一个单元格可以理解为一个word文档,单元格里也可以加段落与表格 List paragraphs = cell.getParagraphs(); for (XWPFParagraph paragraph : paragraphs) { cellText+="

"+getParagraphText(paragraph)+"

"; } result+=cellText; result+="
"; } return result; } /** * 获取段落内容( docx后缀名的Word) * @param paragraph */ private static String getParagraphText(XWPFParagraph paragraph) { // 获取段落中所有内容 List runs = paragraph.getRuns(); if (runs.size() == 0) { //System.out.println("按了回车(新段落)"); return ""; } StringBuffer runText = new StringBuffer(); for (XWPFRun run : runs) { runText.append(run.text()); } // if (runText.length() > 0) { // runText.append(",对齐方式:").append(paragraph.getAlignment().name()); // System.out.println(runText); // } return runText.toString(); } /** * 字节流图片上传 * @param data:图片字节流 * @param fileName:图片名称 */ public static String getImageUrl(byte[] data,String fileName) throws Exception{ String imgUrl=""; Long res =new Date().getTime(); //设置文件存储路径,可以存放在你想要指定的路径里面 String rootPath="D:/mimi/"+File.separator+"upload/images/"; // 新文件名 String newFileName =res + fileName.substring(fileName.lastIndexOf(".")); //新文件 File newFile=new File(rootPath+File.separator+newFileName); //判断文件目录是否存在 if(!newFile.getParentFile().exists()){ //如果目标文件所在的目录不存在,则创建父目录 newFile.getParentFile().mkdirs(); } //-------把图片文件写入磁盘 start ---------------- FileOutputStream fos = new FileOutputStream(newFile); fos.write(data); fos.close(); //-------把图片文件写入磁盘 end ---------------- //服务器图片地址 String baseURL = "http://192.168.0.76:8080/mimi/upload/images/"; imgUrl=baseURL+newFileName; return imgUrl; } /** * 获取单元格跨列个数 * @return */ public static BigInteger getCellGridSpanNum(XWPFTableCell cell){ BigInteger gridSpanNum =null; //获取单元格跨列 CTDecimalNumber gridSpanXml = cell.getCTTc().getTcPr().getGridSpan(); if(gridSpanXml!=null){ gridSpanNum = gridSpanXml.getVal(); System.out.println("gridSpanNum:"+gridSpanNum); } return gridSpanNum; } }