|
@@ -1,26 +1,37 @@
|
|
package com.steerinfo.ftp.uploadfile.utils;
|
|
package com.steerinfo.ftp.uploadfile.utils;
|
|
|
|
|
|
|
|
+import com.itextpdf.text.*;
|
|
|
|
+import com.itextpdf.text.pdf.BaseFont;
|
|
|
|
+import com.itextpdf.text.pdf.PdfWriter;
|
|
import fr.opensagres.poi.xwpf.converter.xhtml.Base64EmbedImgManager;
|
|
import fr.opensagres.poi.xwpf.converter.xhtml.Base64EmbedImgManager;
|
|
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
|
|
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
|
|
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
|
|
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
|
|
import org.apache.poi.hwpf.HWPFDocumentCore;
|
|
import org.apache.poi.hwpf.HWPFDocumentCore;
|
|
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
|
|
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
|
|
import org.apache.poi.hwpf.converter.WordToHtmlUtils;
|
|
import org.apache.poi.hwpf.converter.WordToHtmlUtils;
|
|
|
|
+import org.apache.poi.hwpf.extractor.WordExtractor;
|
|
import org.apache.poi.hwpf.usermodel.Picture;
|
|
import org.apache.poi.hwpf.usermodel.Picture;
|
|
-import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
|
|
|
|
|
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
|
|
|
+import org.apache.poi.xwpf.usermodel.*;
|
|
|
|
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDecimalNumber;
|
|
import org.w3c.dom.Document;
|
|
import org.w3c.dom.Document;
|
|
import org.w3c.dom.Element;
|
|
import org.w3c.dom.Element;
|
|
|
|
|
|
|
|
+import javax.servlet.ServletOutputStream;
|
|
|
|
+import javax.servlet.http.HttpServletResponse;
|
|
import javax.xml.parsers.DocumentBuilderFactory;
|
|
import javax.xml.parsers.DocumentBuilderFactory;
|
|
import javax.xml.transform.OutputKeys;
|
|
import javax.xml.transform.OutputKeys;
|
|
import javax.xml.transform.Transformer;
|
|
import javax.xml.transform.Transformer;
|
|
import javax.xml.transform.TransformerFactory;
|
|
import javax.xml.transform.TransformerFactory;
|
|
import javax.xml.transform.dom.DOMSource;
|
|
import javax.xml.transform.dom.DOMSource;
|
|
import javax.xml.transform.stream.StreamResult;
|
|
import javax.xml.transform.stream.StreamResult;
|
|
-import java.io.ByteArrayOutputStream;
|
|
|
|
-import java.io.IOException;
|
|
|
|
-import java.io.InputStream;
|
|
|
|
|
|
+import java.io.*;
|
|
|
|
+import java.math.BigInteger;
|
|
|
|
+import java.net.URL;
|
|
import java.util.Base64;
|
|
import java.util.Base64;
|
|
|
|
+import java.util.Date;
|
|
|
|
+import java.util.Iterator;
|
|
|
|
+import java.util.List;
|
|
|
|
|
|
/**
|
|
/**
|
|
* POIExcelToHtml 文件转换:
|
|
* POIExcelToHtml 文件转换:
|
|
@@ -79,7 +90,7 @@ public class POIWordToHtml {
|
|
String htmlData = "预览失败";
|
|
String htmlData = "预览失败";
|
|
try{
|
|
try{
|
|
XWPFDocument docxDocument = new XWPFDocument(inputStream);
|
|
XWPFDocument docxDocument = new XWPFDocument(inputStream);
|
|
- XHTMLOptions options = XHTMLOptions.create();
|
|
|
|
|
|
+ XHTMLOptions options = XHTMLOptions.create().indent(4);
|
|
//获取文档中的图片
|
|
//获取文档中的图片
|
|
//List<XWPFPictureData> allPictures = docxDocument.getAllPictures();
|
|
//List<XWPFPictureData> allPictures = docxDocument.getAllPictures();
|
|
//for (XWPFPictureData xwpfPictureData : allPictures) {
|
|
//for (XWPFPictureData xwpfPictureData : allPictures) {
|
|
@@ -113,6 +124,7 @@ public class POIWordToHtml {
|
|
return htmlData;
|
|
return htmlData;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+
|
|
/**
|
|
/**
|
|
* 图片处理
|
|
* 图片处理
|
|
*/
|
|
*/
|
|
@@ -132,4 +144,309 @@ public class POIWordToHtml {
|
|
currentBlock.appendChild(imgNode);
|
|
currentBlock.appendChild(imgNode);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 读取word中的文本内容(段落、表格、图片分开处理)转HTML docx后缀名的Word
|
|
|
|
+ * @param
|
|
|
|
+ * @throws IOException
|
|
|
|
+ */
|
|
|
|
+ public String readWordImgToHtml(InputStream inputStream) throws IOException{
|
|
|
|
+ String htmlData = "预览失败";
|
|
|
|
+ XWPFDocument document = new XWPFDocument(inputStream);
|
|
|
|
+ String htmlText="";
|
|
|
|
+ try {
|
|
|
|
+ // 获取word中的所有段落与表格
|
|
|
|
+ List<IBodyElement> elements = document.getBodyElements();
|
|
|
|
+ for (IBodyElement element : elements) {
|
|
|
|
+ // 段落
|
|
|
|
+ if (element instanceof XWPFParagraph) {
|
|
|
|
+ htmlText+=getParagraphHtmlText((XWPFParagraph) element);
|
|
|
|
+ }
|
|
|
|
+ // 表格
|
|
|
|
+ else if (element instanceof XWPFTable) {
|
|
|
|
+ htmlText+=getTabelHtmlText((XWPFTable) element);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ XHTMLOptions options = XHTMLOptions.create().indent(4);
|
|
|
|
+ options.setExtractor(null);
|
|
|
|
+ options.setIgnoreStylesIfUnused(false);
|
|
|
|
+ options.setFragment(true);
|
|
|
|
+ options.setImageManager(new Base64EmbedImgManager());
|
|
|
|
+ ByteArrayOutputStream htmlStream = new ByteArrayOutputStream();
|
|
|
|
+
|
|
|
|
+ XHTMLConverter.getInstance().convert(document, htmlStream, options);
|
|
|
|
+ htmlData = htmlStream.toString();
|
|
|
|
+ htmlStream.close();
|
|
|
|
+ //获取word中的所有图片
|
|
|
|
+// List<XWPFPictureData> picLists= document.getAllPictures();
|
|
|
|
+// for(XWPFPictureData pic:picLists){
|
|
|
|
+// System.out.println("图片名称:\t" + pic.getFileName());
|
|
|
|
+// System.out.println("图片类型:\t" + pic.getPictureType());
|
|
|
|
+// byte[] data = pic.getData();
|
|
|
|
+// System.out.println(data);
|
|
|
|
+// //字节流图片上传,并返回服务器地址
|
|
|
|
+// String imgUrl = getImageUrl(data, pic.getFileName());
|
|
|
|
+// System.out.println("图片服务器地址:"+imgUrl);
|
|
|
|
+// //组装img
|
|
|
|
+// htmlText+="<p><img alt='' src='"+imgUrl+"'></p>";
|
|
|
|
+ }
|
|
|
|
+ catch (Exception e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ return htmlData;
|
|
|
|
+ }
|
|
|
|
+ /**
|
|
|
|
+ * 获取段落内容并组装段落HTML
|
|
|
|
+ * @param paragraph
|
|
|
|
+ */
|
|
|
|
+ private static String getParagraphHtmlText(XWPFParagraph paragraph) {
|
|
|
|
+ // 获取段落中所有内容
|
|
|
|
+ List<XWPFRun> runs = paragraph.getRuns();
|
|
|
|
+ if (runs.size() == 0) {
|
|
|
|
+ return "";
|
|
|
|
+ }
|
|
|
|
+ StringBuffer runText = new StringBuffer();
|
|
|
|
+ for (XWPFRun run : runs) {
|
|
|
|
+ runText.append(run.text());
|
|
|
|
+ }
|
|
|
|
+ return "<p style='margin:unset;text-align:"+paragraph.getAlignment().name()+"'>"+runText.toString()+"</p>";
|
|
|
|
+ }
|
|
|
|
+ /**
|
|
|
|
+ * 获取表格内容并组装表格HTML
|
|
|
|
+ * @param table
|
|
|
|
+ */
|
|
|
|
+ private static String getTabelHtmlText(XWPFTable table) {
|
|
|
|
+ String result="";
|
|
|
|
+ //获取表格数据行
|
|
|
|
+ List<XWPFTableRow> rows = table.getRows();
|
|
|
|
+
|
|
|
|
+ if(rows.size()>0){
|
|
|
|
+ result+="<table border='1' cellspacing=0 style='border-collapse: collapse;'>";
|
|
|
|
+ //遍历
|
|
|
|
+ for (int i=0;i<rows.size();i++) {
|
|
|
|
+ XWPFTableRow row = rows.get(i);
|
|
|
|
+ result+="<tr style='font-weight: bold;'>";
|
|
|
|
+ //获取每行的数据列
|
|
|
|
+ List<XWPFTableCell> cells = row.getTableCells();
|
|
|
|
+ for (XWPFTableCell cell : cells) {
|
|
|
|
+ //获取单元格跨列个数
|
|
|
|
+ BigInteger gridSpanNum = getCellGridSpanNum(cell);
|
|
|
|
+
|
|
|
|
+ result+="<td colspan="+gridSpanNum+" valign=center style='text-align: center;vertical-align: middle;'>";
|
|
|
|
+ String cellText="";
|
|
|
|
+ // 简单获取内容(简单方式是不能获取字体对齐方式的)
|
|
|
|
+ // System.out.println(cell.getText());
|
|
|
|
+ // 一个单元格可以理解为一个word文档,单元格里也可以加段落与表格
|
|
|
|
+ List<XWPFParagraph> paragraphs = cell.getParagraphs();
|
|
|
|
+ for (XWPFParagraph paragraph : paragraphs) {
|
|
|
|
+ cellText+="<p style='margin: unset;text-align:"+paragraph.getAlignment().name()+"'>"+getParagraphText(paragraph)+"</p>";
|
|
|
|
+ }
|
|
|
|
+ result+=cellText;
|
|
|
|
+ result+="</td>";
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ result+="</tr>";
|
|
|
|
+ }
|
|
|
|
+ result+="</table>";
|
|
|
|
+ }
|
|
|
|
+ return result;
|
|
|
|
+ }
|
|
|
|
+ /**
|
|
|
|
+ * 获取段落内容( docx后缀名的Word)
|
|
|
|
+ * @param paragraph
|
|
|
|
+ */
|
|
|
|
+ private static String getParagraphText(XWPFParagraph paragraph) {
|
|
|
|
+ // 获取段落中所有内容
|
|
|
|
+ List<XWPFRun> runs = paragraph.getRuns();
|
|
|
|
+ if (runs.size() == 0) {
|
|
|
|
+ //System.out.println("按了回车(新段落)");
|
|
|
|
+ return "";
|
|
|
|
+ }
|
|
|
|
+ StringBuffer runText = new StringBuffer();
|
|
|
|
+ for (XWPFRun run : runs) {
|
|
|
|
+ runText.append(run.text());
|
|
|
|
+ }
|
|
|
|
+// if (runText.length() > 0) {
|
|
|
|
+// runText.append(",对齐方式:").append(paragraph.getAlignment().name());
|
|
|
|
+// System.out.println(runText);
|
|
|
|
+// }
|
|
|
|
+ return runText.toString();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 字节流图片上传
|
|
|
|
+ * @param data:图片字节流
|
|
|
|
+ * @param fileName:图片名称
|
|
|
|
+ */
|
|
|
|
+ public static String getImageUrl(byte[] data,String fileName) throws Exception{
|
|
|
|
+ String imgUrl="";
|
|
|
|
+ Long res =new Date().getTime();
|
|
|
|
+ //设置文件存储路径,可以存放在你想要指定的路径里面
|
|
|
|
+ String rootPath="D:/mimi/"+File.separator+"upload/images/";
|
|
|
|
+ // 新文件名
|
|
|
|
+ String newFileName =res + fileName.substring(fileName.lastIndexOf("."));
|
|
|
|
+ //新文件
|
|
|
|
+ File newFile=new File(rootPath+File.separator+newFileName);
|
|
|
|
+ //判断文件目录是否存在
|
|
|
|
+ if(!newFile.getParentFile().exists()){
|
|
|
|
+ //如果目标文件所在的目录不存在,则创建父目录
|
|
|
|
+ newFile.getParentFile().mkdirs();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ //-------把图片文件写入磁盘 start ----------------
|
|
|
|
+ FileOutputStream fos = new FileOutputStream(newFile);
|
|
|
|
+ fos.write(data);
|
|
|
|
+ fos.close();
|
|
|
|
+ //-------把图片文件写入磁盘 end ----------------
|
|
|
|
+ //服务器图片地址
|
|
|
|
+ String baseURL = "http://192.168.0.76:8080/mimi/upload/images/";
|
|
|
|
+ imgUrl=baseURL+newFileName;
|
|
|
|
+
|
|
|
|
+ return imgUrl;
|
|
|
|
+ }
|
|
|
|
+ /**
|
|
|
|
+ * 获取单元格跨列个数
|
|
|
|
+ * @return
|
|
|
|
+ */
|
|
|
|
+ public static BigInteger getCellGridSpanNum(XWPFTableCell cell){
|
|
|
|
+ BigInteger gridSpanNum =null;
|
|
|
|
+ //获取单元格跨列
|
|
|
|
+ CTDecimalNumber gridSpanXml = cell.getCTTc().getTcPr().getGridSpan();
|
|
|
|
+ if(gridSpanXml!=null){
|
|
|
|
+ gridSpanNum = gridSpanXml.getVal();
|
|
|
|
+ System.out.println("gridSpanNum:"+gridSpanNum);
|
|
|
|
+ }
|
|
|
|
+ return gridSpanNum;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public static byte[] docxToPdf(InputStream src) {
|
|
|
|
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
|
|
|
+ byte[] resBytes = null;
|
|
|
|
+ String result;
|
|
|
|
+ try {
|
|
|
|
+ // pdf文件的尺寸
|
|
|
|
+ com.itextpdf.text.Document pdfDocument = new com.itextpdf.text.Document(PageSize.A3, 72, 72, 72, 72);
|
|
|
|
+ PdfWriter pdfWriter = PdfWriter.getInstance(pdfDocument, baos);
|
|
|
|
+ XWPFDocument doc = new XWPFDocument(src);
|
|
|
|
+ pdfWriter.setInitialLeading(20);
|
|
|
|
+ java.util.List<XWPFParagraph> plist = doc.getParagraphs();
|
|
|
|
+ pdfWriter.open();
|
|
|
|
+ pdfDocument.open();
|
|
|
|
+ for (int i = 0; i < plist.size(); i++) {
|
|
|
|
+ XWPFParagraph pa = plist.get(i);
|
|
|
|
+ java.util.List<XWPFRun> runs = pa.getRuns();
|
|
|
|
+ for (int j = 0; j < runs.size(); j++) {
|
|
|
|
+ XWPFRun run = runs.get(j);
|
|
|
|
+ java.util.List<XWPFPicture> piclist = run.getEmbeddedPictures();
|
|
|
|
+ Iterator<XWPFPicture> iterator = piclist.iterator();
|
|
|
|
+ while (iterator.hasNext()) {
|
|
|
|
+ XWPFPicture pic = iterator.next();
|
|
|
|
+ XWPFPictureData picdata = pic.getPictureData();
|
|
|
|
+ byte[] bytepic = picdata.getData();
|
|
|
|
+ Image imag = Image.getInstance(bytepic);
|
|
|
|
+ pdfDocument.add(imag);
|
|
|
|
+ }
|
|
|
|
+ // 中文字体的解决
|
|
|
|
+ BaseFont bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
|
|
|
|
+ Font font = new Font(bf, 11.0f, Font.NORMAL, BaseColor.BLACK);
|
|
|
|
+ String text = run.getText(-1);
|
|
|
|
+ byte[] bs;
|
|
|
|
+ if (text != null) {
|
|
|
|
+ bs = text.getBytes();
|
|
|
|
+ String str = new String(bs);
|
|
|
|
+ Chunk chObj1 = new Chunk(str, font);
|
|
|
|
+ pdfDocument.add(chObj1);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ pdfDocument.add(new Chunk(Chunk.NEWLINE));
|
|
|
|
+ }
|
|
|
|
+ //需要关闭,不然无法获取到输出流
|
|
|
|
+ pdfDocument.close();
|
|
|
|
+ pdfWriter.close();
|
|
|
|
+ resBytes = baos.toByteArray();
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ }finally {
|
|
|
|
+ try{
|
|
|
|
+ if(baos != null){
|
|
|
|
+ baos.close();
|
|
|
|
+ }
|
|
|
|
+ }catch (IOException e){
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return resBytes;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * PDF 转 HTML
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 把输入流里面的内容以UTF-8编码当文本取出。
|
|
|
|
+ * 不考虑异常,直接抛出
|
|
|
|
+ * @param ises
|
|
|
|
+ * @return
|
|
|
|
+ * @throws IOException
|
|
|
|
+ */
|
|
|
|
+ /**
|
|
|
|
+ * 调用pdf2htmlEX将pdf文件转换为html文件
|
|
|
|
+ *
|
|
|
|
+ * @param exeFilePath
|
|
|
|
+ * pdf2htmlEX.exe文件路径
|
|
|
|
+ * @param pdfFile
|
|
|
|
+ * pdf文件绝对路径
|
|
|
|
+ * @param [destDir] 生成的html文件存放路径
|
|
|
|
+ * @param htmlName
|
|
|
|
+ * 生成的html文件名称
|
|
|
|
+ * @return
|
|
|
|
+ */
|
|
|
|
+ public static boolean pdf2html(String exeFilePath, String pdfFile,
|
|
|
|
+ String destDir, String htmlFileName) {
|
|
|
|
+ if (!(exeFilePath != null && !"".equals(exeFilePath) && pdfFile != null
|
|
|
|
+ && !"".equals(pdfFile) && htmlFileName != null && !""
|
|
|
|
+ .equals(htmlFileName))) {
|
|
|
|
+ System.out.println("传递的参数有误!");
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+ Runtime rt = Runtime.getRuntime();
|
|
|
|
+ StringBuilder command = new StringBuilder();
|
|
|
|
+ command.append(exeFilePath).append(" ");
|
|
|
|
+ if (destDir != null && !"".equals(destDir.trim()))// 生成文件存放位置,需要替换文件路径中的空格
|
|
|
|
+ command.append("--dest-dir ").append(destDir.replace(" ", "\" \""))
|
|
|
|
+ .append(" ");
|
|
|
|
+ command.append("--optimize-text 1 ");// 尽量减少用于文本的HTML元素的数目 (default: 0)
|
|
|
|
+ command.append("--zoom 1.4 ");
|
|
|
|
+ command.append("--process-outline 0 ");// html中显示链接:0——false,1——true
|
|
|
|
+ command.append("--font-format woff ");// 嵌入html中的字体后缀(default ttf)
|
|
|
|
+ // ttf,otf,woff,svg
|
|
|
|
+ command.append(pdfFile.replace(" ", "\" \"")).append(" ");// 需要替换文件路径中的空格
|
|
|
|
+ if (htmlFileName != null && !"".equals(htmlFileName.trim())) {
|
|
|
|
+ command.append(htmlFileName);
|
|
|
|
+ if (htmlFileName.indexOf(".html") == -1)
|
|
|
|
+ command.append(".html");
|
|
|
|
+ }
|
|
|
|
+ try {
|
|
|
|
+ Process p = rt.exec(command.toString());
|
|
|
|
+ StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(),
|
|
|
|
+ "ERROR");
|
|
|
|
+ // 开启屏幕标准错误流
|
|
|
|
+ errorGobbler.start();
|
|
|
|
+ StreamGobbler outGobbler = new StreamGobbler(p.getInputStream(),
|
|
|
|
+ "STDOUT");
|
|
|
|
+ // 开启屏幕标准输出流
|
|
|
|
+ outGobbler.start();
|
|
|
|
+ int w = p.waitFor();
|
|
|
|
+ int v = p.exitValue();
|
|
|
|
+ if (w == 0 && v == 0) {
|
|
|
|
+ return true;
|
|
|
|
+ }
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+
|
|
}
|
|
}
|