Parcourir la source

1.对word预览格式进行了优化,可显示表格全貌

zhangym il y a 2 ans
Parent
commit
86a0f8e8de

+ 8 - 5
src/main/java/com/steerinfo/ftp/uploadfile/utils/FtpFileUtil.java

@@ -5,11 +5,9 @@ import com.steerinfo.framework.utils.text.Charsets;
 import org.apache.commons.net.ftp.*;
 import org.springframework.stereotype.Component;
 
+import javax.servlet.ServletOutputStream;
 import javax.servlet.http.HttpServletResponse;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
+import java.io.*;
 import java.util.Base64;
 
 /**
@@ -194,6 +192,8 @@ public class FtpFileUtil {
     public String downloadFile(String fileName, String filePath) throws IOException {
         InputStream inputStream;
         String data = fileName + "预览失败";
+        String workhtml = null;
+        File temp = File.createTempFile("temp",".temp");
         try {
             this.connectToServer();
             // 设置传输二进制文件
@@ -214,14 +214,17 @@ public class FtpFileUtil {
             String file = filePath.substring(filePath.lastIndexOf("/")+1);
             // 检验文件是否存在
             inputStream =ftpClient.retrieveFileStream(file);
+
             String suffixName;
             if(inputStream != null){
                 if (fileName != null && fileName.contains(SPOT)) {
                     POIWordToHtml poiWordToHtml = new POIWordToHtml();
                     suffixName = fileName.substring(fileName.indexOf(".")+1);
                     switch (suffixName) {
-                        case "docx": data = poiWordToHtml.docxToHtml(inputStream);break;
+                        case "docx": data = poiWordToHtml.readWordImgToHtml(inputStream);
+                        break;
                         case "doc" : data = poiWordToHtml.docToHtml(inputStream); break;
+
                         case "xlsx":
                         case "xls" :
                             data = POIExcelToHtml.excelToHtml(inputStream);break;

+ 322 - 5
src/main/java/com/steerinfo/ftp/uploadfile/utils/POIWordToHtml.java

@@ -1,26 +1,37 @@
 package com.steerinfo.ftp.uploadfile.utils;
 
+import com.itextpdf.text.*;
+import com.itextpdf.text.pdf.BaseFont;
+import com.itextpdf.text.pdf.PdfWriter;
 import fr.opensagres.poi.xwpf.converter.xhtml.Base64EmbedImgManager;
 import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
 import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
 import org.apache.poi.hwpf.HWPFDocumentCore;
 import org.apache.poi.hwpf.converter.WordToHtmlConverter;
 import org.apache.poi.hwpf.converter.WordToHtmlUtils;
+import org.apache.poi.hwpf.extractor.WordExtractor;
 import org.apache.poi.hwpf.usermodel.Picture;
-import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.poi.xwpf.usermodel.*;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDecimalNumber;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 
+import javax.servlet.ServletOutputStream;
+import javax.servlet.http.HttpServletResponse;
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.Transformer;
 import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
+import java.io.*;
+import java.math.BigInteger;
+import java.net.URL;
 import java.util.Base64;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.List;
 
 /**
  * POIExcelToHtml 文件转换:
@@ -79,7 +90,7 @@ public class POIWordToHtml {
         String htmlData = "预览失败";
         try{
         XWPFDocument docxDocument = new XWPFDocument(inputStream);
-        XHTMLOptions options = XHTMLOptions.create();
+        XHTMLOptions options = XHTMLOptions.create().indent(4);
             //获取文档中的图片
         //List<XWPFPictureData> allPictures = docxDocument.getAllPictures();
         //for (XWPFPictureData xwpfPictureData : allPictures) {
@@ -113,6 +124,7 @@ public class POIWordToHtml {
         return htmlData;
     }
 
+
     /**
      * 图片处理
      */
@@ -132,4 +144,309 @@ public class POIWordToHtml {
             currentBlock.appendChild(imgNode);
         }
     }
+
+
+    /**
+     * 读取word中的文本内容(段落、表格、图片分开处理)转HTML docx后缀名的Word
+     * @param
+     * @throws IOException
+     */
+    public  String readWordImgToHtml(InputStream inputStream) throws IOException{
+        String htmlData = "预览失败";
+        XWPFDocument document = new XWPFDocument(inputStream);
+        String htmlText="";
+        try {
+            // 获取word中的所有段落与表格
+            List<IBodyElement> elements = document.getBodyElements();
+            for (IBodyElement element : elements) {
+                // 段落
+                if (element instanceof XWPFParagraph) {
+                    htmlText+=getParagraphHtmlText((XWPFParagraph) element);
+                }
+                // 表格
+                else if (element instanceof XWPFTable) {
+                    htmlText+=getTabelHtmlText((XWPFTable) element);
+                }
+            }
+
+
+            XHTMLOptions options = XHTMLOptions.create().indent(4);
+            options.setExtractor(null);
+            options.setIgnoreStylesIfUnused(false);
+            options.setFragment(true);
+            options.setImageManager(new Base64EmbedImgManager());
+            ByteArrayOutputStream htmlStream = new ByteArrayOutputStream();
+
+            XHTMLConverter.getInstance().convert(document, htmlStream, options);
+            htmlData = htmlStream.toString();
+            htmlStream.close();
+            //获取word中的所有图片
+//            List<XWPFPictureData> picLists= document.getAllPictures();
+//            for(XWPFPictureData pic:picLists){
+//                System.out.println("图片名称:\t" + pic.getFileName());
+//                System.out.println("图片类型:\t" + pic.getPictureType());
+//                byte[] data = pic.getData();
+//                System.out.println(data);
+//                //字节流图片上传,并返回服务器地址
+//                String imgUrl = getImageUrl(data, pic.getFileName());
+//                System.out.println("图片服务器地址:"+imgUrl);
+//                //组装img
+//                htmlText+="<p><img alt='' src='"+imgUrl+"'></p>";
+            }
+        catch (Exception e) {
+            e.printStackTrace();
+        }
+        return  htmlData;
+    }
+    /**
+     * 获取段落内容并组装段落HTML
+     * @param paragraph
+     */
+    private static String getParagraphHtmlText(XWPFParagraph paragraph) {
+        // 获取段落中所有内容
+        List<XWPFRun> runs = paragraph.getRuns();
+        if (runs.size() == 0) {
+            return "";
+        }
+        StringBuffer runText = new StringBuffer();
+        for (XWPFRun run : runs) {
+            runText.append(run.text());
+        }
+        return "<p style='margin:unset;text-align:"+paragraph.getAlignment().name()+"'>"+runText.toString()+"</p>";
+    }
+    /**
+     * 获取表格内容并组装表格HTML
+     * @param table
+     */
+    private static String getTabelHtmlText(XWPFTable table) {
+        String result="";
+        //获取表格数据行
+        List<XWPFTableRow> rows = table.getRows();
+
+        if(rows.size()>0){
+            result+="<table border='1' cellspacing=0 style='border-collapse: collapse;'>";
+            //遍历
+            for (int i=0;i<rows.size();i++) {
+                XWPFTableRow row = rows.get(i);
+                result+="<tr style='font-weight: bold;'>";
+                //获取每行的数据列
+                List<XWPFTableCell> cells = row.getTableCells();
+                for (XWPFTableCell cell : cells) {
+                    //获取单元格跨列个数
+                    BigInteger gridSpanNum = getCellGridSpanNum(cell);
+
+                    result+="<td colspan="+gridSpanNum+" valign=center style='text-align: center;vertical-align: middle;'>";
+                    String cellText="";
+                    // 简单获取内容(简单方式是不能获取字体对齐方式的)
+                    // System.out.println(cell.getText());
+                    // 一个单元格可以理解为一个word文档,单元格里也可以加段落与表格
+                    List<XWPFParagraph> paragraphs = cell.getParagraphs();
+                    for (XWPFParagraph paragraph : paragraphs) {
+                        cellText+="<p style='margin: unset;text-align:"+paragraph.getAlignment().name()+"'>"+getParagraphText(paragraph)+"</p>";
+                    }
+                    result+=cellText;
+                    result+="</td>";
+                }
+
+                result+="</tr>";
+            }
+            result+="</table>";
+        }
+        return result;
+    }
+    /**
+     * 获取段落内容( docx后缀名的Word)
+     * @param paragraph
+     */
+    private static String getParagraphText(XWPFParagraph paragraph) {
+        // 获取段落中所有内容
+        List<XWPFRun> runs = paragraph.getRuns();
+        if (runs.size() == 0) {
+            //System.out.println("按了回车(新段落)");
+            return "";
+        }
+        StringBuffer runText = new StringBuffer();
+        for (XWPFRun run : runs) {
+            runText.append(run.text());
+        }
+//		if (runText.length() > 0) {
+//			runText.append(",对齐方式:").append(paragraph.getAlignment().name());
+//			System.out.println(runText);
+//		}
+        return runText.toString();
+    }
+
+    /**
+     * 字节流图片上传
+     * @param data:图片字节流
+     * @param fileName:图片名称
+     */
+    public static String getImageUrl(byte[] data,String fileName) throws Exception{
+        String imgUrl="";
+        Long res =new Date().getTime();
+        //设置文件存储路径,可以存放在你想要指定的路径里面
+        String rootPath="D:/mimi/"+File.separator+"upload/images/";
+        // 新文件名
+        String newFileName =res + fileName.substring(fileName.lastIndexOf("."));
+        //新文件
+        File newFile=new File(rootPath+File.separator+newFileName);
+        //判断文件目录是否存在
+        if(!newFile.getParentFile().exists()){
+            //如果目标文件所在的目录不存在,则创建父目录
+            newFile.getParentFile().mkdirs();
+        }
+
+        //-------把图片文件写入磁盘 start ----------------
+        FileOutputStream fos = new FileOutputStream(newFile);
+        fos.write(data);
+        fos.close();
+        //-------把图片文件写入磁盘 end ----------------
+        //服务器图片地址
+        String baseURL = "http://192.168.0.76:8080/mimi/upload/images/";
+        imgUrl=baseURL+newFileName;
+
+        return imgUrl;
+    }
+    /**
+     * 获取单元格跨列个数
+     * @return
+     */
+    public static BigInteger getCellGridSpanNum(XWPFTableCell cell){
+        BigInteger gridSpanNum =null;
+        //获取单元格跨列
+        CTDecimalNumber gridSpanXml = cell.getCTTc().getTcPr().getGridSpan();
+        if(gridSpanXml!=null){
+            gridSpanNum = gridSpanXml.getVal();
+            System.out.println("gridSpanNum:"+gridSpanNum);
+        }
+        return gridSpanNum;
+    }
+
+    public static byte[] docxToPdf(InputStream src) {
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        byte[] resBytes = null;
+        String result;
+        try {
+            // pdf文件的尺寸
+            com.itextpdf.text.Document pdfDocument = new com.itextpdf.text.Document(PageSize.A3, 72, 72, 72, 72);
+            PdfWriter pdfWriter = PdfWriter.getInstance(pdfDocument, baos);
+            XWPFDocument doc = new XWPFDocument(src);
+            pdfWriter.setInitialLeading(20);
+            java.util.List<XWPFParagraph> plist = doc.getParagraphs();
+            pdfWriter.open();
+            pdfDocument.open();
+            for (int i = 0; i < plist.size(); i++) {
+                XWPFParagraph pa = plist.get(i);
+                java.util.List<XWPFRun> runs = pa.getRuns();
+                for (int j = 0; j < runs.size(); j++) {
+                    XWPFRun run = runs.get(j);
+                    java.util.List<XWPFPicture> piclist = run.getEmbeddedPictures();
+                    Iterator<XWPFPicture> iterator = piclist.iterator();
+                    while (iterator.hasNext()) {
+                        XWPFPicture pic = iterator.next();
+                        XWPFPictureData picdata = pic.getPictureData();
+                        byte[] bytepic = picdata.getData();
+                        Image imag = Image.getInstance(bytepic);
+                        pdfDocument.add(imag);
+                    }
+                    // 中文字体的解决
+                    BaseFont bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
+                    Font font = new Font(bf, 11.0f, Font.NORMAL, BaseColor.BLACK);
+                    String text = run.getText(-1);
+                    byte[] bs;
+                    if (text != null) {
+                        bs = text.getBytes();
+                        String str = new String(bs);
+                        Chunk chObj1 = new Chunk(str, font);
+                        pdfDocument.add(chObj1);
+                    }
+                }
+                pdfDocument.add(new Chunk(Chunk.NEWLINE));
+            }
+            //需要关闭,不然无法获取到输出流
+            pdfDocument.close();
+            pdfWriter.close();
+            resBytes = baos.toByteArray();
+        } catch (Exception e) {
+        }finally {
+            try{
+                if(baos != null){
+                    baos.close();
+                }
+            }catch (IOException e){
+            }
+        }
+        return resBytes;
+    }
+
+
+    /**
+     * PDF 转 HTML
+     */
+
+    /**
+     * 把输入流里面的内容以UTF-8编码当文本取出。
+     * 不考虑异常,直接抛出
+     * @param ises
+     * @return
+     * @throws IOException
+     */
+    /**
+     * 调用pdf2htmlEX将pdf文件转换为html文件
+     *
+     * @param exeFilePath
+     *            pdf2htmlEX.exe文件路径
+     * @param pdfFile
+     *            pdf文件绝对路径
+     * @param [destDir] 生成的html文件存放路径
+     * @param htmlName
+     *            生成的html文件名称
+     * @return
+     */
+    public static boolean pdf2html(String exeFilePath, String pdfFile,
+                                   String destDir, String htmlFileName) {
+        if (!(exeFilePath != null && !"".equals(exeFilePath) && pdfFile != null
+                && !"".equals(pdfFile) && htmlFileName != null && !""
+                .equals(htmlFileName))) {
+            System.out.println("传递的参数有误!");
+            return false;
+        }
+        Runtime rt = Runtime.getRuntime();
+        StringBuilder command = new StringBuilder();
+        command.append(exeFilePath).append(" ");
+        if (destDir != null && !"".equals(destDir.trim()))// 生成文件存放位置,需要替换文件路径中的空格
+            command.append("--dest-dir ").append(destDir.replace(" ", "\" \""))
+                    .append(" ");
+        command.append("--optimize-text 1 ");// 尽量减少用于文本的HTML元素的数目 (default: 0)
+        command.append("--zoom 1.4 ");
+        command.append("--process-outline 0 ");// html中显示链接:0——false,1——true
+        command.append("--font-format woff ");// 嵌入html中的字体后缀(default ttf)
+        // ttf,otf,woff,svg
+        command.append(pdfFile.replace(" ", "\" \"")).append(" ");// 需要替换文件路径中的空格
+        if (htmlFileName != null && !"".equals(htmlFileName.trim())) {
+            command.append(htmlFileName);
+            if (htmlFileName.indexOf(".html") == -1)
+                command.append(".html");
+        }
+        try {
+            Process p = rt.exec(command.toString());
+            StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(),
+                    "ERROR");
+            // 开启屏幕标准错误流
+            errorGobbler.start();
+            StreamGobbler outGobbler = new StreamGobbler(p.getInputStream(),
+                    "STDOUT");
+            // 开启屏幕标准输出流
+            outGobbler.start();
+            int w = p.waitFor();
+            int v = p.exitValue();
+            if (w == 0 && v == 0) {
+                return true;
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return false;
+    }
+
 }