doc或docx(word)或image类型文件批量转PDF脚本
1.实际生产环境中遇到文件展示只能适配PDF版本的文件,奈何一万个文件有七千个都是word或者image类型的,由此搞个脚本批量转换下上传至OSS,为前端提供数据支撑。
2.环境准备,这里使用的是aspose-words-18.6-jdk16-crack.jar工具包,资源包就不提供了,网上百度一下即可。
3.javaMaven项目,jdk1.8.maven3.6

4.使用aspose-words-18.6-jdk16-crack.jar工具包会产生水印,需要配置resources下去除水印配置:
  - <?xml version="1.0" encoding="UTF-8" ?>
- <License>
- <Data>
- <Products>
- <Product>Aspose.Total for Java</Product>
- <Product>Aspose.Words for Java</Product>
- </Products>
- <EditionType>Enterprise</EditionType>
- <SubscriptionExpiry>20991231</SubscriptionExpiry>
- <LicenseExpiry>20991231</LicenseExpiry>
- <SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber>
- </Data>
- <Signature>sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=</Signature>
- </License>
复制代码 license.xml5.工具类编写:
  - package org.utiles.dongl.tools;
- import com.aspose.words.License;
- import com.aspose.words.SaveFormat;
- import com.itextpdf.text.*;
- import com.itextpdf.text.pdf.PdfWriter;
- import org.apache.log4j.Logger;
- import org.utiles.dongl.comment.WordTranPDF;
- import java.io.File;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.util.*;
- import java.util.List;
- /**
- * @ClassName: FileTranPDFTool
- * @Description TODO
- * @Author: 东霖
- * @Date: 2022/7/23 10:50
- * @Version 1.0
- **/
- public class FileTranPDFTool {
- private static Logger logger = Logger.getLogger(FileTranPDFTool.class);
- public static boolean getLicense() {
- boolean result = false;
- try {
- InputStream is = WordTranPDF.class.getClassLoader().getResourceAsStream("\\license.xml"); // license.xml应放在..\WebRoot\WEB-INF\classes路径下
- License aposeLic = new License();
- aposeLic.setLicense(is);
- result = true;
- } catch (Exception e) {
- e.printStackTrace();
- }
- return result;
- }
- /**
- * ImageToPDF
- * 支持类型:jpg/tif/..
- *
- * @param source
- * @param target
- */
- public static void ImageToPDF(String source, String target) {
- Document document = new Document();
- //设置文档页边距
- document.setMargins(0, 0, 0, 0);
- FileOutputStream fos = null;
- try {
- fos = new FileOutputStream(target);
- PdfWriter.getInstance(document, fos);
- //打开文档
- document.open();
- //获取图片的宽高
- Image image = Image.getInstance(source);
- float imageHeight = image.getScaledHeight();
- float imageWidth = image.getScaledWidth();
- //设置页面宽高与图片一致
- Rectangle rectangle = new Rectangle(imageWidth, imageHeight);
- document.setPageSize(rectangle);
- //图片居中
- image.setAlignment(Image.ALIGN_CENTER);
- //新建一页添加图片
- document.newPage();
- document.add(image);
- } catch (Exception ioe) {
- System.out.println(ioe.getMessage());
- } finally {
- //关闭文档
- document.close();
- try {
- fos.flush();
- fos.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
- /**
- * word 文档类型转pdf
- *
- * @param inPath
- * @param outPath
- * @return
- */
- public static boolean doc2pdf(String inPath, String outPath) {
- if (!getLicense()) { // 验证License 若不验证则转化出的pdf文档会有水印产生
- return false;
- }
- FileOutputStream os = null;
- try {
- File file = new File(outPath); // 新建一个空白pdf文档
- os = new FileOutputStream(file);
- com.aspose.words.Document doc = new com.aspose.words.Document(inPath); // Address是将要被转化的word文档
- // doc.save(os, SaveFormat.PDF);// 全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF,
- doc.save(os, SaveFormat.DOCX);// 全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF,
- // EPUB, XPS, SWF 相互转换
- } catch (Exception e) {
- e.printStackTrace();
- return false;
- } finally {
- if (os != null) {
- try {
- os.flush();
- os.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
- return true;
- }
- /**
- * 遍历指定目录取文件名称
- *
- * @param foldPath 文件目录绝对路径
- * @return
- */
- public static List<String> listFileName(String foldPath) {
- List<String> listFiles = new ArrayList<>();
- //创建文件对象
- File f = new File(foldPath);
- //列出文件名称存入数组
- File[] files = f.listFiles();
- for (int i = 0; i < Objects.requireNonNull(files).length; i++) {
- listFiles.add(files[i].getName());
- }
- return listFiles;
- }
- /**
- * 删除指定文件
- * @param filePath
- * @return
- */
- public static boolean deleteByFilePath(String filePath) {
- File file = new File(filePath);
- return file.delete();
- }
- /**
- * 遍历指定目录取文件名称并接入路径
- *
- * @param oldPath 遍历文件目录绝对路径,也是要删除的文件目录
- * @return
- */
- public static Map<String, String> listFileNameAndPath(String oldPath) {
- Map<String, String> listFiles = new HashMap();
- //创建文件对象
- File f = new File(oldPath);
- //列出文件名称存入数组
- File[] files = f.listFiles();
- for (int i = 0; i < Objects.requireNonNull(files).length; i++) {
- listFiles.put(files[i].getPath(), files[i].getName());
- }
- return listFiles;
- }
- /**
- * 获取指定文件目录文件大小为0Size的
- * @param foldPath
- * @return
- */
- public static Integer getFileSize(String foldPath,String newFoldPath) {
- int j=1;
- //创建文件对象
- File file = new File(foldPath);
- File[] files = file.listFiles();
- for (int i = 0; i < files.length; i++) {
- if (files[i].length()==0){
- Boolean aBoolean = WriteToFileExample.moveFileToTarget("D:\\OSS\\ghwb\\ghksj_1_copy\\《金东区卫生健康事业发展“十四五”规划》.pdf", newFoldPath+files[i].getName(),null);
- if (aBoolean==true){
- j++;
- logger.info("移动:"+files[i].getPath()+"到"+newFoldPath);
- }
- System.out.println(files[i].getPath());
- }
- }
- return j;
- }
- /**
- * 文件对比删除重复文件
- * @param oldFileNames
- * @param newPath 对比文件目录
- * @return
- */
- public static Integer deleteByFileName(Map<String, String> oldFileNames, String newPath) {
- int j = 0;
- List<String> newListNames = listFileName(newPath);
- for (Map.Entry<String, String> entry : oldFileNames.entrySet()) {
- for (int i = 0; i < newListNames.size(); i++) {
- String value = entry.getValue();
- String s = newListNames.get(i);
- if (value.substring(0,value.lastIndexOf(".")).equals(s.substring(0,s.lastIndexOf(".")))) {
- boolean b = deleteByFilePath(entry.getKey());
- if (b==true){
- logger.info("成功删除指定文件:"+entry.getKey()+",共计:"+j+"个");
- j++;
- }else{
- logger.error("指定文件不存在:"+entry.getKey());
- }
- }
- }
- }
- return j;
- }
- public static void main(String[] args) {
- //文件对比删除
- Map<String, String> map = listFileNameAndPath("D:\\OSS\\ghwb\\word");
- int b = deleteByFileName(map, "D:\\OSS\\ghwb\\ghksj - 副本");
- //word转pdf
- doc2pdf("D:\\OSS\\ghwb\\13c5ad939a0b2001.doc",
- "D:\\OSS\\ghwb\\doc2docx\\13c5ad939a0b2001.docx");
- //移动文件size为0的数据到指定文件夹
- // getFileSize("D:\\OSS\\ghwb\\ghksj_3_copy","D:\\OSS\\ghwb\\test");
- }
- }
复制代码 WordORImageTranPDF 6.逻辑代码:
  - package org.utiles.dongl.comment;
- import org.apache.log4j.Logger;
- import org.utiles.dongl.tools.FileTranPDFTool;
- import org.utiles.dongl.tools.WriteToFileExample;
- import java.io.*;
- import java.util.HashMap;
- import java.util.Map;
- import static org.utiles.dongl.tools.FileTranPDFTool.doc2pdf;
- /**
- * @ClassName: WordTranPDF
- * @Description TODO
- * @Author: 东霖
- * @Date: 2022/7/22 8:55
- * @Version 1.0
- **/
- public class WordTranPDF {
- private static Logger logger = Logger.getLogger(WordTranPDF.class);
- /**
- * 获取指定文件路径下所有文件对象
- *
- * @param inFilePath
- * @return
- */
- public static Map<String, String> getFilePathName(String inFilePath,String replacePathOld
- ,String replacePathNew,String wjjl,String pdfToPath) {
- Map<String, String> fileList = new HashMap();
- //创建文件对象
- File f = new File(inFilePath);
- //列出文件名称存入数组
- File[] files = f.listFiles();
- for (int i = 0; i < files.length; i++) {
- if (files[i].getName().endsWith("docx") || files[i].getName().endsWith("doc")
- || files[i].getName().endsWith("wps") || files[i].getName().endsWith("rtf"))
- {
- // String str=files[i].getPath().substring(0,files[i].getPath().lastIndexOf(".")+1)+"pdf";
- String str=files[i].getPath().substring(0,files[i].getPath().lastIndexOf(".")+1)+"docx";
- fileList.put(files[i].getPath()+"&"+"word",str.replace(replacePathOld,replacePathNew));
- // logger.info("当前文件路径为:"+files[i].getPath());
- } else if (files[i].getName().endsWith(".png") || files[i].getName().endsWith(".jpg") || files[i].getName().endsWith(".gif")
- || files[i].getName().endsWith(".jpeg") || files[i].getName().endsWith(".tif"))
- {
- String str=files[i].getPath().substring(0,files[i].getPath().lastIndexOf(".")+1)+"pdf";
- fileList.put(files[i].getPath()+"&"+"image", str.replace(replacePathOld,replacePathNew));
- // logger.info("当前文件路径为:"+files[i].getPath());
- }else if(files[i].getName().endsWith(".pdf")) {
- WriteToFileExample.moveFileToTarget(files[i].getPath(),pdfToPath+files[i].getName(),"");
- logger.info("移动:"+files[i].getPath()+"到"+pdfToPath);
- }else{
- WriteToFileExample.writeFileSQL("当前文件无法转换:"+files[i].getPath(),wjjl);
- }
- }
- return fileList;
- }
- public static void start(Map<String, String> hashMap) throws InterruptedException {
- long old = System.currentTimeMillis();
- int j = 0;
- for (Map.Entry<String, String> entry : hashMap.entrySet()) {
- // doc2pdf(entry.getKey(),entry.getValue());
- String[] split = entry.getKey().split("&");
- if(split[1].equals("word")){
- System.out.println(entry.getValue());
- doc2pdf(split[0],entry.getValue());
- Thread.sleep(Long.parseLong("15"));
- }else if (split[1].equals("image")){
- FileTranPDFTool.ImageToPDF(split[0],entry.getValue());
- Thread.sleep(Long.parseLong("15"));
- }else {
- // break;
- }
- j++;
- logger.info("转换第:"+j+"个!"+"文件名称为:"+entry.getKey());
- }
- long now = System.currentTimeMillis();
- logger.info("pdf转换成功,共耗时:" + ((now - old) / 1000.0) + "秒");
- logger.info("共转换:" + j + "个文件!");
- }
- public static void main(String[] args) throws InterruptedException {
- /**
- * inFilePath: 需要转换的文件夹路径
- * replacePathOld: 抓换后的文件要写入新文件,直接替换文件的上级目录关键字即可
- * replacePathNew: 新的文件父路径
- * wjjl: 不能转换的文件记录位置及记录名称
- * pdfToPath:当文件中已有pdf不用抓换的需配置文件留存方向。会从原文件目录移动至新文件目录
- */
- Map<String, String> filePathName = getFilePathName("D:\\OSS\\ghwb\\doc11",
- "doc11","doc2docx",
- "D:\\OSS\\ghwb\"+System.currentTimeMillis()+".txt"
- ,"D:\\OSS\\yjbg\\gjxxzx\\ghksj_copy\");
- start(filePathName);
- }
- }
复制代码 View Code 7.上述就是word或者image类型的批量脚本,可以在工具类中单元测试之后在使用批量逻辑代码。
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作! |