马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。
您需要 登录 才可以下载或查看,没有账号?立即注册
×
先说结论,市面上不费钱的,简单的结果好的就是这个种方式,在线测试下来不如命令转的结果好。AsposeWords和SpireDoc结果都不错,但是只有这个word转pdf感觉花3-5w不划算。
下载容器路径 https://docker.aityp.com/i/search?search=libreoffice
摆设LibreOffice容器
利用Docker运行LibreOffice的无头模式(headless),提供文档转换服务:- #需要挂载输入输出路径和安装字体路径
- docker run -d \
- --name libreoffice1 \
- -v /opt/libreoffice1/input:/app/input \
- -v /opt/libreoffice1/output:/app/output \
- -v /usr/share/fonts/:/usr/share/fonts/
- -p 3000:3000 \
- linuxserver/libreoffice:latest
- #online用的是 需要注意容器配置文件有个位置需要改成一下 要不然http访问不通
- docker run -t -d -p 9980:9980 -e "username=admin" -e "password=123456" --restart always --cap-add SYS_ADMIN libreofficeonline:telecom
复制代码 此命令启动一个LibreOffice容器,监听8100端口,并将宿主机目录挂载到容器内以便文件互换。
Java调用REST API转换文档
若容器提供REST API(如libreserver/office-api),可通过Java的HTTP客户端发送请求:- package cn.zjtele.pubinfo.demo.api.controller;
- import org.apache.http.HttpEntity;
- import org.apache.http.client.config.RequestConfig;
- import org.apache.http.client.methods.CloseableHttpResponse;
- import org.apache.http.client.methods.HttpPost;
- import org.apache.http.conn.ssl.NoopHostnameVerifier;
- import org.apache.http.entity.ContentType;
- import org.apache.http.entity.mime.MultipartEntityBuilder;
- import org.apache.http.impl.client.CloseableHttpClient;
- import org.apache.http.impl.client.HttpClients;
- import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
- import org.apache.http.ssl.SSLContexts;
- import org.apache.http.util.EntityUtils;
- import org.slf4j.MDC;
- import javax.net.ssl.SSLContext;
- import java.io.File;
- import java.io.FileOutputStream;
- import java.nio.charset.StandardCharsets;
- import java.security.KeyManagementException;
- import java.security.KeyStoreException;
- import java.security.NoSuchAlgorithmException;
- import java.util.HashMap;
- import java.util.Map;
- import java.util.UUID;
- import java.util.concurrent.ConcurrentHashMap;
- import java.util.concurrent.Future;
- import java.util.concurrent.ThreadPoolExecutor;
- import java.util.concurrent.TimeUnit;
- import java.util.concurrent.TimeoutException;
- import static com.sun.javafx.runtime.async.BackgroundExecutor.getExecutor;
- public class LibreOfficeOnlineMasterConverter {
- // 正确的API端点路径(根据您的服务器配置可能需要调整)
- private static final String LOOL_CONVERT_URL = "http://localhost:9980/lool/convert-to/pdf";
- // 如果需要忽略SSL证书验证
- static SSLContext sslContext;
- static {
- try {
- sslContext = SSLContexts.custom()
- .loadTrustMaterial((chain, authType) -> true)
- .build();
- } catch (NoSuchAlgorithmException e) {
- throw new RuntimeException(e);
- } catch (KeyManagementException e) {
- throw new RuntimeException(e);
- } catch (KeyStoreException e) {
- throw new RuntimeException(e);
- }
- }
- // 在类初始化时创建共享的HttpClient
- private static final CloseableHttpClient sharedHttpClient = HttpClients.custom()
- .setSSLContext(sslContext)
- .setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE)
- .setMaxConnTotal(100) // 最大连接数
- .setMaxConnPerRoute(20) // 每个路由最大连接数
- .build();
- public static void printPoolStatus() {
- ThreadPoolExecutor executor = (ThreadPoolExecutor) getExecutor();
- System.out.println("活跃线程: " + executor.getActiveCount() +
- " / 队列任务: " + executor.getQueue().size());
- }
- public static boolean convertToPdf(String inputFile, String outputFile) throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException {
- MDC.put("traceId", UUID.randomUUID().toString().substring(0,8));
- System.out.println("开始处理文件: " + inputFile);
- // 如果需要忽略SSL证书验证
- // SSLContext sslContext = SSLContexts.custom()
- // .loadTrustMaterial((chain, authType) -> true)
- // .build();
- // 修改convertToPdf方法中的httpClient获取方式
- // CloseableHttpClient httpClient = sharedHttpClient;
- // 调整HttpClient配置,增加超时控制
- RequestConfig config = RequestConfig.custom()
- .setConnectTimeout(5000) // 连接超时5秒
- .setSocketTimeout(30000) // 数据传输超时30秒
- .build();
- CloseableHttpClient httpClient = HttpClients.custom()
- .setDefaultRequestConfig(config)
- .setConnectionManager(new PoolingHttpClientConnectionManager()) // 使用连接池
- .build();
- try {
- // 1. 创建POST请求
- HttpPost httpPost = new HttpPost(LOOL_CONVERT_URL);
- // 2. 构建Multipart请求体(尝试不同字段名)
- MultipartEntityBuilder builder = MultipartEntityBuilder.create();
- builder.addBinaryBody(
- "file", // 先尝试"file",如果失败再尝试"data"
- new File(inputFile),
- getContentType(inputFile),
- new File(inputFile).getName()
- );
- // 3. 设置必要的头信息(master分支特定头)
- httpPost.setHeader("X-WOPI-Override", "CONVERT_TO");
- httpPost.setHeader("X-WOPI-FileExtension", getFileExtension(inputFile));
- httpPost.setHeader("X-WOPI-SuggestedTarget", getOutputFilename(outputFile));
- httpPost.setHeader("X-LOOL-WOPI-ConvertTo", "pdf"); // master分支特有
- httpPost.setHeader("Accept", "application/pdf");
- // 4. 添加其他可能的必要头
- httpPost.setHeader("User-Agent", "Java LibreOffice Converter");
- httpPost.setHeader("Cache-Control", "no-cache");
- httpPost.setEntity(builder.build());
- System.out.println("发送请求到: " + LOOL_CONVERT_URL);
- System.out.println("使用头信息: " + httpPost.getAllHeaders());
- // 5. 执行请求
- try (CloseableHttpResponse response = httpClient.execute(httpPost)) {
- int statusCode = response.getStatusLine().getStatusCode();
- HttpEntity entity = response.getEntity();
- System.out.println("响应状态: " + response.getStatusLine());
- System.out.println("响应头: " + response.getAllHeaders());
- if (statusCode == 200 && entity != null) {
- try (FileOutputStream fos = new FileOutputStream(outputFile)) {
- entity.writeTo(fos);
- }
- return true;
- } else {
- String responseBody = entity != null ?
- EntityUtils.toString(entity, StandardCharsets.UTF_8) : "无响应体";
- System.err.println("转换失败. 状态码: " + statusCode);
- System.err.println("响应体: " + responseBody);
- // 如果400错误,尝试使用"data"作为字段名
- if (statusCode == 400) {
- System.out.println("尝试使用'data'作为字段名重试...");
- return retryWithDataField(inputFile, outputFile);
- }
- }
- }
- } catch (Exception e) {
- System.err.println("转换过程中发生错误: " + e.getMessage());
- e.printStackTrace();
- } finally {
- try {
- httpClient.close();
- } catch (Exception e) {
- System.err.println("关闭HTTP客户端时出错: " + e.getMessage());
- }
- }
- return false;
- }
- /**
- * 使用"data"作为字段名重试
- */
- private static boolean retryWithDataField(String inputFile, String outputFile) {
- CloseableHttpClient httpClient = HttpClients.createDefault();
- try {
- HttpPost httpPost = new HttpPost(LOOL_CONVERT_URL);
- MultipartEntityBuilder builder = MultipartEntityBuilder.create();
- builder.addBinaryBody(
- "data", // 使用"data"作为字段名
- new File(inputFile),
- getContentType(inputFile),
- new File(inputFile).getName()
- );
- // 设置相同的头信息
- httpPost.setHeader("X-WOPI-Override", "CONVERT_TO");
- httpPost.setHeader("X-WOPI-FileExtension", getFileExtension(inputFile));
- httpPost.setHeader("X-WOPI-SuggestedTarget", getOutputFilename(outputFile));
- httpPost.setHeader("X-LOOL-WOPI-ConvertTo", "pdf");
- httpPost.setHeader("Accept", "application/pdf");
- httpPost.setEntity(builder.build());
- try (CloseableHttpResponse response = httpClient.execute(httpPost)) {
- if (response.getStatusLine().getStatusCode() == 200) {
- try (FileOutputStream fos = new FileOutputStream(outputFile)) {
- response.getEntity().writeTo(fos);
- }
- return true;
- }
- }
- } catch (Exception e) {
- System.err.println("重试失败: " + e.getMessage());
- }
- return false;
- }
- // 新增异步转换方法
- public static Future<Boolean> convertToPdfAsync(String inputFile, String outputFile) {
- return ConverterThreadPool.getExecutor().submit(() -> {
- try {
- return convertToPdf(inputFile, outputFile);
- } catch (Exception e) {
- System.err.println("异步任务执行异常: " + e.getMessage());
- return false;
- }
- });
- }
- // 新增批量处理方法
- public static Map<String, Future<Boolean>> batchConvert(Map<String, String> filePairs) {
- Map<String, Future<Boolean>> results = new ConcurrentHashMap<>();
- filePairs.forEach((input, output) ->
- results.put(input, convertToPdfAsync(input, output))
- );
- return results;
- }
- /**
- * 获取正确的内容类型
- */
- private static ContentType getContentType(String filePath) {
- String ext = getFileExtension(filePath).toLowerCase();
- switch (ext) {
- case "docx": return ContentType.create("application/vnd.openxmlformats-officedocument.wordprocessingml.document");
- case "doc": return ContentType.create("application/msword");
- case "odt": return ContentType.create("application/vnd.oasis.opendocument.text");
- default: return ContentType.APPLICATION_OCTET_STREAM;
- }
- }
- private static String getFileExtension(String filePath) {
- int lastDotIndex = filePath.lastIndexOf('.');
- return lastDotIndex > 0 ? filePath.substring(lastDotIndex + 1) : "";
- }
- private static String getOutputFilename(String filePath) {
- return new File(filePath).getName();
- }
- public static void main(String[] args) throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException {
- String inputFile = "C:\\Users\\sheng\\Desktop\\chongqing.docx";
- String outputFile = "C:\\Users\\sheng\\Desktop\\chongqing.pdf";
- System.out.println("开始转换: " + inputFile + " → " + outputFile);
- boolean b = convertToPdf(inputFile, outputFile);
- System.out.println("转换结果: " + b);
- }
- }
-
复制代码 通过命令行调用容器内工具
若容器仅包含LibreOffice命令行工具,可通过Java实行Docker命令完成转换:- package cn.zjtele.pubinfo.demo.wordtopdf;
- import java.io.File;
- import java.io.IOException;
- import java.nio.file.Files;
- import java.nio.file.Path;
- import java.nio.file.Paths;
- public class LibreOfficeConverter {
- private static final String INPUT_DIR = "D:/docker/input"; // 本地输入目录
- private static final String OUTPUT_DIR = "D:/docker/output"; // 本地输出目录
- public static void main(String[] args) {
- // if (args.length == 0) {
- // System.out.println("请提供要转换的Word文件名(例如:example.docx)");
- // return;
- // }
- long l = System.currentTimeMillis();
- String fileName = "11.docx";
- Path inputFilePath = Paths.get(INPUT_DIR, fileName);
- File inputFile = inputFilePath.toFile();
- if (!inputFile.exists()) {
- System.out.println("文件不存在:" + inputFilePath);
- return;
- }
- try {
- // 确保输出目录存在
- Files.createDirectories(Paths.get(OUTPUT_DIR));
- // 构造输出文件路径
- String outputFileName = fileName.replace(".docx", ".pdf");
- Path outputFilePath = Paths.get(OUTPUT_DIR, outputFileName);
- // 调用 LibreOffice 容器进行转换
- convertFileUsingLibreOffice(inputFile.getAbsolutePath(), outputFilePath.toString());
- System.out.println("文件转换成功!PDF文件已保存到:" + outputFilePath);
- System.out.println("转换耗时:" + (System.currentTimeMillis() - l) + "ms");
- } catch (Exception e) {
- e.printStackTrace();
- System.out.println("文件转换失败!");
- }
- }
- private static void convertFileUsingLibreOffice(String inputFilePath, String outputFilePath) throws IOException, InterruptedException {
- // 使用 LibreOffice 容器命令进行转换
- String command = String.format(
- // "docker exec -i another_linuxserver-libreoffice libreoffice --headless --convert-to pdf --outdir /app/output /app/input/%s",
- // new File(inputFilePath).getName()
- "docker exec -i libreoffice767 libreoffice --headless --convert-to pdf --outdir /app/output /app/input/%s",
- new File(inputFilePath).getName()
- );
- Process process = Runtime.getRuntime().exec(command);
- int exitCode = process.waitFor();
- if (exitCode != 0) {
- throw new RuntimeException("LibreOffice 转换失败,退出码:" + exitCode);
- }
- }
- }
复制代码 文件路径处置处罚留意事项
确保Java应用有权限访问宿主机和容器的挂载目录。
输入/输出路径需利用容器内的映射路径(如/opt/documents)。
转换完成后从挂载目录提取PDF文件。
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。
|