一聚教程网:一个值得你收藏的教程网站

热门教程

java调用openoffice将office系列文档转换为PDF的示例方法

时间:2022-06-29 01:02:19 编辑:袖梨 来源:一聚教程网

前导:

发过程中经常会使用java将office系列文档转换为PDF, 一般都使用微软提供的openoffice+jodconverter 实现转换文档。

openoffice既有windows版本也有linux版。不用担心生产环境是linux系统。

1、openoffice依赖jar,以maven为例:

 
      com.artofsolving 
      jodconverter 
      2.2.1 
     
     
      org.openoffice 
      jurt 
      3.0.1 
     
     
      org.openoffice 
      ridl 
      3.0.1 
     
     
      org.openoffice 
      juh 
      3.0.1 
     
     
      org.openoffice 
      unoil 
      3.0.1 
     
 
     
     
      org.slf4j 
      slf4j-jdk14 
      1.4.3 
     

2、直接上转换代码,需要监听openoffice应用程序8100端口即可。

public void convert(File sourceFile, File targetFile) { 
 
  try { 
    // 1: 打开连接 
    OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100); 
    connection.connect(); 
 
    DocumentConverter converter = new OpenOfficeDocumentConverter(connection); 
    // 2:获取Format 
    DocumentFormatRegistry factory = new BasicDocumentFormatRegistry(); 
    DocumentFormat inputDocumentFormat = factory 
        .getFormatByFileExtension(getExtensionName(sourceFile.getAbsolutePath())); 
    DocumentFormat outputDocumentFormat = factory 
        .getFormatByFileExtension(getExtensionName(targetFile.getAbsolutePath())); 
    // 3:执行转换 
    converter.convert(sourceFile, inputDocumentFormat, targetFile, outputDocumentFormat); 
  } catch (ConnectException e) { 
    log.info("文档转换PDF失败"); 
  } 
} 

3、需注意:jodconverter 在转换2007版本以后的xxx.docx文档会报错,原因大家都明03后缀名xxx.doc  07以后版本xxx.docx

查看jodconverter源码发现documentFormat不支持xxx.docx格式BasicDocumentFormatRegistry中public DocumentFormat getFormatByFileExtension(String extension)默认支持是使用doc格式

BasicDocumentFormatRegistry类源码

// 
// JODConverter - Java OpenDocument Converter 
// Copyright (C) 2004-2007 - Mirko Nasato  
// 
// This library is free software; you can redistribute it and/or 
// modify it under the terms of the GNU Lesser General Public 
// License as published by the Free Software Foundation; either 
// version 2.1 of the License, or (at your option) any later version. 
// 
// This library is distributed in the hope that it will be useful, 
// but WITHOUT ANY WARRANTY; without even the implied warranty of 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
// Lesser General Public License for more details. 
// http://www.gnu.org/copyleft/lesser.html 
// 
package com.artofsolving.jodconverter; 
 
import java.util.ArrayList; 
import java.util.Iterator; 
import java.util.List; 
 
public class BasicDocumentFormatRegistry implements DocumentFormatRegistry { 
 
  private List/**/ documentFormats = new ArrayList(); 
 
  public void addDocumentFormat(DocumentFormat documentFormat) { 
    documentFormats.add(documentFormat); 
  } 
 
  protected List/**/ getDocumentFormats() { 
    return documentFormats; 
  } 
 
  /** 
   * @param extension the file extension 
   * @return the DocumentFormat for this extension, or null if the extension is not mapped 
   */ 
  public DocumentFormat getFormatByFileExtension(String extension) { 
    if (extension == null) { 
      return null; 
    } 
    String lowerExtension = extension.toLowerCase(); 
    for (Iterator it = documentFormats.iterator(); it.hasNext();) { 
      DocumentFormat format = (DocumentFormat) it.next();    
      if (format.getFileExtension().equals(lowerExtension)) { 
        return format; 
      } 
    } 
    return null; 
  } 
 
  public DocumentFormat getFormatByMimeType(String mimeType) { 
    for (Iterator it = documentFormats.iterator(); it.hasNext();) { 
      DocumentFormat format = (DocumentFormat) it.next();    
      if (format.getMimeType().equals(mimeType)) { 
        return format; 
      } 
    } 
    return null; 
  } 
} 

BasicDocumentFormatRegistry的默认实现类DefaultDocumentFormatRegistry  中支持的文件格式如下

// 
// JODConverter - Java OpenDocument Converter 
// Copyright (C) 2004-2007 - Mirko Nasato  
// 
// This library is free software; you can redistribute it and/or 
// modify it under the terms of the GNU Lesser General Public 
// License as published by the Free Software Foundation; either 
// version 2.1 of the License, or (at your option) any later version. 
// 
// This library is distributed in the hope that it will be useful, 
// but WITHOUT ANY WARRANTY; without even the implied warranty of 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
// Lesser General Public License for more details. 
// http://www.gnu.org/copyleft/lesser.html 
// 
package com.artofsolving.jodconverter; 
 
public class DefaultDocumentFormatRegistry extends BasicDocumentFormatRegistry { 
 
  public DefaultDocumentFormatRegistry() { 
    final DocumentFormat pdf = new DocumentFormat("Portable Document Format", "application/pdf", "pdf"); 
    pdf.setExportFilter(DocumentFamily.DRAWING, "draw_pdf_Export"); 
    pdf.setExportFilter(DocumentFamily.PRESENTATION, "impress_pdf_Export"); 
    pdf.setExportFilter(DocumentFamily.SPREADSHEET, "calc_pdf_Export"); 
    pdf.setExportFilter(DocumentFamily.TEXT, "writer_pdf_Export"); 
    addDocumentFormat(pdf); 
     
    final DocumentFormat swf = new DocumentFormat("Macromedia Flash", "application/x-shockwave-flash", "swf"); 
    swf.setExportFilter(DocumentFamily.DRAWING, "draw_flash_Export"); 
    swf.setExportFilter(DocumentFamily.PRESENTATION, "impress_flash_Export"); 
    addDocumentFormat(swf); 
     
    final DocumentFormat xhtml = new DocumentFormat("XHTML", "application/xhtml+xml", "xhtml"); 
    xhtml.setExportFilter(DocumentFamily.PRESENTATION, "XHTML Impress File"); 
    xhtml.setExportFilter(DocumentFamily.SPREADSHEET, "XHTML Calc File"); 
    xhtml.setExportFilter(DocumentFamily.TEXT, "XHTML Writer File"); 
    addDocumentFormat(xhtml); 
 
    // HTML is treated as Text when supplied as input, but as an output it is also 
    // available for exporting Spreadsheet and Presentation formats 
    final DocumentFormat html = new DocumentFormat("HTML", DocumentFamily.TEXT, "text/html", "html"); 
    html.setExportFilter(DocumentFamily.PRESENTATION, "impress_html_Export"); 
    html.setExportFilter(DocumentFamily.SPREADSHEET, "HTML (StarCalc)"); 
    html.setExportFilter(DocumentFamily.TEXT, "HTML (StarWriter)"); 
    addDocumentFormat(html); 
     
    final DocumentFormat odt = new DocumentFormat("OpenDocument Text", DocumentFamily.TEXT, "application/vnd.oasis.opendocument.text", "odt"); 
    odt.setExportFilter(DocumentFamily.TEXT, "writer8"); 
    addDocumentFormat(odt); 
 
    final DocumentFormat sxw = new DocumentFormat("OpenOffice.org 1.0 Text Document", DocumentFamily.TEXT, "application/vnd.sun.xml.writer", "sxw"); 
    sxw.setExportFilter(DocumentFamily.TEXT, "StarOffice XML (Writer)"); 
    addDocumentFormat(sxw); 
 
    final DocumentFormat doc = new DocumentFormat("Microsoft Word", DocumentFamily.TEXT, "application/msword", "doc"); 
    doc.setExportFilter(DocumentFamily.TEXT, "MS Word 97"); 
    addDocumentFormat(doc); 
 
    final DocumentFormat rtf = new DocumentFormat("Rich Text Format", DocumentFamily.TEXT, "text/rtf", "rtf"); 
    rtf.setExportFilter(DocumentFamily.TEXT, "Rich Text Format"); 
    addDocumentFormat(rtf); 
 
    final DocumentFormat wpd = new DocumentFormat("WordPerfect", DocumentFamily.TEXT, "application/wordperfect", "wpd"); 
    addDocumentFormat(wpd); 
 
    final DocumentFormat txt = new DocumentFormat("Plain Text", DocumentFamily.TEXT, "text/plain", "txt"); 
    // set FilterName to "Text" to prevent OOo from tryign to display the "ASCII Filter Options" dialog 
    // alternatively FilterName could be "Text (encoded)" and FilterOptions used to set encoding if needed 
    txt.setImportOption("FilterName", "Text"); 
    txt.setExportFilter(DocumentFamily.TEXT, "Text"); 
    addDocumentFormat(txt); 
 
    final DocumentFormat wikitext = new DocumentFormat("MediaWiki wikitext", "text/x-wiki", "wiki"); 
    wikitext.setExportFilter(DocumentFamily.TEXT, "MediaWiki"); 
    addDocumentFormat(wikitext); 
     
    final DocumentFormat ods = new DocumentFormat("OpenDocument Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.oasis.opendocument.spreadsheet", "ods"); 
    ods.setExportFilter(DocumentFamily.SPREADSHEET, "calc8"); 
    addDocumentFormat(ods); 
 
    final DocumentFormat sxc = new DocumentFormat("OpenOffice.org 1.0 Spreadsheet", DocumentFamily.SPREADSHEET, "application/vnd.sun.xml.calc", "sxc"); 
    sxc.setExportFilter(DocumentFamily.SPREADSHEET, "StarOffice XML (Calc)"); 
    addDocumentFormat(sxc); 
 
    final DocumentFormat xls = new DocumentFormat("Microsoft Excel", DocumentFamily.SPREADSHEET, "application/vnd.ms-excel", "xls"); 
    xls.setExportFilter(DocumentFamily.SPREADSHEET, "MS Excel 97"); 
    addDocumentFormat(xls); 
 
    final DocumentFormat csv = new DocumentFormat("CSV", DocumentFamily.SPREADSHEET, "text/csv", "csv"); 
    csv.setImportOption("FilterName", "Text - txt - csv (StarCalc)"); 
    csv.setImportOption("FilterOptions", "44,34,0"); // Field Separator: ','; Text Delimiter: '"'  
    csv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)"); 
    csv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "44,34,0"); 
    addDocumentFormat(csv); 
 
    final DocumentFormat tsv = new DocumentFormat("Tab-separated Values", DocumentFamily.SPREADSHEET, "text/tab-separated-values", "tsv"); 
    tsv.setImportOption("FilterName", "Text - txt - csv (StarCalc)"); 
    tsv.setImportOption("FilterOptions", "9,34,0"); // Field Separator: 't'; Text Delimiter: '"' 
    tsv.setExportFilter(DocumentFamily.SPREADSHEET, "Text - txt - csv (StarCalc)"); 
    tsv.setExportOption(DocumentFamily.SPREADSHEET, "FilterOptions", "9,34,0"); 
    addDocumentFormat(tsv); 
 
    final DocumentFormat odp = new DocumentFormat("OpenDocument Presentation", DocumentFamily.PRESENTATION, "application/vnd.oasis.opendocument.presentation", "odp"); 
    odp.setExportFilter(DocumentFamily.PRESENTATION, "impress8"); 
    addDocumentFormat(odp); 
 
    final DocumentFormat sxi = new DocumentFormat("OpenOffice.org 1.0 Presentation", DocumentFamily.PRESENTATION, "application/vnd.sun.xml.impress", "sxi"); 
    sxi.setExportFilter(DocumentFamily.PRESENTATION, "StarOffice XML (Impress)"); 
    addDocumentFormat(sxi); 
 
    final DocumentFormat ppt = new DocumentFormat("Microsoft PowerPoint", DocumentFamily.PRESENTATION, "application/vnd.ms-powerpoint", "ppt"); 
    ppt.setExportFilter(DocumentFamily.PRESENTATION, "MS PowerPoint 97"); 
    addDocumentFormat(ppt); 
     
    final DocumentFormat odg = new DocumentFormat("OpenDocument Drawing", DocumentFamily.DRAWING, "application/vnd.oasis.opendocument.graphics", "odg"); 
    odg.setExportFilter(DocumentFamily.DRAWING, "draw8"); 
    addDocumentFormat(odg); 
     
    final DocumentFormat svg = new DocumentFormat("Scalable Vector Graphics", "image/svg+xml", "svg"); 
    svg.setExportFilter(DocumentFamily.DRAWING, "draw_svg_Export"); 
    addDocumentFormat(svg); 
  } 
} 

 解决方法:重写BasicDocumentFormatRegistry类中public DocumentFormat getFormatByFileExtension(String extension)方法,只要是后缀名包含doc则使用doc的documentFormat文档格式

// 
// JODConverter - Java OpenDocument Converter 
// Copyright (C) 2004-2007 - Mirko Nasato  
// 
// This library is free software; you can redistribute it and/or 
// modify it under the terms of the GNU Lesser General Public 
// License as published by the Free Software Foundation; either 
// version 2.1 of the License, or (at your option) any later version. 
// 
// This library is distributed in the hope that it will be useful, 
// but WITHOUT ANY WARRANTY; without even the implied warranty of 
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
// Lesser General Public License for more details. 
// http://www.gnu.org/copyleft/lesser.html 
// 
package com.artofsolving.jodconverter; 
 
import java.util.ArrayList; 
import java.util.Iterator; 
import java.util.List; 
 
/** 
 * 重写 BasicDocumentFormatRegistry 文档格式 
 * @author HuGuangJun 
 */ 
public class BasicDocumentFormatRegistry implements DocumentFormatRegistry { 
 
  private List/*  */ documentFormats = new ArrayList(); 
 
  public void addDocumentFormat(DocumentFormat documentFormat) { 
    documentFormats.add(documentFormat); 
  } 
 
  protected List/*  */ getDocumentFormats() { 
    return documentFormats; 
  } 
 
  /** 
   * @param extension 
   *      the file extension 
   * @return the DocumentFormat for this extension, or null if the extension 
   *     is not mapped 
   */ 
  public DocumentFormat getFormatByFileExtension(String extension) { 
    if (extension == null) { 
      return null; 
    } 
    //将文件名后缀统一转化 
    if (extension.indexOf("doc") >= 0) { 
      extension = "doc"; 
    } 
    if (extension.indexOf("ppt") >= 0) { 
      extension = "ppt"; 
    } 
    if (extension.indexOf("xls") >= 0) { 
      extension = "xls"; 
    } 
    String lowerExtension = extension.toLowerCase(); 
    for (Iterator it = documentFormats.iterator(); it.hasNext();) { 
      DocumentFormat format = (DocumentFormat) it.next(); 
      if (format.getFileExtension().equals(lowerExtension)) { 
        return format; 
      } 
    } 
    return null; 
  } 
 
  public DocumentFormat getFormatByMimeType(String mimeType) { 
    for (Iterator it = documentFormats.iterator(); it.hasNext();) { 
      DocumentFormat format = (DocumentFormat) it.next(); 
      if (format.getMimeType().equals(mimeType)) { 
        return format; 
      } 
    } 
    return null; 
  } 
} 

热门栏目