pingpang

:: 管理

21 Posts :: 0 Stories :: 3 Comments :: 0 Trackbacks

网上有好几种方法可以将将HTML文件转换成PDF文件但是有些对HTML文件格式要求比较严格，稍微错了一些就不能生成我们所要的PDF文件，这里我推荐一个

PD4ML，它可以解决HTML文件格式不正确的问题，可以生成一个比较好的PDF文件，其处理速度快，而且对CSS文件兼容的非常好。下面是最基本的

PD4ML编程：

Java代码 
package samples;  
  
import java.awt.Insets;  
import java.io.File;  
import java.io.IOException;  
import java.net.MalformedURLException;  
import java.net.URL;  
import java.security.InvalidParameterException;  
  
import org.zefer.pd4ml.PD4Constants;  
import org.zefer.pd4ml.PD4ML;  
  
public class GettingStarted1 {  
    protected int topValue = 10;  
    protected int leftValue = 20;  
    protected int rightValue = 10;  
    protected int bottomValue = 10;  
    protected int userSpaceWidth = 1300;  
  
    public static void main(String[] args) {  
        try {  
            GettingStarted1 jt = new GettingStarted1();  
            jt.doConversion("http://pd4ml.com/sample.htm", "c:/pd4ml.pdf");  
        } catch (Exception e) {  
            e.printStackTrace();  
        }  
    }  
  
    public void doConversion( String url, String outputPath )   
                throws InvalidParameterException, MalformedURLException, IOException {  
        File output = new File(outputPath);  
        java.io.FileOutputStream fos = new java.io.FileOutputStream(output);  
  
        PD4ML pd4ml = new PD4ML();  
              
        pd4ml.setHtmlWidth(userSpaceWidth); // set frame width of "virtual web browser"   
              
        // choose target paper format and "rotate" it to landscape orientation  
        pd4ml.setPageSize(pd4ml.changePageOrientation(PD4Constants.A4));   
              
        // define PDF page margins  
        pd4ml.setPageInsetsMM(new Insets(topValue, leftValue, bottomValue, rightValue));   
  
        // source HTML document also may have margins, could be suppressed this way   
        // (PD4ML *Pro* feature):  
        pd4ml.addStyle("BODY {margin: 0}", true);  
              
        // If built-in basic PDF fonts are not sufficient or   
        // if you need to output non-Latin texts,  
        // TTF embedding feature should help (PD4ML *Pro*)  
        pd4ml.useTTF("c:/windows/fonts", true);  
  
        pd4ml.render(new URL(url), fos); // actual document conversion from URL to file  
        fos.close();  
              
        System.out.println( outputPath + "\ndone." );  
    }  
}  

The following Java class slightly changes the above example. Now it pre-reads source HTML to a string and passes it torender()method wrapped toStringReader. First it writes PDF bytes toByteArrayOutputStream, which makes possible to measure size of the resulting document.

A disadvantage of the method is a bigger RAM utilization.

Java代码 
package samples;  
  
import java.awt.Insets;;  
import java.io.BufferedInputStream;  
import java.io.ByteArrayOutputStream;  
import java.io.File;  
import java.io.FileInputStream;  
import java.io.FileOutputStream;  
import java.io.IOException;  
import java.io.StringReader;  
import java.net.MalformedURLException;  
import java.net.URL;  
import java.security.InvalidParameterException;  
  
import org.zefer.pd4ml.PD4Constants;  
import org.zefer.pd4ml.PD4ML;  
  
public class GettingStarted2 {  
    protected int topValue = 10;  
    protected int leftValue = 20;  
    protected int rightValue = 10;  
    protected int bottomValue = 10;  
    protected int userSpaceWidth = 1300;  
  
    public static void main(String[] args) {  
        try {  
            GettingStarted2 jt = new GettingStarted2();  
            String html = readFile("c:/sample.htm", "UTF-8");  
            jt.doConversion2(html, "c:/pd4ml.pdf");  
        } catch (Exception e) {  
            e.printStackTrace();  
        }  
    }  
  
    public void doConversion2( String htmlDocument, String outputPath )   
                throws InvalidParameterException, MalformedURLException, IOException {  
  
        PD4ML pd4ml = new PD4ML();  
              
        pd4ml.setHtmlWidth(userSpaceWidth); // set frame width of "virtual web browser"   
              
        // choose target paper format  
        pd4ml.setPageSize(pd4ml.changePageOrientation(PD4Constants.A4));   
              
        // define PDF page margins  
        pd4ml.setPageInsetsMM(new Insets(topValue, leftValue, bottomValue, rightValue));   
  
        // source HTML document also may have margins, could be suppressed this way   
        // (PD4ML *Pro* feature):  
        pd4ml.addStyle("BODY {margin: 0}", true);  
              
        // If built-in basic PDF fonts are not sufficient or   
        // if you need to output non-Latin texts, TTF embedding feature should help   
        // (PD4ML *Pro*)  
        pd4ml.useTTF("c:/windows/fonts", true);  
  
        ByteArrayOutputStream baos = new ByteArrayOutputStream();  
        // actual document conversion from HTML string to byte array  
        pd4ml.render(new StringReader(htmlDocument), baos);   
        // if the HTML has relative references to images etc,   
        // use render() method with baseDirectory parameter instead  
        baos.close();  
          
        System.out.println( "resulting PDF size: " + baos.size() + " bytes" );  
        // in Web scenarios it is a good idea to send the size with   
        // "Content-length" HTTP header  
  
        File output = new File(outputPath);  
        java.io.FileOutputStream fos = new java.io.FileOutputStream(output);  
        fos.write( baos.toByteArray() );  
        fos.close();  
          
        System.out.println( outputPath + "\ndone." );  
    }  
      
    private final static String readFile( String path, String encoding ) throws IOException {  
  
        File f = new File( path );  
        FileInputStream is = new FileInputStream(f);  
        BufferedInputStream bis = new BufferedInputStream(is);  
          
        ByteArrayOutputStream fos = new ByteArrayOutputStream();  
        byte buffer[] = new byte[2048];  
  
        int read;  
        do {  
            read = is.read(buffer, 0, buffer.length);  
            if (read > 0) {   
                fos.write(buffer, 0, read);   
            }  
        } while (read > -1);  
  
        fos.close();  
        bis.close();  
        is.close();  
  
        return fos.toString(encoding);  
    }  
}  

posted on 2012-07-21 22:19 往事随风阅读(1448) 评论(0) 编辑收藏

新用户注册刷新评论列表


只有注册用户登录后才能发表评论。




网站导航: 博客园博客园最新博文博问管理

pingpang

常用链接

留言簿(1)

我参与的团队

随笔档案

搜索

最新评论

阅读排行榜

评论排行榜

PD4ML，它可以解决HTML文件格式不正确的问题，可以生成一个比较好的PDF文件，其处理速度快，而且对CSS文件兼容的非常好。下面是最基本的

PD4ML编程：