网上有好几种方法可以将将HTML文件转换成PDF文件但是有些对HTML文件格式要求比较严格,稍微错了一些就不能生成我们所要的PDF文件,这里我推荐一个 PD4ML,它可以解决HTML文件格式不正确的问题,可以生成一个比较好的PDF文件,其处理速度快,而且对CSS文件兼容的非常好。下面是最基本的PD4ML编程:- package samples;
-
- import java.awt.Insets;
- import java.io.File;
- import java.io.IOException;
- import java.net.MalformedURLException;
- import java.net.URL;
- import java.security.InvalidParameterException;
-
- import org.zefer.pd4ml.PD4Constants;
- import org.zefer.pd4ml.PD4ML;
-
- public class GettingStarted1 {
- protected int topValue = 10;
- protected int leftValue = 20;
- protected int rightValue = 10;
- protected int bottomValue = 10;
- protected int userSpaceWidth = 1300;
-
- public static void main(String[] args) {
- try {
- GettingStarted1 jt = new GettingStarted1();
- jt.doConversion("http://pd4ml.com/sample.htm", "c:/pd4ml.pdf");
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- public void doConversion( String url, String outputPath )
- throws InvalidParameterException, MalformedURLException, IOException {
- File output = new File(outputPath);
- java.io.FileOutputStream fos = new java.io.FileOutputStream(output);
-
- PD4ML pd4ml = new PD4ML();
-
- pd4ml.setHtmlWidth(userSpaceWidth); // set frame width of "virtual web browser"
-
- // choose target paper format and "rotate" it to landscape orientation
- pd4ml.setPageSize(pd4ml.changePageOrientation(PD4Constants.A4));
-
- // define PDF page margins
- pd4ml.setPageInsetsMM(new Insets(topValue, leftValue, bottomValue, rightValue));
-
- // source HTML document also may have margins, could be suppressed this way
- // (PD4ML *Pro* feature):
- pd4ml.addStyle("BODY {margin: 0}", true);
-
- // If built-in basic PDF fonts are not sufficient or
- // if you need to output non-Latin texts,
- // TTF embedding feature should help (PD4ML *Pro*)
- pd4ml.useTTF("c:/windows/fonts", true);
-
- pd4ml.render(new URL(url), fos); // actual document conversion from URL to file
- fos.close();
-
- System.out.println( outputPath + "\ndone." );
- }
- }
The following Java class slightly changes the above example. Now it pre-reads source HTML to a string and passes it torender()method wrapped toStringReader. First it writes PDF bytes toByteArrayOutputStream, which makes possible to measure size of the resulting document.A disadvantage of the method is a bigger RAM utilization. - package samples;
-
- import java.awt.Insets;;
- import java.io.BufferedInputStream;
- import java.io.ByteArrayOutputStream;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.StringReader;
- import java.net.MalformedURLException;
- import java.net.URL;
- import java.security.InvalidParameterException;
-
- import org.zefer.pd4ml.PD4Constants;
- import org.zefer.pd4ml.PD4ML;
-
- public class GettingStarted2 {
- protected int topValue = 10;
- protected int leftValue = 20;
- protected int rightValue = 10;
- protected int bottomValue = 10;
- protected int userSpaceWidth = 1300;
-
- public static void main(String[] args) {
- try {
- GettingStarted2 jt = new GettingStarted2();
- String html = readFile("c:/sample.htm", "UTF-8");
- jt.doConversion2(html, "c:/pd4ml.pdf");
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- public void doConversion2( String htmlDocument, String outputPath )
- throws InvalidParameterException, MalformedURLException, IOException {
-
- PD4ML pd4ml = new PD4ML();
-
- pd4ml.setHtmlWidth(userSpaceWidth); // set frame width of "virtual web browser"
-
- // choose target paper format
- pd4ml.setPageSize(pd4ml.changePageOrientation(PD4Constants.A4));
-
- // define PDF page margins
- pd4ml.setPageInsetsMM(new Insets(topValue, leftValue, bottomValue, rightValue));
-
- // source HTML document also may have margins, could be suppressed this way
- // (PD4ML *Pro* feature):
- pd4ml.addStyle("BODY {margin: 0}", true);
-
- // If built-in basic PDF fonts are not sufficient or
- // if you need to output non-Latin texts, TTF embedding feature should help
- // (PD4ML *Pro*)
- pd4ml.useTTF("c:/windows/fonts", true);
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- // actual document conversion from HTML string to byte array
- pd4ml.render(new StringReader(htmlDocument), baos);
- // if the HTML has relative references to images etc,
- // use render() method with baseDirectory parameter instead
- baos.close();
-
- System.out.println( "resulting PDF size: " + baos.size() + " bytes" );
- // in Web scenarios it is a good idea to send the size with
- // "Content-length" HTTP header
-
- File output = new File(outputPath);
- java.io.FileOutputStream fos = new java.io.FileOutputStream(output);
- fos.write( baos.toByteArray() );
- fos.close();
-
- System.out.println( outputPath + "\ndone." );
- }
-
- private final static String readFile( String path, String encoding ) throws IOException {
-
- File f = new File( path );
- FileInputStream is = new FileInputStream(f);
- BufferedInputStream bis = new BufferedInputStream(is);
-
- ByteArrayOutputStream fos = new ByteArrayOutputStream();
- byte buffer[] = new byte[2048];
-
- int read;
- do {
- read = is.read(buffer, 0, buffer.length);
- if (read > 0) {
- fos.write(buffer, 0, read);
- }
- } while (read > -1);
-
- fos.close();
- bis.close();
- is.close();
-
- return fos.toString(encoding);
- }
- }
|