posts - 3,  comments - 12,  trackbacks - 0
改写PDF在线浏览前,先将以往写的从提取PDF文件提取文本的代码留个爪,以便今后有用。
package com.eimone.institution.service.impl;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.Writer;
import java.util.Date;
import java.util.List;

import javax.annotation.Resource;

import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.struts2.ServletActionContext;
import org.hibernate.criterion.DetachedCriteria;
import org.springframework.stereotype.Component;

import com.eimone.institution.bean.Attachment;
import com.eimone.institution.dao.IInstitutionDao;
import com.eimone.institution.model.Institution;
import com.eimone.institution.service.IInstitutionService;

public class InstitutionServiceImpl  {
    
private IInstitutionDao institutionDao;

    
public Institution saveInstitution(Institution institution,
            Attachment attachment)
{
        
if (!(attachment == null || attachment.getFileFileName() == null || attachment
                .getFileFileName().length 
== 0)) {
            finishAttachment(attachment, institution);
        }


        Date now 
= new Date();
        institution.setPublishTime(now);
        
return institutionDao.save(institution);
    }


    
    
private String finishAttachment(Attachment attachment,
            Institution institution)
{
        String info 
= "";
        File[] files 
= attachment.getFile();
        String path 
= ServletActionContext.getServletContext().getRealPath(
                
"regulations");
        File desc 
= new File(path);
        
if (!desc.exists()) {
            desc.mkdir();
        }

        
for (int i = 0; i < files.length; i++{
            File file 
= files[i];        
                
long now = System.currentTimeMillis();
                String fileName 
= attachment.getFileFileName()[i];
                path 
+= "\\" + now + "_" + fileName;
                institution.setUrl(now 
+ "_" + fileName);
                saveFile(file, path);
                institution.setContent(getTextFromPdf(path));
                
        }

        
return info;
    }


    
public void saveFile(File file, String path) {
        BufferedOutputStream bof 
= null;
        BufferedInputStream bis 
= null;
        
try {
            FileOutputStream fos 
= new FileOutputStream(path);
            bof 
= new BufferedOutputStream(fos);
            bis 
= new BufferedInputStream(new FileInputStream(file));
            
byte[] buffer = new byte[1024];
            
try {
                
int len = bis.read(buffer);
                
while (len > 0{
                    bof.write(buffer, 
0, len);
                    len 
= bis.read(buffer);
                }

            }
 catch (IOException e) {
                e.printStackTrace();
            }

        }
 catch (FileNotFoundException e) {
            e.printStackTrace();
        }
 finally {
            
try {
                
if (bof != null{
                    bof.close();
                }

                
if (bis != null{
                    bis.close();
                }

            }
 catch (IOException e) {
                e.printStackTrace();
            }

        }

    }


    
/**
     * 
@param filePath
     *            文件路径
     * 
@return 读出的pdf的内容
     
*/

    
public String getTextFromPdf(String filePath) {
        String result 
= null;
        FileInputStream is 
= null;
        PDDocument document 
= null;
        
try {
            is 
= new FileInputStream(filePath);
            PDFParser parser 
= new PDFParser(is);
            parser.parse();
            document 
= parser.getPDDocument();
            PDFTextStripper stripper 
= new PDFTextStripper();
            result 
= stripper.getText(document);
        }
 catch (FileNotFoundException e) {
            e.printStackTrace();
        }
 catch (IOException e) {
            e.printStackTrace();
        }
 catch(NoClassDefFoundError e) {
            e.printStackTrace();
        }
 
        
finally {
            
if (is != null{
                
try {
                    is.close();
                }
 catch (IOException e) {
                    e.printStackTrace();
                }

            }

            
if (document != null{
                
try {
                    document.close();
                }
 catch (IOException e) {
                    e.printStackTrace();
                }

            }

        }

        
return result;
    }

    
    
public static void main(String[] args) {
        InstitutionServiceImpl impl 
= new InstitutionServiceImpl();
        System.out.println(impl.getTextFromPdf(
"d:\\OUTLINE_DESIGN.pdf"));
    }

}
posted on 2010-10-24 15:53 [ 王志伟 ] 阅读(832) 评论(0)  编辑  收藏

只有注册用户登录后才能发表评论。


网站导航:
 

<2024年12月>
24252627282930
1234567
891011121314
15161718192021
22232425262728
2930311234

常用链接

留言簿(1)

随笔档案(3)

文章档案(29)

搜索

  •  

最新评论

阅读排行榜

评论排行榜