随笔-65  评论-68  文章-4  trackbacks-0

///////////////////////////////////////////////////
//其实这个程序可以进一步处理进而过滤出需要的信息
//@author Xiaoshuang
//

import java.sql.*;
import java.net.*;
import java.io.*;

public class WeatherFilter{
 private String html;
 private String target="
http://weather.news.sohu.com/query.php?city=北京";
 
 public WeatherFilter()throws Exception{
  this(null);
 }
 
 public WeatherFilter(String targetIn)throws Exception{
  if(targetIn!=null)
   this.target=targetIn;
  URL targetURL=new URL(target);
  HttpURLConnection conn=(HttpURLConnection)targetURL.openConnection();
  conn.connect();
  InputStream inputSteam=conn.getInputStream();
  BufferedReader bufReader=new BufferedReader(new InputStreamReader(inputSteam));
  StringBuffer buf=new StringBuffer();
  String temp;
  while((temp=bufReader.readLine())!=null){
   buf.append(temp);
  }
  html=buf.toString();
  html=html.toLowerCase();
 }
 
 public String toString(){
  return html;
 }
 
 private void disposeSpaces(){
  html=html.replaceAll(" ","");
  html=html.replaceAll("\t","");
 }
 
 private void replaceAllInTag(String tag,String replace){
  while(replaceAStyle(tag,replace));
 }
 
 private boolean replaceAStyle(String tag,String replace){
  int pageLength=html.length();
  int loc = html.indexOf("<"+tag);
  if(loc>=0){
   int loc2=html.indexOf("</"+tag+">")+8;
   StringBuffer buf=new StringBuffer();
   for(int i=0;i<loc;i++)
    buf.append(html.charAt(i));
   if(replace!=null)
    buf.append(replace);
   for(int i=loc2;i<pageLength;i++)
    buf.append(html.charAt(i));
   html=buf.toString();
   return true;
  }
  else{
   return false;
  }
 }
 
 private void markAllImages(){
  boolean dispose=false;
  do{
   StringBuffer buf=new StringBuffer();
   int pageLength=html.length();
   int end=html.indexOf("<img");
   for(int i=0;i<end;i++)
    buf.append(html.charAt(i));
   buf.append("\n");
   while(html.charAt(++end)!='>');
   for(int i=end+1;i<pageLength;i++){
    buf.append(html.charAt(i));
   }
   html=buf.toString();
   buf=null;
   System.gc();
  }while(html.indexOf("<img")>=0);
 }

 
 private void disposeAllTags(){
  int pageLength=html.length();
  StringBuffer buf=new StringBuffer();
  boolean dispose=false;
  for(int i=0;i<pageLength;i++){
   if(html.charAt(i)=='<'){
    dispose=true;
    continue;
   }
   if(html.charAt(i)=='>'){
    dispose=false;
    continue;
   }
   if(!dispose){
    buf.append(html.charAt(i));
   }
  }
  html=buf.toString();
 }
 
 private void disposeAllKeyChars(){
  int pageLength=html.length();
  StringBuffer buf=new StringBuffer();
  boolean dispose=false;
  for(int i=0;i<pageLength;i++){
   if(html.charAt(i)=='&'){
    dispose=true;
    continue;
   }
   if(html.charAt(i)==';'){
    dispose=false;
    buf.append('\n');
    continue;
   }
   if(!dispose){
    buf.append(html.charAt(i));
   }
  }
  html=buf.toString();
 }

 
 public static void main(String[] args)throws Exception{
  WeatherFilter filter;
  if(args.length==1)
   filter=new WeatherFilter(args[args.length-1]);
  else
   filter=new WeatherFilter();
  filter.replaceAllInTag("style",null);
  filter.replaceAllInTag("script",null);
  filter.markAllImages();
  filter.disposeAllTags();
  filter.disposeSpaces();
  filter.disposeAllKeyChars();
  System.err.print(filter);
 }
}

posted on 2005-09-15 21:18 小爽 阅读(1209) 评论(0)  编辑  收藏 所属分类: 我的Java

只有注册用户登录后才能发表评论。


网站导航: