///////////////////////////////////////////////////
//其实这个程序可以进一步处理进而过滤出需要的信息
//@author Xiaoshuang
//
import java.sql.*;
import java.net.*;
import java.io.*;
public class WeatherFilter{
private String html;
private String target="http://weather.news.sohu.com/query.php?city=北京";
public WeatherFilter()throws Exception{
this(null);
}
public WeatherFilter(String targetIn)throws Exception{
if(targetIn!=null)
this.target=targetIn;
URL targetURL=new URL(target);
HttpURLConnection conn=(HttpURLConnection)targetURL.openConnection();
conn.connect();
InputStream inputSteam=conn.getInputStream();
BufferedReader bufReader=new BufferedReader(new InputStreamReader(inputSteam));
StringBuffer buf=new StringBuffer();
String temp;
while((temp=bufReader.readLine())!=null){
buf.append(temp);
}
html=buf.toString();
html=html.toLowerCase();
}
public String toString(){
return html;
}
private void disposeSpaces(){
html=html.replaceAll(" ","");
html=html.replaceAll("\t","");
}
private void replaceAllInTag(String tag,String replace){
while(replaceAStyle(tag,replace));
}
private boolean replaceAStyle(String tag,String replace){
int pageLength=html.length();
int loc = html.indexOf("<"+tag);
if(loc>=0){
int loc2=html.indexOf("</"+tag+">")+8;
StringBuffer buf=new StringBuffer();
for(int i=0;i<loc;i++)
buf.append(html.charAt(i));
if(replace!=null)
buf.append(replace);
for(int i=loc2;i<pageLength;i++)
buf.append(html.charAt(i));
html=buf.toString();
return true;
}
else{
return false;
}
}
private void markAllImages(){
boolean dispose=false;
do{
StringBuffer buf=new StringBuffer();
int pageLength=html.length();
int end=html.indexOf("<img");
for(int i=0;i<end;i++)
buf.append(html.charAt(i));
buf.append("\n");
while(html.charAt(++end)!='>');
for(int i=end+1;i<pageLength;i++){
buf.append(html.charAt(i));
}
html=buf.toString();
buf=null;
System.gc();
}while(html.indexOf("<img")>=0);
}
private void disposeAllTags(){
int pageLength=html.length();
StringBuffer buf=new StringBuffer();
boolean dispose=false;
for(int i=0;i<pageLength;i++){
if(html.charAt(i)=='<'){
dispose=true;
continue;
}
if(html.charAt(i)=='>'){
dispose=false;
continue;
}
if(!dispose){
buf.append(html.charAt(i));
}
}
html=buf.toString();
}
private void disposeAllKeyChars(){
int pageLength=html.length();
StringBuffer buf=new StringBuffer();
boolean dispose=false;
for(int i=0;i<pageLength;i++){
if(html.charAt(i)=='&'){
dispose=true;
continue;
}
if(html.charAt(i)==';'){
dispose=false;
buf.append('\n');
continue;
}
if(!dispose){
buf.append(html.charAt(i));
}
}
html=buf.toString();
}
public static void main(String[] args)throws Exception{
WeatherFilter filter;
if(args.length==1)
filter=new WeatherFilter(args[args.length-1]);
else
filter=new WeatherFilter();
filter.replaceAllInTag("style",null);
filter.replaceAllInTag("script",null);
filter.markAllImages();
filter.disposeAllTags();
filter.disposeSpaces();
filter.disposeAllKeyChars();
System.err.print(filter);
}
}
posted on 2005-09-15 21:18
小爽 阅读(1209)
评论(0) 编辑 收藏 所属分类:
我的Java