简单实现网页采集程序,运行程序后,把抓到的网页放到目录下面
package com.ducklyl;
import java.net.*;
import java.io.*;
public class WebHttpClient {
//网页保存路径
private static String Text_File_Path="D:\\Test\\index.html";
public static void main(String args[]){
try{
File file=new File(Text_File_Path);
FileWriter fpWriter=new FileWriter(file);
//生成下载对象
String URL="www.bnu.edu.cn",Host="www.bnu.edu.cn";
int Port=80;
Socket webclient=new Socket(URL,Port);
//输出流
PrintWriter result=new PrintWriter(webclient.getOutputStream(),true);
//读取流
BufferedReader receiver=new BufferedReader(new InputStreamReader(webclient.getInputStream()));
//发送http request
result.println("GET / HTTP1.1");
result.println("Host:"+Host);
result.println("Connection:Close");
result.println();
//接收Http response
boolean bRet=true;
StringBuffer sb=new StringBuffer(8096);
while(bRet){
if(receiver.ready()){
int idx=0;
while(idx!=-1){
idx=receiver.read();
sb.append((char)idx);
}
bRet=false;
}
}
//打印结果
System.out.println(sb.toString());
//寫到文件里面
fpWriter.write(sb.toString());
webclient.close();
fpWriter.close();
}catch(UnknownHostException e){
System.err.println("don't get the host");
System.exit(1);
}catch(IOException e){
System.err.println("Download error,please check the URL.");
System.exit(1);
}
}
}