import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
public class GoogleFinderNew {
private static String address = "https://www.google.com.hk/search?hl=en&q=";
private static String query = "";
private static String charset = "UTF-8";
private static List<String> useragentList = new ArrayList<String>();
private static void initUserAgentList(String filename) throws IOException {
BufferedReader reader = new BufferedReader(new FileReader(filename));
String line = null;
while((line = reader.readLine()) != null){
useragentList.add(line.trim());
}
reader.close();
return;
}
private static List<String> getpages(URL url) throws IOException {
HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
BufferedReader reader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream()));
String line;
String ans = "";
while ((line = reader.readLine()) != null){
ans += line + "\n";
}
int st = -1, ed = 0;
List<String> pagesList = new ArrayList<String>();
while((st = ans.indexOf("<h3 class=\"r\"><a href=\"", ed)) != -1) {
ed = ans.indexOf("\"", st+23);
//System.out.println(ans.substring(st+23, ed));
pagesList.add(ans.substring(st+23, ed));
}
return pagesList;
}
public static void main(String[] args) throws MalformedURLException, IOException, InterruptedException {
Scanner in = new Scanner(System.in);
String askurl = in.nextLine();
query = in.nextLine();
initUserAgentList("D:\\useragent.txt");
//System.setProperty("http.agent", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.3.154.9 Safari/525.19");
int pages = 10;
for(int i=0;i<pages;i++) {
System.out.println((i+1) + " ..");
int index = (int)(useragentList.size()*Math.random());
if(index == useragentList.size()) index --;
String theUserAgent = useragentList.get(index);
System.setProperty("http.agent", theUserAgent);
String urlString = address + URLEncoder.encode(query, charset);
if(i != 0) urlString += "&start=" + i + "0";
//System.out.println(urlString);
List<String> list = getpages(new URL(urlString));
for(String page : list) {
if(page.contains(askurl)) {
// if(page.equals(askurl)) {
System.out.println(askurl + " found in the " + (i+1) + " th page.");
System.out.println(page);
return;
}
}
int extraTime = 0; //(int)(3000 * Math.random());
Thread.sleep(1000 + extraTime);
}
System.out.println("can't find " + askurl + " in the first " + pages + " pages.");
}
}
posted on 2015-07-29 16:41
marchalex 阅读(262)
评论(0) 编辑 收藏 所属分类:
java小程序