1 /*
2 * Copyright (c) 2006 Your Corporation. All Rights Reserved.
3 */
4 package liuxuan;
5
6 /**
7 * Created by IntelliJ IDEA.
8 * User: Administrator
9 * Date: 2006-7-26
10 * Time: 15:33:49
11 * To change this template use File | Settings | File Templates.
12 */
13 import org.htmlparser.Node;
14 import org.htmlparser.Parser;
15 import org.htmlparser.http.ConnectionManager;
16 import org.htmlparser.tags.LinkTag;
17 import org.htmlparser.util.ParserException;
18 import org.htmlparser.visitors.ObjectFindingVisitor;
19
20 import java.sql.Statement;
21 import java.sql.DriverManager;
22 import java.sql.Connection;
23 import java.sql.SQLException;
24
25 public class LinkDemo
26 {
27 public static void main (String[] args) throws ParserException, SQLException
28 {
29 ConnectionManager cn = new ConnectionManager();
30 cn.setProxyHost("10.75.1.38");
31 cn.setProxyPort(80);
32 Parser.setConnectionManager(cn);
33 Parser parser;
34 //parser.s
35 String[] pyurl = new String[2] ;
36
37 pyurl[0]="http://www.google.cn/search?num=100&hl=zh-CN&newwindow=1&q=%E6%BF%AE%E9%98%B3&btnG=%E6%90%9C%E7%B4%A2&meta=cr%3DcountryCN";
38 pyurl[1]="http://www.google.cn/search?q=%E6%BF%AE%E9%98%B3&num=100&hl=zh-CN&lr=&cr=countryCN&newwindow=1&start=100&sa=N";
39 for (int j=0;j<pyurl.length;j++) {
40 parser = new Parser (pyurl[j]);
41 ObjectFindingVisitor visitor = new ObjectFindingVisitor (LinkTag.class);
42 parser.visitAllNodesWith (visitor);
43 Node[] links = visitor.getTags ();
44 String sql;
45 try {
46 Class.forName("com.mysql.jdbc.Driver");
47 } catch (ClassNotFoundException e) {
48 e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
49 }
50 Connection conn = null;
51 try {
52 conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/crawdb", "root", "root");
53 } catch (SQLException e) {
54 e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
55 }
56 Statement stmt = conn.createStatement();
57 for (int i = 0; i < links.length; i++)
58 {
59 sql="";
60 LinkTag linkTag = (LinkTag)links[i];
61 if (linkTag.getLink().indexOf("cache")<0 && linkTag.getLink().indexOf("google")<0 &&linkTag.getLinkText().indexOf("濮阳")>0) {
62
63
64 sql="insert into urls(name,note,url) values('"+linkTag.getLinkText ()+"','"+"lixuan"+"','"+linkTag.getLink ()+"')";
65 stmt.executeUpdate(sql);
66 //rs.close();
67
68 System.out.print ("\"" + linkTag.getLinkText () + "\" => ");
69 System.out.println (linkTag.getLink ());
70 }
71 }
72 stmt.close();
73 conn.close();
74 }
75 }
76
77 }
78