解析网址提取天气信息

以前写过的一个从网上提取天气信息的类，参照了公司老前辈们的代码，可能不太规范，但基本实现，主要就是对页面源码的解析和有用信息的截取，取出来得都是有规律的字符串信息，可根据需要存进数据库，进行应用。代码如下：

  1 package parsehtml;
  2
  3 import java.io.BufferedReader;
  4 import java.io.InputStreamReader;
  5 import java.net.HttpURLConnection;
  6 import java.net.URL;
  7 import java.util.ArrayList;
  8 import java.util.Iterator;
  9 import java.util.List;
10
11 public class ParseHtml extends Thread {
12     /*
13      * 解析网址运行
14      *
15      */public void run() {
16         try {
17             // 河北天气
18             String urlAddress = "http://www.weathercn.com/forecast/province.jsp?province=hebei";
19             startParse(urlAddress);
20         } catch (Exception e) {
21             e.printStackTrace();
22             System.out.println("网络错误，提取天气数据出错！");
23         }
24     }
25
26     /*
27      *
28      * 开始解析网址
29      *
30      *
31      */public void startParse(String urlAddress) throws Exception {
32         System.out.println("开始提取网址：" + urlAddress);
33         URL url = new URL(urlAddress);
34         HttpURLConnection httpConnection = (HttpURLConnection) url
35                 .openConnection();
36         httpConnection.setRequestProperty("User-Agent", "Mozilla");
37         httpConnection.setRequestProperty("Connection", "Keep-Alive");
38
39         int responseCode = 0;
40         try {
41             responseCode = httpConnection.getResponseCode();
42         } catch (Exception ex) {
43             System.out.println("读取网页失败，返回代码：" + responseCode);
44         }
45         System.out.println("读取网页反回代码：" + responseCode);
46
47         // 获得输入流
48         InputStreamReader ir = new InputStreamReader(httpConnection
49                 .getInputStream());
50         if (ir != null) {
51             BufferedReader reader = new BufferedReader(ir);
52             System.out.println(reader);
53             if (reader != null)
54                 // 调用从何处取数据
55                 isStartPoint(reader, "99", 1);
56             reader.close();
57             ir.close();
58         }
59
60     }
61
62     private void isStartPoint(BufferedReader reader, String tag, int number)
63             throws Exception {
64         String CurrentLine = "";
65
66         // 从流中读取一行字符串(html源文件)
67         while ((CurrentLine = reader.readLine()) != null) {
68
69             // 循环查询整个 CurrentLine 中的 tag,查到一个就将计数据器 number 减 1
70             int fromIndex = 0;
71             while ((number != 0)
72                     && (CurrentLine.toUpperCase().indexOf(tag.toUpperCase(),
73                             fromIndex) != -1)) {
74                 fromIndex = CurrentLine.toUpperCase().indexOf(
75                         tag.toUpperCase(), fromIndex) + 1;
76                 if (fromIndex > 0)
77                     number--;
78             }
79
80             // 如果到了起始点即 number == 0 时，开始执行取数据操作
81             List sb = new ArrayList();
82             if ((CurrentLine.indexOf("citydetail") > 0)
83                     && (CurrentLine.indexOf("99") > 0)) {
84                 sb.add(this.processBuffer(CurrentLine));
85                 // 截取天气信
86                 CurrentLine = reader.readLine();
87                 CurrentLine = reader.readLine();
88                 if (CurrentLine != null) {
89                     sb.add(this.processBuffer(CurrentLine));
90                 }
91                 // 截取最低气温
92                 CurrentLine = reader.readLine();
93                 if (CurrentLine != null) {
94                     sb.add(this.processBuffer(CurrentLine));
95                 }
96             }
97             StringBuffer s = new StringBuffer();
98             // 将所有的截取信息汇总进行处理，用‘，’间隔便于以后截取相应信息
99             for (Iterator it = sb.iterator(); it.hasNext();) {
100                 String i = it.next().toString();
101                 s.append(i);
102             }
103             String Tq = s.toString();
104             String[] Tqxx = Tq.split(",");
105             if (Tqxx.length >= 3) {
106                 System.out.println(Tq);
107             }
108         }
109     }
110
111     /*
112      * 判断并从网页上截取
113      *
114      * @param old
115      */
116     private String processBuffer(String strLine) throws Exception {
117         // 保存当前取得的城市
118         StringBuffer sb = new StringBuffer();
119         String Tqxx;
120         // 当当前行含有“城市”时，截取相应的城市名称
121         if (strLine.indexOf("citydetail") > 0) {
122             Tqxx = subString(strLine, "sta_id", "<");
123             Tqxx = Tqxx.substring(24);
124             sb = sb.append(Tqxx + ",");
125         }
126         if (strLine.indexOf("alt") > 0) {
127             Tqxx = subString(strLine, "alt", ">");
128             Tqxx = Tqxx.substring(1);
129             sb = sb.append(Tqxx + ",");
130         }
131         if (strLine.indexOf("strong") > 0) {
132             strLine = strLine.replaceAll(" ", "");
133             Tqxx = subString(strLine, "strong>", "<");
134             String Tqxx1 = subString(strLine, "-", "</");
135             Tqxx1 = Tqxx1.substring(8);
136             Tqxx = Tqxx + "~" + Tqxx1;
137             sb = sb.append(Tqxx + ",");
138         }
139         return sb.toString();
140
141     }
142
143     /*
144      * 返回在 strSourc 的 strStart ,strEnd 之间的字符串
145      *
146      */
147     private String subString(String strSource, String strStart, String strEnd) {
148         strSource = strSource.toUpperCase();
149         strStart = strStart.toUpperCase();
150         strEnd = strEnd.toUpperCase();
151         int intStart = strSource.indexOf(strStart);
152         int intEnd = strSource.indexOf(strEnd, intStart);
153         String strRetu = " ";
154         if (intStart == -1)
155             return strRetu;
156         if ((intEnd != -1) && (intEnd > intStart)) {
157             strRetu = strSource.substring(intStart + strStart.length(), intEnd);
158         } else {
159             strRetu = strSource.substring(intStart + strStart.length());
160         }
161         return strRetu.trim();
162     }
163
164     public ParseHtml() {
165
166     }
167
168     public static void main(String args[]) {
169         ParseHtml p = new ParseHtml();
170         p.run();
171     }
172 }
173

posted on 2007-04-30 09:47 reeve 阅读(1563) 评论(1) 编辑收藏

# re: 解析网址提取天气信息[未登录] 2007-04-30 09:59 samuel

呵呵！早就在3年前玩过了。。。你可以查看httpclient开源项目。。回复更多评论

新用户注册刷新评论列表


只有注册用户登录后才能发表评论。




网站导航: 博客园 IT新闻知识库 C++博客博问管理

# re: 解析网址提取天气信息[未登录] 2007-04-30 09:59 samuel

Life is like a boat

解析网址提取天气信息

评论

导航

统计

常用链接

留言簿(2)

随笔档案

搜索

最新评论

阅读排行榜

评论排行榜