Posted on 2008-08-09 16:21
left 阅读(403)
评论(0) 编辑 收藏 所属分类:
python
1 # for 金牌榜
2 import HTMLParser
3
4 class MyHTMLParser(HTMLParser.HTMLParser):
5
6 def reset(self):
7 print "reset"
8 self._xp = ''
9 self._xq = ''
10 self._xs = False
11 self.tds = []
12 HTMLParser.HTMLParser.reset(self)
13
14 def handle_starttag(self, tag, attrs):
15 if tag == 'td':
16 if self._xp == 'tr':
17 self._xs = True
18 else:
19 pass
20
21 self._xp = tag
22
23
24 def handle_endtag(self, tag):
25
26 if tag == 'tr':
27 if self._xq == 'td':
28 self._xs = False
29 print " ".join(self.tds)
30 self.tds = []
31 else:
32 pass
33 self._xq = tag
34
35
36 def handle_data(self, data):
37 if self._xs:
38 self.tds.append(data.strip())
39 else:
40 pass
41
42
43
44 class MedalPage(webapp.RequestHandler):
45 def get(self):
46 r = urlfetch.fetch('http://data2008.sports.tom.com/medaltop-1-1.shtml', method="GET")
47 if r.status_code == 200:
48 content = r.content
49
50 parser = MyHTMLParser()
51 parser.feed(content)
52 parser.close()
21 行写成了 self._xp == tag
33 行写成了 self._xq = tag
好久不弄python了 5555555555555555555555555555555555555555555555555
浪费以下午