页面的js链接点击不了,写了个python的脚本,用wget把网页抓下来再整到bt上面去吧:
#!/usr/bin/env python
# encoding: utf-8
"""
get-algorithm.py
Created by <zhkzyth@gmail.com> on 6 22, 2013
"""
from BeautifulSoup import BeautifulSoup
import re
import codecs
def main():
read_data = []
with codecs.open('data.html','r+', 'utf-8') as f:
read_data += f.read()
soup = BeautifulSoup("".join(read_data))
links = soup.findAll('a',href=re.compile("^ed2k.*"))
results = []
for link in links:
results += link['href']+"\n"
with codecs.open('tmp','w','utf-8') as f:
f.write("".join(results))
if __name__ == '__main__':
main()
回复 更多评论