本文采用 HttpClient 来模拟站点的登录发帖回复,介绍 HttpClient 的用法和常见问题的解决方案。 HttpClient 是 Apache Jakarta Common 下的子项目,可以用来提供支持 HTTP 协议的客户端编程工具包,模拟浏览器的行为。它提供了很多的方法来简化网络的访问,虽然大部分的功能可以使用较底层的 java.net.HttpURLConnection 来实现。例如: - 实现了所有 HTTP 的方法( GET,POST 等)
- 支持 HTTPS 协议
- 支持代理服务器
- 自动维护 Cookies 等
我们知道, http 协议是面向无连接的,要维持会话,现在基本上都是采用基于 Cookies 的方式( Session 机制也是通过 Cookies 实现的),所以 HttpClient 的自动维护 Cookies 的方式对我们的登录发帖回复非常有用(一般网站都需要先登录再发帖回复)。 下面的 例子都是采用 commons-httpclient-3.1.jar 包来实现的(虽然 commons-httpclient-4.0 已经发布,但是代码发生了较大的重构,调用方式也发生了很大的改变)。 下载 jar 包的路径为: http://hc.apache.org/downloads.cgi 由于 httpclient 使用了 Apache Jakarta common 下的子项目 logging 和 codec ,所以也需要在 http://commons.apache.org/ 下载这两个包: commons-logging.jar commons-codec-1.3.jar 为了更好地理解代码,设计的 UML 类图如下: 方法调用的时序图如下: 其中, BrowserContext 类代表浏览器上下文对象,维护 HttpClient 链接和 Cookies 。 KaixinSitePost 是负责实现开心网的具体登录发帖回复逻辑的类。 BrowserContext 的代码如下:
1 /** */ /** 2 * Copyright (C): 2009 3 * @author 陈新汉 4 * Aug 24, 2009 3:09:00 PM 5 */ 6 7 /** */ /** 8 * 浏览器进程上下文 9 */ 10 public class BrowserContext 11 { 12 private HttpClient client; // 注意:每个站点和每个用户,对应一个单独的BrowserContext对象 13 private Cookie[] cookies = new Cookie[ 0 ]; // 维护Cookies 14 private Proxyips proxyip = null ; // 当前的代理IP 15 private Siteusers user = null ; // 当前的登录用户 16 17 public Cookie[] getCookies() { 18 return cookies; 19 } 20 21 public void setCookies(Cookie[] cookies) { 22 this .cookies = cookies; 23 } 24 25 public void addCookie(Cookie c) { 26 if (cookies != null && cookies.length > 0 ) { 27 Cookie[] others = new Cookie[cookies.length + 1 ]; 28 System.arraycopy(cookies, 0 , others, 0 , cookies.length); 29 others[others.length - 1 ] = c; 30 cookies = others; 31 } else { 32 cookies = new Cookie[ 1 ]; 33 cookies[ 0 ] = c; 34 } 35 } 36 37 public Proxyips getProxyip() { 38 return proxyip; 39 } 40 41 public void setProxyip(Proxyips proxyip) { 42 this .proxyip = proxyip; 43 if ( this .proxyip != null ) { 44 client.getHostConfiguration().setProxy(proxyip.getIp(),proxyip.getPort()); 45 client.getParams().setAuthenticationPreemptive( true ); 46 // 如果代理需要密码验证,这里设置用户名密码 47 // client.getState().setProxyCredentials(AuthScope.ANY, new UsernamePasswordCredentials("","")); 48 } 49 } 50 51 public HttpClient getClient() { 52 return client; 53 } 54 55 public Siteusers getUser() { 56 return user; 57 } 58 59 public void setUser(Siteusers user) { 60 this .user = user; 61 } 62 63 private BrowserContext(Site site) { 64 super (); 65 Protocol myhttps = new Protocol( " https " , new MySecureProtocolSocketFactory(), 443 ); 66 Protocol.registerProtocol( " https " , myhttps); 67 client = new HttpClient(); 68 client.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY); 69 HttpConnectionManagerParams managerParams = client.getHttpConnectionManager().getParams(); 70 // 设置连接超时时间(单位毫秒) 71 // managerParams.setConnectionTimeout(50000); 72 // 设置读数据超时时间(单位毫秒) 73 // managerParams.setSoTimeout(120000); 74 initForSiteVisit(site.getSite(),site.getPort(),site.getCharset()); 75 } 76 77 public BrowserContext(Site site,Proxyips proxyip) { 78 this (site); 79 this .setProxyip(proxyip); 80 } 81 82 private void initForSiteVisit(String siteurl, int port,String charset) { 83 client.getHostConfiguration().setHost(siteurl, port, " http " ); 84 // 解决中文乱码问题,和指定网站的页面编码一致 85 client.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, charset); 86 } 87 88 // 查看cookie信息 89 public void printCookies() 90 { 91 System.out.println( " ---------------Cookie---------------- " ); 92 if (cookies != null ) { 93 for (Cookie c:cookies) { 94 System.out.println(c.getName() + " : " + c.getValue()); 95 } 96 } else { 97 System.out.println( " 没有设置Cookies " ); 98 } 99 System.out.println( " ---------------Cookie---------------- " ); 100 } 101 102 public void setCommonMethodRequestHeaders(HttpMethodBase method) 103 { 104 method.setRequestHeader( " Accept " , " */* " ); 105 // method.setRequestHeader("Accept-Language", "zh-cn"); 106 // method.setRequestHeader("Accept-Encoding", "gzip,deflate"); 107 method.setRequestHeader( " User-Agent " , " Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;) " ); 108 // 设置非常重要 109 method.setRequestHeader( " Connection " , " Keep-Alive " ); 110 } 111 112 public String redirectToURL(String url) throws IOException 113 { 114 if (url != null ) { 115 try { 116 System.out.println( " 页面重定向到: " + url); 117 String responseString = this .doCommonVisitWithURL(url); 118 // System.out.println(responseString); 119 return responseString; 120 } catch (IOException e) { 121 System.out.println( " 重定向: " + url + " 出错 " ); 122 } 123 } else { 124 System.out.println( " redirect url is null " ); 125 } 126 return null ; 127 } 128 129 public String doCommonVisitWithURL(String url) throws IOException { 130 GetMethod get = new GetMethod(url); 131 return this .doGet(get); 132 } 133 134 public String doPost(ExpectContinueMethod post) throws IOException 135 { 136 if (post == null ) 137 return null ; 138 try 139 { 140 if (getCookies() != null ) { 141 // printCookies(); 142 client.getState().addCookies(cookies); 143 post.addRequestHeader( " Cookie " ,getCookies().toString()); 144 // System.out.println(post.getRequestHeader("Cookie").getValue()); 145 } 146 setCommonMethodRequestHeaders(post); 147 int statusCode = client.executeMethod(post); 148 cookies = client.getState().getCookies(); 149 System.out.println(statusCode); 150 // System.out.println(post.getResponseHeader("Location")); 151 String responseString = post.getResponseBodyAsString(); 152 System.out.println(responseString); 153 printCookies(); 154 post.releaseConnection(); 155 if (statusCode == 301 || statusCode == 302 ) { 156 redirectToURL(post.getResponseHeader( " Location " ).getValue()); 157 } 158 return responseString; 159 } 160 finally { 161 if (post != null ) 162 post.releaseConnection(); 163 } 164 } 165 166 public String doGet(GetMethod get) throws IOException 167 { 168 if (get == null ) 169 return null ; 170 if (cookies != null ) { 171 // printCookies(); 172 client.getState().addCookies(cookies); 173 get.addRequestHeader( " Cookie " ,cookies.toString()); 174 } 175 try { 176 setCommonMethodRequestHeaders(get); 177 int statusCode = client.executeMethod(get); 178 cookies = client.getState().getCookies(); // 重新保存Cookies 179 printCookies(); 180 System.out.println(statusCode); 181 if (statusCode == 301 || statusCode == 302 ) { 182 redirectToURL(get.getResponseHeader( " Location " ).getValue()); 183 } 184 String responseString = get.getResponseBodyAsString(); 185 // System.out.println(responseString); 186 return responseString; 187 } 188 finally { 189 if (get != null ) 190 get.releaseConnection(); 191 } 192 } 193 194 public String getRedirectURL(String content) 195 { 196 if (content != null && content.indexOf( " window.location=\ "" )!=-1){ 197 int begin = content.indexOf( " window.location=\ "" ); 198 int end = content.indexOf( " \ "" , begin+17); 199 return content.substring(begin + 17 ,end); 200 } 201 return null ; 202 } 203 } KaixinSitePost类的代码: 1/** *//** 2 * Copyright (C): 2009 3 * @author 陈新汉 4 * Aug 14, 2009 11:16:32 AM 5 */ 6 7/** *//** 8 * 模拟测试网站(不需要验证码) 9 * 开心网(www.kaixin.com) 10 */ 11public class KaixinSitePost implements ISitePost 12{ 13 private static final String LOGON_SITE = "www.kaixin.com"; 14 private static final int LOGON_PORT = 80; 15 private static final String CHARSET="UTF-8"; 16 private BrowserContext context=null; 17 18 19 //单个用户登录 20 public String login(Siteusers userinfo, Proxyips ip) 21 { 22 if(userinfo!=null) 23 { 24 SiteLogin login=new SiteLogin(context,"http://login.kaixin.com/Login.do"); 25 if(ip!=null) 26 login.getContext().setProxyip(ip); 27 Map<String,String> params=new HashMap<String,String>(); 28 params.put("ss", "10106"); 29 params.put("loginregFrom", "index"); 30 params.put("origURL", "http://www.kaixin.com/SysHome.do"); 31 params.put("email", userinfo.getUsername()); 32 params.put("password", userinfo.getUserpwd()); 33 login.addRequestParameters(params); 34 return login.login(userinfo); 35 } 36 return null; 37 } 38 39 public List<Siteboards> parseBoard(Siteboards data) { 40 41 return null; 42 } 43 44 public String post(Postinfos postinfo,List<Siteboards> siteboards) 45 { 46 if(postinfo!=null && siteboards!=null){ 47 SitePost sport=new SitePost(context); 48 context.getClient().getHostConfiguration().setHost("blog.kaixin.com"); 49 Map<String,String> params=new HashMap<String,String>(); 50 params.put("categoryId", "0"); 51 params.put("blogControl", "1"); 52 params.put("title", postinfo.getTitle()); 53 params.put("body",postinfo.getContent()); 54 sport.addRequestParameters(params); 55 56 for(Siteboards sb:siteboards){ 57 sb.setPostUrl("http://blog.kaixin.com/NewEntry.do"); 58 try{ 59 sport.post(postinfo, sb); 60 }catch(IOException e){ 61 e.printStackTrace(); 62 } 63 } 64 } 65 return null; 66 } 67 68 69 public String reply(Postinfos postinfo,List<Articleinfos> arts) 70 { 71 72 return null; 73 } 74 75 /** *//** 76 * @param args 77 */ 78 public static void main(String[] args) 79 { 80 try 81 { 82 Siteusers userinfo=new Siteusers(); 83 userinfo.setUsername("xxxx"); 84 userinfo.setUserpwd("xxxx"); 85 Proxyips ips = new Proxyips(); 86 ips.setIp("218.56.64.210"); 87 ips.setPort(8080); 88 KaixinSitePost sp=new KaixinSitePost(); 89 sp.login(userinfo,ips); 90 Postinfos post=new Postinfos(); 91 post.setContent("<p>lllllllllllllllllllllll</p>"); 92 post.setTitle("中文测试"); 93 List<Siteboards> siteboards=new ArrayList<Siteboards>(); 94 siteboards.add(new Siteboards()); 95 siteboards.add(new Siteboards()); 96 sp.post(post,siteboards); 97 } 98 catch(Exception e){ 99 e.printStackTrace(); 100 } 101 } 102} 封装登录的类SiteLogin: 1/** *//** 2 * Copyright (C): 2009 3 * @author 陈新汉 4 * Aug 24, 2009 3:03:00 PM 5 */ 6 7/** *//** 8 * 站点登录 9 */ 10public class SiteLogin extends AbstractMethodAdapter 11{ 12 private HttpMethodBase method; 13 private boolean ispost=true; 14 protected BrowserContext context; //当前的浏览器进程上下文 15 16 public BrowserContext getContext() { 17 return context; 18 } 19 20 /** *//** 21 * 构造函数 22 * @param context 23 * @param url 24 * @param ispost 设置是否POST方式提交,默认为POST 25 */ 26 public SiteLogin(BrowserContext context,String url,boolean ispost) { 27 super(); 28 this.context = context; 29 this.ispost=ispost; 30 method = this.ispost?new PostMethod(url):new GetMethod(url); 31 } 32 33 public SiteLogin(BrowserContext context,String url) { 34 this(context,url,true); 35 } 36 37 public String login(Siteusers user) 38 { 39 int statusCode=0; 40 if(this.ispost && this.hasRequestParameters()){ 41 ((PostMethod)method).setRequestBody(this.getRequestParams()); 42 } 43 44 if(this.hasExtraRequestHeaders()){ 45 this.addExtraRequestHeaders(method,this.getExtraRequestHeaders()); 46 } 47 context.setCommonMethodRequestHeaders(method); 48 try 49 { 50 if(context.getCookies()!=null){ 51 //printCookies(); 52 context.getClient().getState().addCookies(context.getCookies()); 53 method.addRequestHeader("Cookie", context.getCookies().toString()); 54 } 55 statusCode = context.getClient().executeMethod(method); 56 context.setCookies(context.getClient().getState().getCookies()); 57 String responseString = method.getResponseBodyAsString(); 58 //System.out.println(responseString); 59 method.releaseConnection(); 60 if(statusCode==HttpStatus.SC_OK){ 61 System.out.println("登录成功"); 62 return responseString; 63 } 64 else if(statusCode==302 ||statusCode==301){ 65 System.out.println("登录成功,页面重定向"); 66 String url=method.getResponseHeader("Location").getValue(); 67 return context.redirectToURL(url); 68 } 69 else{ 70 System.out.println("登录失败,状态码:"+statusCode); 71 } 72 }catch(Exception e){ 73 e.printStackTrace(); 74 }finally{ 75 if(method!=null) 76 method.releaseConnection(); 77 } 78 return null; 79 } 80} 81 封装站点发帖的类SitePost: 1/** *//** 2 * Copyright (C): 2009 3 * @author 陈新汉 4 * Aug 24, 2009 5:05:55 PM 5 */ 6 7/** *//** 8 * 站点发帖新帖 9 */ 10public class SitePost extends CommonSitePost 11{ 12 13 public SitePost(BrowserContext context) { 14 super(); 15 this.context=context; 16 } 17 18 public String post(Postinfos postinfo,Siteboards siteboard) throws IOException 19 { 20 if (postinfo != null && siteboard != null) { 21 if (StringUtils.isNotEmpty(siteboard.getPostUrl())) { 22 PostMethod post = new PostMethod(siteboard.getPostUrl()); 23 if(this.hasRequestParameters()){ 24 post.setRequestBody(this.getRequestParams()); 25 } 26 if(this.hasExtraRequestHeaders()){ 27 this.addExtraRequestHeaders(post,this.getExtraRequestHeaders()); 28 } 29 context.setCommonMethodRequestHeaders(post); 30 this.context.doPost(post); 31 }else{ 32 System.out.println("版面的新帖提交地址不能为空!"); 33 } 34 }else{ 35 System.out.println("帖子或者版面信息输入都不能为空"); 36 } 37 return null; 38 } 39} 40 友情提醒:本博文章欢迎转载,但请注明出处: 陈新汉
|