package com.roadway.test;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TeskSRC {
public String getHtmlCode(String httpUrl) {
String htmlCode = "";
try {
InputStream in;
URL url = new java.net.URL(httpUrl);
HttpURLConnection connection = (HttpURLConnection) url
.openConnection();
connection = (HttpURLConnection) url.openConnection();
connection.setRequestProperty("User-Agent", "Mozilla/4.0");
connection.connect();
in = connection.getInputStream();
byte[] buffer = new byte[512];
int length = -1;
while((length = in.read(buffer,0,512)) != -1){
htmlCode += new String(buffer,0,length);
}
} catch (Exception e) {
}
if(htmlCode == null){
return "";
}
return htmlCode;
}
public static void main(String[] args){
TeskSRC ts = new TeskSRC();
String searchImgReg = "(?x)(src|SRC|background|BACKGROUND)=('|\")(http://([\\w-]+\\.)+[\\w-]+(:[0-9]+)*(/[\\w-]+)*(/[\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")";
String content = ts.getHtmlCode("http://www.163.com");
Pattern pattern = Pattern.compile(searchImgReg);
Matcher matcher = pattern.matcher(content);
while(matcher.find()){
System.out.println(matcher.group(3));
}
//searchImgReg = "(?x)(src|SRC|background|BACKGROUND)=('|\")/?(([\\w-]+/)*([\\w-]+\\.(jpg|JPG|png|PNG|gif|GIF)))('|\")";
}
}