/**
这个小程序用来扫描一个网页上的电子邮件地址,用正则表达式来做.
用法如下:
1. java MailMatch //测试 http://www.bnu.edu.cn 上的邮件.默认扫描师大主页.
2. java MailMatch http://www.pku.edu.cn // 扫描后面参数上的电子邮件.注意http://www 一定要写全.
@version 0.1
@author WangBNU helloquota@163.com WangBNU@gmail.com
http://bnulinux.yeah.net
*/
import java.io.*;
public class MailMatch {
// file 暂时还没有用,
public void mailMatch(URL url ,File file ) {
try {
InputStreamReader in = new InputStreamReader(url.openStream());
StringBuffer input = new StringBuffer();
int ch;
while ((ch =
in.read()) != -1) input.append( (char)ch);
// 电子邮件的java正则表达式,还有改进的余地,不过大大部分email地址可以扫出来了.
String patternString = "[\\w-]+@[\\w.]+[a-z]{2,4}";
Pattern pattern = Pattern.compile(patternString);
Matcher matcher = pattern.matcher(input);
while (matcher.find() ) {
int start = matcher.start();
int end = matcher.end();
String match = input.substring(start, end);
System.out.println(match);
}
} catch (IOException exception) {
exception.printStackTrace();
} catch (PatternSyntaxException exception) {
exception.printStackTrace();
}
}
public static void main(String[] args) {
String urlName ;
if ( args.length >0) {
urlName = args[0];
}else {
urlName = "http://www.bnu.edu.cn";
}
try {
URL
url =
new URL( urlName);
File
file = new File ( "test.txt");
MailMatch test = new MailMatch( );
test.mailMatch( url , file );
} catch (MalformedURLException e ){
System.err.println(e);
}
}
}