private interface Filter { void doFilter(Task fatherTask, Task newTask, String path, Filter chain); } private class FilterChain implements Filter { private List<Filter> list = new ArrayList<Filter>(); { addFilter(new TwoLevel()); addFilter(new OneLevel()); addFilter(new FullPath()); addFilter(new Root()); addFilter(new Default()); } private void addFilter(Filter filter) { list.add(filter); } private Iterator<Filter> it = list.iterator(); @Override public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) { if (it.hasNext()) { it.next().doFilter(fatherTask, newTask, path, chain); } } } private class TwoLevel implements Filter { @Override public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) { if (path.startsWith("../../")) { String prefix = getPrefix(fatherTask.getCurrentPath(), 3); newTask.init(fatherTask.getHost(), fatherTask.getPort(), path.replace("../../", prefix)); } else { chain.doFilter(fatherTask, newTask, path, chain); } } } private class OneLevel implements Filter { @Override public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) { if (path.startsWith("../")) { String prefix = getPrefix(fatherTask.getCurrentPath(), 2); newTask.init(fatherTask.getHost(), fatherTask.getPort(), path.replace("../", prefix)); } else { chain.doFilter(fatherTask, newTask, path, chain); } } } private class FullPath implements Filter { @Override public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) { if (path.startsWith("http://")) { Iterator<String> it = domainlist.iterator(); boolean flag = false; while (it.hasNext()) { String domain = it.next(); if (path.startsWith("http://" + domain + "/")) { newTask.init(domain, fatherTask.getPort(), path.replace("http://" + domain + "/", "/")); flag = true; break; } } if (!flag) { newTask = null; } } else { chain.doFilter(fatherTask, newTask, path, chain); } } } private class Root implements Filter { @Override public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) { if (path.startsWith("/")) { newTask.init(fatherTask.getHost(), fatherTask.getPort(), path); } else { chain.doFilter(fatherTask, newTask, path, chain); } } } private class Default implements Filter { @Override public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) { String prefix = getPrefix(fatherTask.getCurrentPath(), 1); newTask.init(fatherTask.getHost(), fatherTask.getPort(), prefix + "/" + path); } } public ParseHandler(BlockingQueue<Task> connectlist, BlockingQueue<Task> parselist, BlockingQueue<Task> persistencelist, List<String> domainlist) { this.connectlist = connectlist; this.parselist = parselist; this.persistencelist = persistencelist; this.domainlist = domainlist; } private Pattern pattern = Pattern.compile("\"[^\"]+\\.htm[^\"]*\""); private void handler() { try { Task task = parselist.take(); parseTaskState(task); if (200 == task.getState()) { Matcher matcher = pattern.matcher(task.getContent()); while (matcher.find()) { String path = matcher.group(); if (!path.contains(" ") && !path.contains("\t") && !path.contains("(") && !path.contains(")") && !path.contains(":")) { path = path.substring(1, path.length() - 1); if (!SET.contains(path)) { SET.add(path); createNewTask(task, path); } } } } task.setContent(null); persistencelist.put(task); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } private void parseTaskState(Task task) { if (task.getContent().startsWith("HTTP/1.1")) { task.setState(Integer.parseInt(task.getContent().substring(9, 12))); } else { task.setState(Integer.parseInt(task.getContent().substring(19, 22))); } } /** * @param fatherTask * @param path * @throws Exception */ private void createNewTask(Task fatherTask, String path) throws Exception { Task newTask = new Task(); FilterChain filterchain = new FilterChain(); filterchain.doFilter(fatherTask, newTask, path, filterchain); if (newTask != null) { connectlist.put(newTask); } } private String getPrefix(String s, int count) { String prefix = s; while (count > 0) { prefix = prefix.substring(0, prefix.lastIndexOf("/")); count--; } return "".equals(prefix) ? "/" : prefix; } @Override public void run() { while (true) { this.handler(); COUNT.addAndGet(1); } } } class ConnectHandler implements Runnable { public static int GETCOUNT() { return COUNT.get(); } private static final AtomicInteger COUNT = new AtomicInteger(); private BlockingQueue<Task> connectlist; private BlockingQueue<Task> parselist; public ConnectHandler(BlockingQueue<Task> connectlist, BlockingQueue<Task> parselist) { this.connectlist = connectlist; this.parselist = parselist; } private void handler() { try { Task task = connectlist.take(); long start = System.currentTimeMillis(); getHtml(task); long end = System.currentTimeMillis(); task.setTaskTime(end - start); parselist.put(task); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } private void getHtml(Task task) throws Exception { StringBuilder sb = new StringBuilder(2048); InetAddress addr = InetAddress.getByName(task.getHost()); // 建立一个Socket Socket socket = new Socket(addr, task.getPort()); // 发送命令,无非就是在Socket发送流的基础上加多一些握手信息,详情请了解HTTP协议 BufferedWriter wr = new BufferedWriter(new OutputStreamWriter(socket.getOutputStream(), "UTF-8")); wr.write("GET " + task.getCurrentPath() + " HTTP/1.0\r\n"); wr.write("HOST:" + task.getHost() + "\r\n"); wr.write("Accept:*/*\r\n"); wr.write("\r\n"); wr.flush(); // 接收Socket返回的结果,并打印出来 BufferedReader rd = new BufferedReader(new InputStreamReader(socket.getInputStream())); String line; while ((line = rd.readLine()) != null) { sb.append(line); } wr.close(); rd.close(); task.setContent(sb.toString()); socket.close(); } @Override public void run() { while (true) { this.handler(); COUNT.addAndGet(1); } } } class PersistenceHandler implements Runnable { static { try { Class.forName("oracle.jdbc.OracleDriver"); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static int GETCOUNT() { return COUNT.get(); } private static final AtomicInteger COUNT = new AtomicInteger(); private BlockingQueue<Task> persistencelist; public PersistenceHandler(BlockingQueue<Task> persistencelist) { this.persistencelist = persistencelist; try { conn = DriverManager.getConnection("jdbc:oracle:thin:127.0.0.1:1521:orcl", "edmond", "edmond"); ps = conn .prepareStatement("insert into probe(id,host,path,state,tasktime,type) values(seq_probe_id.nextval,?,?,?,?,?)"); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } private Connection conn; private PreparedStatement ps; @Override public void run() { while (true) { this.handler(); COUNT.addAndGet(1); } } private void handler() { try { Task task = persistencelist.take(); ps.setString(1, task.getHost()); ps.setString(2, task.getCurrentPath()); ps.setInt(3, task.getState()); ps.setLong(4, task.getTaskTime()); ps.setString(5, task.getType()); ps.executeUpdate(); conn.commit(); } catch (InterruptedException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } } } |