import java.applet.Applet; import java.awt.*; import java.net.URL; import java.net.MalformedURLException; import java.io.*; import java.util.Stack; import java.util.Hashtable; import java.util.EmptyStackException; public class WebWalker extends Applet implements Runnable { TextField url; TextArea text; Stack stack; Hashtable table; URL u, u1; Thread thread; public static void main(String[] args) { WebWalker a = new WebWalker(); Frame f = new Frame("Web Walker"); f.add("Center", a); f.resize(500,300); a.init(); a.start(); f.show(); } public void init() { text = new TextArea(); setLayout(new BorderLayout()); add("Center", text); Panel top = new Panel(); top.setLayout(new FlowLayout(FlowLayout.LEFT)); top.add(new Button("Start")); top.add(new Label("Starting URL:")); url = new TextField(40); top.add(url); add("North", top); stack = new Stack(); table = new Hashtable(); } public boolean action(Event e, Object o) { if (e.target instanceof Button) { table.clear(); text.setText(""); checkStart(url.getText()); return true; } else if (e.target == url) { table.clear(); text.setText(""); checkStart(url.getText()); return true; } return false; } public void checkStart(String s) { String t = s.trim(); try { u1 = new URL(s); } catch (MalformedURLException e) { append("===Not a valid Java URL: " + s); return; } stack.push(u1); thread = new Thread(this); thread.start(); } public void run() { while(true) { try { u = (URL) stack.pop(); } catch (EmptyStackException e) { append("DONE: stack empty."); return; } if (table.contains(u)) continue; // We have already visited here table.put(u, u); // Else, add it to the hashed list append("Checking: " + u.toString()); try { InputStream in = u.openStream(); readStream(in); } catch (FileNotFoundException e) { append("===File Not Found: " + u.toString()); } catch (IOException e) { append("===IOERROR: " + u.toString() + ": " + e.toString()); } } // End while loop } public void readStream (InputStream is) { String nextline; try { DataInputStream dis = new DataInputStream(is); while((nextline = dis.readLine()) != null) parseLine(nextline); } catch (IOException e) { append("===IOError: " + u.toString() + ": " + e.toString()); } } // Parseline only checks strings for HREF and SRC, and only once per line void parseLine(String s) { String l=s.toLowerCase(); int start = 0; if ((start=l.indexOf("href"))> 0) checkURL(s, start); if ((start=l.indexOf("src"))> 0) checkURL(s, start); } void checkURL(String s, int start) { int first, last; String name = ""; URL u1, u2; try { first = s.indexOf("\"", start); last = s.indexOf("\"", first+1); name = s.substring(first+1, last); if ((last=name.lastIndexOf('#')) > 0) { // Strip off trailing name=name.substring(0,last); // named anchors } if (name.indexOf('#') == 0) return; // Bare named anchor try { if (name.indexOf(":/")>0) // name contains protocol u2 = new URL(name); else u2 = new URL(u, name); // use context of current URL } catch (MalformedURLException e) { append("---URL not recognized: " + name); return; } } catch (StringIndexOutOfBoundsException e) { System.err.println("Quotes unbalanced or extends to next line."); return; } // Now we have a URL; if name ends in /, or contains .htm, push // u2 on the stack; if not, fetch it now String l = name.toLowerCase(); // Pass over this URL if the host has changed if (!u.getHost().equals(u2.getHost())) { append("+++Not local: " + u2.toString()); return; } if (name.endsWith("/") || (l.indexOf(".htm") > 0)) { stack.push(u2); return; } else { if (table.contains(u2)) return; // We have already visited here table.put(u2, u2); // Else, add it to the hashed list try { InputStream in = u2.openStream(); BufferedInputStream bis = new BufferedInputStream(in, 1024); int i; while ((i=bis.read()) > 0) { } // just read the file append("Read: " + u2.toString()); } catch (FileNotFoundException e) { append("===File Not Found: " + u2.toString()); } catch (IOException e) { append("===IOError: " + u2.toString() + ": " + e.toString()); } return; } } private void append(String s) { text.appendText(s + "\n"); } }
|
Need help? Use our Contacts page.
Last changed: May 22, 1997 jd |