Check out the new USENIX Web site.
Using JavaUSENIX

 
import java.applet.Applet;
import java.awt.*;
import java.net.URL;
import java.net.MalformedURLException;
import java.io.*;
import java.util.Stack;
import java.util.Hashtable;
import java.util.EmptyStackException;

public class WebWalker extends Applet 
    implements Runnable {

    TextField url;
    TextArea text;
    Stack stack;
    Hashtable table;
    URL u, u1;
    Thread thread;

    public static void main(String[] args) {
        WebWalker a = new WebWalker();
        Frame f = new Frame("Web Walker");
	f.add("Center", a);
	f.resize(500,300);
	a.init();
	a.start();
	f.show();
    }

    public void init() {
        text = new TextArea();
	setLayout(new BorderLayout());
	add("Center", text);
	Panel top = new Panel();
	top.setLayout(new FlowLayout(FlowLayout.LEFT));
	top.add(new Button("Start"));
	top.add(new Label("Starting URL:"));
	url = new TextField(40);
	top.add(url);
	add("North", top);
	stack = new Stack();
	table = new Hashtable();
    }

    public boolean action(Event e, Object o) {
        if (e.target instanceof Button) {
	    table.clear();
	    text.setText("");
	    checkStart(url.getText());
	    return true;
	}
	else if (e.target == url) {
	    table.clear();
	    text.setText("");
	    checkStart(url.getText());
	    return true;
	}
	return false;
    }

    public void checkStart(String s) {
	String t = s.trim();
        try {
	    u1 = new URL(s);
	} catch (MalformedURLException e) {
	    append("===Not a valid Java URL: " + s);
	    return;
	}
	stack.push(u1);
	thread = new Thread(this);
	thread.start();
    }

    public void run() {
        while(true) {
	    try {
		u = (URL) stack.pop();
	    } catch (EmptyStackException e) {
	        append("DONE: stack empty.");
		return;
	    }
	    if (table.contains(u)) continue; // We have already visited here
	    table.put(u, u);  // Else, add it to the hashed list
            append("Checking: " + u.toString());	
	    try { 
		    InputStream in = u.openStream();
		    readStream(in);
		} catch (FileNotFoundException e) {
		append("===File Not Found: " + u.toString());
		} catch (IOException e) {
	        append("===IOERROR: " + 
		    u.toString() + ": " + e.toString());
            }
        } // End while loop
    }

    public void readStream (InputStream is) {

        String nextline;
        try {
            DataInputStream dis = new DataInputStream(is);
    	while((nextline = dis.readLine()) != null) 
    	    parseLine(nextline);
        } catch (IOException e) {
            append("===IOError: " + u.toString() + 
                ": " + e.toString());
        }
    }

// Parseline only checks strings for HREF and SRC, and only once per line
    void parseLine(String s) {
	String l=s.toLowerCase();
	int start = 0;
	if ((start=l.indexOf("href"))> 0) checkURL(s, start);
	if ((start=l.indexOf("src"))> 0) checkURL(s, start);
    }

    void checkURL(String s, int start) {

	int first, last;
	String name = "";
	URL u1, u2;
	try {
	    first = s.indexOf("\"", start);
	    last = s.indexOf("\"", first+1);
	    name = s.substring(first+1, last);
	    if ((last=name.lastIndexOf('#')) > 0) { // Strip off trailing 
		name=name.substring(0,last);        // named anchors
	    }
	    if (name.indexOf('#') == 0) return; // Bare named anchor 
	    try { 
		if (name.indexOf(":/")>0)  // name contains protocol
		    u2 = new URL(name);
		else
		    u2 = new URL(u, name); // use context of current URL    
	    } catch (MalformedURLException e) {
	        append("---URL not recognized: " + name);
		return;
	    }
	} catch (StringIndexOutOfBoundsException e) {
	    System.err.println("Quotes unbalanced or extends to next line.");
	    return;
        }
	// Now we have a URL; if name ends in /, or contains .htm, push
	// u2 on the stack; if not, fetch it now
	String l = name.toLowerCase();
	// Pass over this URL if the host has changed
	if (!u.getHost().equals(u2.getHost())) {
	    append("+++Not local: " + u2.toString());
	    return;
	}
	if (name.endsWith("/") || (l.indexOf(".htm") > 0)) {
	    stack.push(u2);
            return;
	} else {
	    if (table.contains(u2)) return; // We have already visited here
	    table.put(u2, u2);  // Else, add it to the hashed list
	    try {
		InputStream in = u2.openStream();
		BufferedInputStream bis = new BufferedInputStream(in, 1024);
		int i;
	 	while ((i=bis.read()) > 0) { } // just read the file 
                append("Read: " + u2.toString());
	    } catch (FileNotFoundException e) {
	    append("===File Not Found: " + u2.toString());
	    } catch (IOException e) { 
                append("===IOError: " + u2.toString() + ": " +
                    e.toString());
	    }
	return;
	}
    }

    private void append(String s) {
	text.appendText(s + "\n");
    }
}

 

?Need help? Use our Contacts page.
Last changed: May 22, 1997 jd