import java.applet.Applet;
import java.awt.*;
import java.net.URL;
import java.net.MalformedURLException;
import java.io.*;
import java.util.Stack;
import java.util.Hashtable;
import java.util.EmptyStackException;
public class WebWalker extends Applet
implements Runnable {
TextField url;
TextArea text;
Stack stack;
Hashtable table;
URL u, u1;
Thread thread;
public static void main(String[] args) {
WebWalker a = new WebWalker();
Frame f = new Frame("Web Walker");
f.add("Center", a);
f.resize(500,300);
a.init();
a.start();
f.show();
}
public void init() {
text = new TextArea();
setLayout(new BorderLayout());
add("Center", text);
Panel top = new Panel();
top.setLayout(new FlowLayout(FlowLayout.LEFT));
top.add(new Button("Start"));
top.add(new Label("Starting URL:"));
url = new TextField(40);
top.add(url);
add("North", top);
stack = new Stack();
table = new Hashtable();
}
public boolean action(Event e, Object o) {
if (e.target instanceof Button) {
table.clear();
text.setText("");
checkStart(url.getText());
return true;
}
else if (e.target == url) {
table.clear();
text.setText("");
checkStart(url.getText());
return true;
}
return false;
}
public void checkStart(String s) {
String t = s.trim();
try {
u1 = new URL(s);
} catch (MalformedURLException e) {
append("===Not a valid Java URL: " + s);
return;
}
stack.push(u1);
thread = new Thread(this);
thread.start();
}
public void run() {
while(true) {
try {
u = (URL) stack.pop();
} catch (EmptyStackException e) {
append("DONE: stack empty.");
return;
}
if (table.contains(u)) continue; // We have already visited here
table.put(u, u); // Else, add it to the hashed list
append("Checking: " + u.toString());
try {
InputStream in = u.openStream();
readStream(in);
} catch (FileNotFoundException e) {
append("===File Not Found: " + u.toString());
} catch (IOException e) {
append("===IOERROR: " +
u.toString() + ": " + e.toString());
}
} // End while loop
}
public void readStream (InputStream is) {
String nextline;
try {
DataInputStream dis = new DataInputStream(is);
while((nextline = dis.readLine()) != null)
parseLine(nextline);
} catch (IOException e) {
append("===IOError: " + u.toString() +
": " + e.toString());
}
}
// Parseline only checks strings for HREF and SRC, and only once per line
void parseLine(String s) {
String l=s.toLowerCase();
int start = 0;
if ((start=l.indexOf("href"))> 0) checkURL(s, start);
if ((start=l.indexOf("src"))> 0) checkURL(s, start);
}
void checkURL(String s, int start) {
int first, last;
String name = "";
URL u1, u2;
try {
first = s.indexOf("\"", start);
last = s.indexOf("\"", first+1);
name = s.substring(first+1, last);
if ((last=name.lastIndexOf('#')) > 0) { // Strip off trailing
name=name.substring(0,last); // named anchors
}
if (name.indexOf('#') == 0) return; // Bare named anchor
try {
if (name.indexOf(":/")>0) // name contains protocol
u2 = new URL(name);
else
u2 = new URL(u, name); // use context of current URL
} catch (MalformedURLException e) {
append("---URL not recognized: " + name);
return;
}
} catch (StringIndexOutOfBoundsException e) {
System.err.println("Quotes unbalanced or extends to next line.");
return;
}
// Now we have a URL; if name ends in /, or contains .htm, push
// u2 on the stack; if not, fetch it now
String l = name.toLowerCase();
// Pass over this URL if the host has changed
if (!u.getHost().equals(u2.getHost())) {
append("+++Not local: " + u2.toString());
return;
}
if (name.endsWith("/") || (l.indexOf(".htm") > 0)) {
stack.push(u2);
return;
} else {
if (table.contains(u2)) return; // We have already visited here
table.put(u2, u2); // Else, add it to the hashed list
try {
InputStream in = u2.openStream();
BufferedInputStream bis = new BufferedInputStream(in, 1024);
int i;
while ((i=bis.read()) > 0) { } // just read the file
append("Read: " + u2.toString());
} catch (FileNotFoundException e) {
append("===File Not Found: " + u2.toString());
} catch (IOException e) {
append("===IOError: " + u2.toString() + ": " +
e.toString());
}
return;
}
}
private void append(String s) {
text.appendText(s + "\n");
}
}
|
|
Last changed: May 22, 1997 jd |