package Tools;

import java.util.Iterator;
import java.util.ConcurrentModificationException;
import java.util.HashSet;

/**
 * Used as a (not drop-in) replacement for HashSet (or Hashtable with
 * meaningless values attached to keys); saves oodles of memory.
 * HashSet uses about 40 bytes per key -- 16 for the Entry structure
 * {hash, key, value, next}, 4 for its Class pointer (guessing),
 * probably 8 for its mallocing (length, next fields?)... the 40 value
 * was empirically measured, and includes the unused bucket head slots
 * in the table (25-62%).<p>
 * 
 * Anyway, so we beat that by trading time for space. Each occupied
 * entry takes only 4 bytes (Object pointer). However, we might spend
 * more time hopping down the array (HashSet only has to hop down the
 * linked list corresponding to one bucket, which probabilistically
 * will stay low due to the count/threshold mechanism), and we don't
 * save the hashCode() of the keys, so we might spend more time
 * recomputing those on rehashes. This SmallHashset uses (empirically)
 * about 8 bytes/entry, accounting for the 25-62% empty table slots.<p>
 * 
 * Increasing loadFactor (bad idea) or decreasing the factor of 2 used
 * when rehashing will reduce the space requirements further. However,
 * a higher loadFactor increases (rapidly) the average time spent
 * walking the table, and a lower growth rate increases the time spent
 * rehashing.<p>
 * 
 * Since my application (indexing) may often involve growing a
 * Hashtable and then using it read-only, it may pay to add a method to
 * rehash one final time to make a fairly tight fit.
 */

public class SmallHashset {

	protected Object table[];
	protected int count;
	protected int threshold;		// when to rehash
	protected float loadFactor;		// how threshold is determined
	protected int serialNum;		// so we can explode dead iterators.


	static Object spacer = new Object();
		// sentinel object to represent an empty space

	public SmallHashset() {
		init(101);
	}

	public SmallHashset(int capacity) {
		init(capacity);
	}

	private void init(int capacity) {
		table = new Object[capacity];
		count = 0;
		loadFactor = (float) 0.75;
		threshold = (int) (table.length*loadFactor);
		serialNum = 0;
	}

	public boolean containsKey(Object key) {
		return containsKey(key, indexFor(key));
	}

	protected boolean containsKey(Object key, int index) {
		for (int i=index; i<table.length; i++) {
			if (table[i] == null) return false;
			if (table[i].equals(key)) return true;
		}
		for (int i=0; i<index; i++) {
			if (table[i] == null) return false;
			if (table[i].equals(key)) return true;
		}
		return false;	// but wow, table is full! That shouldn't happen.
	}

	public int indexFor(Object key) {
		return (key.hashCode() & 0x7fffffff) % table.length;
	}

	public void put(Object key) {
		int index = indexFor(key);


		if (containsKey(key, index)) {
			return;	// don't put in a duplicate key
		}


		internalPut(key, index);

		count++;
		serialNum++;
		if (count>=threshold) {
			rehash();
		}
	}

	protected void internalPut(Object key, int index) {
		for (int i=index; i<table.length; i++) {
			if (table[i] == null || table[i] == spacer) {
				table[i] = key;
				return;
			}
		}
		for (int i=0; i<index; i++) {
			if (table[i] == null || table[i] == spacer) {
				table[i] = key;
				return;
			}
		}
		// This routine "can't" be called when table is full, so this should
		// never happen.
		Tools.Assert.assert(false);
	}

	private void rehash() {
		int oldSize = table.length;
		Object oldTable[] = table;

		int newSize = table.length*2+1;	// should look for a prime?
		Object newTable[] = new Object[newSize];

		threshold = (int) (newSize*loadFactor);
		serialNum++;	// alert iterators before installing new table
		table = newTable;

		for (int i = 0; i<oldSize; i++) {
			if (oldTable[i]!=null && oldTable[i]!=spacer) {
				internalPut(oldTable[i], indexFor(oldTable[i]));
					// should not call us back, since capacity is now enough.
					// this calls each object's hashCode() again,
					// which could be slow. But that saves the memory
					// we'd spend caching it.
			}
		}
	}

	public Iterator getKeyIterator() {
		return new FHIterator();
	}


	public int size() {
		return count;
	}

	class FHIterator
		implements Iterator {

		int index;
		int version;	// hold this privately to compare to actual serialNum
						// to see if we've become out of date.

		FHIterator() {
			this.version = serialNum;
			this.index = 0;
		}
	
		public boolean hasNext() {
			// ensure there is another element after index
			return nextOccupiedIndex()!=-1;
		}

		public Object next() {
			int nextIndex = nextOccupiedIndex();
			index = nextIndex+1;
			Object o = table[nextIndex];
			checkVersion();
			return o;
		}

		public void remove()
			throws UnsupportedOperationException {
			throw new UnsupportedOperationException();
		}

		private void checkVersion() {
			if (version!=serialNum) {
				throw new ConcurrentModificationException();
			}
		}

		private int nextOccupiedIndex() {
			for (int i=index; i<table.length; i++) {
				if (table[i]!=null && table[i]!=spacer) {
					return i;
				}
			}
			return -1;
		}
	}
}
