onsdag den 13. februar 2008

Iterable-mutable-hashtable

For a long time I have been trying to find a data structure for a set of problems that we kept running into at work. What we wanted was a container that was both fast for lookups (hashtable) good for iteration (linked list) - and with the special feature that you could add and remove from the list while iterating it.

The two first are true for most implementations of the java interface Map. But the last requirement is not met in these standard implementations - as they all throw an ConcurrentModificationException. The reason we wanted the ability to concurrently modify the container was that a lot of iterating over children of objects resulted in the children self-destructing, hence removing them-selves from the container we were currently iterating. Now, one could argue that we could just use the iterators "remove()" method to do this, but since it was not for the parent to decide child would have to somehow know about the current iterator... I guess you see the code cluttering up down that train of thoughts.

So, after many ideas that seemed to be working on the paper, but have some flaw once implemented I came up with the LinkedList-Hashtable-mix that this entry is all about.

Basic idea
The idea is really simple: create a class where insertions where made both to a linked list (for iterations) and a hashtable (for lookups). This did not work in practice since the LinkedList did not like concurrent modifications either - but the idea worked, so I just had to create my own implementation of a linked list.


Implementation
Actually implementing the thing took a few hours once the idea was in place. Mainly because the container had to handle serialization, and serialization of a linked list cannot be done in the standard serialization way (due to stack overflow).

Code



import java.io.IOException;
import java.io.Serializable;
import java.util.Collection;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Set;
import java.util.Vector;
import java.util.Map.Entry;


/**
* The following is much like the {@link LinkedHashMap} except it allows concurrent
* modifications.
*
* The order is not guarantied after serialization.
*
* @author Emanuel Greisen
*/
public class ThreadSafeHashtable<K,T> implements Iterable<T>,Serializable
{
private static final long serialVersionUID = 2131757156391825454L;
protected Hashtable<K,ThreadSafeHashitem<T>> lookup_table =
new Hashtable<K,ThreadSafeHashitem<T>>();
transient ThreadSafeHashitem<T> first_item;
transient ThreadSafeHashitem<T> last_item;


private void writeObject(java.io.ObjectOutputStream out)
throws IOException
{
out.defaultWriteObject();
}

private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException
{
in.defaultReadObject();

// OPTIMIZE: postpone this untill we need it. Loads of unserializations
// on the server does not use all tables, hence
// the relinking everying for iteration is not nessesary.
if(lookup_table != null)
{
for(Entry<K,ThreadSafeHashitem<T>> item_entry : lookup_table.entrySet())
{
if(first_item == null)
{
first_item = last_item = item_entry.getValue();
}
else
{
last_item.next = item_entry.getValue();
item_entry.getValue().prev = last_item;
last_item = item_entry.getValue();
}
}
}
}

public Iterator<T> iterator()
{
return new ThreadSafeHashtableIterator<K,T>(first_item);
}

/**
* Put an object in the hashmap, note that if any iterators are open the addition
* will not be visible untill these iterators have closed
* or the object has been serialized/deserialized.
* @param key
* @param value
* @return
*/
public T putObject(final K key, final T value)
{
if(key == null)
throw new IllegalArgumentException("'key' may not be null.");

ThreadSafeHashitem<T> old = lookup_table.get(key);
if(old != null)
{
T old_val = old.value;
old.value = value;
return old_val;
}

ThreadSafeHashitem<T> new_item = new ThreadSafeHashitem<T>(value);
if(last_item != null)
{
last_item.next = new_item;
new_item.prev = last_item;
last_item = new_item;
}
else
{
first_item = new_item;
last_item = new_item;
}
lookup_table.put(key, new_item);
return null;
}

/**
* Remove all objects in the hashtable.
*/
public void clear()
{
// Nuke all "next"-refs
ThreadSafeHashitem<T> item = first_item;
while(item != null)
{
ThreadSafeHashitem<T> next_item = item.next;
item.next = null;
item.prev = null;
item = next_item;
}
lookup_table.clear();
}

/**
* Return true if the hashtable contains a certain key.
* @param key
* @return
*/
public boolean containsKey(K key)
{
return lookup_table.containsKey(key);
}

/**
* Get the object that is assosiated with the given key.
* @param key
* @return
*/
public T getObject(K key)
{
ThreadSafeHashitem<T> item = lookup_table.get(key);
if(item != null)
{
return item.value;
}
return null;
}

/**
* The number of objects in the hashtable.
* @return
*/
public int size()
{
return lookup_table.size();
}

public Collection<T> values()
{
Vector<T> vals = new Vector<T>();
for(Entry<K, ThreadSafeHashitem<T>> entry : lookup_table.entrySet())
{
vals.add(entry.getValue().value);
}
return vals;
}

public Enumeration<K> keys()
{
return lookup_table.keys();
}

public T removeObject(K key)
{
if(key == null)
throw new IllegalArgumentException("'key' may not be null.");

ThreadSafeHashitem<T> item = lookup_table.remove(key);
if(item != null)
{
// Link the previous to the next
if(item.prev != null)
{
item.prev.next = item.next;
}
else
{
// we were the first (update first_item)
first_item = item.next;
}

// Link the next to the prev
if(item.next != null)
{
item.next.prev = item.prev;
}
else
{
// We were the last, (update last_item)
last_item = item.prev;
}

item.removed = true;
return item.value;
}
return null;
}

/**
* Note that this method is NOT safe to modify in.
* TODO: make a copy.
* @return
*/
public Set<K> keySet()
{
return lookup_table.keySet();
}

public void addAll(ThreadSafeHashtable<K, T> table)
{
for(K k : table.keySet())
{
putObject(k, table.getObject(k));
}
}
}

class ThreadSafeHashitem<T> implements Serializable
{
private static final long serialVersionUID = 1906364262566366088L;
T value;
transient ThreadSafeHashitem<T> prev;
transient ThreadSafeHashitem<T> next;
transient boolean removed;
public ThreadSafeHashitem(T value)
{
this.value = value;
}
}

class ThreadSafeHashtableIterator<K,T> implements Iterator<T>
{
ThreadSafeHashitem<T> item;

public ThreadSafeHashtableIterator(ThreadSafeHashitem<T> item)
{
this.item = item;
}

public boolean hasNext()
{
// Skip all the removed items.
while(item != null && item.removed)
{
item = item.next;
}
return item != null;
}

public T next()
{
T val = item.value;
item = item.next;
return val;
}

public void remove()
{
//NOT SUPPORTED: iterator.remove();
}
}

1 kommentar:

Oak sagde ...

Arghh, I forgot to mention that the class-name indicates thread-safety. This is NOT the case for this implementation.