Tools/perfecthash/perfect_hash.py - platform/external/python/cpython3 - Gitiles

 #!/usr/bin/env/python

 # perfect_hash.py
 #
 # Outputs C code for a minimal perfect hash.
 # The hash is produced using the algorithm described in
 # "Optimal algorithms for minimal perfect hashing",
 # G. Havas, B.S. Majewski.  Available as a technical report
 # from the CS department, University of Queensland
 # (ftp://ftp.cs.uq.oz.au/).
 #
 # This is a modified version of Andrew Kuchling's code
 # (http://starship.python.net/crew/amk/python/code/perfect-hash.html)
 # and generates C fragments suitable for compilation as a Python
 # extension module.
 #

 # Difference between this algorithm and gperf:
 # Gperf will complete in finite time with a successful function,
 # or by giving up.
 # This algorithm may never complete, although it is extremely likely
 # when c >= 2.

 # The algorithm works like this:
 #   0) You have K keys, that you want to perfectly hash to a bunch
 #      of hash values.
 #
 #   1) Choose a number N larger than K.  This is the number of
 #      vertices in a graph G, and also the size of the resulting table.
 #
 #   2) Pick two random hash functions f1, f2, that output values from
 #      0...N-1.
 #
 #   3) for key in keys:
 #          h1 = f1(key) ; h2 = f2(key)
 #          Draw an edge between vertices h1 and h2 of the graph.
 #          Associate the desired hash value with that edge.
 #
 #   4) Check if G is acyclic; if not, go back to step 1 and pick a bigger N.
 #
 #   5) Assign values to each vertex such that, for each edge, you can
 #      add the values for the two vertices and get the desired value
 #      for that edge -- which is the desired hash key.  This task is
 #      dead easy, because the graph is acyclic.  This is done by
 #      picking a vertex V, and assigning it a value of 0.  You then do a
 #      depth-first search, assigning values to new vertices so that
 #      they sum up properly.
 #
 #   6) f1, f2, and G now make up your perfect hash function.


 import sys, whrandom, string
 import pprint
 import perfhash
 import time

 class Hash:
     """Random hash function
     For simplicity and speed, this doesn't implement any byte-level hashing
     scheme.  Instead, a random string is generated and prefixing to
     str(key), and then Python's hashing function is used."""

     def __init__(self, N, caseInsensitive=0):
         self.N = N
         junk = ""
         for i in range(10):
             junk = junk + whrandom.choice(string.letters + string.digits)
         self.junk = junk
         self.caseInsensitive = caseInsensitive
         self.seed = perfhash.calcSeed(junk)

     def __call__(self, key):
       key = str(key)
       if self.caseInsensitive:
         key = string.upper(key)
       x = perfhash.hash(self.seed, len(self.junk), key) % self.N
       #h = hash(self.junk + key) % self.N
       #assert x == h
       return x

     def generate_code(self):
       s = """{
     register int len;
     register unsigned char *p;
     register long x;

     len = cch;
     p = (unsigned char *) key;
     x = %(junkSeed)d;
     while (--len >= 0)
         x = (1000003*x) ^ """ % \
       {
         "lenJunk" : len(self.junk),
         "junkSeed" : self.seed,
       }

       if self.caseInsensitive:
         s = s + "toupper(*(p++));"
       else:
         s = s + "*(p++);"
       s = s + """
     x ^= cch + %(lenJunk)d;
     if (x == -1)
         x = -2;
     x %%= k_cHashElements;
     /* ensure the returned value is positive so we mimic Python's %% operator */
     if (x < 0)
       x += k_cHashElements;
     return x;
 }
 """ % { "lenJunk" : len(self.junk),
         "junkSeed" : self.seed, }
       return s


 WHITE, GREY, BLACK = 0,1,2
 class Graph:
     """Graph class.  This class isn't particularly efficient or general,
     and only has the features I needed to implement this algorithm.

     num_vertices -- number of vertices
     edges -- maps 2-tuples of vertex numbers to the value for this
              edge.  If there's an edge between v1 and v2 (v1<v2),
              (v1,v2) is a key and the value is the edge's value.
     reachable_list -- maps a vertex V to the list of vertices
                       to which V is connected by edges.  Used
                       for traversing the graph.
     values -- numeric value for each vertex
     """

     def __init__(self, num_vertices):
         self.num_vertices = num_vertices
         self.edges = {}
         self.reachable_list = {}
         self.values = [-1] * num_vertices

     def connect(self, vertex1, vertex2, value):
         """Connect 'vertex1' and 'vertex2' with an edge, with associated
         value 'value'"""

         if vertex1 > vertex2: vertex1, vertex2 = vertex2, vertex1
 #        if self.edges.has_key( (vertex1, vertex2) ):
 #            raise ValueError, 'Collision: vertices already connected'
         self.edges[ (vertex1, vertex2) ] = value

         # Add vertices to each other's reachable list
         if not self.reachable_list.has_key( vertex1 ):
             self.reachable_list[ vertex1 ] = [vertex2]
         else:
             self.reachable_list[vertex1].append(vertex2)

         if not self.reachable_list.has_key( vertex2 ):
             self.reachable_list[ vertex2 ] = [vertex1]
         else:
             self.reachable_list[vertex2].append(vertex1)

     def get_edge_value(self, vertex1, vertex2):
         """Retrieve the value corresponding to the edge between
         'vertex1' and 'vertex2'.  Raises KeyError if no such edge"""
         if vertex1 > vertex2:
             vertex1, vertex2 = vertex2, vertex1
         return self.edges[ (vertex1, vertex2) ]

     def is_acyclic(self):
         "Returns true if the graph is acyclic, otherwise false"

         # This is done by doing a depth-first search of the graph;
         # painting each vertex grey and then black.  If the DFS
         # ever finds a vertex that isn't white, there's a cycle.
         colour = {}
         for i in range(self.num_vertices): colour[i] = WHITE

         # Loop over all vertices, taking white ones as starting
         # points for a traversal.
         for i in range(self.num_vertices):
             if colour[i] == WHITE:

                 # List of vertices to visit
                 visit_list = [ (None,i) ]

                 # Do a DFS
                 while visit_list:
                     # Colour this vertex grey.
                     parent, vertex = visit_list[0] ; del visit_list[0]
                     colour[vertex] = GREY

                     # Make copy of list of neighbours, removing the vertex
                     # we arrived here from.
                     neighbours = self.reachable_list.get(vertex, []) [:]
                     if parent in neighbours: neighbours.remove( parent )

                     for neighbour in neighbours:
                         if colour[neighbour] == WHITE:
                             visit_list.insert(0, (vertex, neighbour) )
                         elif colour[neighbour] != WHITE:
                             # Aha!  Already visited this node,
                             # so the graph isn't acyclic.
                             return 0

                     colour[vertex] = BLACK

         # We got through, so the graph is acyclic.
         return 1

     def assign_values(self):
         """Compute values for each vertex, so that they sum up
         properly to the associated value for each edge."""

         # Also done with a DFS; I simply copied the DFS code
         # from is_acyclic().  (Should generalize the logic so
         # one function could be used from both methods,
         # but I couldn't be bothered.)

         colour = {}
         for i in range(self.num_vertices): colour[i] = WHITE

         # Loop over all vertices, taking white ones as starting
         # points for a traversal.
         for i in range(self.num_vertices):
             if colour[i] == WHITE:
                 # Set this vertex's value, arbitrarily, to zero.
                 self.set_vertex_value( i, 0 )

                 # List of vertices to visit
                 visit_list = [ (None,i) ]

                 # Do a DFS
                 while visit_list:
                     # Colour this vertex grey.
                     parent, vertex = visit_list[0] ; del visit_list[0]
                     colour[vertex] = GREY

                     # Make copy of list of neighbours, removing the vertex
                     # we arrived here from.
                     neighbours = self.reachable_list.get(vertex, []) [:]
                     if parent in neighbours: neighbours.remove( parent )

                     for neighbour in self.reachable_list.get(vertex, []):
                         edge_value = self.get_edge_value( vertex, neighbour )
                         if colour[neighbour] == WHITE:
                             visit_list.insert(0, (vertex, neighbour) )

                             # Set new vertex's value to the desired
                             # edge value, minus the value of the
                             # vertex we came here from.
                             new_val = (edge_value -
                                        self.get_vertex_value( vertex ) )
                             self.set_vertex_value( neighbour,
                                                    new_val % self.num_vertices)

                     colour[vertex] = BLACK

         # Returns nothing
         return

     def __getitem__(self, index):
         if index < self.num_vertices: return index
         raise IndexError

     def get_vertex_value(self, vertex):
         "Get value for a vertex"
         return self.values[ vertex ]

     def set_vertex_value(self, vertex, value):
         "Set value for a vertex"
         self.values[ vertex ] = value

     def generate_code(self, out, width = 70):
         "Return nicely formatted table"
         out.write("{ ")
         pos = 0
         for v in self.values:
             v=str(v)+', '
             out.write(v)
             pos = pos + len(v) + 1
             if pos > width: out.write('\n '); pos = 0
         out.write('};\n')


 class PerfectHash:
   def __init__(self, cchMax, f1, f2, G, cHashElements, cKeys, maxHashValue):
     self.cchMax = cchMax
     self.f1 = f1
     self.f2 = f2
     self.G  = G
     self.cHashElements = cHashElements
     self.cKeys = cKeys
     # determine the necessary type for storing our hash function
     # helper table:
     self.type = self.determineType(maxHashValue)

   def generate_header(self, structName):
     header = """
 #include <Python.h>
 #include <stdlib.h>

 /* --- C API ----------------------------------------------------*/
 /* C API for usage by other Python modules */
 typedef struct %(structName)s
 {
     unsigned long cKeys;
     unsigned long cchMax;
     unsigned long (*hash)(const char *key, unsigned int cch);
     const void *(*getValue)(unsigned long iKey);
 } %(structName)s;
 """ % { "structName" : structName }
     return header

   def determineType(self, maxHashValue):
     if maxHashValue <= 255:
       return "unsigned char"
     elif maxHashValue <= 65535:
       return "unsigned short"
     else:
       # Take the cheesy way out...
       return "unsigned long"

   def generate_code(self, moduleName, dataArrayName, dataArrayType, structName):
     # Output C code for the hash functions and tables
     code = """
 /*
  * The hash is produced using the algorithm described in
  * "Optimal algorithms for minimal perfect hashing",
  * G. Havas, B.S. Majewski.  Available as a technical report
  * from the CS department, University of Queensland
  * (ftp://ftp.cs.uq.oz.au/).
  *
  * Generated using a heavily tweaked version of Andrew Kuchling's
  * perfect_hash.py:
  * http://starship.python.net/crew/amk/python/code/perfect-hash.html
  *
  * Generated on: %s
  */
 """ % time.ctime(time.time())
     # MSVC SP3 was complaining when I actually used a global constant
     code = code + """
 #define k_cHashElements %i
 #define k_cchMaxKey  %d
 #define k_cKeys  %i

 """ % (self.cHashElements, self.cchMax, self.cKeys)

     code = code + """
 static const %s G[k_cHashElements];
 static const %s %s[k_cKeys];
 """ % (self.type, dataArrayType, dataArrayName)

     code = code + """
 static long f1(const char *key, unsigned int cch)
 """
     code = code + self.f1.generate_code()
     code = code + """

 static long f2(const char *key, unsigned int cch)
 """
     code = code + self.f2.generate_code()
     code = code + """

 static unsigned long hash(const char *key, unsigned int cch)
 {
     return ((unsigned long)(G[ f1(key, cch) ]) + (unsigned long)(G[ f2(key, cch) ]) ) %% k_cHashElements;
 }

 const void *getValue(unsigned long iKey)
 {
     return &%(dataArrayName)s[iKey];
 }

 /* Helper for adding objects to dictionaries. Check for errors with
    PyErr_Occurred() */
 static
 void insobj(PyObject *dict,
      char *name,
      PyObject *v)
 {
     PyDict_SetItemString(dict, name, v);
     Py_XDECREF(v);
 }

 static const %(structName)s hashAPI =
 {
     k_cKeys,
     k_cchMaxKey,
     &hash,
     &getValue,
 };

 static
 PyMethodDef Module_methods[] =
 {
     {NULL, NULL},
 };

 static char *Module_docstring = "%(moduleName)s hash function module";

 /* Error reporting for module init functions */

 #define Py_ReportModuleInitError(modname) {			\\
     PyObject *exc_type, *exc_value, *exc_tb;			\\
     PyObject *str_type, *str_value;				\\
 								\\
     /* Fetch error objects and convert them to strings */	\\
     PyErr_Fetch(&exc_type, &exc_value, &exc_tb);		\\
     if (exc_type && exc_value) {				\\
 	    str_type = PyObject_Str(exc_type);			\\
 	    str_value = PyObject_Str(exc_value);			\\
     }								\\
     else {							\\
 	   str_type = NULL;					\\
 	   str_value = NULL;					\\
     }								\\
     /* Try to format a more informative error message using the	\\
        original error */					\\
     if (str_type && str_value &&				\\
 	    PyString_Check(str_type) && PyString_Check(str_value))	\\
 	    PyErr_Format(						\\
    		    PyExc_ImportError,				\\
 		    "initialization of module "modname" failed "	\\
 		    "(%%s:%%s)",					\\
 		PyString_AS_STRING(str_type),			\\
 		PyString_AS_STRING(str_value));			\\
     else							\\
 	    PyErr_SetString(					\\
 		    PyExc_ImportError,				\\
 		    "initialization of module "modname" failed");	\\
     Py_XDECREF(str_type);					\\
     Py_XDECREF(str_value);					\\
     Py_XDECREF(exc_type);					\\
     Py_XDECREF(exc_value);					\\
     Py_XDECREF(exc_tb);						\\
 }


 /* Create PyMethodObjects and register them in the module\'s dict */
 DL_EXPORT(void)
 init%(moduleName)s(void)
 {
     PyObject *module, *moddict;
     /* Create module */
     module = Py_InitModule4("%(moduleName)s", /* Module name */
              Module_methods, /* Method list */
              Module_docstring, /* Module doc-string */
              (PyObject *)NULL, /* always pass this as *self */
              PYTHON_API_VERSION); /* API Version */
     if (module == NULL)
         goto onError;
     /* Add some constants to the module\'s dict */
     moddict = PyModule_GetDict(module);
     if (moddict == NULL)
         goto onError;

     /* Export C API */
     insobj(
         moddict,
         "%(moduleName)sAPI",
         PyCObject_FromVoidPtr((void *)&hashAPI, NULL));

 onError:
     /* Check for errors and report them */
     if (PyErr_Occurred())
         Py_ReportModuleInitError("%(moduleName)s");
     return;
 }
 """ % { "moduleName" : moduleName,
         "dataArrayName" : dataArrayName,
         "structName" : structName, }

     return code

   def generate_graph(self, out):
     out.write("""
 static const unsigned short G[] =
 """)
     self.G.generate_code(out)


 def generate_hash(keys, caseInsensitive=0,
                   minC=None, initC=None,
                   f1Seed=None, f2Seed=None,
                   cIncrement=None, cTries=None):
     """Print out code for a perfect minimal hash.  Input is a list of
     (key, desired hash value) tuples.  """

     # K is the number of keys.
     K = len(keys)

     # We will be generating graphs of size N, where N = c * K.
     # The larger C is, the fewer trial graphs will need to be made, but
     # the resulting table is also larger.  Increase this starting value
     # if you're impatient.  After 50 failures, c will be increased by 0.025.
     if initC is None:
       initC = 1.5

     c = initC
     if cIncrement is None:
       cIncrement = 0.0025

     if cTries is None:
       cTries = 50

     # Number of trial graphs so far
     num_graphs = 0
     sys.stderr.write('Generating graphs... ')

     while 1:
         # N is the number of vertices in the graph G
         N = int(c*K)
         num_graphs = num_graphs + 1
         if (num_graphs % cTries) == 0:
             # Enough failures at this multiplier,
             # increase the multiplier and keep trying....
             c = c + cIncrement

             # Whats good with searching for a better
             # hash function if we exceed the size
             # of a function we've generated in the past....
             if minC is not None and \
                c > minC:
               c = initC
               sys.stderr.write(' -- c > minC, resetting c to %0.4f\n' % c)
             else:
               sys.stderr.write(' -- increasing c to %0.4f\n' % c)
             sys.stderr.write('Generating graphs... ')

         # Output a progress message
         sys.stderr.write( str(num_graphs) + ' ')
         sys.stderr.flush()

         # Create graph w/ N vertices
         G = Graph(N)
         # Save the seeds used to generate
         # the following two hash functions.
         _seeds = whrandom._inst._seed

         # Create 2 random hash functions
         f1 = Hash(N, caseInsensitive)
         f2 = Hash(N, caseInsensitive)

         # Set the initial hash function seed values if passed in.
         # Doing this protects our hash functions from
         # changes to whrandom's behavior.
         if f1Seed is not None:
           f1.seed = f1Seed
           f1Seed = None
           fSpecifiedSeeds = 1
         if f2Seed is not None:
           f2.seed = f2Seed
           f2Seed = None
           fSpecifiedSeeds = 1

         # Connect vertices given by the values of the two hash functions
         # for each key.  Associate the desired hash value with each
         # edge.
         for k, v in keys:
             h1 = f1(k) ; h2 = f2(k)
             G.connect( h1,h2, v)

         # Check if the resulting graph is acyclic; if it is,
         # we're done with step 1.
         if G.is_acyclic():
           break
         elif fSpecifiedSeeds:
           sys.stderr.write('\nThe initial f1/f2 seeds you specified didn\'t generate a perfect hash function: \n')
           sys.stderr.write('f1 seed: %s\n' % f1.seed)
           sys.stderr.write('f2 seed: %s\n' % f2.seed)
           sys.stderr.write('multipler: %s\n' % c)
           sys.stderr.write('Your data has likely changed, or you forgot what your initial multiplier should be.\n')
           sys.stderr.write('continuing the search for a perfect hash function......\n')
           fSpecifiedSeeds = 0

     # Now we have an acyclic graph, so we assign values to each vertex
     # such that, for each edge, you can add the values for the two vertices
     # involved and get the desired value for that edge -- which is the
     # desired hash key.  This task is dead easy, because the graph is acyclic.
     sys.stderr.write('\nAcyclic graph found; computing vertex values...\n')
     G.assign_values()

     sys.stderr.write('Checking uniqueness of hash values...\n')

     # Sanity check the result by actually verifying that all the keys
     # hash to the right value.
     cchMaxKey = 0
     maxHashValue = 0

     for k, v in keys:
       hash1 = G.values[ f1(k) ]
       hash2 = G.values[ f2(k) ]
       if hash1 > maxHashValue:
         maxHashValue = hash1
       if hash2 > maxHashValue:
         maxHashValue = hash2
       perfecthash = (hash1 + hash2) % N
       assert perfecthash == v
       cch = len(k)
       if cch > cchMaxKey:
         cchMaxKey = cch

     sys.stderr.write('Found perfect hash function!\n')
     sys.stderr.write('\nIn order to regenerate this hash function, \n')
     sys.stderr.write('you need to pass these following values back in:\n')
     sys.stderr.write('f1 seed: %s\n' % repr(f1.seed))
     sys.stderr.write('f2 seed: %s\n' % repr(f2.seed))
     sys.stderr.write('initial multipler: %s\n' % c)

     return PerfectHash(cchMaxKey, f1, f2, G, N, len(keys), maxHashValue)

 """
 static
 PyObject *codec_tuple(PyObject *unicode,
               int len)
 {
     PyObject *v,*w;

     if (unicode == NULL)
     return NULL;
     v = PyTuple_New(2);
     if (v == NULL) {
     Py_DECREF(unicode);
     return NULL;
     }
     PyTuple_SET_ITEM(v,0,unicode);
     w = PyInt_FromLong(len);
     if (w == NULL) {
     Py_DECREF(v);
     return NULL;
     }
     PyTuple_SET_ITEM(v,1,w);
     return v;
 }

 static PyObject *
 ucn_decode(PyObject *self,
            PyObject *args)
 {
     const char *data;
     int size;
     const char *errors = NULL;
     PyObject *mapping = NULL;

     if (!PyArg_ParseTuple(args, "t#|z:ucn_decode",
               &data, &size, &errors))
         return NULL;
     if (mapping == Py_None)
         mapping = NULL;

     return codec_tuple(PyUnicode_DecodeNamedUnicodeEscape(data, size, errors),
                size);
 }


 static PyMethodDef _codecs_functions[] = {
     { "ucn_decode", ucn_decode, 1 },
 };

 DL_EXPORT(void)
 init_ucn()
 {
     Py_InitModule("_ucn", _codecs_functions);
 }

 """
	#!/usr/bin/env/python

	# perfect_hash.py
	#
	# Outputs C code for a minimal perfect hash.
	# The hash is produced using the algorithm described in
	# "Optimal algorithms for minimal perfect hashing",
	# G. Havas, B.S. Majewski. Available as a technical report
	# from the CS department, University of Queensland
	# (ftp://ftp.cs.uq.oz.au/).
	#
	# This is a modified version of Andrew Kuchling's code
	# (http://starship.python.net/crew/amk/python/code/perfect-hash.html)
	# and generates C fragments suitable for compilation as a Python
	# extension module.
	#

	# Difference between this algorithm and gperf:
	# Gperf will complete in finite time with a successful function,
	# or by giving up.
	# This algorithm may never complete, although it is extremely likely
	# when c >= 2.

	# The algorithm works like this:
	# 0) You have K keys, that you want to perfectly hash to a bunch
	# of hash values.
	#
	# 1) Choose a number N larger than K. This is the number of
	# vertices in a graph G, and also the size of the resulting table.
	#
	# 2) Pick two random hash functions f1, f2, that output values from
	# 0...N-1.
	#
	# 3) for key in keys:
	# h1 = f1(key) ; h2 = f2(key)
	# Draw an edge between vertices h1 and h2 of the graph.
	# Associate the desired hash value with that edge.
	#
	# 4) Check if G is acyclic; if not, go back to step 1 and pick a bigger N.
	#
	# 5) Assign values to each vertex such that, for each edge, you can
	# add the values for the two vertices and get the desired value
	# for that edge -- which is the desired hash key. This task is
	# dead easy, because the graph is acyclic. This is done by
	# picking a vertex V, and assigning it a value of 0. You then do a
	# depth-first search, assigning values to new vertices so that
	# they sum up properly.
	#
	# 6) f1, f2, and G now make up your perfect hash function.


	import sys, whrandom, string
	import pprint
	import perfhash
	import time

	class Hash:
	"""Random hash function
	For simplicity and speed, this doesn't implement any byte-level hashing
	scheme. Instead, a random string is generated and prefixing to
	str(key), and then Python's hashing function is used."""

	def __init__(self, N, caseInsensitive=0):
	self.N = N
	junk = ""
	for i in range(10):
	junk = junk + whrandom.choice(string.letters + string.digits)
	self.junk = junk
	self.caseInsensitive = caseInsensitive
	self.seed = perfhash.calcSeed(junk)

	def __call__(self, key):
	key = str(key)
	if self.caseInsensitive:
	key = string.upper(key)
	x = perfhash.hash(self.seed, len(self.junk), key) % self.N
	#h = hash(self.junk + key) % self.N
	#assert x == h
	return x

	def generate_code(self):
	s = """{
	register int len;
	register unsigned char *p;
	register long x;

	len = cch;
	p = (unsigned char *) key;
	x = %(junkSeed)d;
	while (--len >= 0)
	x = (1000003*x) ^ """ % \
	{
	"lenJunk" : len(self.junk),
	"junkSeed" : self.seed,
	}

	if self.caseInsensitive:
	s = s + "toupper(*(p++));"
	else:
	s = s + "*(p++);"
	s = s + """
	x ^= cch + %(lenJunk)d;
	if (x == -1)
	x = -2;
	x %%= k_cHashElements;
	/* ensure the returned value is positive so we mimic Python's %% operator */
	if (x < 0)
	x += k_cHashElements;
	return x;
	}
	""" % { "lenJunk" : len(self.junk),
	"junkSeed" : self.seed, }
	return s


	WHITE, GREY, BLACK = 0,1,2
	class Graph:
	"""Graph class. This class isn't particularly efficient or general,
	and only has the features I needed to implement this algorithm.

	num_vertices -- number of vertices
	edges -- maps 2-tuples of vertex numbers to the value for this
	edge. If there's an edge between v1 and v2 (v1<v2),
	(v1,v2) is a key and the value is the edge's value.
	reachable_list -- maps a vertex V to the list of vertices
	to which V is connected by edges. Used
	for traversing the graph.
	values -- numeric value for each vertex
	"""

	def __init__(self, num_vertices):
	self.num_vertices = num_vertices
	self.edges = {}
	self.reachable_list = {}
	self.values = [-1] * num_vertices

	def connect(self, vertex1, vertex2, value):
	"""Connect 'vertex1' and 'vertex2' with an edge, with associated
	value 'value'"""

	if vertex1 > vertex2: vertex1, vertex2 = vertex2, vertex1
	# if self.edges.has_key( (vertex1, vertex2) ):
	# raise ValueError, 'Collision: vertices already connected'
	self.edges[ (vertex1, vertex2) ] = value

	# Add vertices to each other's reachable list
	if not self.reachable_list.has_key( vertex1 ):
	self.reachable_list[ vertex1 ] = [vertex2]
	else:
	self.reachable_list[vertex1].append(vertex2)

	if not self.reachable_list.has_key( vertex2 ):
	self.reachable_list[ vertex2 ] = [vertex1]
	else:
	self.reachable_list[vertex2].append(vertex1)

	def get_edge_value(self, vertex1, vertex2):
	"""Retrieve the value corresponding to the edge between
	'vertex1' and 'vertex2'. Raises KeyError if no such edge"""
	if vertex1 > vertex2:
	vertex1, vertex2 = vertex2, vertex1
	return self.edges[ (vertex1, vertex2) ]

	def is_acyclic(self):
	"Returns true if the graph is acyclic, otherwise false"

	# This is done by doing a depth-first search of the graph;
	# painting each vertex grey and then black. If the DFS
	# ever finds a vertex that isn't white, there's a cycle.
	colour = {}
	for i in range(self.num_vertices): colour[i] = WHITE

	# Loop over all vertices, taking white ones as starting
	# points for a traversal.
	for i in range(self.num_vertices):
	if colour[i] == WHITE:

	# List of vertices to visit
	visit_list = [ (None,i) ]

	# Do a DFS
	while visit_list:
	# Colour this vertex grey.
	parent, vertex = visit_list[0] ; del visit_list[0]
	colour[vertex] = GREY

	# Make copy of list of neighbours, removing the vertex
	# we arrived here from.
	neighbours = self.reachable_list.get(vertex, []) [:]
	if parent in neighbours: neighbours.remove( parent )

	for neighbour in neighbours:
	if colour[neighbour] == WHITE:
	visit_list.insert(0, (vertex, neighbour) )
	elif colour[neighbour] != WHITE:
	# Aha! Already visited this node,
	# so the graph isn't acyclic.
	return 0

	colour[vertex] = BLACK

	# We got through, so the graph is acyclic.
	return 1

	def assign_values(self):
	"""Compute values for each vertex, so that they sum up
	properly to the associated value for each edge."""

	# Also done with a DFS; I simply copied the DFS code
	# from is_acyclic(). (Should generalize the logic so
	# one function could be used from both methods,
	# but I couldn't be bothered.)

	colour = {}
	for i in range(self.num_vertices): colour[i] = WHITE

	# Loop over all vertices, taking white ones as starting
	# points for a traversal.
	for i in range(self.num_vertices):
	if colour[i] == WHITE:
	# Set this vertex's value, arbitrarily, to zero.
	self.set_vertex_value( i, 0 )

	# List of vertices to visit
	visit_list = [ (None,i) ]

	# Do a DFS
	while visit_list:
	# Colour this vertex grey.
	parent, vertex = visit_list[0] ; del visit_list[0]
	colour[vertex] = GREY

	# Make copy of list of neighbours, removing the vertex
	# we arrived here from.
	neighbours = self.reachable_list.get(vertex, []) [:]
	if parent in neighbours: neighbours.remove( parent )

	for neighbour in self.reachable_list.get(vertex, []):
	edge_value = self.get_edge_value( vertex, neighbour )
	if colour[neighbour] == WHITE:
	visit_list.insert(0, (vertex, neighbour) )

	# Set new vertex's value to the desired
	# edge value, minus the value of the
	# vertex we came here from.
	new_val = (edge_value -
	self.get_vertex_value( vertex ) )
	self.set_vertex_value( neighbour,
	new_val % self.num_vertices)

	colour[vertex] = BLACK

	# Returns nothing
	return

	def __getitem__(self, index):
	if index < self.num_vertices: return index
	raise IndexError

	def get_vertex_value(self, vertex):
	"Get value for a vertex"
	return self.values[ vertex ]

	def set_vertex_value(self, vertex, value):
	"Set value for a vertex"
	self.values[ vertex ] = value

	def generate_code(self, out, width = 70):
	"Return nicely formatted table"
	out.write("{ ")
	pos = 0
	for v in self.values:
	v=str(v)+', '
	out.write(v)
	pos = pos + len(v) + 1
	if pos > width: out.write('\n '); pos = 0
	out.write('};\n')


	class PerfectHash:
	def __init__(self, cchMax, f1, f2, G, cHashElements, cKeys, maxHashValue):
	self.cchMax = cchMax
	self.f1 = f1
	self.f2 = f2
	self.G = G
	self.cHashElements = cHashElements
	self.cKeys = cKeys
	# determine the necessary type for storing our hash function
	# helper table:
	self.type = self.determineType(maxHashValue)

	def generate_header(self, structName):
	header = """
	#include <Python.h>
	#include <stdlib.h>

	/* --- C API ----------------------------------------------------*/
	/* C API for usage by other Python modules */
	typedef struct %(structName)s
	{
	unsigned long cKeys;
	unsigned long cchMax;
	unsigned long (hash)(const char key, unsigned int cch);
	const void (getValue)(unsigned long iKey);
	} %(structName)s;
	""" % { "structName" : structName }
	return header

	def determineType(self, maxHashValue):
	if maxHashValue <= 255:
	return "unsigned char"
	elif maxHashValue <= 65535:
	return "unsigned short"
	else:
	# Take the cheesy way out...
	return "unsigned long"

	def generate_code(self, moduleName, dataArrayName, dataArrayType, structName):
	# Output C code for the hash functions and tables
	code = """
	/*
	* The hash is produced using the algorithm described in
	* "Optimal algorithms for minimal perfect hashing",
	* G. Havas, B.S. Majewski. Available as a technical report
	* from the CS department, University of Queensland
	* (ftp://ftp.cs.uq.oz.au/).
	*
	* Generated using a heavily tweaked version of Andrew Kuchling's
	* perfect_hash.py:
	* http://starship.python.net/crew/amk/python/code/perfect-hash.html
	*
	* Generated on: %s
	*/
	""" % time.ctime(time.time())
	# MSVC SP3 was complaining when I actually used a global constant
	code = code + """
	#define k_cHashElements %i
	#define k_cchMaxKey %d
	#define k_cKeys %i

	""" % (self.cHashElements, self.cchMax, self.cKeys)

	code = code + """
	static const %s G[k_cHashElements];
	static const %s %s[k_cKeys];
	""" % (self.type, dataArrayType, dataArrayName)

	code = code + """
	static long f1(const char *key, unsigned int cch)
	"""
	code = code + self.f1.generate_code()
	code = code + """

	static long f2(const char *key, unsigned int cch)
	"""
	code = code + self.f2.generate_code()
	code = code + """

	static unsigned long hash(const char *key, unsigned int cch)
	{
	return ((unsigned long)(G[ f1(key, cch) ]) + (unsigned long)(G[ f2(key, cch) ]) ) %% k_cHashElements;
	}

	const void *getValue(unsigned long iKey)
	{
	return &%(dataArrayName)s[iKey];
	}

	/* Helper for adding objects to dictionaries. Check for errors with
	PyErr_Occurred() */
	static
	void insobj(PyObject *dict,
	char *name,
	PyObject *v)
	{
	PyDict_SetItemString(dict, name, v);
	Py_XDECREF(v);
	}

	static const %(structName)s hashAPI =
	{
	k_cKeys,
	k_cchMaxKey,
	&hash,
	&getValue,
	};

	static
	PyMethodDef Module_methods[] =
	{
	{NULL, NULL},
	};

	static char *Module_docstring = "%(moduleName)s hash function module";

	/* Error reporting for module init functions */

	#define Py_ReportModuleInitError(modname) { \\
	PyObject exc_type, exc_value, *exc_tb; \\
	PyObject str_type, str_value; \\
	\\
	/* Fetch error objects and convert them to strings */ \\
	PyErr_Fetch(&exc_type, &exc_value, &exc_tb); \\
	if (exc_type && exc_value) { \\
	str_type = PyObject_Str(exc_type); \\
	str_value = PyObject_Str(exc_value); \\
	} \\
	else { \\
	str_type = NULL; \\
	str_value = NULL; \\
	} \\
	/* Try to format a more informative error message using the \\
	original error */ \\
	if (str_type && str_value && \\
	PyString_Check(str_type) && PyString_Check(str_value)) \\
	PyErr_Format( \\
	PyExc_ImportError, \\
	"initialization of module "modname" failed " \\
	"(%%s:%%s)", \\
	PyString_AS_STRING(str_type), \\
	PyString_AS_STRING(str_value)); \\
	else \\
	PyErr_SetString( \\
	PyExc_ImportError, \\
	"initialization of module "modname" failed"); \\
	Py_XDECREF(str_type); \\
	Py_XDECREF(str_value); \\
	Py_XDECREF(exc_type); \\
	Py_XDECREF(exc_value); \\
	Py_XDECREF(exc_tb); \\
	}


	/* Create PyMethodObjects and register them in the module\'s dict */
	DL_EXPORT(void)
	init%(moduleName)s(void)
	{
	PyObject module, moddict;
	/* Create module */
	module = Py_InitModule4("%(moduleName)s", /* Module name */
	Module_methods, /* Method list */
	Module_docstring, /* Module doc-string */
	(PyObject )NULL, / always pass this as self /
	PYTHON_API_VERSION); /* API Version */
	if (module == NULL)
	goto onError;
	/* Add some constants to the module\'s dict */
	moddict = PyModule_GetDict(module);
	if (moddict == NULL)
	goto onError;

	/* Export C API */
	insobj(
	moddict,
	"%(moduleName)sAPI",
	PyCObject_FromVoidPtr((void *)&hashAPI, NULL));

	onError:
	/* Check for errors and report them */
	if (PyErr_Occurred())
	Py_ReportModuleInitError("%(moduleName)s");
	return;
	}
	""" % { "moduleName" : moduleName,
	"dataArrayName" : dataArrayName,
	"structName" : structName, }

	return code

	def generate_graph(self, out):
	out.write("""
	static const unsigned short G[] =
	""")
	self.G.generate_code(out)


	def generate_hash(keys, caseInsensitive=0,
	minC=None, initC=None,
	f1Seed=None, f2Seed=None,
	cIncrement=None, cTries=None):
	"""Print out code for a perfect minimal hash. Input is a list of
	(key, desired hash value) tuples. """

	# K is the number of keys.
	K = len(keys)

	# We will be generating graphs of size N, where N = c * K.
	# The larger C is, the fewer trial graphs will need to be made, but
	# the resulting table is also larger. Increase this starting value
	# if you're impatient. After 50 failures, c will be increased by 0.025.
	if initC is None:
	initC = 1.5

	c = initC
	if cIncrement is None:
	cIncrement = 0.0025

	if cTries is None:
	cTries = 50

	# Number of trial graphs so far
	num_graphs = 0
	sys.stderr.write('Generating graphs... ')

	while 1:
	# N is the number of vertices in the graph G
	N = int(c*K)
	num_graphs = num_graphs + 1
	if (num_graphs % cTries) == 0:
	# Enough failures at this multiplier,
	# increase the multiplier and keep trying....
	c = c + cIncrement

	# Whats good with searching for a better
	# hash function if we exceed the size
	# of a function we've generated in the past....
	if minC is not None and \
	c > minC:
	c = initC
	sys.stderr.write(' -- c > minC, resetting c to %0.4f\n' % c)
	else:
	sys.stderr.write(' -- increasing c to %0.4f\n' % c)
	sys.stderr.write('Generating graphs... ')

	# Output a progress message
	sys.stderr.write( str(num_graphs) + ' ')
	sys.stderr.flush()

	# Create graph w/ N vertices
	G = Graph(N)
	# Save the seeds used to generate
	# the following two hash functions.
	_seeds = whrandom._inst._seed

	# Create 2 random hash functions
	f1 = Hash(N, caseInsensitive)
	f2 = Hash(N, caseInsensitive)

	# Set the initial hash function seed values if passed in.
	# Doing this protects our hash functions from
	# changes to whrandom's behavior.
	if f1Seed is not None:
	f1.seed = f1Seed
	f1Seed = None
	fSpecifiedSeeds = 1
	if f2Seed is not None:
	f2.seed = f2Seed
	f2Seed = None
	fSpecifiedSeeds = 1

	# Connect vertices given by the values of the two hash functions
	# for each key. Associate the desired hash value with each
	# edge.
	for k, v in keys:
	h1 = f1(k) ; h2 = f2(k)
	G.connect( h1,h2, v)

	# Check if the resulting graph is acyclic; if it is,
	# we're done with step 1.
	if G.is_acyclic():
	break
	elif fSpecifiedSeeds:
	sys.stderr.write('\nThe initial f1/f2 seeds you specified didn\'t generate a perfect hash function: \n')
	sys.stderr.write('f1 seed: %s\n' % f1.seed)
	sys.stderr.write('f2 seed: %s\n' % f2.seed)
	sys.stderr.write('multipler: %s\n' % c)
	sys.stderr.write('Your data has likely changed, or you forgot what your initial multiplier should be.\n')
	sys.stderr.write('continuing the search for a perfect hash function......\n')
	fSpecifiedSeeds = 0

	# Now we have an acyclic graph, so we assign values to each vertex
	# such that, for each edge, you can add the values for the two vertices
	# involved and get the desired value for that edge -- which is the
	# desired hash key. This task is dead easy, because the graph is acyclic.
	sys.stderr.write('\nAcyclic graph found; computing vertex values...\n')
	G.assign_values()

	sys.stderr.write('Checking uniqueness of hash values...\n')

	# Sanity check the result by actually verifying that all the keys
	# hash to the right value.
	cchMaxKey = 0
	maxHashValue = 0

	for k, v in keys:
	hash1 = G.values[ f1(k) ]
	hash2 = G.values[ f2(k) ]
	if hash1 > maxHashValue:
	maxHashValue = hash1
	if hash2 > maxHashValue:
	maxHashValue = hash2
	perfecthash = (hash1 + hash2) % N
	assert perfecthash == v
	cch = len(k)
	if cch > cchMaxKey:
	cchMaxKey = cch

	sys.stderr.write('Found perfect hash function!\n')
	sys.stderr.write('\nIn order to regenerate this hash function, \n')
	sys.stderr.write('you need to pass these following values back in:\n')
	sys.stderr.write('f1 seed: %s\n' % repr(f1.seed))
	sys.stderr.write('f2 seed: %s\n' % repr(f2.seed))
	sys.stderr.write('initial multipler: %s\n' % c)

	return PerfectHash(cchMaxKey, f1, f2, G, N, len(keys), maxHashValue)

	"""
	static
	PyObject codec_tuple(PyObject unicode,
	int len)
	{
	PyObject v,w;

	if (unicode == NULL)
	return NULL;
	v = PyTuple_New(2);
	if (v == NULL) {
	Py_DECREF(unicode);
	return NULL;
	}
	PyTuple_SET_ITEM(v,0,unicode);
	w = PyInt_FromLong(len);
	if (w == NULL) {
	Py_DECREF(v);
	return NULL;
	}
	PyTuple_SET_ITEM(v,1,w);
	return v;
	}

	static PyObject *
	ucn_decode(PyObject *self,
	PyObject *args)
	{
	const char *data;
	int size;
	const char *errors = NULL;
	PyObject *mapping = NULL;

	if (!PyArg_ParseTuple(args, "t#\|z:ucn_decode",
	&data, &size, &errors))
	return NULL;
	if (mapping == Py_None)
	mapping = NULL;

	return codec_tuple(PyUnicode_DecodeNamedUnicodeEscape(data, size, errors),
	size);
	}


	static PyMethodDef _codecs_functions[] = {
	{ "ucn_decode", ucn_decode, 1 },
	};

	DL_EXPORT(void)
	init_ucn()
	{
	Py_InitModule("_ucn", _codecs_functions);
	}

	"""