git.cworth.org Git - tar/blob - lib/hash.c

   1 /* hash - hashing table processing.
   2
   3    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2007 Free
   4    Software Foundation, Inc.
   5
   6    Written by Jim Meyering, 1992.
   7
   8    This program is free software: you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 /* A generic hash table package.  */
  22
  23 /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
  24    of malloc.  If you change USE_OBSTACK, you have to recompile!  */
  25
  26 #include <config.h>
  27
  28 #include "hash.h"
  29 #include "xalloc.h"
  30
  31 #include <limits.h>
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34
  35 #if USE_OBSTACK
  36 # include "obstack.h"
  37 # ifndef obstack_chunk_alloc
  38 #  define obstack_chunk_alloc malloc
  39 # endif
  40 # ifndef obstack_chunk_free
  41 #  define obstack_chunk_free free
  42 # endif
  43 #endif
  44
  45 #ifndef SIZE_MAX
  46 # define SIZE_MAX ((size_t) -1)
  47 #endif
  48
  49 struct hash_table
  50   {
  51     /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
  52        for a possibility of N_BUCKETS.  Among those, N_BUCKETS_USED buckets
  53        are not empty, there are N_ENTRIES active entries in the table.  */
  54     struct hash_entry *bucket;
  55     struct hash_entry const *bucket_limit;
  56     size_t n_buckets;
  57     size_t n_buckets_used;
  58     size_t n_entries;
  59
  60     /* Tuning arguments, kept in a physicaly separate structure.  */
  61     const Hash_tuning *tuning;
  62
  63     /* Three functions are given to `hash_initialize', see the documentation
  64        block for this function.  In a word, HASHER randomizes a user entry
  65        into a number up from 0 up to some maximum minus 1; COMPARATOR returns
  66        true if two user entries compare equally; and DATA_FREER is the cleanup
  67        function for a user entry.  */
  68     Hash_hasher hasher;
  69     Hash_comparator comparator;
  70     Hash_data_freer data_freer;
  71
  72     /* A linked list of freed struct hash_entry structs.  */
  73     struct hash_entry *free_entry_list;
  74
  75 #if USE_OBSTACK
  76     /* Whenever obstacks are used, it is possible to allocate all overflowed
  77        entries into a single stack, so they all can be freed in a single
  78        operation.  It is not clear if the speedup is worth the trouble.  */
  79     struct obstack entry_stack;
  80 #endif
  81   };
  82
  83 /* A hash table contains many internal entries, each holding a pointer to
  84    some user provided data (also called a user entry).  An entry indistinctly
  85    refers to both the internal entry and its associated user entry.  A user
  86    entry contents may be hashed by a randomization function (the hashing
  87    function, or just `hasher' for short) into a number (or `slot') between 0
  88    and the current table size.  At each slot position in the hash table,
  89    starts a linked chain of entries for which the user data all hash to this
  90    slot.  A bucket is the collection of all entries hashing to the same slot.
  91
  92    A good `hasher' function will distribute entries rather evenly in buckets.
  93    In the ideal case, the length of each bucket is roughly the number of
  94    entries divided by the table size.  Finding the slot for a data is usually
  95    done in constant time by the `hasher', and the later finding of a precise
  96    entry is linear in time with the size of the bucket.  Consequently, a
  97    larger hash table size (that is, a larger number of buckets) is prone to
  98    yielding shorter chains, *given* the `hasher' function behaves properly.
  99
 100    Long buckets slow down the lookup algorithm.  One might use big hash table
 101    sizes in hope to reduce the average length of buckets, but this might
 102    become inordinate, as unused slots in the hash table take some space.  The
 103    best bet is to make sure you are using a good `hasher' function (beware
 104    that those are not that easy to write! :-), and to use a table size
 105    larger than the actual number of entries.  */
 106
 107 /* If an insertion makes the ratio of nonempty buckets to table size larger
 108    than the growth threshold (a number between 0.0 and 1.0), then increase
 109    the table size by multiplying by the growth factor (a number greater than
 110    1.0).  The growth threshold defaults to 0.8, and the growth factor
 111    defaults to 1.414, meaning that the table will have doubled its size
 112    every second time 80% of the buckets get used.  */
 113 #define DEFAULT_GROWTH_THRESHOLD 0.8
 114 #define DEFAULT_GROWTH_FACTOR 1.414
 115
 116 /* If a deletion empties a bucket and causes the ratio of used buckets to
 117    table size to become smaller than the shrink threshold (a number between
 118    0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
 119    number greater than the shrink threshold but smaller than 1.0).  The shrink
 120    threshold and factor default to 0.0 and 1.0, meaning that the table never
 121    shrinks.  */
 122 #define DEFAULT_SHRINK_THRESHOLD 0.0
 123 #define DEFAULT_SHRINK_FACTOR 1.0
 124
 125 /* Use this to initialize or reset a TUNING structure to
 126    some sensible values. */
 127 static const Hash_tuning default_tuning =
 128   {
 129     DEFAULT_SHRINK_THRESHOLD,
 130     DEFAULT_SHRINK_FACTOR,
 131     DEFAULT_GROWTH_THRESHOLD,
 132     DEFAULT_GROWTH_FACTOR,
 133     false
 134   };
 135
 136 /* Information and lookup.  */
 137
 138 /* The following few functions provide information about the overall hash
 139    table organization: the number of entries, number of buckets and maximum
 140    length of buckets.  */
 141
 142 /* Return the number of buckets in the hash table.  The table size, the total
 143    number of buckets (used plus unused), or the maximum number of slots, are
 144    the same quantity.  */
 145
 146 size_t
 147 hash_get_n_buckets (const Hash_table *table)
 148 {
 149   return table->n_buckets;
 150 }
 151
 152 /* Return the number of slots in use (non-empty buckets).  */
 153
 154 size_t
 155 hash_get_n_buckets_used (const Hash_table *table)
 156 {
 157   return table->n_buckets_used;
 158 }
 159
 160 /* Return the number of active entries.  */
 161
 162 size_t
 163 hash_get_n_entries (const Hash_table *table)
 164 {
 165   return table->n_entries;
 166 }
 167
 168 /* Return the length of the longest chain (bucket).  */
 169
 170 size_t
 171 hash_get_max_bucket_length (const Hash_table *table)
 172 {
 173   struct hash_entry const *bucket;
 174   size_t max_bucket_length = 0;
 175
 176   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 177     {
 178       if (bucket->data)
 179         {
 180           struct hash_entry const *cursor = bucket;
 181           size_t bucket_length = 1;
 182
 183           while (cursor = cursor->next, cursor)
 184             bucket_length++;
 185
 186           if (bucket_length > max_bucket_length)
 187             max_bucket_length = bucket_length;
 188         }
 189     }
 190
 191   return max_bucket_length;
 192 }
 193
 194 /* Do a mild validation of a hash table, by traversing it and checking two
 195    statistics.  */
 196
 197 bool
 198 hash_table_ok (const Hash_table *table)
 199 {
 200   struct hash_entry const *bucket;
 201   size_t n_buckets_used = 0;
 202   size_t n_entries = 0;
 203
 204   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 205     {
 206       if (bucket->data)
 207         {
 208           struct hash_entry const *cursor = bucket;
 209
 210           /* Count bucket head.  */
 211           n_buckets_used++;
 212           n_entries++;
 213
 214           /* Count bucket overflow.  */
 215           while (cursor = cursor->next, cursor)
 216             n_entries++;
 217         }
 218     }
 219
 220   if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
 221     return true;
 222
 223   return false;
 224 }
 225
 226 void
 227 hash_print_statistics (const Hash_table *table, FILE *stream)
 228 {
 229   size_t n_entries = hash_get_n_entries (table);
 230   size_t n_buckets = hash_get_n_buckets (table);
 231   size_t n_buckets_used = hash_get_n_buckets_used (table);
 232   size_t max_bucket_length = hash_get_max_bucket_length (table);
 233
 234   fprintf (stream, "# entries:         %lu\n", (unsigned long int) n_entries);
 235   fprintf (stream, "# buckets:         %lu\n", (unsigned long int) n_buckets);
 236   fprintf (stream, "# buckets used:    %lu (%.2f%%)\n",
 237            (unsigned long int) n_buckets_used,
 238            (100.0 * n_buckets_used) / n_buckets);
 239   fprintf (stream, "max bucket length: %lu\n",
 240            (unsigned long int) max_bucket_length);
 241 }
 242
 243 /* If ENTRY matches an entry already in the hash table, return the
 244    entry from the table.  Otherwise, return NULL.  */
 245
 246 void *
 247 hash_lookup (const Hash_table *table, const void *entry)
 248 {
 249   struct hash_entry const *bucket
 250     = table->bucket + table->hasher (entry, table->n_buckets);
 251   struct hash_entry const *cursor;
 252
 253   if (! (bucket < table->bucket_limit))
 254     abort ();
 255
 256   if (bucket->data == NULL)
 257     return NULL;
 258
 259   for (cursor = bucket; cursor; cursor = cursor->next)
 260     if (table->comparator (entry, cursor->data))
 261       return cursor->data;
 262
 263   return NULL;
 264 }
 265
 266 /* Walking.  */
 267
 268 /* The functions in this page traverse the hash table and process the
 269    contained entries.  For the traversal to work properly, the hash table
 270    should not be resized nor modified while any particular entry is being
 271    processed.  In particular, entries should not be added or removed.  */
 272
 273 /* Return the first data in the table, or NULL if the table is empty.  */
 274
 275 void *
 276 hash_get_first (const Hash_table *table)
 277 {
 278   struct hash_entry const *bucket;
 279
 280   if (table->n_entries == 0)
 281     return NULL;
 282
 283   for (bucket = table->bucket; ; bucket++)
 284     if (! (bucket < table->bucket_limit))
 285       abort ();
 286     else if (bucket->data)
 287       return bucket->data;
 288 }
 289
 290 /* Return the user data for the entry following ENTRY, where ENTRY has been
 291    returned by a previous call to either `hash_get_first' or `hash_get_next'.
 292    Return NULL if there are no more entries.  */
 293
 294 void *
 295 hash_get_next (const Hash_table *table, const void *entry)
 296 {
 297   struct hash_entry const *bucket
 298     = table->bucket + table->hasher (entry, table->n_buckets);
 299   struct hash_entry const *cursor;
 300
 301   if (! (bucket < table->bucket_limit))
 302     abort ();
 303
 304   /* Find next entry in the same bucket.  */
 305   for (cursor = bucket; cursor; cursor = cursor->next)
 306     if (cursor->data == entry && cursor->next)
 307       return cursor->next->data;
 308
 309   /* Find first entry in any subsequent bucket.  */
 310   while (++bucket < table->bucket_limit)
 311     if (bucket->data)
 312       return bucket->data;
 313
 314   /* None found.  */
 315   return NULL;
 316 }
 317
 318 /* Fill BUFFER with pointers to active user entries in the hash table, then
 319    return the number of pointers copied.  Do not copy more than BUFFER_SIZE
 320    pointers.  */
 321
 322 size_t
 323 hash_get_entries (const Hash_table *table, void **buffer,
 324                   size_t buffer_size)
 325 {
 326   size_t counter = 0;
 327   struct hash_entry const *bucket;
 328   struct hash_entry const *cursor;
 329
 330   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 331     {
 332       if (bucket->data)
 333         {
 334           for (cursor = bucket; cursor; cursor = cursor->next)
 335             {
 336               if (counter >= buffer_size)
 337                 return counter;
 338               buffer[counter++] = cursor->data;
 339             }
 340         }
 341     }
 342
 343   return counter;
 344 }
 345
 346 /* Call a PROCESSOR function for each entry of a hash table, and return the
 347    number of entries for which the processor function returned success.  A
 348    pointer to some PROCESSOR_DATA which will be made available to each call to
 349    the processor function.  The PROCESSOR accepts two arguments: the first is
 350    the user entry being walked into, the second is the value of PROCESSOR_DATA
 351    as received.  The walking continue for as long as the PROCESSOR function
 352    returns nonzero.  When it returns zero, the walking is interrupted.  */
 353
 354 size_t
 355 hash_do_for_each (const Hash_table *table, Hash_processor processor,
 356                   void *processor_data)
 357 {
 358   size_t counter = 0;
 359   struct hash_entry const *bucket;
 360   struct hash_entry const *cursor;
 361
 362   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 363     {
 364       if (bucket->data)
 365         {
 366           for (cursor = bucket; cursor; cursor = cursor->next)
 367             {
 368               if (!(*processor) (cursor->data, processor_data))
 369                 return counter;
 370               counter++;
 371             }
 372         }
 373     }
 374
 375   return counter;
 376 }
 377
 378 /* Allocation and clean-up.  */
 379
 380 /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
 381    This is a convenience routine for constructing other hashing functions.  */
 382
 383 #if USE_DIFF_HASH
 384
 385 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
 386    B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
 387    Software--practice & experience 20, 2 (Feb 1990), 209-224.  Good hash
 388    algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
 389    may not be good for your application."  */
 390
 391 size_t
 392 hash_string (const char *string, size_t n_buckets)
 393 {
 394 # define ROTATE_LEFT(Value, Shift) \
 395   ((Value) << (Shift) | (Value) >> ((sizeof (size_t) * CHAR_BIT) - (Shift)))
 396 # define HASH_ONE_CHAR(Value, Byte) \
 397   ((Byte) + ROTATE_LEFT (Value, 7))
 398
 399   size_t value = 0;
 400   unsigned char ch;
 401
 402   for (; (ch = *string); string++)
 403     value = HASH_ONE_CHAR (value, ch);
 404   return value % n_buckets;
 405
 406 # undef ROTATE_LEFT
 407 # undef HASH_ONE_CHAR
 408 }
 409
 410 #else /* not USE_DIFF_HASH */
 411
 412 /* This one comes from `recode', and performs a bit better than the above as
 413    per a few experiments.  It is inspired from a hashing routine found in the
 414    very old Cyber `snoop', itself written in typical Greg Mansfield style.
 415    (By the way, what happened to this excellent man?  Is he still alive?)  */
 416
 417 size_t
 418 hash_string (const char *string, size_t n_buckets)
 419 {
 420   size_t value = 0;
 421   unsigned char ch;
 422
 423   for (; (ch = *string); string++)
 424     value = (value * 31 + ch) % n_buckets;
 425   return value;
 426 }
 427
 428 #endif /* not USE_DIFF_HASH */
 429
 430 /* Return true if CANDIDATE is a prime number.  CANDIDATE should be an odd
 431    number at least equal to 11.  */
 432
 433 static bool
 434 is_prime (size_t candidate)
 435 {
 436   size_t divisor = 3;
 437   size_t square = divisor * divisor;
 438
 439   while (square < candidate && (candidate % divisor))
 440     {
 441       divisor++;
 442       square += 4 * divisor;
 443       divisor++;
 444     }
 445
 446   return (candidate % divisor ? true : false);
 447 }
 448
 449 /* Round a given CANDIDATE number up to the nearest prime, and return that
 450    prime.  Primes lower than 10 are merely skipped.  */
 451
 452 static size_t
 453 next_prime (size_t candidate)
 454 {
 455   /* Skip small primes.  */
 456   if (candidate < 10)
 457     candidate = 10;
 458
 459   /* Make it definitely odd.  */
 460   candidate |= 1;
 461
 462   while (!is_prime (candidate))
 463     candidate += 2;
 464
 465   return candidate;
 466 }
 467
 468 void
 469 hash_reset_tuning (Hash_tuning *tuning)
 470 {
 471   *tuning = default_tuning;
 472 }
 473
 474 /* For the given hash TABLE, check the user supplied tuning structure for
 475    reasonable values, and return true if there is no gross error with it.
 476    Otherwise, definitively reset the TUNING field to some acceptable default
 477    in the hash table (that is, the user loses the right of further modifying
 478    tuning arguments), and return false.  */
 479
 480 static bool
 481 check_tuning (Hash_table *table)
 482 {
 483   const Hash_tuning *tuning = table->tuning;
 484
 485   /* Be a bit stricter than mathematics would require, so that
 486      rounding errors in size calculations do not cause allocations to
 487      fail to grow or shrink as they should.  The smallest allocation
 488      is 11 (due to next_prime's algorithm), so an epsilon of 0.1
 489      should be good enough.  */
 490   float epsilon = 0.1f;
 491
 492   if (epsilon < tuning->growth_threshold
 493       && tuning->growth_threshold < 1 - epsilon
 494       && 1 + epsilon < tuning->growth_factor
 495       && 0 <= tuning->shrink_threshold
 496       && tuning->shrink_threshold + epsilon < tuning->shrink_factor
 497       && tuning->shrink_factor <= 1
 498       && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
 499     return true;
 500
 501   table->tuning = &default_tuning;
 502   return false;
 503 }
 504
 505 /* Allocate and return a new hash table, or NULL upon failure.  The initial
 506    number of buckets is automatically selected so as to _guarantee_ that you
 507    may insert at least CANDIDATE different user entries before any growth of
 508    the hash table size occurs.  So, if have a reasonably tight a-priori upper
 509    bound on the number of entries you intend to insert in the hash table, you
 510    may save some table memory and insertion time, by specifying it here.  If
 511    the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
 512    argument has its meaning changed to the wanted number of buckets.
 513
 514    TUNING points to a structure of user-supplied values, in case some fine
 515    tuning is wanted over the default behavior of the hasher.  If TUNING is
 516    NULL, the default tuning parameters are used instead.
 517
 518    The user-supplied HASHER function should be provided.  It accepts two
 519    arguments ENTRY and TABLE_SIZE.  It computes, by hashing ENTRY contents, a
 520    slot number for that entry which should be in the range 0..TABLE_SIZE-1.
 521    This slot number is then returned.
 522
 523    The user-supplied COMPARATOR function should be provided.  It accepts two
 524    arguments pointing to user data, it then returns true for a pair of entries
 525    that compare equal, or false otherwise.  This function is internally called
 526    on entries which are already known to hash to the same bucket index.
 527
 528    The user-supplied DATA_FREER function, when not NULL, may be later called
 529    with the user data as an argument, just before the entry containing the
 530    data gets freed.  This happens from within `hash_free' or `hash_clear'.
 531    You should specify this function only if you want these functions to free
 532    all of your `data' data.  This is typically the case when your data is
 533    simply an auxiliary struct that you have malloc'd to aggregate several
 534    values.  */
 535
 536 Hash_table *
 537 hash_initialize (size_t candidate, const Hash_tuning *tuning,
 538                  Hash_hasher hasher, Hash_comparator comparator,
 539                  Hash_data_freer data_freer)
 540 {
 541   Hash_table *table;
 542
 543   if (hasher == NULL || comparator == NULL)
 544     return NULL;
 545
 546   table = malloc (sizeof *table);
 547   if (table == NULL)
 548     return NULL;
 549
 550   if (!tuning)
 551     tuning = &default_tuning;
 552   table->tuning = tuning;
 553   if (!check_tuning (table))
 554     {
 555       /* Fail if the tuning options are invalid.  This is the only occasion
 556          when the user gets some feedback about it.  Once the table is created,
 557          if the user provides invalid tuning options, we silently revert to
 558          using the defaults, and ignore further request to change the tuning
 559          options.  */
 560       goto fail;
 561     }
 562
 563   if (!tuning->is_n_buckets)
 564     {
 565       float new_candidate = candidate / tuning->growth_threshold;
 566       if (SIZE_MAX <= new_candidate)
 567         goto fail;
 568       candidate = new_candidate;
 569     }
 570
 571   if (xalloc_oversized (candidate, sizeof *table->bucket))
 572     goto fail;
 573   table->n_buckets = next_prime (candidate);
 574   if (xalloc_oversized (table->n_buckets, sizeof *table->bucket))
 575     goto fail;
 576
 577   table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
 578   if (table->bucket == NULL)
 579     goto fail;
 580   table->bucket_limit = table->bucket + table->n_buckets;
 581   table->n_buckets_used = 0;
 582   table->n_entries = 0;
 583
 584   table->hasher = hasher;
 585   table->comparator = comparator;
 586   table->data_freer = data_freer;
 587
 588   table->free_entry_list = NULL;
 589 #if USE_OBSTACK
 590   obstack_init (&table->entry_stack);
 591 #endif
 592   return table;
 593
 594  fail:
 595   free (table);
 596   return NULL;
 597 }
 598
 599 /* Make all buckets empty, placing any chained entries on the free list.
 600    Apply the user-specified function data_freer (if any) to the datas of any
 601    affected entries.  */
 602
 603 void
 604 hash_clear (Hash_table *table)
 605 {
 606   struct hash_entry *bucket;
 607
 608   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 609     {
 610       if (bucket->data)
 611         {
 612           struct hash_entry *cursor;
 613           struct hash_entry *next;
 614
 615           /* Free the bucket overflow.  */
 616           for (cursor = bucket->next; cursor; cursor = next)
 617             {
 618               if (table->data_freer)
 619                 (*table->data_freer) (cursor->data);
 620               cursor->data = NULL;
 621
 622               next = cursor->next;
 623               /* Relinking is done one entry at a time, as it is to be expected
 624                  that overflows are either rare or short.  */
 625               cursor->next = table->free_entry_list;
 626               table->free_entry_list = cursor;
 627             }
 628
 629           /* Free the bucket head.  */
 630           if (table->data_freer)
 631             (*table->data_freer) (bucket->data);
 632           bucket->data = NULL;
 633           bucket->next = NULL;
 634         }
 635     }
 636
 637   table->n_buckets_used = 0;
 638   table->n_entries = 0;
 639 }
 640
 641 /* Reclaim all storage associated with a hash table.  If a data_freer
 642    function has been supplied by the user when the hash table was created,
 643    this function applies it to the data of each entry before freeing that
 644    entry.  */
 645
 646 void
 647 hash_free (Hash_table *table)
 648 {
 649   struct hash_entry *bucket;
 650   struct hash_entry *cursor;
 651   struct hash_entry *next;
 652
 653   /* Call the user data_freer function.  */
 654   if (table->data_freer && table->n_entries)
 655     {
 656       for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 657         {
 658           if (bucket->data)
 659             {
 660               for (cursor = bucket; cursor; cursor = cursor->next)
 661                 {
 662                   (*table->data_freer) (cursor->data);
 663                 }
 664             }
 665         }
 666     }
 667
 668 #if USE_OBSTACK
 669
 670   obstack_free (&table->entry_stack, NULL);
 671
 672 #else
 673
 674   /* Free all bucket overflowed entries.  */
 675   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 676     {
 677       for (cursor = bucket->next; cursor; cursor = next)
 678         {
 679           next = cursor->next;
 680           free (cursor);
 681         }
 682     }
 683
 684   /* Also reclaim the internal list of previously freed entries.  */
 685   for (cursor = table->free_entry_list; cursor; cursor = next)
 686     {
 687       next = cursor->next;
 688       free (cursor);
 689     }
 690
 691 #endif
 692
 693   /* Free the remainder of the hash table structure.  */
 694   free (table->bucket);
 695   free (table);
 696 }
 697
 698 /* Insertion and deletion.  */
 699
 700 /* Get a new hash entry for a bucket overflow, possibly by reclying a
 701    previously freed one.  If this is not possible, allocate a new one.  */
 702
 703 static struct hash_entry *
 704 allocate_entry (Hash_table *table)
 705 {
 706   struct hash_entry *new;
 707
 708   if (table->free_entry_list)
 709     {
 710       new = table->free_entry_list;
 711       table->free_entry_list = new->next;
 712     }
 713   else
 714     {
 715 #if USE_OBSTACK
 716       new = obstack_alloc (&table->entry_stack, sizeof *new);
 717 #else
 718       new = malloc (sizeof *new);
 719 #endif
 720     }
 721
 722   return new;
 723 }
 724
 725 /* Free a hash entry which was part of some bucket overflow,
 726    saving it for later recycling.  */
 727
 728 static void
 729 free_entry (Hash_table *table, struct hash_entry *entry)
 730 {
 731   entry->data = NULL;
 732   entry->next = table->free_entry_list;
 733   table->free_entry_list = entry;
 734 }
 735
 736 /* This private function is used to help with insertion and deletion.  When
 737    ENTRY matches an entry in the table, return a pointer to the corresponding
 738    user data and set *BUCKET_HEAD to the head of the selected bucket.
 739    Otherwise, return NULL.  When DELETE is true and ENTRY matches an entry in
 740    the table, unlink the matching entry.  */
 741
 742 static void *
 743 hash_find_entry (Hash_table *table, const void *entry,
 744                  struct hash_entry **bucket_head, bool delete)
 745 {
 746   struct hash_entry *bucket
 747     = table->bucket + table->hasher (entry, table->n_buckets);
 748   struct hash_entry *cursor;
 749
 750   if (! (bucket < table->bucket_limit))
 751     abort ();
 752
 753   *bucket_head = bucket;
 754
 755   /* Test for empty bucket.  */
 756   if (bucket->data == NULL)
 757     return NULL;
 758
 759   /* See if the entry is the first in the bucket.  */
 760   if ((*table->comparator) (entry, bucket->data))
 761     {
 762       void *data = bucket->data;
 763
 764       if (delete)
 765         {
 766           if (bucket->next)
 767             {
 768               struct hash_entry *next = bucket->next;
 769
 770               /* Bump the first overflow entry into the bucket head, then save
 771                  the previous first overflow entry for later recycling.  */
 772               *bucket = *next;
 773               free_entry (table, next);
 774             }
 775           else
 776             {
 777               bucket->data = NULL;
 778             }
 779         }
 780
 781       return data;
 782     }
 783
 784   /* Scan the bucket overflow.  */
 785   for (cursor = bucket; cursor->next; cursor = cursor->next)
 786     {
 787       if ((*table->comparator) (entry, cursor->next->data))
 788         {
 789           void *data = cursor->next->data;
 790
 791           if (delete)
 792             {
 793               struct hash_entry *next = cursor->next;
 794
 795               /* Unlink the entry to delete, then save the freed entry for later
 796                  recycling.  */
 797               cursor->next = next->next;
 798               free_entry (table, next);
 799             }
 800
 801           return data;
 802         }
 803     }
 804
 805   /* No entry found.  */
 806   return NULL;
 807 }
 808
 809 /* For an already existing hash table, change the number of buckets through
 810    specifying CANDIDATE.  The contents of the hash table are preserved.  The
 811    new number of buckets is automatically selected so as to _guarantee_ that
 812    the table may receive at least CANDIDATE different user entries, including
 813    those already in the table, before any other growth of the hash table size
 814    occurs.  If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
 815    exact number of buckets desired.  */
 816
 817 bool
 818 hash_rehash (Hash_table *table, size_t candidate)
 819 {
 820   Hash_table *new_table;
 821   struct hash_entry *bucket;
 822   struct hash_entry *cursor;
 823   struct hash_entry *next;
 824
 825   new_table = hash_initialize (candidate, table->tuning, table->hasher,
 826                                table->comparator, table->data_freer);
 827   if (new_table == NULL)
 828     return false;
 829
 830   /* Merely reuse the extra old space into the new table.  */
 831 #if USE_OBSTACK
 832   obstack_free (&new_table->entry_stack, NULL);
 833   new_table->entry_stack = table->entry_stack;
 834 #endif
 835   new_table->free_entry_list = table->free_entry_list;
 836
 837   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
 838     if (bucket->data)
 839       for (cursor = bucket; cursor; cursor = next)
 840         {
 841           void *data = cursor->data;
 842           struct hash_entry *new_bucket
 843             = (new_table->bucket
 844                + new_table->hasher (data, new_table->n_buckets));
 845
 846           if (! (new_bucket < new_table->bucket_limit))
 847             abort ();
 848
 849           next = cursor->next;
 850
 851           if (new_bucket->data)
 852             {
 853               if (cursor == bucket)
 854                 {
 855                   /* Allocate or recycle an entry, when moving from a bucket
 856                      header into a bucket overflow.  */
 857                   struct hash_entry *new_entry = allocate_entry (new_table);
 858
 859                   if (new_entry == NULL)
 860                     return false;
 861
 862                   new_entry->data = data;
 863                   new_entry->next = new_bucket->next;
 864                   new_bucket->next = new_entry;
 865                 }
 866               else
 867                 {
 868                   /* Merely relink an existing entry, when moving from a
 869                      bucket overflow into a bucket overflow.  */
 870                   cursor->next = new_bucket->next;
 871                   new_bucket->next = cursor;
 872                 }
 873             }
 874           else
 875             {
 876               /* Free an existing entry, when moving from a bucket
 877                  overflow into a bucket header.  Also take care of the
 878                  simple case of moving from a bucket header into a bucket
 879                  header.  */
 880               new_bucket->data = data;
 881               new_table->n_buckets_used++;
 882               if (cursor != bucket)
 883                 free_entry (new_table, cursor);
 884             }
 885         }
 886
 887   free (table->bucket);
 888   table->bucket = new_table->bucket;
 889   table->bucket_limit = new_table->bucket_limit;
 890   table->n_buckets = new_table->n_buckets;
 891   table->n_buckets_used = new_table->n_buckets_used;
 892   table->free_entry_list = new_table->free_entry_list;
 893   /* table->n_entries already holds its value.  */
 894 #if USE_OBSTACK
 895   table->entry_stack = new_table->entry_stack;
 896 #endif
 897   free (new_table);
 898
 899   return true;
 900 }
 901
 902 /* If ENTRY matches an entry already in the hash table, return the pointer
 903    to the entry from the table.  Otherwise, insert ENTRY and return ENTRY.
 904    Return NULL if the storage required for insertion cannot be allocated.  */
 905
 906 void *
 907 hash_insert (Hash_table *table, const void *entry)
 908 {
 909   void *data;
 910   struct hash_entry *bucket;
 911
 912   /* The caller cannot insert a NULL entry.  */
 913   if (! entry)
 914     abort ();
 915
 916   /* If there's a matching entry already in the table, return that.  */
 917   if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
 918     return data;
 919
 920   /* ENTRY is not matched, it should be inserted.  */
 921
 922   if (bucket->data)
 923     {
 924       struct hash_entry *new_entry = allocate_entry (table);
 925
 926       if (new_entry == NULL)
 927         return NULL;
 928
 929       /* Add ENTRY in the overflow of the bucket.  */
 930
 931       new_entry->data = (void *) entry;
 932       new_entry->next = bucket->next;
 933       bucket->next = new_entry;
 934       table->n_entries++;
 935       return (void *) entry;
 936     }
 937
 938   /* Add ENTRY right in the bucket head.  */
 939
 940   bucket->data = (void *) entry;
 941   table->n_entries++;
 942   table->n_buckets_used++;
 943
 944   /* If the growth threshold of the buckets in use has been reached, increase
 945      the table size and rehash.  There's no point in checking the number of
 946      entries:  if the hashing function is ill-conditioned, rehashing is not
 947      likely to improve it.  */
 948
 949   if (table->n_buckets_used
 950       > table->tuning->growth_threshold * table->n_buckets)
 951     {
 952       /* Check more fully, before starting real work.  If tuning arguments
 953          became invalid, the second check will rely on proper defaults.  */
 954       check_tuning (table);
 955       if (table->n_buckets_used
 956           > table->tuning->growth_threshold * table->n_buckets)
 957         {
 958           const Hash_tuning *tuning = table->tuning;
 959           float candidate =
 960             (tuning->is_n_buckets
 961              ? (table->n_buckets * tuning->growth_factor)
 962              : (table->n_buckets * tuning->growth_factor
 963                 * tuning->growth_threshold));
 964
 965           if (SIZE_MAX <= candidate)
 966             return NULL;
 967
 968           /* If the rehash fails, arrange to return NULL.  */
 969           if (!hash_rehash (table, candidate))
 970             entry = NULL;
 971         }
 972     }
 973
 974   return (void *) entry;
 975 }
 976
 977 /* If ENTRY is already in the table, remove it and return the just-deleted
 978    data (the user may want to deallocate its storage).  If ENTRY is not in the
 979    table, don't modify the table and return NULL.  */
 980
 981 void *
 982 hash_delete (Hash_table *table, const void *entry)
 983 {
 984   void *data;
 985   struct hash_entry *bucket;
 986
 987   data = hash_find_entry (table, entry, &bucket, true);
 988   if (!data)
 989     return NULL;
 990
 991   table->n_entries--;
 992   if (!bucket->data)
 993     {
 994       table->n_buckets_used--;
 995
 996       /* If the shrink threshold of the buckets in use has been reached,
 997          rehash into a smaller table.  */
 998
 999       if (table->n_buckets_used
1000           < table->tuning->shrink_threshold * table->n_buckets)
1001         {
1002           /* Check more fully, before starting real work.  If tuning arguments
1003              became invalid, the second check will rely on proper defaults.  */
1004           check_tuning (table);
1005           if (table->n_buckets_used
1006               < table->tuning->shrink_threshold * table->n_buckets)
1007             {
1008               const Hash_tuning *tuning = table->tuning;
1009               size_t candidate =
1010                 (tuning->is_n_buckets
1011                  ? table->n_buckets * tuning->shrink_factor
1012                  : (table->n_buckets * tuning->shrink_factor
1013                     * tuning->growth_threshold));
1014
1015               hash_rehash (table, candidate);
1016             }
1017         }
1018     }
1019
1020   return data;
1021 }
1022
1023 /* Testing.  */
1024
1025 #if TESTING
1026
1027 void
1028 hash_print (const Hash_table *table)
1029 {
1030   struct hash_entry const *bucket;
1031
1032   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
1033     {
1034       struct hash_entry *cursor;
1035
1036       if (bucket)
1037         printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1038
1039       for (cursor = bucket; cursor; cursor = cursor->next)
1040         {
1041           char const *s = cursor->data;
1042           /* FIXME */
1043           if (s)
1044             printf ("  %s\n", s);
1045         }
1046     }
1047 }
1048
1049 #endif /* TESTING */