/* * Copyright (c) 2016-2018 David Leeds * * Hashmap is free software; you can redistribute it and/or modify * it under the terms of the MIT license. See LICENSE for details. */ #include #include #include #include #include #include #include "hashmap.h" #ifndef HASHMAP_NOASSERT #include #define HASHMAP_ASSERT(expr) assert(expr) #else #define HASHMAP_ASSERT(expr) #endif /* Table sizes must be powers of 2 */ #define HASHMAP_SIZE_MIN (1 << 5) /* 32 */ #define HASHMAP_SIZE_DEFAULT (1 << 8) /* 256 */ #define HASHMAP_SIZE_MOD(map, val) ((val) & ((map)->table_size - 1)) /* Limit for probing is 1/2 of table_size */ #define HASHMAP_PROBE_LEN(map) ((map)->table_size >> 1) /* Return the next linear probe index */ #define HASHMAP_PROBE_NEXT(map, index) HASHMAP_SIZE_MOD(map, (index) + 1) /* Check if index b is less than or equal to index a */ #define HASHMAP_INDEX_LE(map, a, b) \ ((a) == (b) || (((b) - (a)) & ((map)->table_size >> 1)) != 0) struct hashmap_entry { void *key; void *data; #ifdef HASHMAP_METRICS size_t num_collisions; #endif }; /* * Enforce a maximum 0.75 load factor. */ static inline size_t hashmap_table_min_size_calc(size_t num_entries) { return num_entries + (num_entries / 3); } /* * Calculate the optimal table size, given the specified max number * of elements. */ static size_t hashmap_table_size_calc(size_t num_entries) { size_t table_size; size_t min_size; table_size = hashmap_table_min_size_calc(num_entries); /* Table size is always a power of 2 */ min_size = HASHMAP_SIZE_MIN; while (min_size < table_size) { min_size <<= 1; } return min_size; } /* * Get a valid hash table index from a key. */ static inline size_t hashmap_calc_index(const struct hashmap *map, const void *key) { return HASHMAP_SIZE_MOD(map, map->hash(key)); } /* * Return the next populated entry, starting with the specified one. * Returns NULL if there are no more valid entries. */ static struct hashmap_entry *hashmap_entry_get_populated( const struct hashmap *map, struct hashmap_entry *entry) { for (; entry < &map->table[map->table_size]; ++entry) { if (entry->key) { return entry; } } return NULL; } /* * Find the hashmap entry with the specified key, or an empty slot. * Returns NULL if the entire table has been searched without finding a match. */ static struct hashmap_entry *hashmap_entry_find(const struct hashmap *map, const void *key, bool find_empty) { size_t i; size_t index; size_t probe_len = HASHMAP_PROBE_LEN(map); struct hashmap_entry *entry; index = hashmap_calc_index(map, key); /* Linear probing */ for (i = 0; i < probe_len; ++i) { entry = &map->table[index]; if (!entry->key) { if (find_empty) { #ifdef HASHMAP_METRICS entry->num_collisions = i; #endif return entry; } return NULL; } if (map->key_compare(key, entry->key) == 0) { return entry; } index = HASHMAP_PROBE_NEXT(map, index); } return NULL; } /* * Removes the specified entry and processes the proceeding entries to reduce * the load factor and keep the chain continuous. This is a required * step for hash maps using linear probing. */ static void hashmap_entry_remove(struct hashmap *map, struct hashmap_entry *removed_entry) { size_t i; #ifdef HASHMAP_METRICS size_t removed_i = 0; #endif size_t index; size_t entry_index; size_t removed_index = (removed_entry - map->table); struct hashmap_entry *entry; /* Free the key */ if (map->key_free) { map->key_free(removed_entry->key); } --map->num_entries; /* Fill the free slot in the chain */ index = HASHMAP_PROBE_NEXT(map, removed_index); for (i = 1; i < map->table_size; ++i) { entry = &map->table[index]; if (!entry->key) { /* Reached end of chain */ break; } entry_index = hashmap_calc_index(map, entry->key); /* Shift in entries with an index <= to the removed slot */ if (HASHMAP_INDEX_LE(map, removed_index, entry_index)) { #ifdef HASHMAP_METRICS entry->num_collisions -= (i - removed_i); removed_i = i; #endif memcpy(removed_entry, entry, sizeof(*removed_entry)); removed_index = index; removed_entry = entry; } index = HASHMAP_PROBE_NEXT(map, index); } /* Clear the last removed entry */ memset(removed_entry, 0, sizeof(*removed_entry)); } /* * Reallocates the hash table to the new size and rehashes all entries. * new_size MUST be a power of 2. * Returns 0 on success and -errno on allocation or hash function failure. */ static int hashmap_rehash(struct hashmap *map, size_t new_size) { size_t old_size; struct hashmap_entry *old_table; struct hashmap_entry *new_table; struct hashmap_entry *entry; struct hashmap_entry *new_entry; HASHMAP_ASSERT(new_size >= HASHMAP_SIZE_MIN); HASHMAP_ASSERT((new_size & (new_size - 1)) == 0); new_table = (struct hashmap_entry *)calloc(new_size, sizeof(struct hashmap_entry)); if (!new_table) { return -ENOMEM; } /* Backup old elements in case of rehash failure */ old_size = map->table_size; old_table = map->table; map->table_size = new_size; map->table = new_table; /* Rehash */ for (entry = old_table; entry < &old_table[old_size]; ++entry) { if (!entry->data) { /* Only copy entries with data */ continue; } new_entry = hashmap_entry_find(map, entry->key, true); if (!new_entry) { /* * The load factor is too high with the new table * size, or a poor hash function was used. */ goto revert; } /* Shallow copy (intentionally omits num_collisions) */ new_entry->key = entry->key; new_entry->data = entry->data; } free(old_table); return 0; revert: map->table_size = old_size; map->table = old_table; free(new_table); return -EINVAL; } /* * Iterate through all entries and free all keys. */ static void hashmap_free_keys(struct hashmap *map) { struct hashmap_iter *iter; if (!map->key_free) { return; } for (iter = hashmap_iter(map); iter; iter = hashmap_iter_next(map, iter)) { map->key_free((void *)hashmap_iter_get_key(iter)); } } /* * Initialize an empty hashmap. * * hash_func should return an even distribution of numbers between 0 * and SIZE_MAX varying on the key provided. If set to NULL, the default * case-sensitive string hash function is used: hashmap_hash_string * * key_compare_func should return 0 if the keys match, and non-zero otherwise. * If set to NULL, the default case-sensitive string comparator function is * used: hashmap_compare_string * * initial_size is optional, and may be set to the max number of entries * expected to be put in the hash table. This is used as a hint to * pre-allocate the hash table to the minimum size needed to avoid * gratuitous rehashes. If initial_size is 0, a default size will be used. * * Returns 0 on success and -errno on failure. */ int hashmap_init(struct hashmap *map, size_t (*hash_func)(const void *), int (*key_compare_func)(const void *, const void *), size_t initial_size) { HASHMAP_ASSERT(map != NULL); if (!initial_size) { initial_size = HASHMAP_SIZE_DEFAULT; } else { /* Convert init size to valid table size */ initial_size = hashmap_table_size_calc(initial_size); } map->table_size_init = initial_size; map->table_size = initial_size; map->num_entries = 0; map->table = (struct hashmap_entry *)calloc(initial_size, sizeof(struct hashmap_entry)); if (!map->table) { return -ENOMEM; } map->hash = hash_func ? hash_func : hashmap_hash_string; map->key_compare = key_compare_func ? key_compare_func : hashmap_compare_string; map->key_alloc = NULL; map->key_free = NULL; return 0; } /* * Free the hashmap and all associated memory. */ void hashmap_destroy(struct hashmap *map) { if (!map) { return; } hashmap_free_keys(map); free(map->table); memset(map, 0, sizeof(*map)); } /* * Enable internal memory management of hash keys. */ void hashmap_set_key_alloc_funcs(struct hashmap *map, void *(*key_alloc_func)(const void *), void (*key_free_func)(void *)) { HASHMAP_ASSERT(map != NULL); map->key_alloc = key_alloc_func; map->key_free = key_free_func; } /* * Add an entry to the hashmap. If an entry with a matching key already * exists and has a data pointer associated with it, the existing data * pointer is returned, instead of assigning the new value. Compare * the return value with the data passed in to determine if a new entry was * created. Returns NULL if memory allocation failed. */ void *hashmap_put(struct hashmap *map, const void *key, void *data) { struct hashmap_entry *entry; HASHMAP_ASSERT(map != NULL); HASHMAP_ASSERT(key != NULL); /* Rehash with 2x capacity if load factor is approaching 0.75 */ if (map->table_size <= hashmap_table_min_size_calc(map->num_entries)) { hashmap_rehash(map, map->table_size << 1); } entry = hashmap_entry_find(map, key, true); if (!entry) { /* * Cannot find an empty slot. Either out of memory, or using * a poor hash function. Attempt to rehash once to reduce * chain length. */ if (hashmap_rehash(map, map->table_size << 1) < 0) { return NULL; } entry = hashmap_entry_find(map, key, true); if (!entry) { return NULL; } } if (!entry->key) { /* Allocate copy of key to simplify memory management */ if (map->key_alloc) { entry->key = map->key_alloc(key); if (!entry->key) { return NULL; } } else { entry->key = (void *)key; } ++map->num_entries; } else if (entry->data) { /* Do not overwrite existing data */ return entry->data; } entry->data = data; return data; } /* * Return the data pointer, or NULL if no entry exists. */ void *hashmap_get(const struct hashmap *map, const void *key) { struct hashmap_entry *entry; HASHMAP_ASSERT(map != NULL); HASHMAP_ASSERT(key != NULL); entry = hashmap_entry_find(map, key, false); if (!entry) { return NULL; } return entry->data; } /* * Remove an entry with the specified key from the map. * Returns the data pointer, or NULL, if no entry was found. */ void *hashmap_remove(struct hashmap *map, const void *key) { struct hashmap_entry *entry; void *data; HASHMAP_ASSERT(map != NULL); HASHMAP_ASSERT(key != NULL); entry = hashmap_entry_find(map, key, false); if (!entry) { return NULL; } data = entry->data; /* Clear the entry and make the chain contiguous */ hashmap_entry_remove(map, entry); return data; } /* * Remove all entries. */ void hashmap_clear(struct hashmap *map) { HASHMAP_ASSERT(map != NULL); hashmap_free_keys(map); map->num_entries = 0; memset(map->table, 0, sizeof(struct hashmap_entry) * map->table_size); } /* * Remove all entries and reset the hash table to its initial size. */ void hashmap_reset(struct hashmap *map) { struct hashmap_entry *new_table; HASHMAP_ASSERT(map != NULL); hashmap_clear(map); if (map->table_size == map->table_size_init) { return; } new_table = (struct hashmap_entry *)realloc(map->table, sizeof(struct hashmap_entry) * map->table_size_init); if (!new_table) { return; } map->table = new_table; map->table_size = map->table_size_init; } /* * Return the number of entries in the hash map. */ size_t hashmap_size(const struct hashmap *map) { HASHMAP_ASSERT(map != NULL); return map->num_entries; } /* * Get a new hashmap iterator. The iterator is an opaque * pointer that may be used with hashmap_iter_*() functions. * Hashmap iterators are INVALID after a put or remove operation is performed. * hashmap_iter_remove() allows safe removal during iteration. */ struct hashmap_iter *hashmap_iter(const struct hashmap *map) { HASHMAP_ASSERT(map != NULL); if (!map->num_entries) { return NULL; } return (struct hashmap_iter *)hashmap_entry_get_populated(map, map->table); } /* * Return an iterator to the next hashmap entry. Returns NULL if there are * no more entries. */ struct hashmap_iter *hashmap_iter_next(const struct hashmap *map, const struct hashmap_iter *iter) { struct hashmap_entry *entry = (struct hashmap_entry *)iter; HASHMAP_ASSERT(map != NULL); if (!iter) { return NULL; } return (struct hashmap_iter *)hashmap_entry_get_populated(map, entry + 1); } /* * Remove the hashmap entry pointed to by this iterator and return an * iterator to the next entry. Returns NULL if there are no more entries. */ struct hashmap_iter *hashmap_iter_remove(struct hashmap *map, const struct hashmap_iter *iter) { struct hashmap_entry *entry = (struct hashmap_entry *)iter; HASHMAP_ASSERT(map != NULL); if (!iter) { return NULL; } if (!entry->key) { /* Iterator is invalid, so just return the next valid entry */ return hashmap_iter_next(map, iter); } hashmap_entry_remove(map, entry); return (struct hashmap_iter *)hashmap_entry_get_populated(map, entry); } /* * Return the key of the entry pointed to by the iterator. */ const void *hashmap_iter_get_key(const struct hashmap_iter *iter) { if (!iter) { return NULL; } return (const void *)((struct hashmap_entry *)iter)->key; } /* * Return the data of the entry pointed to by the iterator. */ void *hashmap_iter_get_data(const struct hashmap_iter *iter) { if (!iter) { return NULL; } return ((struct hashmap_entry *)iter)->data; } /* * Set the data pointer of the entry pointed to by the iterator. */ void hashmap_iter_set_data(const struct hashmap_iter *iter, void *data) { if (!iter) { return; } ((struct hashmap_entry *)iter)->data = data; } /* * Invoke func for each entry in the hashmap. Unlike the hashmap_iter_*() * interface, this function supports calls to hashmap_remove() during iteration. * However, it is an error to put or remove an entry other than the current one, * and doing so will immediately halt iteration and return an error. * Iteration is stopped if func returns non-zero. Returns func's return * value if it is < 0, otherwise, 0. */ int hashmap_foreach(const struct hashmap *map, int (*func)(const void *, void *, void *), void *arg) { struct hashmap_entry *entry; size_t num_entries; const void *key; int rc; HASHMAP_ASSERT(map != NULL); HASHMAP_ASSERT(func != NULL); entry = map->table; for (entry = map->table; entry < &map->table[map->table_size]; ++entry) { if (!entry->key) { continue; } num_entries = map->num_entries; key = entry->key; rc = func(entry->key, entry->data, arg); if (rc < 0) { return rc; } if (rc > 0) { return 0; } /* Run this entry again if func() deleted it */ if (entry->key != key) { --entry; } else if (num_entries != map->num_entries) { /* Stop immediately if func put/removed another entry */ return -1; } } return 0; } /* * Default hash function for string keys. * This is an implementation of the well-documented Jenkins one-at-a-time * hash function. */ size_t hashmap_hash_string(const void *key) { const char *key_str = (const char *)key; size_t hash = 0; for (; *key_str; ++key_str) { hash += *key_str; hash += (hash << 10); hash ^= (hash >> 6); } hash += (hash << 3); hash ^= (hash >> 11); hash += (hash << 15); return hash; } /* * Default key comparator function for string keys. */ int hashmap_compare_string(const void *a, const void *b) { return strcmp((const char *)a, (const char *)b); } /* * Default key allocation function for string keys. Use free() for the * key_free_func. */ void *hashmap_alloc_key_string(const void *key) { return (void *)strdup((const char *)key); } /* * Case insensitive hash function for string keys. */ size_t hashmap_hash_string_i(const void *key) { const char *key_str = (const char *)key; size_t hash = 0; for (; *key_str; ++key_str) { hash += tolower(*key_str); hash += (hash << 10); hash ^= (hash >> 6); } hash += (hash << 3); hash ^= (hash >> 11); hash += (hash << 15); return hash; } /* * Case insensitive key comparator function for string keys. */ int hashmap_compare_string_i(const void *a, const void *b) { return strcasecmp((const char *)a, (const char *)b); } #ifdef HASHMAP_METRICS /* * Return the load factor. */ double hashmap_load_factor(const struct hashmap *map) { HASHMAP_ASSERT(map != NULL); if (!map->table_size) { return 0; } return (double)map->num_entries / map->table_size; } /* * Return the average number of collisions per entry. */ double hashmap_collisions_mean(const struct hashmap *map) { struct hashmap_entry *entry; size_t total_collisions = 0; HASHMAP_ASSERT(map != NULL); if (!map->num_entries) { return 0; } for (entry = map->table; entry < &map->table[map->table_size]; ++entry) { if (!entry->key) { continue; } total_collisions += entry->num_collisions; } return (double)total_collisions / map->num_entries; } /* * Return the variance between entry collisions. The higher the variance, * the more likely the hash function is poor and is resulting in clustering. */ double hashmap_collisions_variance(const struct hashmap *map) { struct hashmap_entry *entry; double mean_collisions; double variance; double total_variance = 0; HASHMAP_ASSERT(map != NULL); if (!map->num_entries) { return 0; } mean_collisions = hashmap_collisions_mean(map); for (entry = map->table; entry < &map->table[map->table_size]; ++entry) { if (!entry->key) { continue; } variance = (double)entry->num_collisions - mean_collisions; total_variance += variance * variance; } return total_variance / map->num_entries; } #endif