From 33db97b72473124697170394efbc993870276c55 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Fri, 9 Nov 2018 15:36:21 +0100 Subject: Started work on proxy-side uid generation/tracking --- hashmap.c | 723 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 723 insertions(+) create mode 100644 hashmap.c (limited to 'hashmap.c') diff --git a/hashmap.c b/hashmap.c new file mode 100644 index 0000000..eceeacc --- /dev/null +++ b/hashmap.c @@ -0,0 +1,723 @@ +/* + * Copyright (c) 2016-2018 David Leeds + * + * Hashmap is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include +#include +#include +#include +#include +#include + +#include "hashmap.h" + +#ifndef HASHMAP_NOASSERT +#include +#define HASHMAP_ASSERT(expr) assert(expr) +#else +#define HASHMAP_ASSERT(expr) +#endif + +/* Table sizes must be powers of 2 */ +#define HASHMAP_SIZE_MIN (1 << 5) /* 32 */ +#define HASHMAP_SIZE_DEFAULT (1 << 8) /* 256 */ +#define HASHMAP_SIZE_MOD(map, val) ((val) & ((map)->table_size - 1)) + +/* Limit for probing is 1/2 of table_size */ +#define HASHMAP_PROBE_LEN(map) ((map)->table_size >> 1) +/* Return the next linear probe index */ +#define HASHMAP_PROBE_NEXT(map, index) HASHMAP_SIZE_MOD(map, (index) + 1) + +/* Check if index b is less than or equal to index a */ +#define HASHMAP_INDEX_LE(map, a, b) \ + ((a) == (b) || (((b) - (a)) & ((map)->table_size >> 1)) != 0) + + +struct hashmap_entry { + void *key; + void *data; +#ifdef HASHMAP_METRICS + size_t num_collisions; +#endif +}; + + +/* + * Enforce a maximum 0.75 load factor. + */ +static inline size_t hashmap_table_min_size_calc(size_t num_entries) +{ + return num_entries + (num_entries / 3); +} + +/* + * Calculate the optimal table size, given the specified max number + * of elements. + */ +static size_t hashmap_table_size_calc(size_t num_entries) +{ + size_t table_size; + size_t min_size; + + table_size = hashmap_table_min_size_calc(num_entries); + + /* Table size is always a power of 2 */ + min_size = HASHMAP_SIZE_MIN; + while (min_size < table_size) { + min_size <<= 1; + } + return min_size; +} + +/* + * Get a valid hash table index from a key. + */ +static inline size_t hashmap_calc_index(const struct hashmap *map, + const void *key) +{ + return HASHMAP_SIZE_MOD(map, map->hash(key)); +} + +/* + * Return the next populated entry, starting with the specified one. + * Returns NULL if there are no more valid entries. + */ +static struct hashmap_entry *hashmap_entry_get_populated( + const struct hashmap *map, struct hashmap_entry *entry) +{ + for (; entry < &map->table[map->table_size]; ++entry) { + if (entry->key) { + return entry; + } + } + return NULL; +} + +/* + * Find the hashmap entry with the specified key, or an empty slot. + * Returns NULL if the entire table has been searched without finding a match. + */ +static struct hashmap_entry *hashmap_entry_find(const struct hashmap *map, + const void *key, bool find_empty) +{ + size_t i; + size_t index; + size_t probe_len = HASHMAP_PROBE_LEN(map); + struct hashmap_entry *entry; + + index = hashmap_calc_index(map, key); + + /* Linear probing */ + for (i = 0; i < probe_len; ++i) { + entry = &map->table[index]; + if (!entry->key) { + if (find_empty) { +#ifdef HASHMAP_METRICS + entry->num_collisions = i; +#endif + return entry; + } + return NULL; + } + if (map->key_compare(key, entry->key) == 0) { + return entry; + } + index = HASHMAP_PROBE_NEXT(map, index); + } + return NULL; +} + +/* + * Removes the specified entry and processes the proceeding entries to reduce + * the load factor and keep the chain continuous. This is a required + * step for hash maps using linear probing. + */ +static void hashmap_entry_remove(struct hashmap *map, + struct hashmap_entry *removed_entry) +{ + size_t i; +#ifdef HASHMAP_METRICS + size_t removed_i = 0; +#endif + size_t index; + size_t entry_index; + size_t removed_index = (removed_entry - map->table); + struct hashmap_entry *entry; + + /* Free the key */ + if (map->key_free) { + map->key_free(removed_entry->key); + } + --map->num_entries; + + /* Fill the free slot in the chain */ + index = HASHMAP_PROBE_NEXT(map, removed_index); + for (i = 1; i < map->table_size; ++i) { + entry = &map->table[index]; + if (!entry->key) { + /* Reached end of chain */ + break; + } + entry_index = hashmap_calc_index(map, entry->key); + /* Shift in entries with an index <= to the removed slot */ + if (HASHMAP_INDEX_LE(map, removed_index, entry_index)) { +#ifdef HASHMAP_METRICS + entry->num_collisions -= (i - removed_i); + removed_i = i; +#endif + memcpy(removed_entry, entry, sizeof(*removed_entry)); + removed_index = index; + removed_entry = entry; + } + index = HASHMAP_PROBE_NEXT(map, index); + } + /* Clear the last removed entry */ + memset(removed_entry, 0, sizeof(*removed_entry)); +} + +/* + * Reallocates the hash table to the new size and rehashes all entries. + * new_size MUST be a power of 2. + * Returns 0 on success and -errno on allocation or hash function failure. + */ +static int hashmap_rehash(struct hashmap *map, size_t new_size) +{ + size_t old_size; + struct hashmap_entry *old_table; + struct hashmap_entry *new_table; + struct hashmap_entry *entry; + struct hashmap_entry *new_entry; + + HASHMAP_ASSERT(new_size >= HASHMAP_SIZE_MIN); + HASHMAP_ASSERT((new_size & (new_size - 1)) == 0); + + new_table = (struct hashmap_entry *)calloc(new_size, + sizeof(struct hashmap_entry)); + if (!new_table) { + return -ENOMEM; + } + /* Backup old elements in case of rehash failure */ + old_size = map->table_size; + old_table = map->table; + map->table_size = new_size; + map->table = new_table; + /* Rehash */ + for (entry = old_table; entry < &old_table[old_size]; ++entry) { + if (!entry->data) { + /* Only copy entries with data */ + continue; + } + new_entry = hashmap_entry_find(map, entry->key, true); + if (!new_entry) { + /* + * The load factor is too high with the new table + * size, or a poor hash function was used. + */ + goto revert; + } + /* Shallow copy (intentionally omits num_collisions) */ + new_entry->key = entry->key; + new_entry->data = entry->data; + } + free(old_table); + return 0; +revert: + map->table_size = old_size; + map->table = old_table; + free(new_table); + return -EINVAL; +} + +/* + * Iterate through all entries and free all keys. + */ +static void hashmap_free_keys(struct hashmap *map) +{ + struct hashmap_iter *iter; + + if (!map->key_free) { + return; + } + for (iter = hashmap_iter(map); iter; + iter = hashmap_iter_next(map, iter)) { + map->key_free((void *)hashmap_iter_get_key(iter)); + } +} + +/* + * Initialize an empty hashmap. + * + * hash_func should return an even distribution of numbers between 0 + * and SIZE_MAX varying on the key provided. If set to NULL, the default + * case-sensitive string hash function is used: hashmap_hash_string + * + * key_compare_func should return 0 if the keys match, and non-zero otherwise. + * If set to NULL, the default case-sensitive string comparator function is + * used: hashmap_compare_string + * + * initial_size is optional, and may be set to the max number of entries + * expected to be put in the hash table. This is used as a hint to + * pre-allocate the hash table to the minimum size needed to avoid + * gratuitous rehashes. If initial_size is 0, a default size will be used. + * + * Returns 0 on success and -errno on failure. + */ +int hashmap_init(struct hashmap *map, size_t (*hash_func)(const void *), + int (*key_compare_func)(const void *, const void *), + size_t initial_size) +{ + HASHMAP_ASSERT(map != NULL); + + if (!initial_size) { + initial_size = HASHMAP_SIZE_DEFAULT; + } else { + /* Convert init size to valid table size */ + initial_size = hashmap_table_size_calc(initial_size); + } + map->table_size_init = initial_size; + map->table_size = initial_size; + map->num_entries = 0; + map->table = (struct hashmap_entry *)calloc(initial_size, + sizeof(struct hashmap_entry)); + if (!map->table) { + return -ENOMEM; + } + map->hash = hash_func ? + hash_func : hashmap_hash_string; + map->key_compare = key_compare_func ? + key_compare_func : hashmap_compare_string; + map->key_alloc = NULL; + map->key_free = NULL; + return 0; +} + +/* + * Free the hashmap and all associated memory. + */ +void hashmap_destroy(struct hashmap *map) +{ + if (!map) { + return; + } + hashmap_free_keys(map); + free(map->table); + memset(map, 0, sizeof(*map)); +} + +/* + * Enable internal memory management of hash keys. + */ +void hashmap_set_key_alloc_funcs(struct hashmap *map, + void *(*key_alloc_func)(const void *), + void (*key_free_func)(void *)) +{ + HASHMAP_ASSERT(map != NULL); + + map->key_alloc = key_alloc_func; + map->key_free = key_free_func; +} + +/* + * Add an entry to the hashmap. If an entry with a matching key already + * exists and has a data pointer associated with it, the existing data + * pointer is returned, instead of assigning the new value. Compare + * the return value with the data passed in to determine if a new entry was + * created. Returns NULL if memory allocation failed. + */ +void *hashmap_put(struct hashmap *map, const void *key, void *data) +{ + struct hashmap_entry *entry; + + HASHMAP_ASSERT(map != NULL); + HASHMAP_ASSERT(key != NULL); + + /* Rehash with 2x capacity if load factor is approaching 0.75 */ + if (map->table_size <= hashmap_table_min_size_calc(map->num_entries)) { + hashmap_rehash(map, map->table_size << 1); + } + entry = hashmap_entry_find(map, key, true); + if (!entry) { + /* + * Cannot find an empty slot. Either out of memory, or using + * a poor hash function. Attempt to rehash once to reduce + * chain length. + */ + if (hashmap_rehash(map, map->table_size << 1) < 0) { + return NULL; + } + entry = hashmap_entry_find(map, key, true); + if (!entry) { + return NULL; + } + } + if (!entry->key) { + /* Allocate copy of key to simplify memory management */ + if (map->key_alloc) { + entry->key = map->key_alloc(key); + if (!entry->key) { + return NULL; + } + } else { + entry->key = (void *)key; + } + ++map->num_entries; + } else if (entry->data) { + /* Do not overwrite existing data */ + return entry->data; + } + entry->data = data; + return data; +} + +/* + * Return the data pointer, or NULL if no entry exists. + */ +void *hashmap_get(const struct hashmap *map, const void *key) +{ + struct hashmap_entry *entry; + + HASHMAP_ASSERT(map != NULL); + HASHMAP_ASSERT(key != NULL); + + entry = hashmap_entry_find(map, key, false); + if (!entry) { + return NULL; + } + return entry->data; +} + +/* + * Remove an entry with the specified key from the map. + * Returns the data pointer, or NULL, if no entry was found. + */ +void *hashmap_remove(struct hashmap *map, const void *key) +{ + struct hashmap_entry *entry; + void *data; + + HASHMAP_ASSERT(map != NULL); + HASHMAP_ASSERT(key != NULL); + + entry = hashmap_entry_find(map, key, false); + if (!entry) { + return NULL; + } + data = entry->data; + /* Clear the entry and make the chain contiguous */ + hashmap_entry_remove(map, entry); + return data; +} + +/* + * Remove all entries. + */ +void hashmap_clear(struct hashmap *map) +{ + HASHMAP_ASSERT(map != NULL); + + hashmap_free_keys(map); + map->num_entries = 0; + memset(map->table, 0, sizeof(struct hashmap_entry) * map->table_size); +} + +/* + * Remove all entries and reset the hash table to its initial size. + */ +void hashmap_reset(struct hashmap *map) +{ + struct hashmap_entry *new_table; + + HASHMAP_ASSERT(map != NULL); + + hashmap_clear(map); + if (map->table_size == map->table_size_init) { + return; + } + new_table = (struct hashmap_entry *)realloc(map->table, + sizeof(struct hashmap_entry) * map->table_size_init); + if (!new_table) { + return; + } + map->table = new_table; + map->table_size = map->table_size_init; +} + +/* + * Return the number of entries in the hash map. + */ +size_t hashmap_size(const struct hashmap *map) +{ + HASHMAP_ASSERT(map != NULL); + + return map->num_entries; +} + +/* + * Get a new hashmap iterator. The iterator is an opaque + * pointer that may be used with hashmap_iter_*() functions. + * Hashmap iterators are INVALID after a put or remove operation is performed. + * hashmap_iter_remove() allows safe removal during iteration. + */ +struct hashmap_iter *hashmap_iter(const struct hashmap *map) +{ + HASHMAP_ASSERT(map != NULL); + + if (!map->num_entries) { + return NULL; + } + return (struct hashmap_iter *)hashmap_entry_get_populated(map, + map->table); +} + +/* + * Return an iterator to the next hashmap entry. Returns NULL if there are + * no more entries. + */ +struct hashmap_iter *hashmap_iter_next(const struct hashmap *map, + const struct hashmap_iter *iter) +{ + struct hashmap_entry *entry = (struct hashmap_entry *)iter; + + HASHMAP_ASSERT(map != NULL); + + if (!iter) { + return NULL; + } + return (struct hashmap_iter *)hashmap_entry_get_populated(map, + entry + 1); +} + +/* + * Remove the hashmap entry pointed to by this iterator and return an + * iterator to the next entry. Returns NULL if there are no more entries. + */ +struct hashmap_iter *hashmap_iter_remove(struct hashmap *map, + const struct hashmap_iter *iter) +{ + struct hashmap_entry *entry = (struct hashmap_entry *)iter; + + HASHMAP_ASSERT(map != NULL); + + if (!iter) { + return NULL; + } + if (!entry->key) { + /* Iterator is invalid, so just return the next valid entry */ + return hashmap_iter_next(map, iter); + } + hashmap_entry_remove(map, entry); + return (struct hashmap_iter *)hashmap_entry_get_populated(map, entry); +} + +/* + * Return the key of the entry pointed to by the iterator. + */ +const void *hashmap_iter_get_key(const struct hashmap_iter *iter) +{ + if (!iter) { + return NULL; + } + return (const void *)((struct hashmap_entry *)iter)->key; +} + +/* + * Return the data of the entry pointed to by the iterator. + */ +void *hashmap_iter_get_data(const struct hashmap_iter *iter) +{ + if (!iter) { + return NULL; + } + return ((struct hashmap_entry *)iter)->data; +} + +/* + * Set the data pointer of the entry pointed to by the iterator. + */ +void hashmap_iter_set_data(const struct hashmap_iter *iter, void *data) +{ + if (!iter) { + return; + } + ((struct hashmap_entry *)iter)->data = data; +} + +/* + * Invoke func for each entry in the hashmap. Unlike the hashmap_iter_*() + * interface, this function supports calls to hashmap_remove() during iteration. + * However, it is an error to put or remove an entry other than the current one, + * and doing so will immediately halt iteration and return an error. + * Iteration is stopped if func returns non-zero. Returns func's return + * value if it is < 0, otherwise, 0. + */ +int hashmap_foreach(const struct hashmap *map, + int (*func)(const void *, void *, void *), void *arg) +{ + struct hashmap_entry *entry; + size_t num_entries; + const void *key; + int rc; + + HASHMAP_ASSERT(map != NULL); + HASHMAP_ASSERT(func != NULL); + + entry = map->table; + for (entry = map->table; entry < &map->table[map->table_size]; + ++entry) { + if (!entry->key) { + continue; + } + num_entries = map->num_entries; + key = entry->key; + rc = func(entry->key, entry->data, arg); + if (rc < 0) { + return rc; + } + if (rc > 0) { + return 0; + } + /* Run this entry again if func() deleted it */ + if (entry->key != key) { + --entry; + } else if (num_entries != map->num_entries) { + /* Stop immediately if func put/removed another entry */ + return -1; + } + } + return 0; +} + +/* + * Default hash function for string keys. + * This is an implementation of the well-documented Jenkins one-at-a-time + * hash function. + */ +size_t hashmap_hash_string(const void *key) +{ + const char *key_str = (const char *)key; + size_t hash = 0; + + for (; *key_str; ++key_str) { + hash += *key_str; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +/* + * Default key comparator function for string keys. + */ +int hashmap_compare_string(const void *a, const void *b) +{ + return strcmp((const char *)a, (const char *)b); +} + +/* + * Default key allocation function for string keys. Use free() for the + * key_free_func. + */ +void *hashmap_alloc_key_string(const void *key) +{ + return (void *)strdup((const char *)key); +} + +/* + * Case insensitive hash function for string keys. + */ +size_t hashmap_hash_string_i(const void *key) +{ + const char *key_str = (const char *)key; + size_t hash = 0; + + for (; *key_str; ++key_str) { + hash += tolower(*key_str); + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +/* + * Case insensitive key comparator function for string keys. + */ +int hashmap_compare_string_i(const void *a, const void *b) +{ + return strcasecmp((const char *)a, (const char *)b); +} + + +#ifdef HASHMAP_METRICS +/* + * Return the load factor. + */ +double hashmap_load_factor(const struct hashmap *map) +{ + HASHMAP_ASSERT(map != NULL); + + if (!map->table_size) { + return 0; + } + return (double)map->num_entries / map->table_size; +} + +/* + * Return the average number of collisions per entry. + */ +double hashmap_collisions_mean(const struct hashmap *map) +{ + struct hashmap_entry *entry; + size_t total_collisions = 0; + + HASHMAP_ASSERT(map != NULL); + + if (!map->num_entries) { + return 0; + } + for (entry = map->table; entry < &map->table[map->table_size]; + ++entry) { + if (!entry->key) { + continue; + } + total_collisions += entry->num_collisions; + } + return (double)total_collisions / map->num_entries; +} + +/* + * Return the variance between entry collisions. The higher the variance, + * the more likely the hash function is poor and is resulting in clustering. + */ +double hashmap_collisions_variance(const struct hashmap *map) +{ + struct hashmap_entry *entry; + double mean_collisions; + double variance; + double total_variance = 0; + + HASHMAP_ASSERT(map != NULL); + + if (!map->num_entries) { + return 0; + } + mean_collisions = hashmap_collisions_mean(map); + for (entry = map->table; entry < &map->table[map->table_size]; + ++entry) { + if (!entry->key) { + continue; + } + variance = (double)entry->num_collisions - mean_collisions; + total_variance += variance * variance; + } + return total_variance / map->num_entries; +} +#endif -- cgit v1.2.3-55-g7522