diff options
author | Simon Rettberg | 2018-11-09 15:36:21 +0100 |
---|---|---|
committer | Simon Rettberg | 2018-11-09 15:36:21 +0100 |
commit | 33db97b72473124697170394efbc993870276c55 (patch) | |
tree | b1f27c1634c07652808af243f245da2809a6e4a3 | |
parent | Move string functions to lstring.[hc] (diff) | |
download | ldadp-33db97b72473124697170394efbc993870276c55.tar.gz ldadp-33db97b72473124697170394efbc993870276c55.tar.xz ldadp-33db97b72473124697170394efbc993870276c55.zip |
Started work on proxy-side uid generation/tracking
-rw-r--r-- | hashmap.c | 723 | ||||
-rw-r--r-- | hashmap.h | 281 | ||||
-rw-r--r-- | uidmap.c | 262 | ||||
-rw-r--r-- | uidmap.h | 16 |
4 files changed, 1282 insertions, 0 deletions
diff --git a/hashmap.c b/hashmap.c new file mode 100644 index 0000000..eceeacc --- /dev/null +++ b/hashmap.c @@ -0,0 +1,723 @@ +/* + * Copyright (c) 2016-2018 David Leeds <davidesleeds@gmail.com> + * + * Hashmap is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#include <stdlib.h> +#include <stdint.h> +#include <stdbool.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> + +#include "hashmap.h" + +#ifndef HASHMAP_NOASSERT +#include <assert.h> +#define HASHMAP_ASSERT(expr) assert(expr) +#else +#define HASHMAP_ASSERT(expr) +#endif + +/* Table sizes must be powers of 2 */ +#define HASHMAP_SIZE_MIN (1 << 5) /* 32 */ +#define HASHMAP_SIZE_DEFAULT (1 << 8) /* 256 */ +#define HASHMAP_SIZE_MOD(map, val) ((val) & ((map)->table_size - 1)) + +/* Limit for probing is 1/2 of table_size */ +#define HASHMAP_PROBE_LEN(map) ((map)->table_size >> 1) +/* Return the next linear probe index */ +#define HASHMAP_PROBE_NEXT(map, index) HASHMAP_SIZE_MOD(map, (index) + 1) + +/* Check if index b is less than or equal to index a */ +#define HASHMAP_INDEX_LE(map, a, b) \ + ((a) == (b) || (((b) - (a)) & ((map)->table_size >> 1)) != 0) + + +struct hashmap_entry { + void *key; + void *data; +#ifdef HASHMAP_METRICS + size_t num_collisions; +#endif +}; + + +/* + * Enforce a maximum 0.75 load factor. + */ +static inline size_t hashmap_table_min_size_calc(size_t num_entries) +{ + return num_entries + (num_entries / 3); +} + +/* + * Calculate the optimal table size, given the specified max number + * of elements. + */ +static size_t hashmap_table_size_calc(size_t num_entries) +{ + size_t table_size; + size_t min_size; + + table_size = hashmap_table_min_size_calc(num_entries); + + /* Table size is always a power of 2 */ + min_size = HASHMAP_SIZE_MIN; + while (min_size < table_size) { + min_size <<= 1; + } + return min_size; +} + +/* + * Get a valid hash table index from a key. + */ +static inline size_t hashmap_calc_index(const struct hashmap *map, + const void *key) +{ + return HASHMAP_SIZE_MOD(map, map->hash(key)); +} + +/* + * Return the next populated entry, starting with the specified one. + * Returns NULL if there are no more valid entries. + */ +static struct hashmap_entry *hashmap_entry_get_populated( + const struct hashmap *map, struct hashmap_entry *entry) +{ + for (; entry < &map->table[map->table_size]; ++entry) { + if (entry->key) { + return entry; + } + } + return NULL; +} + +/* + * Find the hashmap entry with the specified key, or an empty slot. + * Returns NULL if the entire table has been searched without finding a match. + */ +static struct hashmap_entry *hashmap_entry_find(const struct hashmap *map, + const void *key, bool find_empty) +{ + size_t i; + size_t index; + size_t probe_len = HASHMAP_PROBE_LEN(map); + struct hashmap_entry *entry; + + index = hashmap_calc_index(map, key); + + /* Linear probing */ + for (i = 0; i < probe_len; ++i) { + entry = &map->table[index]; + if (!entry->key) { + if (find_empty) { +#ifdef HASHMAP_METRICS + entry->num_collisions = i; +#endif + return entry; + } + return NULL; + } + if (map->key_compare(key, entry->key) == 0) { + return entry; + } + index = HASHMAP_PROBE_NEXT(map, index); + } + return NULL; +} + +/* + * Removes the specified entry and processes the proceeding entries to reduce + * the load factor and keep the chain continuous. This is a required + * step for hash maps using linear probing. + */ +static void hashmap_entry_remove(struct hashmap *map, + struct hashmap_entry *removed_entry) +{ + size_t i; +#ifdef HASHMAP_METRICS + size_t removed_i = 0; +#endif + size_t index; + size_t entry_index; + size_t removed_index = (removed_entry - map->table); + struct hashmap_entry *entry; + + /* Free the key */ + if (map->key_free) { + map->key_free(removed_entry->key); + } + --map->num_entries; + + /* Fill the free slot in the chain */ + index = HASHMAP_PROBE_NEXT(map, removed_index); + for (i = 1; i < map->table_size; ++i) { + entry = &map->table[index]; + if (!entry->key) { + /* Reached end of chain */ + break; + } + entry_index = hashmap_calc_index(map, entry->key); + /* Shift in entries with an index <= to the removed slot */ + if (HASHMAP_INDEX_LE(map, removed_index, entry_index)) { +#ifdef HASHMAP_METRICS + entry->num_collisions -= (i - removed_i); + removed_i = i; +#endif + memcpy(removed_entry, entry, sizeof(*removed_entry)); + removed_index = index; + removed_entry = entry; + } + index = HASHMAP_PROBE_NEXT(map, index); + } + /* Clear the last removed entry */ + memset(removed_entry, 0, sizeof(*removed_entry)); +} + +/* + * Reallocates the hash table to the new size and rehashes all entries. + * new_size MUST be a power of 2. + * Returns 0 on success and -errno on allocation or hash function failure. + */ +static int hashmap_rehash(struct hashmap *map, size_t new_size) +{ + size_t old_size; + struct hashmap_entry *old_table; + struct hashmap_entry *new_table; + struct hashmap_entry *entry; + struct hashmap_entry *new_entry; + + HASHMAP_ASSERT(new_size >= HASHMAP_SIZE_MIN); + HASHMAP_ASSERT((new_size & (new_size - 1)) == 0); + + new_table = (struct hashmap_entry *)calloc(new_size, + sizeof(struct hashmap_entry)); + if (!new_table) { + return -ENOMEM; + } + /* Backup old elements in case of rehash failure */ + old_size = map->table_size; + old_table = map->table; + map->table_size = new_size; + map->table = new_table; + /* Rehash */ + for (entry = old_table; entry < &old_table[old_size]; ++entry) { + if (!entry->data) { + /* Only copy entries with data */ + continue; + } + new_entry = hashmap_entry_find(map, entry->key, true); + if (!new_entry) { + /* + * The load factor is too high with the new table + * size, or a poor hash function was used. + */ + goto revert; + } + /* Shallow copy (intentionally omits num_collisions) */ + new_entry->key = entry->key; + new_entry->data = entry->data; + } + free(old_table); + return 0; +revert: + map->table_size = old_size; + map->table = old_table; + free(new_table); + return -EINVAL; +} + +/* + * Iterate through all entries and free all keys. + */ +static void hashmap_free_keys(struct hashmap *map) +{ + struct hashmap_iter *iter; + + if (!map->key_free) { + return; + } + for (iter = hashmap_iter(map); iter; + iter = hashmap_iter_next(map, iter)) { + map->key_free((void *)hashmap_iter_get_key(iter)); + } +} + +/* + * Initialize an empty hashmap. + * + * hash_func should return an even distribution of numbers between 0 + * and SIZE_MAX varying on the key provided. If set to NULL, the default + * case-sensitive string hash function is used: hashmap_hash_string + * + * key_compare_func should return 0 if the keys match, and non-zero otherwise. + * If set to NULL, the default case-sensitive string comparator function is + * used: hashmap_compare_string + * + * initial_size is optional, and may be set to the max number of entries + * expected to be put in the hash table. This is used as a hint to + * pre-allocate the hash table to the minimum size needed to avoid + * gratuitous rehashes. If initial_size is 0, a default size will be used. + * + * Returns 0 on success and -errno on failure. + */ +int hashmap_init(struct hashmap *map, size_t (*hash_func)(const void *), + int (*key_compare_func)(const void *, const void *), + size_t initial_size) +{ + HASHMAP_ASSERT(map != NULL); + + if (!initial_size) { + initial_size = HASHMAP_SIZE_DEFAULT; + } else { + /* Convert init size to valid table size */ + initial_size = hashmap_table_size_calc(initial_size); + } + map->table_size_init = initial_size; + map->table_size = initial_size; + map->num_entries = 0; + map->table = (struct hashmap_entry *)calloc(initial_size, + sizeof(struct hashmap_entry)); + if (!map->table) { + return -ENOMEM; + } + map->hash = hash_func ? + hash_func : hashmap_hash_string; + map->key_compare = key_compare_func ? + key_compare_func : hashmap_compare_string; + map->key_alloc = NULL; + map->key_free = NULL; + return 0; +} + +/* + * Free the hashmap and all associated memory. + */ +void hashmap_destroy(struct hashmap *map) +{ + if (!map) { + return; + } + hashmap_free_keys(map); + free(map->table); + memset(map, 0, sizeof(*map)); +} + +/* + * Enable internal memory management of hash keys. + */ +void hashmap_set_key_alloc_funcs(struct hashmap *map, + void *(*key_alloc_func)(const void *), + void (*key_free_func)(void *)) +{ + HASHMAP_ASSERT(map != NULL); + + map->key_alloc = key_alloc_func; + map->key_free = key_free_func; +} + +/* + * Add an entry to the hashmap. If an entry with a matching key already + * exists and has a data pointer associated with it, the existing data + * pointer is returned, instead of assigning the new value. Compare + * the return value with the data passed in to determine if a new entry was + * created. Returns NULL if memory allocation failed. + */ +void *hashmap_put(struct hashmap *map, const void *key, void *data) +{ + struct hashmap_entry *entry; + + HASHMAP_ASSERT(map != NULL); + HASHMAP_ASSERT(key != NULL); + + /* Rehash with 2x capacity if load factor is approaching 0.75 */ + if (map->table_size <= hashmap_table_min_size_calc(map->num_entries)) { + hashmap_rehash(map, map->table_size << 1); + } + entry = hashmap_entry_find(map, key, true); + if (!entry) { + /* + * Cannot find an empty slot. Either out of memory, or using + * a poor hash function. Attempt to rehash once to reduce + * chain length. + */ + if (hashmap_rehash(map, map->table_size << 1) < 0) { + return NULL; + } + entry = hashmap_entry_find(map, key, true); + if (!entry) { + return NULL; + } + } + if (!entry->key) { + /* Allocate copy of key to simplify memory management */ + if (map->key_alloc) { + entry->key = map->key_alloc(key); + if (!entry->key) { + return NULL; + } + } else { + entry->key = (void *)key; + } + ++map->num_entries; + } else if (entry->data) { + /* Do not overwrite existing data */ + return entry->data; + } + entry->data = data; + return data; +} + +/* + * Return the data pointer, or NULL if no entry exists. + */ +void *hashmap_get(const struct hashmap *map, const void *key) +{ + struct hashmap_entry *entry; + + HASHMAP_ASSERT(map != NULL); + HASHMAP_ASSERT(key != NULL); + + entry = hashmap_entry_find(map, key, false); + if (!entry) { + return NULL; + } + return entry->data; +} + +/* + * Remove an entry with the specified key from the map. + * Returns the data pointer, or NULL, if no entry was found. + */ +void *hashmap_remove(struct hashmap *map, const void *key) +{ + struct hashmap_entry *entry; + void *data; + + HASHMAP_ASSERT(map != NULL); + HASHMAP_ASSERT(key != NULL); + + entry = hashmap_entry_find(map, key, false); + if (!entry) { + return NULL; + } + data = entry->data; + /* Clear the entry and make the chain contiguous */ + hashmap_entry_remove(map, entry); + return data; +} + +/* + * Remove all entries. + */ +void hashmap_clear(struct hashmap *map) +{ + HASHMAP_ASSERT(map != NULL); + + hashmap_free_keys(map); + map->num_entries = 0; + memset(map->table, 0, sizeof(struct hashmap_entry) * map->table_size); +} + +/* + * Remove all entries and reset the hash table to its initial size. + */ +void hashmap_reset(struct hashmap *map) +{ + struct hashmap_entry *new_table; + + HASHMAP_ASSERT(map != NULL); + + hashmap_clear(map); + if (map->table_size == map->table_size_init) { + return; + } + new_table = (struct hashmap_entry *)realloc(map->table, + sizeof(struct hashmap_entry) * map->table_size_init); + if (!new_table) { + return; + } + map->table = new_table; + map->table_size = map->table_size_init; +} + +/* + * Return the number of entries in the hash map. + */ +size_t hashmap_size(const struct hashmap *map) +{ + HASHMAP_ASSERT(map != NULL); + + return map->num_entries; +} + +/* + * Get a new hashmap iterator. The iterator is an opaque + * pointer that may be used with hashmap_iter_*() functions. + * Hashmap iterators are INVALID after a put or remove operation is performed. + * hashmap_iter_remove() allows safe removal during iteration. + */ +struct hashmap_iter *hashmap_iter(const struct hashmap *map) +{ + HASHMAP_ASSERT(map != NULL); + + if (!map->num_entries) { + return NULL; + } + return (struct hashmap_iter *)hashmap_entry_get_populated(map, + map->table); +} + +/* + * Return an iterator to the next hashmap entry. Returns NULL if there are + * no more entries. + */ +struct hashmap_iter *hashmap_iter_next(const struct hashmap *map, + const struct hashmap_iter *iter) +{ + struct hashmap_entry *entry = (struct hashmap_entry *)iter; + + HASHMAP_ASSERT(map != NULL); + + if (!iter) { + return NULL; + } + return (struct hashmap_iter *)hashmap_entry_get_populated(map, + entry + 1); +} + +/* + * Remove the hashmap entry pointed to by this iterator and return an + * iterator to the next entry. Returns NULL if there are no more entries. + */ +struct hashmap_iter *hashmap_iter_remove(struct hashmap *map, + const struct hashmap_iter *iter) +{ + struct hashmap_entry *entry = (struct hashmap_entry *)iter; + + HASHMAP_ASSERT(map != NULL); + + if (!iter) { + return NULL; + } + if (!entry->key) { + /* Iterator is invalid, so just return the next valid entry */ + return hashmap_iter_next(map, iter); + } + hashmap_entry_remove(map, entry); + return (struct hashmap_iter *)hashmap_entry_get_populated(map, entry); +} + +/* + * Return the key of the entry pointed to by the iterator. + */ +const void *hashmap_iter_get_key(const struct hashmap_iter *iter) +{ + if (!iter) { + return NULL; + } + return (const void *)((struct hashmap_entry *)iter)->key; +} + +/* + * Return the data of the entry pointed to by the iterator. + */ +void *hashmap_iter_get_data(const struct hashmap_iter *iter) +{ + if (!iter) { + return NULL; + } + return ((struct hashmap_entry *)iter)->data; +} + +/* + * Set the data pointer of the entry pointed to by the iterator. + */ +void hashmap_iter_set_data(const struct hashmap_iter *iter, void *data) +{ + if (!iter) { + return; + } + ((struct hashmap_entry *)iter)->data = data; +} + +/* + * Invoke func for each entry in the hashmap. Unlike the hashmap_iter_*() + * interface, this function supports calls to hashmap_remove() during iteration. + * However, it is an error to put or remove an entry other than the current one, + * and doing so will immediately halt iteration and return an error. + * Iteration is stopped if func returns non-zero. Returns func's return + * value if it is < 0, otherwise, 0. + */ +int hashmap_foreach(const struct hashmap *map, + int (*func)(const void *, void *, void *), void *arg) +{ + struct hashmap_entry *entry; + size_t num_entries; + const void *key; + int rc; + + HASHMAP_ASSERT(map != NULL); + HASHMAP_ASSERT(func != NULL); + + entry = map->table; + for (entry = map->table; entry < &map->table[map->table_size]; + ++entry) { + if (!entry->key) { + continue; + } + num_entries = map->num_entries; + key = entry->key; + rc = func(entry->key, entry->data, arg); + if (rc < 0) { + return rc; + } + if (rc > 0) { + return 0; + } + /* Run this entry again if func() deleted it */ + if (entry->key != key) { + --entry; + } else if (num_entries != map->num_entries) { + /* Stop immediately if func put/removed another entry */ + return -1; + } + } + return 0; +} + +/* + * Default hash function for string keys. + * This is an implementation of the well-documented Jenkins one-at-a-time + * hash function. + */ +size_t hashmap_hash_string(const void *key) +{ + const char *key_str = (const char *)key; + size_t hash = 0; + + for (; *key_str; ++key_str) { + hash += *key_str; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +/* + * Default key comparator function for string keys. + */ +int hashmap_compare_string(const void *a, const void *b) +{ + return strcmp((const char *)a, (const char *)b); +} + +/* + * Default key allocation function for string keys. Use free() for the + * key_free_func. + */ +void *hashmap_alloc_key_string(const void *key) +{ + return (void *)strdup((const char *)key); +} + +/* + * Case insensitive hash function for string keys. + */ +size_t hashmap_hash_string_i(const void *key) +{ + const char *key_str = (const char *)key; + size_t hash = 0; + + for (; *key_str; ++key_str) { + hash += tolower(*key_str); + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +/* + * Case insensitive key comparator function for string keys. + */ +int hashmap_compare_string_i(const void *a, const void *b) +{ + return strcasecmp((const char *)a, (const char *)b); +} + + +#ifdef HASHMAP_METRICS +/* + * Return the load factor. + */ +double hashmap_load_factor(const struct hashmap *map) +{ + HASHMAP_ASSERT(map != NULL); + + if (!map->table_size) { + return 0; + } + return (double)map->num_entries / map->table_size; +} + +/* + * Return the average number of collisions per entry. + */ +double hashmap_collisions_mean(const struct hashmap *map) +{ + struct hashmap_entry *entry; + size_t total_collisions = 0; + + HASHMAP_ASSERT(map != NULL); + + if (!map->num_entries) { + return 0; + } + for (entry = map->table; entry < &map->table[map->table_size]; + ++entry) { + if (!entry->key) { + continue; + } + total_collisions += entry->num_collisions; + } + return (double)total_collisions / map->num_entries; +} + +/* + * Return the variance between entry collisions. The higher the variance, + * the more likely the hash function is poor and is resulting in clustering. + */ +double hashmap_collisions_variance(const struct hashmap *map) +{ + struct hashmap_entry *entry; + double mean_collisions; + double variance; + double total_variance = 0; + + HASHMAP_ASSERT(map != NULL); + + if (!map->num_entries) { + return 0; + } + mean_collisions = hashmap_collisions_mean(map); + for (entry = map->table; entry < &map->table[map->table_size]; + ++entry) { + if (!entry->key) { + continue; + } + variance = (double)entry->num_collisions - mean_collisions; + total_variance += variance * variance; + } + return total_variance / map->num_entries; +} +#endif diff --git a/hashmap.h b/hashmap.h new file mode 100644 index 0000000..c344c4c --- /dev/null +++ b/hashmap.h @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2016-2018 David Leeds <davidesleeds@gmail.com> + * + * Hashmap is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + +#ifndef __HASHMAP_H__ +#define __HASHMAP_H__ + +#include <stddef.h> + +/* + * Define HASHMAP_METRICS to compile in performance analysis + * functions for use in assessing hash function performance. + */ +/* #define HASHMAP_METRICS */ + +/* + * Define HASHMAP_NOASSERT to compile out all assertions used internally. + */ +/* #define HASHMAP_NOASSERT */ + +/* + * Macros to declare type-specific versions of hashmap_*() functions to + * allow compile-time type checking and avoid the need for type casting. + */ +#define HASHMAP_FUNCS_DECLARE(name, key_type, data_type) \ + data_type *name##_hashmap_put(struct hashmap *map, \ + const key_type *key, data_type *data); \ + data_type *name##_hashmap_get(const struct hashmap *map, \ + const key_type *key); \ + data_type *name##_hashmap_remove(struct hashmap *map, \ + const key_type *key); \ + const key_type *name##_hashmap_iter_get_key( \ + const struct hashmap_iter *iter); \ + data_type *name##_hashmap_iter_get_data( \ + const struct hashmap_iter *iter); \ + void name##_hashmap_iter_set_data(const struct hashmap_iter *iter, \ + data_type *data); \ + int name##_hashmap_foreach(const struct hashmap *map, \ + int (*func)(const key_type *, data_type *, void *), void *arg); + +#define HASHMAP_FUNCS_CREATE(name, key_type, data_type) \ + data_type *name##_hashmap_put(struct hashmap *map, \ + const key_type *key, data_type *data) \ + { \ + return (data_type *)hashmap_put(map, (const void *)key, \ + (void *)data); \ + } \ + data_type *name##_hashmap_get(const struct hashmap *map, \ + const key_type *key) \ + { \ + return (data_type *)hashmap_get(map, (const void *)key); \ + } \ + data_type *name##_hashmap_remove(struct hashmap *map, \ + const key_type *key) \ + { \ + return (data_type *)hashmap_remove(map, (const void *)key); \ + } \ + const key_type *name##_hashmap_iter_get_key( \ + const struct hashmap_iter *iter) \ + { \ + return (const key_type *)hashmap_iter_get_key(iter); \ + } \ + data_type *name##_hashmap_iter_get_data( \ + const struct hashmap_iter *iter) \ + { \ + return (data_type *)hashmap_iter_get_data(iter); \ + } \ + void name##_hashmap_iter_set_data(const struct hashmap_iter *iter, \ + data_type *data) \ + { \ + hashmap_iter_set_data(iter, (void *)data); \ + } \ + struct __##name##_hashmap_foreach_state { \ + int (*func)(const key_type *, data_type *, void *); \ + void *arg; \ + }; \ + static inline int __##name##_hashmap_foreach_callback( \ + const void *key, void *data, void *arg) \ + { \ + struct __##name##_hashmap_foreach_state *s = \ + (struct __##name##_hashmap_foreach_state *)arg; \ + return s->func((const key_type *)key, \ + (data_type *)data, s->arg); \ + } \ + int name##_hashmap_foreach(const struct hashmap *map, \ + int (*func)(const key_type *, data_type *, void *), \ + void *arg) \ + { \ + struct __##name##_hashmap_foreach_state s = { func, arg }; \ + return hashmap_foreach(map, \ + __##name##_hashmap_foreach_callback, &s); \ + } + + +struct hashmap_iter; +struct hashmap_entry; + +/* + * The hashmap state structure. + */ +struct hashmap { + size_t table_size_init; + size_t table_size; + size_t num_entries; + struct hashmap_entry *table; + size_t (*hash)(const void *); + int (*key_compare)(const void *, const void *); + void *(*key_alloc)(const void *); + void (*key_free)(void *); +}; + +/* + * Initialize an empty hashmap. + * + * hash_func should return an even distribution of numbers between 0 + * and SIZE_MAX varying on the key provided. If set to NULL, the default + * case-sensitive string hash function is used: hashmap_hash_string + * + * key_compare_func should return 0 if the keys match, and non-zero otherwise. + * If set to NULL, the default case-sensitive string comparator function is + * used: hashmap_compare_string + * + * initial_size is optional, and may be set to the max number of entries + * expected to be put in the hash table. This is used as a hint to + * pre-allocate the hash table to the minimum size needed to avoid + * gratuitous rehashes. If initial_size is 0, a default size will be used. + * + * Returns 0 on success and -errno on failure. + */ +int hashmap_init(struct hashmap *map, size_t (*hash_func)(const void *), + int (*key_compare_func)(const void *, const void *), + size_t initial_size); + +/* + * Free the hashmap and all associated memory. + */ +void hashmap_destroy(struct hashmap *map); + +/* + * Enable internal memory allocation and management of hash keys. + */ +void hashmap_set_key_alloc_funcs(struct hashmap *map, + void *(*key_alloc_func)(const void *), + void (*key_free_func)(void *)); + +/* + * Add an entry to the hashmap. If an entry with a matching key already + * exists and has a data pointer associated with it, the existing data + * pointer is returned, instead of assigning the new value. Compare + * the return value with the data passed in to determine if a new entry was + * created. Returns NULL if memory allocation failed. + */ +void *hashmap_put(struct hashmap *map, const void *key, void *data); + +/* + * Return the data pointer, or NULL if no entry exists. + */ +void *hashmap_get(const struct hashmap *map, const void *key); + +/* + * Remove an entry with the specified key from the map. + * Returns the data pointer, or NULL, if no entry was found. + */ +void *hashmap_remove(struct hashmap *map, const void *key); + +/* + * Remove all entries. + */ +void hashmap_clear(struct hashmap *map); + +/* + * Remove all entries and reset the hash table to its initial size. + */ +void hashmap_reset(struct hashmap *map); + +/* + * Return the number of entries in the hash map. + */ +size_t hashmap_size(const struct hashmap *map); + +/* + * Get a new hashmap iterator. The iterator is an opaque + * pointer that may be used with hashmap_iter_*() functions. + * Hashmap iterators are INVALID after a put or remove operation is performed. + * hashmap_iter_remove() allows safe removal during iteration. + */ +struct hashmap_iter *hashmap_iter(const struct hashmap *map); + +/* + * Return an iterator to the next hashmap entry. Returns NULL if there are + * no more entries. + */ +struct hashmap_iter *hashmap_iter_next(const struct hashmap *map, + const struct hashmap_iter *iter); + +/* + * Remove the hashmap entry pointed to by this iterator and returns an + * iterator to the next entry. Returns NULL if there are no more entries. + */ +struct hashmap_iter *hashmap_iter_remove(struct hashmap *map, + const struct hashmap_iter *iter); + +/* + * Return the key of the entry pointed to by the iterator. + */ +const void *hashmap_iter_get_key(const struct hashmap_iter *iter); + +/* + * Return the data of the entry pointed to by the iterator. + */ +void *hashmap_iter_get_data(const struct hashmap_iter *iter); + +/* + * Set the data pointer of the entry pointed to by the iterator. + */ +void hashmap_iter_set_data(const struct hashmap_iter *iter, void *data); + +/* + * Invoke func for each entry in the hashmap. Unlike the hashmap_iter_*() + * interface, this function supports calls to hashmap_remove() during iteration. + * However, it is an error to put or remove an entry other than the current one, + * and doing so will immediately halt iteration and return an error. + * Iteration is stopped if func returns non-zero. Returns func's return + * value if it is < 0, otherwise, 0. + */ +int hashmap_foreach(const struct hashmap *map, + int (*func)(const void *, void *, void *), void *arg); + +/* + * Default hash function for string keys. + * This is an implementation of the well-documented Jenkins one-at-a-time + * hash function. + */ +size_t hashmap_hash_string(const void *key); + +/* + * Default key comparator function for string keys. + */ +int hashmap_compare_string(const void *a, const void *b); + +/* + * Default key allocation function for string keys. Use free() for the + * key_free_func. + */ +void *hashmap_alloc_key_string(const void *key); + +/* + * Case insensitive hash function for string keys. + */ +size_t hashmap_hash_string_i(const void *key); + +/* + * Case insensitive key comparator function for string keys. + */ +int hashmap_compare_string_i(const void *a, const void *b); + + +#ifdef HASHMAP_METRICS +/* + * Return the load factor. + */ +double hashmap_load_factor(const struct hashmap *map); + +/* + * Return the average number of collisions per entry. + */ +double hashmap_collisions_mean(const struct hashmap *map); + +/* + * Return the variance between entry collisions. The higher the variance, + * the more likely the hash function is poor and is resulting in clustering. + */ +double hashmap_collisions_variance(const struct hashmap *map); +#endif + + +#endif /* __HASHMAP_H__ */ + diff --git a/uidmap.c b/uidmap.c new file mode 100644 index 0000000..8315be2 --- /dev/null +++ b/uidmap.c @@ -0,0 +1,262 @@ +#include "hashmap.h" +#include "lstring.h" +#include "types.h" +#include "helper.h" +#include <stdint.h> +#include <inttypes.h> +#include <math.h> +#include <time.h> +#include <stdio.h> +#include <unistd.h> + +static struct hashmap _numToName, _nameToNum; +static BOOL _initDone = FALSE; +static const char *_saveFileName = NULL; + +static const uint16_t MAGIC = 1234; +static const int64_t MAX_AGE = 86400 * 2; // 2 days +#define MAX_NAME_LEN 1024 + +HASHMAP_FUNCS_CREATE(name, struct string, struct user); +HASHMAP_FUNCS_CREATE(number, uint32_t, struct user); + +struct user +{ + uint32_t number; + const struct string *name; + int64_t lastUse; +}; + +static void insertTime(const struct string *name, const uint32_t number, const int64_t lastUse) +{ + uint8_t *buffer = malloc(sizeof(struct user) + sizeof(struct string) + name->l); + struct user *u = (struct user*)buffer; + struct string *s = (struct string*)(buffer + sizeof(struct user)); + char *str = (char*)(buffer + sizeof(struct user) + sizeof(struct string)); + memcpy(str, name->s, name->l); + s->s = str; + s->l = name->l; + u->name = s; + u->number = number; + u->lastUse = lastUse; + if (name_hashmap_put(&_nameToNum, u->name, u) != u) { + plog(DEBUG_WARNING, "[uidmap] Collision when inserting %.*s by name (%"PRIu32")", (int)s->l, s->s, number); + } + if (number_hashmap_put(&_numToName, &u->number, u) != u) { + plog(DEBUG_WARNING, "[uidmap] Collision when inserting %.*s by number (%"PRIu32")", (int)s->l, s->s, number); + } + // TODO: Leak +} + +static void insert(const struct string *name, const uint32_t number) +{ + insertTime(name, number, time(NULL)); +} + +static int uint32_compare(const void *a, const void *b) +{ + return (*(const uint32_t*)a) != (*(const uint32_t*)b); +} + +static int string_compare(const void *a, const void *b) +{ + return !equals((const struct string*)a, (const struct string*)b); +} + +static size_t uint32_hash(const void *key) +{ + return *(const uint32_t*)key; +} + +static size_t string_hash(const void *key) +{ + const struct string *key_str = (const struct string*)key; + size_t hash = 0; + + for (size_t pos = 0; pos < key_str->l; ++pos) { + hash += key_str->s[pos]; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + return hash; +} + +uint32_t uidmap_getNumberForName(const struct string *name) +{ + // Try string matching + char buf[name->l]; + struct string lower = { .s = buf, .l = name->l }; + for (size_t i = 0; i < name->l; ++i) { + buf[i] = tolower(name->s[i]); + } + // Get + struct user *u = name_hashmap_get(&_nameToNum, &lower); + if (u != NULL) { + u->lastUse = time(NULL); + return u->number; + } + // Not known yet - try numeric + uint32_t asNumber = parseUInt32(name); + if (asNumber >= 2000) { + u = number_hashmap_get(&_numToName, &asNumber); + if (u == NULL) { + // No collision, use name as number + insert(&lower, asNumber); + return asNumber; + } + plog(DEBUG_WARNING, "%.*s is numeric (%u) but collides", (int)name->l, name->s, asNumber); + } + // Default case - generate number + asNumber = (uint32_t)string_hash(&lower); + while (asNumber < 2000) { + asNumber = rand() + 2000; + } + for (;;) { + //plog(DEBUG_WARNING, "Using number %u for %.*s", asNumber, (int)name->l, name->s); + u = number_hashmap_get(&_numToName, &asNumber); + if (u == NULL) + break; + do { + asNumber = rand() + 2000; + } while (asNumber < 2000); + } + insert(&lower, asNumber); + return asNumber; +} + +const struct string *uidmap_getNameForNumber(const uint32_t number) +{ + struct user *u = number_hashmap_get(&_numToName, &number); + if (u == NULL) + return NULL; + u->lastUse = time(NULL); + return u->name; +} + +void uidmap_init(const char *fileName) +{ + if (_initDone) + return; + _initDone = TRUE; + hashmap_init(&_numToName, uint32_hash, uint32_compare, 0); + hashmap_init(&_nameToNum, string_hash, string_compare, 0); + if (fileName == NULL) + return; + // We have a filename, remember it and try to load DB + _saveFileName = strdup(fileName); + FILE *fh = fopen(_saveFileName, "rb"); + if (fh == NULL) + return; + fseek(fh, 0, SEEK_END); + const int fsize = ftell(fh); + fseek(fh, 0, SEEK_SET); + if (fsize > 0) { + const size_t entries = (size_t)fsize / 25; + hashmap_destroy(&_numToName); + hashmap_destroy(&_nameToNum); + hashmap_init(&_numToName, uint32_hash, uint32_compare, entries); + hashmap_init(&_nameToNum, string_hash, string_compare, entries); + } + // Not endian safe, do not copy file around... + uint16_t len = 0; + int64_t lastUse; + uint32_t uidNumber; + char nbuffer[MAX_NAME_LEN]; + struct string name; + name.s = nbuffer; + if (fread(&len, sizeof(len), 1, fh) != 1 || len != MAGIC) { + plog(DEBUG_WARNING, "uid map file missing header magic (%"PRIu16")", len); + fclose(fh); + return; + } + int loaded = 0; + const int64_t now = time(NULL); + for (;;) { + if (fread(&len, sizeof(len), 1, fh) != 1) + break; + if (len > MAX_NAME_LEN) { + plog(DEBUG_WARNING, "DB has entry of size %"PRIu16", aborting import"); + break; + } + if (fread(&lastUse, sizeof(lastUse), 1, fh) != 1 || fread(&uidNumber, sizeof(uidNumber), 1, fh) != 1) { + plog(DEBUG_WARNING, "DB has truncated entry (no lastUse/uidNumber)"); + break; + } + if (fread(nbuffer, len, 1, fh) != 1) { + plog(DEBUG_WARNING, "DB has truncated entry (no name)"); + break; + } + // Check age + if (now - lastUse > MAX_AGE) + continue; + if (lastUse > now) + lastUse = now; + // Add + name.l = len; + insertTime(&name, uidNumber, lastUse); + loaded++; + } + fclose(fh); + plog(DEBUG_INFO, "Loaded %d entries from %s", loaded, _saveFileName); +} + +void uidmap_cleanupSave() +{ + if (!_initDone) + return; + FILE *fh = NULL; + if (_saveFileName != NULL) { + fh = fopen(_saveFileName, "wb"); + if (fh != NULL) { + if (fwrite(&MAGIC, sizeof(MAGIC), 1, fh) != 1) { + plog(DEBUG_WARNING, "Cannot write magic to %s", _saveFileName); + fclose(fh); + fh = NULL; + } + } + } + const int64_t deadline = time(NULL) - MAX_AGE; + struct hashmap_iter *it = hashmap_iter(&_nameToNum); + struct user *u = NULL; + int saved = 0, removed = 0; + uint16_t len; + while (it != NULL) { + u = name_hashmap_iter_get_data(it); + if (u->lastUse < deadline) { + it = hashmap_iter_remove(&_nameToNum, it); + if (number_hashmap_remove(&_numToName, &u->number) != u) { + plog(DEBUG_WARNING, "Could not remove user from numToName that was in nameToNum"); + } + free(u); + removed++; + continue; + } + // Save if file + if (fh != NULL && u->name->l < MAX_NAME_LEN) { + len = (uint16_t)u->name->l; + if (fwrite(&len, sizeof(len), 1, fh) != 1 + || fwrite(&u->lastUse, sizeof(u->lastUse), 1, fh) != 1 + || fwrite(&u->number, sizeof(u->number), 1, fh) != 1 + || fwrite(u->name->s, len, 1, fh) != 1 + ) { + plog(DEBUG_WARNING, "Could not write record to %s", _saveFileName); + fclose(fh); + fh = NULL; + } else { + saved++; + } + } + it = hashmap_iter_next(&_nameToNum, it); + } + if (fh != NULL) { + fsync(fileno(fh)); + fclose(fh); + } + if (saved > 0 || removed > 0) { + plog(DEBUG_VERBOSE, "Purged %d old entries, saved %d entries", removed, saved); + } +} + diff --git a/uidmap.h b/uidmap.h new file mode 100644 index 0000000..a126e39 --- /dev/null +++ b/uidmap.h @@ -0,0 +1,16 @@ +#ifndef _UIDMAP_H_ +#define _UIDMAP_H_ + +#include <stdint.h> + +struct string; + +uint32_t uidmap_getNumberForName(const struct string *name); + +const struct string *uidmap_getNameForNumber(const uint32_t number); + +void uidmap_init(const char *fileName); + +void uidmap_cleanupSave(); + +#endif |