Started work on proxy-side uid generation/tracking

author: Simon Rettberg 2018-11-09 15:36:21 +0100
committer: Simon Rettberg 2018-11-09 15:36:21 +0100
commit: 33db97b72473124697170394efbc993870276c55 (patch)
tree: b1f27c1634c07652808af243f245da2809a6e4a3
parent: Move string functions to lstring.[hc] (diff)
download: ldadp-33db97b72473124697170394efbc993870276c55.tar.gz
ldadp-33db97b72473124697170394efbc993870276c55.tar.xz
ldadp-33db97b72473124697170394efbc993870276c55.zip
4 files changed, 1282 insertions, 0 deletions
diff --git a/hashmap.c b/hashmap.c
new file mode 100644
index 0000000..eceeacc
--- /dev/null
+++ b/hashmap.c
@@ -0,0 +1,723 @@
+/*
+ * Copyright (c) 2016-2018 David Leeds <davidesleeds@gmail.com>
+ *
+ * Hashmap is free software; you can redistribute it and/or modify
+ * it under the terms of the MIT license. See LICENSE for details.
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+#include "hashmap.h"
+
+#ifndef HASHMAP_NOASSERT
+#include <assert.h>
+#define HASHMAP_ASSERT(expr)            assert(expr)
+#else
+#define HASHMAP_ASSERT(expr)
+#endif
+
+/* Table sizes must be powers of 2 */
+#define HASHMAP_SIZE_MIN                (1 << 5)    /* 32 */
+#define HASHMAP_SIZE_DEFAULT            (1 << 8)    /* 256 */
+#define HASHMAP_SIZE_MOD(map, val)      ((val) & ((map)->table_size - 1))
+
+/* Limit for probing is 1/2 of table_size */
+#define HASHMAP_PROBE_LEN(map)          ((map)->table_size >> 1)
+/* Return the next linear probe index */
+#define HASHMAP_PROBE_NEXT(map, index)  HASHMAP_SIZE_MOD(map, (index) + 1)
+
+/* Check if index b is less than or equal to index a */
+#define HASHMAP_INDEX_LE(map, a, b)     \
+    ((a) == (b) || (((b) - (a)) & ((map)->table_size >> 1)) != 0)
+
+
+struct hashmap_entry {
+    void *key;
+    void *data;
+#ifdef HASHMAP_METRICS
+    size_t num_collisions;
+#endif
+};
+
+
+/*
+ * Enforce a maximum 0.75 load factor.
+ */
+static inline size_t hashmap_table_min_size_calc(size_t num_entries)
+{
+    return num_entries + (num_entries / 3);
+}
+
+/*
+ * Calculate the optimal table size, given the specified max number
+ * of elements.
+ */
+static size_t hashmap_table_size_calc(size_t num_entries)
+{
+    size_t table_size;
+    size_t min_size;
+
+    table_size = hashmap_table_min_size_calc(num_entries);
+
+    /* Table size is always a power of 2 */
+    min_size = HASHMAP_SIZE_MIN;
+    while (min_size < table_size) {
+        min_size <<= 1;
+    }
+    return min_size;
+}
+
+/*
+ * Get a valid hash table index from a key.
+ */
+static inline size_t hashmap_calc_index(const struct hashmap *map,
+    const void *key)
+{
+    return HASHMAP_SIZE_MOD(map, map->hash(key));
+}
+
+/*
+ * Return the next populated entry, starting with the specified one.
+ * Returns NULL if there are no more valid entries.
+ */
+static struct hashmap_entry *hashmap_entry_get_populated(
+    const struct hashmap *map, struct hashmap_entry *entry)
+{
+    for (; entry < &map->table[map->table_size]; ++entry) {
+        if (entry->key) {
+            return entry;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * Find the hashmap entry with the specified key, or an empty slot.
+ * Returns NULL if the entire table has been searched without finding a match.
+ */
+static struct hashmap_entry *hashmap_entry_find(const struct hashmap *map,
+    const void *key, bool find_empty)
+{
+    size_t i;
+    size_t index;
+    size_t probe_len = HASHMAP_PROBE_LEN(map);
+    struct hashmap_entry *entry;
+
+    index = hashmap_calc_index(map, key);
+
+    /* Linear probing */
+    for (i = 0; i < probe_len; ++i) {
+        entry = &map->table[index];
+        if (!entry->key) {
+            if (find_empty) {
+#ifdef HASHMAP_METRICS
+                entry->num_collisions = i;
+#endif
+                return entry;
+            }
+            return NULL;
+        }
+        if (map->key_compare(key, entry->key) == 0) {
+            return entry;
+        }
+        index = HASHMAP_PROBE_NEXT(map, index);
+    }
+    return NULL;
+}
+
+/*
+ * Removes the specified entry and processes the proceeding entries to reduce
+ * the load factor and keep the chain continuous.  This is a required
+ * step for hash maps using linear probing.
+ */
+static void hashmap_entry_remove(struct hashmap *map,
+    struct hashmap_entry *removed_entry)
+{
+    size_t i;
+#ifdef HASHMAP_METRICS
+    size_t removed_i = 0;
+#endif
+    size_t index;
+    size_t entry_index;
+    size_t removed_index = (removed_entry - map->table);
+    struct hashmap_entry *entry;
+
+    /* Free the key */
+    if (map->key_free) {
+        map->key_free(removed_entry->key);
+    }
+    --map->num_entries;
+
+    /* Fill the free slot in the chain */
+    index = HASHMAP_PROBE_NEXT(map, removed_index);
+    for (i = 1; i < map->table_size; ++i) {
+        entry = &map->table[index];
+        if (!entry->key) {
+            /* Reached end of chain */
+            break;
+        }
+        entry_index = hashmap_calc_index(map, entry->key);
+        /* Shift in entries with an index <= to the removed slot */
+        if (HASHMAP_INDEX_LE(map, removed_index, entry_index)) {
+#ifdef HASHMAP_METRICS
+            entry->num_collisions -= (i - removed_i);
+            removed_i = i;
+#endif
+            memcpy(removed_entry, entry, sizeof(*removed_entry));
+            removed_index = index;
+            removed_entry = entry;
+        }
+        index = HASHMAP_PROBE_NEXT(map, index);
+    }
+    /* Clear the last removed entry */
+    memset(removed_entry, 0, sizeof(*removed_entry));
+}
+
+/*
+ * Reallocates the hash table to the new size and rehashes all entries.
+ * new_size MUST be a power of 2.
+ * Returns 0 on success and -errno on allocation or hash function failure.
+ */
+static int hashmap_rehash(struct hashmap *map, size_t new_size)
+{
+    size_t old_size;
+    struct hashmap_entry *old_table;
+    struct hashmap_entry *new_table;
+    struct hashmap_entry *entry;
+    struct hashmap_entry *new_entry;
+
+    HASHMAP_ASSERT(new_size >= HASHMAP_SIZE_MIN);
+    HASHMAP_ASSERT((new_size & (new_size - 1)) == 0);
+
+    new_table = (struct hashmap_entry *)calloc(new_size,
+        sizeof(struct hashmap_entry));
+    if (!new_table) {
+        return -ENOMEM;
+    }
+    /* Backup old elements in case of rehash failure */
+    old_size = map->table_size;
+    old_table = map->table;
+    map->table_size = new_size;
+    map->table = new_table;
+    /* Rehash */
+    for (entry = old_table; entry < &old_table[old_size]; ++entry) {
+        if (!entry->data) {
+            /* Only copy entries with data */
+            continue;
+        }
+        new_entry = hashmap_entry_find(map, entry->key, true);
+        if (!new_entry) {
+            /*
+             * The load factor is too high with the new table
+             * size, or a poor hash function was used.
+             */
+            goto revert;
+        }
+        /* Shallow copy (intentionally omits num_collisions) */
+        new_entry->key = entry->key;
+        new_entry->data = entry->data;
+    }
+    free(old_table);
+    return 0;
+revert:
+    map->table_size = old_size;
+    map->table = old_table;
+    free(new_table);
+    return -EINVAL;
+}
+
+/*
+ * Iterate through all entries and free all keys.
+ */
+static void hashmap_free_keys(struct hashmap *map)
+{
+    struct hashmap_iter *iter;
+
+    if (!map->key_free) {
+        return;
+    }
+    for (iter = hashmap_iter(map); iter;
+        iter = hashmap_iter_next(map, iter)) {
+        map->key_free((void *)hashmap_iter_get_key(iter));
+    }
+}
+
+/*
+ * Initialize an empty hashmap.
+ *
+ * hash_func should return an even distribution of numbers between 0
+ * and SIZE_MAX varying on the key provided.  If set to NULL, the default
+ * case-sensitive string hash function is used: hashmap_hash_string
+ *
+ * key_compare_func should return 0 if the keys match, and non-zero otherwise.
+ * If set to NULL, the default case-sensitive string comparator function is
+ * used: hashmap_compare_string
+ *
+ * initial_size is optional, and may be set to the max number of entries
+ * expected to be put in the hash table.  This is used as a hint to
+ * pre-allocate the hash table to the minimum size needed to avoid
+ * gratuitous rehashes.  If initial_size is 0, a default size will be used.
+ *
+ * Returns 0 on success and -errno on failure.
+ */
+int hashmap_init(struct hashmap *map, size_t (*hash_func)(const void *),
+    int (*key_compare_func)(const void *, const void *),
+    size_t initial_size)
+{
+    HASHMAP_ASSERT(map != NULL);
+
+    if (!initial_size) {
+        initial_size = HASHMAP_SIZE_DEFAULT;
+    } else {
+        /* Convert init size to valid table size */
+        initial_size = hashmap_table_size_calc(initial_size);
+    }
+    map->table_size_init = initial_size;
+    map->table_size = initial_size;
+    map->num_entries = 0;
+    map->table = (struct hashmap_entry *)calloc(initial_size,
+        sizeof(struct hashmap_entry));
+    if (!map->table) {
+        return -ENOMEM;
+    }
+    map->hash = hash_func ?
+            hash_func : hashmap_hash_string;
+    map->key_compare = key_compare_func ?
+            key_compare_func : hashmap_compare_string;
+    map->key_alloc = NULL;
+    map->key_free = NULL;
+    return 0;
+}
+
+/*
+ * Free the hashmap and all associated memory.
+ */
+void hashmap_destroy(struct hashmap *map)
+{
+    if (!map) {
+        return;
+    }
+    hashmap_free_keys(map);
+    free(map->table);
+    memset(map, 0, sizeof(*map));
+}
+
+/*
+ * Enable internal memory management of hash keys.
+ */
+void hashmap_set_key_alloc_funcs(struct hashmap *map,
+    void *(*key_alloc_func)(const void *),
+    void (*key_free_func)(void *))
+{
+    HASHMAP_ASSERT(map != NULL);
+
+    map->key_alloc = key_alloc_func;
+    map->key_free = key_free_func;
+}
+
+/*
+ * Add an entry to the hashmap.  If an entry with a matching key already
+ * exists and has a data pointer associated with it, the existing data
+ * pointer is returned, instead of assigning the new value.  Compare
+ * the return value with the data passed in to determine if a new entry was
+ * created.  Returns NULL if memory allocation failed.
+ */
+void *hashmap_put(struct hashmap *map, const void *key, void *data)
+{
+    struct hashmap_entry *entry;
+
+    HASHMAP_ASSERT(map != NULL);
+    HASHMAP_ASSERT(key != NULL);
+
+    /* Rehash with 2x capacity if load factor is approaching 0.75 */
+    if (map->table_size <= hashmap_table_min_size_calc(map->num_entries)) {
+        hashmap_rehash(map, map->table_size << 1);
+    }
+    entry = hashmap_entry_find(map, key, true);
+    if (!entry) {
+        /*
+         * Cannot find an empty slot.  Either out of memory, or using
+         * a poor hash function.  Attempt to rehash once to reduce
+         * chain length.
+         */
+        if (hashmap_rehash(map, map->table_size << 1) < 0) {
+            return NULL;
+        }
+        entry = hashmap_entry_find(map, key, true);
+        if (!entry) {
+            return NULL;
+        }
+    }
+    if (!entry->key) {
+        /* Allocate copy of key to simplify memory management */
+        if (map->key_alloc) {
+            entry->key = map->key_alloc(key);
+            if (!entry->key) {
+                return NULL;
+            }
+        } else {
+            entry->key = (void *)key;
+        }
+        ++map->num_entries;
+    } else if (entry->data) {
+        /* Do not overwrite existing data */
+        return entry->data;
+    }
+    entry->data = data;
+    return data;
+}
+
+/*
+ * Return the data pointer, or NULL if no entry exists.
+ */
+void *hashmap_get(const struct hashmap *map, const void *key)
+{
+    struct hashmap_entry *entry;
+
+    HASHMAP_ASSERT(map != NULL);
+    HASHMAP_ASSERT(key != NULL);
+
+    entry = hashmap_entry_find(map, key, false);
+    if (!entry) {
+        return NULL;
+    }
+    return entry->data;
+}
+
+/*
+ * Remove an entry with the specified key from the map.
+ * Returns the data pointer, or NULL, if no entry was found.
+ */
+void *hashmap_remove(struct hashmap *map, const void *key)
+{
+    struct hashmap_entry *entry;
+    void *data;
+
+    HASHMAP_ASSERT(map != NULL);
+    HASHMAP_ASSERT(key != NULL);
+
+    entry = hashmap_entry_find(map, key, false);
+    if (!entry) {
+        return NULL;
+    }
+    data = entry->data;
+    /* Clear the entry and make the chain contiguous */
+    hashmap_entry_remove(map, entry);
+    return data;
+}
+
+/*
+ * Remove all entries.
+ */
+void hashmap_clear(struct hashmap *map)
+{
+    HASHMAP_ASSERT(map != NULL);
+
+    hashmap_free_keys(map);
+    map->num_entries = 0;
+    memset(map->table, 0, sizeof(struct hashmap_entry) * map->table_size);
+}
+
+/*
+ * Remove all entries and reset the hash table to its initial size.
+ */
+void hashmap_reset(struct hashmap *map)
+{
+    struct hashmap_entry *new_table;
+
+    HASHMAP_ASSERT(map != NULL);
+
+    hashmap_clear(map);
+    if (map->table_size == map->table_size_init) {
+        return;
+    }
+    new_table = (struct hashmap_entry *)realloc(map->table,
+        sizeof(struct hashmap_entry) * map->table_size_init);
+    if (!new_table) {
+        return;
+    }
+    map->table = new_table;
+    map->table_size = map->table_size_init;
+}
+
+/*
+ * Return the number of entries in the hash map.
+ */
+size_t hashmap_size(const struct hashmap *map)
+{
+    HASHMAP_ASSERT(map != NULL);
+
+    return map->num_entries;
+}
+
+/*
+ * Get a new hashmap iterator.  The iterator is an opaque
+ * pointer that may be used with hashmap_iter_*() functions.
+ * Hashmap iterators are INVALID after a put or remove operation is performed.
+ * hashmap_iter_remove() allows safe removal during iteration.
+ */
+struct hashmap_iter *hashmap_iter(const struct hashmap *map)
+{
+    HASHMAP_ASSERT(map != NULL);
+
+    if (!map->num_entries) {
+        return NULL;
+    }
+    return (struct hashmap_iter *)hashmap_entry_get_populated(map,
+        map->table);
+}
+
+/*
+ * Return an iterator to the next hashmap entry.  Returns NULL if there are
+ * no more entries.
+ */
+struct hashmap_iter *hashmap_iter_next(const struct hashmap *map,
+    const struct hashmap_iter *iter)
+{
+    struct hashmap_entry *entry = (struct hashmap_entry *)iter;
+
+    HASHMAP_ASSERT(map != NULL);
+
+    if (!iter) {
+        return NULL;
+    }
+    return (struct hashmap_iter *)hashmap_entry_get_populated(map,
+        entry + 1);
+}
+
+/*
+ * Remove the hashmap entry pointed to by this iterator and return an
+ * iterator to the next entry.  Returns NULL if there are no more entries.
+ */
+struct hashmap_iter *hashmap_iter_remove(struct hashmap *map,
+    const struct hashmap_iter *iter)
+{
+    struct hashmap_entry *entry = (struct hashmap_entry *)iter;
+
+    HASHMAP_ASSERT(map != NULL);
+
+    if (!iter) {
+        return NULL;
+    }
+    if (!entry->key) {
+        /* Iterator is invalid, so just return the next valid entry */
+        return hashmap_iter_next(map, iter);
+    }
+    hashmap_entry_remove(map, entry);
+    return (struct hashmap_iter *)hashmap_entry_get_populated(map, entry);
+}
+
+/*
+ * Return the key of the entry pointed to by the iterator.
+ */
+const void *hashmap_iter_get_key(const struct hashmap_iter *iter)
+{
+    if (!iter) {
+        return NULL;
+    }
+    return (const void *)((struct hashmap_entry *)iter)->key;
+}
+
+/*
+ * Return the data of the entry pointed to by the iterator.
+ */
+void *hashmap_iter_get_data(const struct hashmap_iter *iter)
+{
+    if (!iter) {
+        return NULL;
+    }
+    return ((struct hashmap_entry *)iter)->data;
+}
+
+/*
+ * Set the data pointer of the entry pointed to by the iterator.
+ */
+void hashmap_iter_set_data(const struct hashmap_iter *iter, void *data)
+{
+    if (!iter) {
+        return;
+    }
+    ((struct hashmap_entry *)iter)->data = data;
+}
+
+/*
+ * Invoke func for each entry in the hashmap.  Unlike the hashmap_iter_*()
+ * interface, this function supports calls to hashmap_remove() during iteration.
+ * However, it is an error to put or remove an entry other than the current one,
+ * and doing so will immediately halt iteration and return an error.
+ * Iteration is stopped if func returns non-zero.  Returns func's return
+ * value if it is < 0, otherwise, 0.
+ */
+int hashmap_foreach(const struct hashmap *map,
+    int (*func)(const void *, void *, void *), void *arg)
+{
+    struct hashmap_entry *entry;
+    size_t num_entries;
+    const void *key;
+    int rc;
+
+    HASHMAP_ASSERT(map != NULL);
+    HASHMAP_ASSERT(func != NULL);
+
+    entry = map->table;
+    for (entry = map->table; entry < &map->table[map->table_size];
+        ++entry) {
+        if (!entry->key) {
+            continue;
+        }
+        num_entries = map->num_entries;
+        key = entry->key;
+        rc = func(entry->key, entry->data, arg);
+        if (rc < 0) {
+            return rc;
+        }
+        if (rc > 0) {
+            return 0;
+        }
+        /* Run this entry again if func() deleted it */
+        if (entry->key != key) {
+            --entry;
+        } else if (num_entries != map->num_entries) {
+            /* Stop immediately if func put/removed another entry */
+            return -1;
+        }
+    }
+    return 0;
+}
+
+/*
+ * Default hash function for string keys.
+ * This is an implementation of the well-documented Jenkins one-at-a-time
+ * hash function.
+ */
+size_t hashmap_hash_string(const void *key)
+{
+    const char *key_str = (const char *)key;
+    size_t hash = 0;
+
+    for (; *key_str; ++key_str) {
+        hash += *key_str;
+        hash += (hash << 10);
+        hash ^= (hash >> 6);
+    }
+    hash += (hash << 3);
+    hash ^= (hash >> 11);
+    hash += (hash << 15);
+    return hash;
+}
+
+/*
+ * Default key comparator function for string keys.
+ */
+int hashmap_compare_string(const void *a, const void *b)
+{
+    return strcmp((const char *)a, (const char *)b);
+}
+
+/*
+ * Default key allocation function for string keys.  Use free() for the
+ * key_free_func.
+ */
+void *hashmap_alloc_key_string(const void *key)
+{
+    return (void *)strdup((const char *)key);
+}
+
+/*
+ * Case insensitive hash function for string keys.
+ */
+size_t hashmap_hash_string_i(const void *key)
+{
+    const char *key_str = (const char *)key;
+    size_t hash = 0;
+
+    for (; *key_str; ++key_str) {
+        hash += tolower(*key_str);
+        hash += (hash << 10);
+        hash ^= (hash >> 6);
+    }
+    hash += (hash << 3);
+    hash ^= (hash >> 11);
+    hash += (hash << 15);
+    return hash;
+}
+
+/*
+ * Case insensitive key comparator function for string keys.
+ */
+int hashmap_compare_string_i(const void *a, const void *b)
+{
+    return strcasecmp((const char *)a, (const char *)b);
+}
+
+
+#ifdef HASHMAP_METRICS
+/*
+ * Return the load factor.
+ */
+double hashmap_load_factor(const struct hashmap *map)
+{
+    HASHMAP_ASSERT(map != NULL);
+
+    if (!map->table_size) {
+        return 0;
+    }
+    return (double)map->num_entries / map->table_size;
+}
+
+/*
+ * Return the average number of collisions per entry.
+ */
+double hashmap_collisions_mean(const struct hashmap *map)
+{
+    struct hashmap_entry *entry;
+    size_t total_collisions = 0;
+
+    HASHMAP_ASSERT(map != NULL);
+
+    if (!map->num_entries) {
+        return 0;
+    }
+    for (entry = map->table; entry < &map->table[map->table_size];
+        ++entry) {
+        if (!entry->key) {
+            continue;
+        }
+        total_collisions += entry->num_collisions;
+    }
+    return (double)total_collisions / map->num_entries;
+}
+
+/*
+ * Return the variance between entry collisions.  The higher the variance,
+ * the more likely the hash function is poor and is resulting in clustering.
+ */
+double hashmap_collisions_variance(const struct hashmap *map)
+{
+    struct hashmap_entry *entry;
+    double mean_collisions;
+    double variance;
+    double total_variance = 0;
+
+    HASHMAP_ASSERT(map != NULL);
+
+    if (!map->num_entries) {
+        return 0;
+    }
+    mean_collisions = hashmap_collisions_mean(map);
+    for (entry = map->table; entry < &map->table[map->table_size];
+        ++entry) {
+        if (!entry->key) {
+            continue;
+        }
+        variance = (double)entry->num_collisions - mean_collisions;
+        total_variance += variance * variance;
+    }
+    return total_variance / map->num_entries;
+}
+#endif
diff --git a/hashmap.h b/hashmap.h
new file mode 100644
index 0000000..c344c4c
--- /dev/null
+++ b/hashmap.h
@@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2016-2018 David Leeds <davidesleeds@gmail.com>
+ *
+ * Hashmap is free software; you can redistribute it and/or modify
+ * it under the terms of the MIT license. See LICENSE for details.
+ */
+
+#ifndef __HASHMAP_H__
+#define __HASHMAP_H__
+
+#include <stddef.h>
+
+/*
+ * Define HASHMAP_METRICS to compile in performance analysis
+ * functions for use in assessing hash function performance.
+ */
+/* #define HASHMAP_METRICS */
+
+/*
+ * Define HASHMAP_NOASSERT to compile out all assertions used internally.
+ */
+/* #define HASHMAP_NOASSERT */
+
+/*
+ * Macros to declare type-specific versions of hashmap_*() functions to
+ * allow compile-time type checking and avoid the need for type casting.
+ */
+#define HASHMAP_FUNCS_DECLARE(name, key_type, data_type)                \
+    data_type *name##_hashmap_put(struct hashmap *map,                  \
+            const key_type *key, data_type *data);                      \
+    data_type *name##_hashmap_get(const struct hashmap *map,            \
+            const key_type *key);                                       \
+    data_type *name##_hashmap_remove(struct hashmap *map,               \
+            const key_type *key);                                       \
+    const key_type *name##_hashmap_iter_get_key(                        \
+            const struct hashmap_iter *iter);                           \
+    data_type *name##_hashmap_iter_get_data(                            \
+            const struct hashmap_iter *iter);                           \
+    void name##_hashmap_iter_set_data(const struct hashmap_iter *iter,  \
+            data_type *data);                                           \
+    int name##_hashmap_foreach(const struct hashmap *map,               \
+            int (*func)(const key_type *, data_type *, void *), void *arg);
+
+#define HASHMAP_FUNCS_CREATE(name, key_type, data_type)                 \
+    data_type *name##_hashmap_put(struct hashmap *map,                  \
+            const key_type *key, data_type *data)                       \
+    {                                                                   \
+        return (data_type *)hashmap_put(map, (const void *)key,         \
+                (void *)data);                                          \
+    }                                                                   \
+    data_type *name##_hashmap_get(const struct hashmap *map,            \
+            const key_type *key)                                        \
+    {                                                                   \
+        return (data_type *)hashmap_get(map, (const void *)key);        \
+    }                                                                   \
+    data_type *name##_hashmap_remove(struct hashmap *map,               \
+            const key_type *key)                                        \
+    {                                                                   \
+        return (data_type *)hashmap_remove(map, (const void *)key);     \
+    }                                                                   \
+    const key_type *name##_hashmap_iter_get_key(                        \
+            const struct hashmap_iter *iter)                            \
+    {                                                                   \
+        return (const key_type *)hashmap_iter_get_key(iter);            \
+    }                                                                   \
+    data_type *name##_hashmap_iter_get_data(                            \
+            const struct hashmap_iter *iter)                            \
+    {                                                                   \
+        return (data_type *)hashmap_iter_get_data(iter);                \
+    }                                                                   \
+    void name##_hashmap_iter_set_data(const struct hashmap_iter *iter,  \
+            data_type *data)                                            \
+    {                                                                   \
+        hashmap_iter_set_data(iter, (void *)data);                      \
+    }                                                                   \
+    struct __##name##_hashmap_foreach_state {                           \
+        int (*func)(const key_type *, data_type *, void *);             \
+        void *arg;                                                      \
+    };                                                                  \
+    static inline int __##name##_hashmap_foreach_callback(              \
+            const void *key, void *data, void *arg)                     \
+    {                                                                   \
+        struct __##name##_hashmap_foreach_state *s =                    \
+            (struct __##name##_hashmap_foreach_state *)arg;             \
+        return s->func((const key_type *)key,                           \
+                (data_type *)data, s->arg);                             \
+    }                                                                   \
+    int name##_hashmap_foreach(const struct hashmap *map,               \
+            int (*func)(const key_type *, data_type *, void *),         \
+            void *arg)                                                  \
+    {                                                                   \
+        struct __##name##_hashmap_foreach_state s = { func, arg };      \
+        return hashmap_foreach(map,                                     \
+            __##name##_hashmap_foreach_callback, &s);                   \
+    }
+
+
+struct hashmap_iter;
+struct hashmap_entry;
+
+/*
+ * The hashmap state structure.
+ */
+struct hashmap {
+    size_t table_size_init;
+    size_t table_size;
+    size_t num_entries;
+    struct hashmap_entry *table;
+    size_t (*hash)(const void *);
+    int (*key_compare)(const void *, const void *);
+    void *(*key_alloc)(const void *);
+    void (*key_free)(void *);
+};
+
+/*
+ * Initialize an empty hashmap.
+ *
+ * hash_func should return an even distribution of numbers between 0
+ * and SIZE_MAX varying on the key provided.  If set to NULL, the default
+ * case-sensitive string hash function is used: hashmap_hash_string
+ *
+ * key_compare_func should return 0 if the keys match, and non-zero otherwise.
+ * If set to NULL, the default case-sensitive string comparator function is
+ * used: hashmap_compare_string
+ *
+ * initial_size is optional, and may be set to the max number of entries
+ * expected to be put in the hash table.  This is used as a hint to
+ * pre-allocate the hash table to the minimum size needed to avoid
+ * gratuitous rehashes.  If initial_size is 0, a default size will be used.
+ *
+ * Returns 0 on success and -errno on failure.
+ */
+int hashmap_init(struct hashmap *map, size_t (*hash_func)(const void *),
+    int (*key_compare_func)(const void *, const void *),
+    size_t initial_size);
+
+/*
+ * Free the hashmap and all associated memory.
+ */
+void hashmap_destroy(struct hashmap *map);
+
+/*
+ * Enable internal memory allocation and management of hash keys.
+ */
+void hashmap_set_key_alloc_funcs(struct hashmap *map,
+    void *(*key_alloc_func)(const void *),
+    void (*key_free_func)(void *));
+
+/*
+ * Add an entry to the hashmap.  If an entry with a matching key already
+ * exists and has a data pointer associated with it, the existing data
+ * pointer is returned, instead of assigning the new value.  Compare
+ * the return value with the data passed in to determine if a new entry was
+ * created.  Returns NULL if memory allocation failed.
+ */
+void *hashmap_put(struct hashmap *map, const void *key, void *data);
+
+/*
+ * Return the data pointer, or NULL if no entry exists.
+ */
+void *hashmap_get(const struct hashmap *map, const void *key);
+
+/*
+ * Remove an entry with the specified key from the map.
+ * Returns the data pointer, or NULL, if no entry was found.
+ */
+void *hashmap_remove(struct hashmap *map, const void *key);
+
+/*
+ * Remove all entries.
+ */
+void hashmap_clear(struct hashmap *map);
+
+/*
+ * Remove all entries and reset the hash table to its initial size.
+ */
+void hashmap_reset(struct hashmap *map);
+
+/*
+ * Return the number of entries in the hash map.
+ */
+size_t hashmap_size(const struct hashmap *map);
+
+/*
+ * Get a new hashmap iterator.  The iterator is an opaque
+ * pointer that may be used with hashmap_iter_*() functions.
+ * Hashmap iterators are INVALID after a put or remove operation is performed.
+ * hashmap_iter_remove() allows safe removal during iteration.
+ */
+struct hashmap_iter *hashmap_iter(const struct hashmap *map);
+
+/*
+ * Return an iterator to the next hashmap entry.  Returns NULL if there are
+ * no more entries.
+ */
+struct hashmap_iter *hashmap_iter_next(const struct hashmap *map,
+    const struct hashmap_iter *iter);
+
+/*
+ * Remove the hashmap entry pointed to by this iterator and returns an
+ * iterator to the next entry.  Returns NULL if there are no more entries.
+ */
+struct hashmap_iter *hashmap_iter_remove(struct hashmap *map,
+    const struct hashmap_iter *iter);
+
+/*
+ * Return the key of the entry pointed to by the iterator.
+ */
+const void *hashmap_iter_get_key(const struct hashmap_iter *iter);
+
+/*
+ * Return the data of the entry pointed to by the iterator.
+ */
+void *hashmap_iter_get_data(const struct hashmap_iter *iter);
+
+/*
+ * Set the data pointer of the entry pointed to by the iterator.
+ */
+void hashmap_iter_set_data(const struct hashmap_iter *iter, void *data);
+
+/*
+ * Invoke func for each entry in the hashmap.  Unlike the hashmap_iter_*()
+ * interface, this function supports calls to hashmap_remove() during iteration.
+ * However, it is an error to put or remove an entry other than the current one,
+ * and doing so will immediately halt iteration and return an error.
+ * Iteration is stopped if func returns non-zero.  Returns func's return
+ * value if it is < 0, otherwise, 0.
+ */
+int hashmap_foreach(const struct hashmap *map,
+    int (*func)(const void *, void *, void *), void *arg);
+
+/*
+ * Default hash function for string keys.
+ * This is an implementation of the well-documented Jenkins one-at-a-time
+ * hash function.
+ */
+size_t hashmap_hash_string(const void *key);
+
+/*
+ * Default key comparator function for string keys.
+ */
+int hashmap_compare_string(const void *a, const void *b);
+
+/*
+ * Default key allocation function for string keys.  Use free() for the
+ * key_free_func.
+ */
+void *hashmap_alloc_key_string(const void *key);
+
+/*
+ * Case insensitive hash function for string keys.
+ */
+size_t hashmap_hash_string_i(const void *key);
+
+/*
+ * Case insensitive key comparator function for string keys.
+ */
+int hashmap_compare_string_i(const void *a, const void *b);
+
+
+#ifdef HASHMAP_METRICS
+/*
+ * Return the load factor.
+ */
+double hashmap_load_factor(const struct hashmap *map);
+
+/*
+ * Return the average number of collisions per entry.
+ */
+double hashmap_collisions_mean(const struct hashmap *map);
+
+/*
+ * Return the variance between entry collisions.  The higher the variance,
+ * the more likely the hash function is poor and is resulting in clustering.
+ */
+double hashmap_collisions_variance(const struct hashmap *map);
+#endif
+
+
+#endif /* __HASHMAP_H__ */
+
diff --git a/uidmap.c b/uidmap.c
new file mode 100644
index 0000000..8315be2
--- /dev/null
+++ b/uidmap.c
@@ -0,0 +1,262 @@
+#include "hashmap.h"
+#include "lstring.h"
+#include "types.h"
+#include "helper.h"
+#include <stdint.h>
+#include <inttypes.h>
+#include <math.h>
+#include <time.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static struct hashmap _numToName, _nameToNum;
+static BOOL _initDone = FALSE;
+static const char *_saveFileName = NULL;
+
+static const uint16_t MAGIC = 1234;
+static const int64_t MAX_AGE = 86400 * 2; // 2 days
+#define MAX_NAME_LEN 1024
+
+HASHMAP_FUNCS_CREATE(name, struct string, struct user);
+HASHMAP_FUNCS_CREATE(number, uint32_t, struct user);
+
+struct user
+{
+	uint32_t number;
+	const struct string *name;
+	int64_t lastUse;
+};
+
+static void insertTime(const struct string *name, const uint32_t number, const int64_t lastUse)
+{
+	uint8_t *buffer = malloc(sizeof(struct user) + sizeof(struct string) + name->l);
+	struct user *u = (struct user*)buffer;
+	struct string *s = (struct string*)(buffer + sizeof(struct user));
+	char *str = (char*)(buffer + sizeof(struct user) + sizeof(struct string));
+	memcpy(str, name->s, name->l);
+	s->s = str;
+	s->l = name->l;
+	u->name = s;
+	u->number = number;
+	u->lastUse = lastUse;
+	if (name_hashmap_put(&_nameToNum, u->name, u) != u) {
+		plog(DEBUG_WARNING, "[uidmap] Collision when inserting %.*s by name (%"PRIu32")", (int)s->l, s->s, number);
+	}
+	if (number_hashmap_put(&_numToName, &u->number, u) != u) {
+		plog(DEBUG_WARNING, "[uidmap] Collision when inserting %.*s by number (%"PRIu32")", (int)s->l, s->s, number);
+	}
+	// TODO: Leak
+}
+
+static void insert(const struct string *name, const uint32_t number)
+{
+	insertTime(name, number, time(NULL));
+}
+
+static int uint32_compare(const void *a, const void *b)
+{
+	return (*(const uint32_t*)a) != (*(const uint32_t*)b);
+}
+
+static int string_compare(const void *a, const void *b)
+{
+	return !equals((const struct string*)a, (const struct string*)b);
+}
+
+static size_t uint32_hash(const void *key)
+{
+	return *(const uint32_t*)key;
+}
+
+static size_t string_hash(const void *key)
+{
+	const struct string *key_str = (const struct string*)key;
+	size_t hash = 0;
+
+	for (size_t pos = 0; pos < key_str->l; ++pos) {
+		hash += key_str->s[pos];
+		hash += (hash << 10);
+		hash ^= (hash >> 6);
+	}
+	hash += (hash << 3);
+	hash ^= (hash >> 11);
+	hash += (hash << 15);
+	return hash;
+}
+
+uint32_t uidmap_getNumberForName(const struct string *name)
+{
+	// Try string matching
+	char buf[name->l];
+	struct string lower = { .s = buf, .l = name->l };
+	for (size_t i = 0; i < name->l; ++i) {
+		buf[i] = tolower(name->s[i]);
+	}
+	// Get
+	struct user *u = name_hashmap_get(&_nameToNum, &lower);
+	if (u != NULL) {
+		u->lastUse = time(NULL);
+		return u->number;
+	}
+	// Not known yet - try numeric
+	uint32_t asNumber = parseUInt32(name);
+	if (asNumber >= 2000) {
+		u = number_hashmap_get(&_numToName, &asNumber);
+		if (u == NULL) {
+			// No collision, use name as number
+			insert(&lower, asNumber);
+			return asNumber;
+		}
+		plog(DEBUG_WARNING, "%.*s is numeric (%u) but collides", (int)name->l, name->s, asNumber);
+	}
+	// Default case - generate number
+	asNumber = (uint32_t)string_hash(&lower);
+	while (asNumber < 2000) {
+		asNumber = rand() + 2000;
+	}
+	for (;;) {
+		//plog(DEBUG_WARNING, "Using number %u for %.*s", asNumber, (int)name->l, name->s);
+		u = number_hashmap_get(&_numToName, &asNumber);
+		if (u == NULL)
+			break;
+		do {
+			asNumber = rand() + 2000;
+		} while (asNumber < 2000);
+	}
+	insert(&lower, asNumber);
+	return asNumber;
+}
+
+const struct string *uidmap_getNameForNumber(const uint32_t number)
+{
+	struct user *u = number_hashmap_get(&_numToName, &number);
+	if (u == NULL)
+		return NULL;
+	u->lastUse = time(NULL);
+	return u->name;
+}
+
+void uidmap_init(const char *fileName)
+{
+	if (_initDone)
+		return;
+	_initDone = TRUE;
+	hashmap_init(&_numToName, uint32_hash, uint32_compare, 0);
+	hashmap_init(&_nameToNum, string_hash, string_compare, 0);
+	if (fileName == NULL)
+		return;
+	// We have a filename, remember it and try to load DB
+	_saveFileName = strdup(fileName);
+	FILE *fh = fopen(_saveFileName, "rb");
+	if (fh == NULL)
+		return;
+	fseek(fh, 0, SEEK_END);
+	const int fsize = ftell(fh);
+	fseek(fh, 0, SEEK_SET);
+	if (fsize > 0) {
+		const size_t entries = (size_t)fsize / 25;
+		hashmap_destroy(&_numToName);
+		hashmap_destroy(&_nameToNum);
+		hashmap_init(&_numToName, uint32_hash, uint32_compare, entries);
+		hashmap_init(&_nameToNum, string_hash, string_compare, entries);
+	}
+	// Not endian safe, do not copy file around...
+	uint16_t len = 0;
+	int64_t lastUse;
+	uint32_t uidNumber;
+	char nbuffer[MAX_NAME_LEN];
+	struct string name;
+	name.s = nbuffer;
+	if (fread(&len, sizeof(len), 1, fh) != 1 || len != MAGIC) {
+		plog(DEBUG_WARNING, "uid map file missing header magic (%"PRIu16")", len);
+		fclose(fh);
+		return;
+	}
+	int loaded = 0;
+	const int64_t now = time(NULL);
+	for (;;) {
+		if (fread(&len, sizeof(len), 1, fh) != 1)
+			break;
+		if (len > MAX_NAME_LEN) {
+			plog(DEBUG_WARNING, "DB has entry of size %"PRIu16", aborting import");
+			break;
+		}
+		if (fread(&lastUse, sizeof(lastUse), 1, fh) != 1 || fread(&uidNumber, sizeof(uidNumber), 1, fh) != 1) {
+			plog(DEBUG_WARNING, "DB has truncated entry (no lastUse/uidNumber)");
+			break;
+		}
+		if (fread(nbuffer, len, 1, fh) != 1) {
+			plog(DEBUG_WARNING, "DB has truncated entry (no name)");
+			break;
+		}
+		// Check age
+		if (now - lastUse >  MAX_AGE)
+			continue;
+		if (lastUse > now)
+			lastUse = now;
+		// Add
+		name.l = len;
+		insertTime(&name, uidNumber, lastUse);
+		loaded++;
+	}
+	fclose(fh);
+	plog(DEBUG_INFO, "Loaded %d entries from %s", loaded, _saveFileName);
+}
+
+void uidmap_cleanupSave()
+{
+	if (!_initDone)
+		return;
+	FILE *fh = NULL;
+	if (_saveFileName != NULL) {
+		fh = fopen(_saveFileName, "wb");
+		if (fh != NULL) {
+			if (fwrite(&MAGIC, sizeof(MAGIC), 1, fh) != 1) {
+				plog(DEBUG_WARNING, "Cannot write magic to %s", _saveFileName);
+				fclose(fh);
+				fh = NULL;
+			}
+		}
+	}
+	const int64_t deadline = time(NULL) - MAX_AGE;
+	struct hashmap_iter *it = hashmap_iter(&_nameToNum);
+	struct user *u = NULL;
+	int saved = 0, removed = 0;
+	uint16_t len;
+	while (it != NULL) {
+		u = name_hashmap_iter_get_data(it);
+		if (u->lastUse < deadline) {
+			it = hashmap_iter_remove(&_nameToNum, it);
+			if (number_hashmap_remove(&_numToName, &u->number) != u) {
+				plog(DEBUG_WARNING, "Could not remove user from numToName that was in nameToNum");
+			}
+			free(u);
+			removed++;
+			continue;
+		}
+		// Save if file
+		if (fh != NULL && u->name->l < MAX_NAME_LEN) {
+			len = (uint16_t)u->name->l;
+			if (fwrite(&len, sizeof(len), 1, fh) != 1
+					|| fwrite(&u->lastUse, sizeof(u->lastUse), 1, fh) != 1
+					|| fwrite(&u->number, sizeof(u->number), 1, fh) != 1
+					|| fwrite(u->name->s, len, 1, fh) != 1
+				) {
+				plog(DEBUG_WARNING, "Could not write record to %s", _saveFileName);
+				fclose(fh);
+				fh = NULL;
+			} else {
+				saved++;
+			}
+		}
+		it = hashmap_iter_next(&_nameToNum, it);
+	}
+	if (fh != NULL) {
+		fsync(fileno(fh));
+		fclose(fh);
+	}
+	if (saved > 0 || removed > 0) {
+		plog(DEBUG_VERBOSE, "Purged %d old entries, saved %d entries", removed, saved);
+	}
+}
+
diff --git a/uidmap.h b/uidmap.h
new file mode 100644
index 0000000..a126e39
--- /dev/null
+++ b/uidmap.h
@@ -0,0 +1,16 @@
+#ifndef _UIDMAP_H_
+#define _UIDMAP_H_
+
+#include <stdint.h>
+
+struct string;
+
+uint32_t uidmap_getNumberForName(const struct string *name);
+
+const struct string *uidmap_getNameForNumber(const uint32_t number);
+
+void uidmap_init(const char *fileName);
+
+void uidmap_cleanupSave();
+
+#endif
author	Simon Rettberg	2018-11-09 15:36:21 +0100
committer	Simon Rettberg	2018-11-09 15:36:21 +0100
commit	33db97b72473124697170394efbc993870276c55 (patch)
tree	b1f27c1634c07652808af243f245da2809a6e4a3
parent	Move string functions to lstring.[hc] (diff)
download	ldadp-33db97b72473124697170394efbc993870276c55.tar.gz ldadp-33db97b72473124697170394efbc993870276c55.tar.xz ldadp-33db97b72473124697170394efbc993870276c55.zip