/* SPDX-License-Identifier: GPL-3.0-only */

/*
 * Simple linear hashing implementation
 *
 * Linear hashing is a hash table algorithm by Witold Litwin (1980)
 * which grows the table on demand without rehashing existing objects.
 *
 * This implementation employs a conventional hash function C whose
 * output length is assumed to be greater than the order of the table,
 * i.e. the base-2 logarithm of the number of slots. Hash collisions
 * are resolved by chaining.
 *
 * The hash table consists in fact of two tables which are consecutive
 * in memory. Each table represents N = 1 << n many slots of the
 * table. Example (n = 2, N = 4):
 *
 * 	[....][....]
 * 	 0123  4567
 *
 * In what follows, we refer to the two tables as the lower table and
 * the higher table.
 *
 * C gives rise to a sequence of hash functions C_k via C_k(key) :=
 * lower k bits of C(key). In terms of C_k the lower and higher hash
 * functions, h and H, are defined by h = C_n and H = C_{n+1}. Hence
 * h(key) always corresponds to a slot in the lower table while H(key)
 * corresponds to a slot in either table. Both functions are trivial to
 * compute given C(key).
 *
 * The algorithm also maintains a split position s which corresponds to
 * a slot of the lower table and starts out at zero.
 *
 * 	[s...][....]
 * 	 0123  4567
 *
 * The linear hash function L is defined by L(key) = H(key) if h(key)
 * < s, and L(key) = h(key) otherwise. In other words, L(key) is either
 * the lower or the higher hash value, depending on the relation between
 * the lower hash value and s. The higher hash function applies for all
 * keys whose lower hash value is smaller than s. Initially we have L = h.
 *
 * On insertion, if the table load is large, the chain of entries which
 * hash to s is split by transitioning from h to H.
 *
 * For an existing key, we have either H(key) = h(key) or H(key) =
 * h(key) + 1 << n. That is, the most significant bit of H(key) tells
 * whether this entry must be moved from its slot in the low table to the
 * corresponding slot in the high table. Approximately half the entries
 * will be moved this way. To reflect the fact that from now on L(key)
 * = H(key) for all keys with h(key) = s, s is increased by one.
 *
 * Each time s reaches 1 << n, the upper bound of the lower table, we
 * have L = H. In this case the hash table size is doubled, and the split
 * position is reset to zero. Moreover, h and H are redefined as h :=
 * C_{n+1} (i.e., the new h becomes the former H), and H := C_{n+2}. This
 * means the table order n has increased by one, and a new round begins.
 *
 * This implementation includes an API for iterating over all entries
 * in the hash table which avoids callback functions. Instead an opaque
 * iterator structure is provided together with a set of functions that
 * create or operate on such a structure.
 *
 * While shrinking a linear hash table can performed essentially
 * by reversing the steps for growing described above, this is not
 * implemented since it is not needed for tfortune.
 */

#include "tf.h"

struct lh_slot {
	struct linhash_item item;
	uint32_t hash;
	struct lh_slot *next;
};

struct linhash_table {
	struct lh_slot **slots;
	unsigned order;
	uint32_t num_items; /* how many items have been inserted so far. */
	uint32_t split_position;
};

struct linhash_iterator {
	/* these are used regardless of whether comp is NULL */
	struct linhash_table *t;
	uint32_t idx;
	/* comp == NULL means: iterate in hash order */
	linhash_comparator *comp;
	/* only used if comp == NULL */
	struct lh_slot *head, *prev, *next;
	/* only used if comp != NULL */
	struct linhash_item **items;
	bool reverse;
};

static uint32_t lh_num_slots_low(const struct linhash_table *t)
{
	return (uint32_t)1 << t->order;
}

static uint32_t lh_num_slots_high(const struct linhash_table *t)
{
	return lh_num_slots_low(t) * 2;
}

static uint32_t lh_index_low(uint32_t hash, const struct linhash_table *t)
{
	return hash % lh_num_slots_low(t);
}

static uint32_t lh_index_high(uint32_t hash, const struct linhash_table *t)
{
	return hash % lh_num_slots_high(t);
}

static bool lh_must_grow(const struct linhash_table *t)
{
	return t->num_items >= lh_num_slots_high(t) / 2;
}

/* The simple DJB (Daniel J Bernstein) hash is good enough here. */
static uint32_t lh_hash(const char *data)
{
	uint32_t h = 5381;
	const unsigned char *c = (typeof(c))data;

	while (*(c++))
		h = h * 33 + *c;
	return h;
}

/*
 * Create a new table for linear hashing.
 *
 * The order argument determines the initial number of slots: it is given by 2
 * << order. The hash table grows on demand, and the point of linear hashing is
 * the ability to cheaply grow the table, so specifying anything greater than
 * zero is probably not necessary.
 *
 * The functtion returns an opaque pointer which serves as the handle to the
 * newly created hash table.  Most functions of the linhash API take such a
 * pointer to know the table to operate on. This function either succeeds or
 * terminates, it never returns NULL.
 */
struct linhash_table *linhash_new(uint32_t order)
{
	struct linhash_table *t;
	uint32_t ns;

	DEBUG_LOG("creating order %u hash table\n", order);
	t = xmalloc(sizeof(*t));
	t->order = order;
	t->num_items = 0;
	t->split_position = 0;
	ns = lh_num_slots_high(t);
	t->slots = xcalloc(ns * sizeof(*t->slots));
	return t;
}

static uint32_t lh_index(uint32_t hash, const struct linhash_table *t)
{
	uint32_t low = lh_index_low(hash, t);

	if (low >= t->split_position)
		return low;
	return lh_index_high(hash, t);
}

static void lh_split_slot(struct linhash_table *t)
{
	struct lh_slot *prev, *next, *s;
	uint32_t sp = t->split_position;

	DEBUG_LOG("splitting slot %u\n", sp);
	for (prev = NULL, s = t->slots[sp]; s;) {
		uint32_t idx = lh_index_high(s->hash, t);
		if (idx == sp) {
			prev = s;
			s = s->next;
			continue;
		}
		DEBUG_LOG("moving %s up from slot %u to slot %u\n",
			s->item.key, sp, idx);
		if (t->slots[sp] == s)
			t->slots[sp] = s->next;
		if (prev)
			prev->next = s->next;
		next = s->next;
		s->next = t->slots[idx];
		t->slots[idx] = s;
		s = next;
	}
	DEBUG_LOG("slot %u has been split\n", sp);
	if (!t->slots[sp])
		DEBUG_LOG("slot #%u has become empty\n", sp);
}

static void lh_grow_table(struct linhash_table *t)
{
	uint32_t idx, ns;

	DEBUG_LOG("growing hash table to order %u\n", ++t->order);
	ns = lh_num_slots_high(t);
	t->slots = xrealloc(t->slots, ns * sizeof(*t->slots));
	idx = lh_num_slots_low(t);
	memset(t->slots + idx, 0, (ns - idx) * sizeof(*t->slots));
}

static struct lh_slot *lh_lookup(const char *key,
		const struct linhash_table *t, uint32_t *hashp, uint32_t *idxp,
		struct lh_slot **prevp)
{
	struct lh_slot *s, *prev;
	uint32_t hash, idx;

	if (!t)
		return NULL;
	hash = lh_hash(key);
	idx = lh_index(hash, t);
	//DEBUG_LOG("key %s, hash: %u, idx: %u\n", key, hash, idx);
	if (hashp)
		*hashp = hash;
	if (idxp)
		*idxp = idx;
	for (s = t->slots[idx], prev = NULL; s; prev = s, s = s->next) {
		//DEBUG_LOG("comparing %s vs. %s\n", key, s->item.key);
		if (strcmp(s->item.key, key))
			continue;
		/* found it */
		if (prevp)
			*prevp = prev;
		return s;
	}
	if (prevp)
		*prevp = NULL;
	return NULL;
}

/**
 * Find the entry identified by a key.
 *
 * \param key The key to look up.
 * \param t Where to look up the key.
 */
struct linhash_item *linhash_lookup(const char *key,
		const struct linhash_table *t)
{
	struct lh_slot *s = lh_lookup(key, t, NULL, NULL, NULL);

	if (!s)
		return NULL;
	return &s->item;
}

static void *lh_remove(struct lh_slot *s, struct lh_slot *prev, uint32_t idx, struct linhash_table *t)
{
	void *obj;

	if (!s)
		return NULL;
	t->num_items--;
	obj = s->item.object;
	if (prev)
		prev->next = s->next;
	else
		t->slots[idx] = s->next;
	free(s);
	return obj;
}

/* Note: This renders all existing iterators stale. */
void *linhash_remove(const char *key, struct linhash_table *t)
{
	uint32_t idx;
	struct lh_slot *prev, *s = lh_lookup(key, t, NULL, &idx, &prev);

	return lh_remove(s, prev, idx, t);
}

static void *lh_iterator_remove_current(struct linhash_iterator *iter)
{
	void *obj;

	assert(!iter->comp);
	if (!iter->head)
		return NULL;
	obj = lh_remove(iter->head, iter->prev, iter->idx, iter->t);
	iter->head = iter->prev;
	return obj;
}

static struct lh_slot *lh_first_nonempty_slot(uint32_t *idxp,
		const struct linhash_table *t)
{
	uint32_t ns = lh_num_slots_high(t);

	for (; *idxp < ns; (*idxp)++)
		if (t->slots[*idxp])
			return t->slots[*idxp];
	return NULL;
}

static void lh_iter_init(struct linhash_iterator *iter, uint32_t idx)
{
	iter->idx = idx;
	iter->prev = NULL;
	iter->head = lh_first_nonempty_slot(&iter->idx, iter->t);
	if (iter->head)
		iter->next = iter->head->next;
}

/*
 * Normally iter->head points to the current head. However, if this head was
 * removed with lh_iterator_remove_current(), iter->head points to its
 * predecessor in the list.
 */
void linhash_iterator_next(struct linhash_iterator *iter)
{
	if (iter->comp) {
		if (iter->reverse)
			iter->idx--;
		else
			iter->idx++;
		return;
	}
	if (iter->next) {
		iter->prev = iter->head;
		iter->head = iter->next;
		iter->next = iter->next->next;
		return;
	}
	lh_iter_init(iter, iter->idx + 1);
}

struct linhash_item *linhash_iterator_item(const struct linhash_iterator *iter)
{
	if (iter->comp) {
		if (iter->idx >= iter->t->num_items)
			return NULL;
		return iter->items[iter->idx];
	}
	if (!iter->head)
		return NULL;
	return &iter->head->item;
}

void linhash_iterator_free(struct linhash_iterator *iter)
{
	if (!iter)
		return;
	if (iter->comp)
		free(iter->items);
	free(iter);
}

/* always succeeds. reverse order is only respected if a comparator is given. */
struct linhash_iterator *linhash_iterator_new(struct linhash_table *t,
		linhash_comparator *comp, bool reverse_sort_order)
{
	struct linhash_iterator *iter = xmalloc(sizeof(*iter)), *iter2;
	struct linhash_item *item;
	unsigned n;

	iter->t = t;
	iter->comp = comp;
	if (!comp) {
		lh_iter_init(iter, 0);
		return iter;
	}
	iter->reverse = reverse_sort_order;
	iter2 = linhash_iterator_new(t, NULL, false);
	iter->items = xmalloc(t->num_items * sizeof(struct linhash_item *));
	for (
		n = 0;
		(item = linhash_iterator_item(iter2));
		linhash_iterator_next(iter2)
	)
		iter->items[n++] = item;
	linhash_iterator_free(iter2);
	qsort(iter->items, t->num_items, sizeof(struct linhash_iter *),
		(int (*)(const void *, const void *))comp);
	iter->idx = reverse_sort_order? t->num_items - 1 : 0;
	return iter;
}

/* Deallocate the resources occupied by the given hash table. */
void linhash_free(struct linhash_table *t)
{
	struct linhash_iterator *iter;
	struct linhash_item *itemp;

	if (!t)
		return;
	for (
		iter = linhash_iterator_new(t, NULL, false);
		(itemp = linhash_iterator_item(iter));
		linhash_iterator_next(iter)
	)
		lh_iterator_remove_current(iter);
	linhash_iterator_free(iter);
	assert(t->num_items == 0);
	free(t->slots);
	free(t);
}

/* returns item in first arg if key already exists */
int linhash_insert(struct linhash_item *item, struct linhash_table *t,
		void ***object)
{
	struct lh_slot *s;
	uint32_t idx, hash;

	s = lh_lookup(item->key, t, &hash, &idx, NULL);
	if (s) {
		if (object)
			*object = &s->item.object;
		return -E_LH_EXIST;
	}
	s = xmalloc(sizeof(*s));
	s->item = *item;
	s->hash = hash;
	DEBUG_LOG("inserting item #%u, key: %s, hash: %u, idx: %u\n",
		t->num_items, item->key, hash, idx);
	s->next = t->slots[idx];
	t->slots[idx] = s;
	t->num_items++;

	if (!lh_must_grow(t)) {
		DEBUG_LOG("no need to grow\n");
		return 0;
	}
	lh_split_slot(t);
	t->split_position++;
	if (t->split_position < lh_num_slots_low(t))
		return 1;
	t->split_position = 0;
	lh_grow_table(t);
	return 2;
}

uint32_t linhash_num_items(const struct linhash_table *t)
{
	return t->num_items;
}

char *linhash_statistics(const struct linhash_table *t)
{
	uint32_t min_fill = -1, max_fill = 0, n, idx, ns;
	uint32_t fill_count[11] = {0};
	char *result;

	ns = lh_num_slots_low(t) + t->split_position;
	for (idx = 0; idx < ns; idx++) {
		struct lh_slot *s;
		for (n = 0, s = t->slots[idx]; s; s = s->next, n++)
			; /* nothing */
		min_fill = MIN(min_fill, n);
		max_fill = MAX(max_fill, n);
		fill_count[n < 10? n : 10]++;
	}
	xasprintf(&result,
		"order............... %2u\n"
		"num slots........... %u\n"
		"num items (table)... %u\n"
		"load factor........ %3u%%\n"
		"min fill............ %2u\n"
		"max fill............ %2u\n"
		"max count[0]....... %3u%% (%u)\n"
		"max count[1]....... %3u%% (%u)\n"
		"max count[2]....... %3u%% (%u)\n"
		"max count[3]....... %3u%% (%u)\n"
		"max count[4]....... %3u%% (%u)\n"
		"max count[5]....... %3u%% (%u)\n"
		"max count[6]....... %3u%% (%u)\n"
		"max count[7]....... %3u%% (%u)\n"
		"max count[8]....... %3u%% (%u)\n"
		"max count[9]....... %3u%% (%u)\n"
		"max count[10+]..... %3u%% (%u)\n"
		,
		t->order,
		ns,
		t->num_items,
		(t->num_items * 100 + (ns / 2)) / ns,
		min_fill,
		max_fill,
		100 * fill_count[0] / ns, fill_count[0],
		100 * fill_count[1] / ns, fill_count[1],
		100 * fill_count[2] / ns, fill_count[2],
		100 * fill_count[3] / ns, fill_count[3],
		100 * fill_count[4] / ns, fill_count[4],
		100 * fill_count[5] / ns, fill_count[5],
		100 * fill_count[6] / ns, fill_count[6],
		100 * fill_count[7] / ns, fill_count[7],
		100 * fill_count[8] / ns, fill_count[8],
		100 * fill_count[9] / ns, fill_count[9],
		100 * fill_count[10] / ns, fill_count[10]
	);
	return result;
}
