[BACK]Return to cdbw.c CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / lib / libc / cdb

File: [cvs.NetBSD.org] / src / lib / libc / cdb / cdbw.c (download)

Revision 1.1, Sun Apr 25 00:54:46 2010 UTC (13 years, 11 months ago) by joerg
Branch: MAIN
CVS Tags: yamt-pagecache-base3, yamt-pagecache-base2, yamt-pagecache-base, netbsd-6-base, matt-mips64-premerge-20101231, cherry-xenmp-base, cherry-xenmp, bouyer-quota2-nbase, bouyer-quota2-base, bouyer-quota2
Branch point for: yamt-pagecache, netbsd-6

Add the constant database reader (cdbr(3)) and writer ((cdbw(3)).
They implement a space efficent write-once database with fast access
path. Switch the services(5) database to use cdb. The size of the
database file decreases from 2.1MB disk space to 307KB. Access
performance is about the same if setservent(0) is used and about an
order of magnitude faster otherwise. services_mkdb defaults to the new
format, but can optionally create the old db(3) format as well for
statically linked legacy applications.

/*	$NetBSD: cdbw.c,v 1.1 2010/04/25 00:54:46 joerg Exp $	*/
/*-
 * Copyright (c) 2009, 2010 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Joerg Sonnenberger.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#if HAVE_NBTOOL_CONFIG_H
#include "nbtool_config.h"
#endif

#include <sys/cdefs.h>
__RCSID("$NetBSD: cdbw.c,v 1.1 2010/04/25 00:54:46 joerg Exp $");

#include "namespace.h"

#include <sys/endian.h>
#include <sys/queue.h>
#include <cdbw.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#ifdef __weak_alias
__weak_alias(cdbw_close,_cdbw_close)
__weak_alias(cdbw_open,_cdbw_open)
__weak_alias(cdbw_output,_cdbw_output)
__weak_alias(cdbw_put,_cdbw_put)
__weak_alias(cdbw_put_data,_cdbw_put_data)
__weak_alias(cdbw_put_key,_cdbw_put_key)
#endif

struct key_hash {
	SLIST_ENTRY(key_hash) link;
	uint32_t hashes[3];
	uint32_t idx;
	void *key;
	size_t keylen;
};

SLIST_HEAD(key_hash_head, key_hash);

struct cdbw {
	size_t data_counter;
	size_t data_allocated;
	size_t data_size;
	size_t *data_len;
	void **data_ptr;

	size_t hash_size;
	struct key_hash_head *hash;
	size_t key_counter;
};

 /* Max. data counter that allows the index size to be 32bit. */
static const uint32_t max_data_counter = 0xccccccccU;

struct cdbw *
cdbw_open(void)
{
	struct cdbw *cdbw;
	size_t i;

	cdbw = calloc(sizeof(*cdbw), 1);
	if (cdbw == NULL)
		return NULL;

	cdbw->hash_size = 1024;
	cdbw->hash = calloc(cdbw->hash_size, sizeof(*cdbw->hash));
	if (cdbw->hash == NULL) {
		free(cdbw);
		return NULL;
	}

	for (i = 0; i < cdbw->hash_size; ++i)
		SLIST_INIT(cdbw->hash + i);

	return cdbw;
}

int
cdbw_put(struct cdbw *cdbw, const void *key, size_t keylen,
    const void *data, size_t datalen)
{
	uint32_t idx;
	int rv;

	rv = cdbw_put_data(cdbw, data, datalen, &idx);
	if (rv)
		return rv;
	rv = cdbw_put_key(cdbw, key, keylen, idx);
	if (rv) {
		--cdbw->data_counter;
		free(cdbw->data_ptr[cdbw->data_counter]);
		cdbw->data_size -= datalen;
		return rv;
	}
	return 0;
}

int
cdbw_put_data(struct cdbw *cdbw, const void *data, size_t datalen,
    uint32_t *idx)
{

	if (cdbw->data_counter == max_data_counter)
		return -1;

	if (cdbw->data_size + datalen < cdbw->data_size ||
	    cdbw->data_size + datalen > 0xffffffffU)
		return -1; /* Overflow */

	if (cdbw->data_allocated == cdbw->data_counter) {
		void **new_data_ptr;
		size_t *new_data_len;
		size_t new_allocated;

		if (cdbw->data_allocated == 0)
			new_allocated = 256;
		else
			new_allocated = cdbw->data_allocated * 2;

		new_data_ptr = realloc(cdbw->data_ptr,
		    sizeof(*cdbw->data_ptr) * new_allocated);
		if (new_data_ptr == NULL)
			return -1;
		cdbw->data_ptr = new_data_ptr;

		new_data_len = realloc(cdbw->data_len,
		    sizeof(*cdbw->data_len) * new_allocated);
		if (new_data_len == NULL)
			return -1;
		cdbw->data_len = new_data_len;

		cdbw->data_allocated = new_allocated;
	}

	cdbw->data_ptr[cdbw->data_counter] = malloc(datalen);
	if (cdbw->data_ptr[cdbw->data_counter] == NULL)
		return -1;
	memcpy(cdbw->data_ptr[cdbw->data_counter], data, datalen);
	cdbw->data_len[cdbw->data_counter] = datalen;
	cdbw->data_size += datalen;
	*idx = cdbw->data_counter++;
	return 0;
}

int
cdbw_put_key(struct cdbw *cdbw, const void *key, size_t keylen, uint32_t idx)
{
	uint32_t hashes[3];
	struct key_hash_head *head, *head2, *new_head;
	struct key_hash *key_hash;
	size_t new_hash_size, i;

	if (idx >= cdbw->data_counter ||
	    cdbw->key_counter == max_data_counter)
		return -1;

	mi_vector_hash(key, keylen, 0, hashes);

	head = cdbw->hash + (hashes[0] & (cdbw->hash_size - 1));
	SLIST_FOREACH(key_hash, head, link) {
		if (key_hash->keylen != keylen)
			continue;
		if (key_hash->hashes[0] != hashes[0])
			continue;
		if (key_hash->hashes[1] != hashes[1])
			continue;
		if (key_hash->hashes[2] != hashes[2])
			continue;
		if (memcmp(key, key_hash->key, keylen))
			continue;
		return -1;
	}
	key_hash = malloc(sizeof(*key_hash));
	if (key_hash == NULL)
		return -1;
	key_hash->key = malloc(keylen);
	if (key_hash->key == NULL) {
		free(key_hash);
		return -1;
	}
	memcpy(key_hash->key, key, keylen);
	key_hash->hashes[0] = hashes[0];
	key_hash->hashes[1] = hashes[1];
	key_hash->hashes[2] = hashes[2];
	key_hash->keylen = keylen;
	key_hash->idx = idx;
	SLIST_INSERT_HEAD(head, key_hash, link);
	++cdbw->key_counter;

	if (cdbw->key_counter <= cdbw->hash_size)
		return 0;

	/* Try to resize the hash table, but ignore errors. */
	new_hash_size = cdbw->hash_size * 2;
	new_head = calloc(sizeof(*new_head), new_hash_size);
	if (new_head == NULL)
		return 0;

	head = &cdbw->hash[hashes[0] & (cdbw->hash_size - 1)];
	for (i = 0; i < new_hash_size; ++i)
		SLIST_INIT(new_head + i);

	for (i = 0; i < cdbw->hash_size; ++i) {
		head = cdbw->hash + i;

		while ((key_hash = SLIST_FIRST(head)) != NULL) {
			SLIST_REMOVE_HEAD(head, link);
			head2 = new_head +
			    (key_hash->hashes[0] & (new_hash_size - 1));
			SLIST_INSERT_HEAD(head2, key_hash, link);
		}
	}
	free(cdbw->hash);
	cdbw->hash_size = new_hash_size;
	cdbw->hash = new_head;

	return 0;
}

void
cdbw_close(struct cdbw *cdbw)
{
	struct key_hash_head *head;
	struct key_hash *key_hash;
	size_t i;

	for (i = 0; i < cdbw->hash_size; ++i) {
		head = cdbw->hash + i;
		while ((key_hash = SLIST_FIRST(head)) != NULL) {
			SLIST_REMOVE_HEAD(head, link);
			free(key_hash->key);
			free(key_hash);
		}
	}

	for (i = 0; i < cdbw->data_counter; ++i)
		free(cdbw->data_ptr[i]);
	free(cdbw->data_ptr);
	free(cdbw->data_len);
	free(cdbw->hash);
	free(cdbw);
}

#define unused 0xffffffffU

struct vertex {
	uint32_t l_edge, m_edge, r_edge;
};

struct edge {
	uint32_t idx;

	uint32_t left, middle, right;
	uint32_t l_prev, m_prev, l_next;
	uint32_t r_prev, m_next, r_next;
};

struct state {
	uint32_t data_entries;
	uint32_t entries;
	uint32_t keys;
	uint32_t seed;

	uint32_t *g;
	char *visited;

	struct vertex *verts;
	struct edge *edges;
	uint32_t output_index;
	uint32_t *output_order;
};

static void
remove_vertex(struct state *state, struct vertex *v)
{
	struct edge *e;
	struct vertex *vl, *vm, *vr;

	if (v->l_edge != unused && v->m_edge != unused)
		return;
	if (v->l_edge != unused && v->r_edge != unused)
		return;
	if (v->m_edge != unused && v->r_edge != unused)
		return;
	if (v->l_edge == unused && v->m_edge == unused && v->r_edge == unused)
		return;

	if (v->l_edge != unused) {
		e = &state->edges[v->l_edge];
		if (e->l_next != unused)
			return;
	} else if (v->m_edge != unused) {
		e = &state->edges[v->m_edge];
		if (e->m_next != unused)
			return;
	} else {
		if (v->r_edge == unused)
			abort();
		e = &state->edges[v->r_edge];
		if (e->r_next != unused)
			return;
	}

	state->output_order[--state->output_index] = e - state->edges;

	vl = &state->verts[e->left];
	vm = &state->verts[e->middle];
	vr = &state->verts[e->right];

	if (e->l_prev == unused)
		vl->l_edge = e->l_next;
	else
		state->edges[e->l_prev].l_next = e->l_next;
	if (e->l_next != unused)
		state->edges[e->l_next].l_prev = e->l_prev;

	if (e->m_prev == unused)
		vm->m_edge = e->m_next;
	else
		state->edges[e->m_prev].m_next = e->m_next;
	if (e->m_next != unused)
		state->edges[e->m_next].m_prev = e->m_prev;

	if (e->r_prev == unused)
		vr->r_edge = e->r_next;
	else
		state->edges[e->r_prev].r_next = e->r_next;
	if (e->r_next != unused)
		state->edges[e->r_next].r_prev = e->r_prev;
}

static int
build_graph(struct cdbw *cdbw, struct state *state)
{
	struct key_hash_head *head;
	struct key_hash *key_hash;
	struct vertex *v;
	struct edge *e;
	uint32_t hashes[3];
	size_t i;

	e = state->edges;
	for (i = 0; i < cdbw->hash_size; ++i) {
		head = &cdbw->hash[i];
		SLIST_FOREACH(key_hash, head, link) {
			e->idx = key_hash->idx;
			mi_vector_hash(key_hash->key, key_hash->keylen,
			    state->seed, hashes);
			e->left = hashes[0] % state->entries;
			e->middle = hashes[1] % state->entries;
			e->right = hashes[2] % state->entries;

			++e;
		}
	}

	for (i = 0; i < state->entries; ++i) {
		v = state->verts + i;
		v->l_edge = unused;
		v->m_edge = unused;
		v->r_edge = unused;
	}

	for (i = 0; i < state->keys; ++i) {
		e = state->edges + i;
		v = state->verts + e->left;
		if (v->l_edge != unused)
			state->edges[v->l_edge].l_prev = i;
		e->l_next = v->l_edge;
		e->l_prev = unused;
		v->l_edge = i;

		v = &state->verts[e->middle];
		if (v->m_edge != unused)
			state->edges[v->m_edge].m_prev = i;
		e->m_next = v->m_edge;
		e->m_prev = unused;
		v->m_edge = i;

		v = &state->verts[e->right];
		if (v->r_edge != unused)
			state->edges[v->r_edge].r_prev = i;
		e->r_next = v->r_edge;
		e->r_prev = unused;
		v->r_edge = i;
	}

	state->output_index = state->keys;
	for (i = 0; i < state->entries; ++i)
		remove_vertex(state, state->verts + i);

	i = state->keys;
	while (i > 0 && i > state->output_index) {
		--i;
		e = state->edges + state->output_order[i];
		remove_vertex(state, state->verts + e->left);
		remove_vertex(state, state->verts + e->middle);
		remove_vertex(state, state->verts + e->right);
	}

	return state->output_index == 0 ? 0 : -1;
}

static void
assign_nodes(struct state *state)
{
	struct edge *e;
	size_t i;

	for (i = 0; i < state->keys; ++i) {
		e = state->edges + state->output_order[i];

		if (!state->visited[e->left]) {
			state->g[e->left] =
			    (2 * state->data_entries + e->idx
			    - state->g[e->middle] - state->g[e->right])
			    % state->data_entries;
		} else if (!state->visited[e->middle]) {
			state->g[e->middle] =
			    (2 * state->data_entries + e->idx
			    - state->g[e->left] - state->g[e->right])
			    % state->data_entries;
		} else {
			state->g[e->right] =
			    (2 * state->data_entries + e->idx
			    - state->g[e->left] - state->g[e->middle])
			    % state->data_entries;
		}
		state->visited[e->left] = 1;
		state->visited[e->middle] = 1;
		state->visited[e->right] = 1;
	}
}

static size_t
compute_size(uint32_t size)
{
	if (size < 0x100)
		return 1;
	else if (size < 0x10000)
		return 2;
	else
		return 4;
}

#define COND_FLUSH_BUFFER(n) do { 				\
	if (__predict_false(cur_pos + (n) >= sizeof(buf))) {	\
		ret = write(fd, buf, cur_pos);			\
		if (ret == -1 || (size_t)ret != cur_pos)	\
			return -1;				\
		cur_pos = 0;					\
	}							\
} while (/* CONSTCOND */ 0)

static int
print_hash(struct cdbw *cdbw, struct state *state, int fd, const char *descr)
{
	uint32_t data_size;
	uint8_t buf[90000];
	size_t i, size, size2, cur_pos;
	ssize_t ret;

	memcpy(buf, "NBCDB\n\0", 7);
	buf[7] = 1;
	strncpy((char *)buf + 8, descr, 16);
	le32enc(buf + 24, cdbw->data_size);
	le32enc(buf + 28, cdbw->data_counter);
	le32enc(buf + 32, state->entries);
	le32enc(buf + 36, state->seed);
	cur_pos = 40;

	size = compute_size(state->entries);
	for (i = 0; i < state->entries; ++i) {
		COND_FLUSH_BUFFER(4);
		le32enc(buf + cur_pos, state->g[i]);
		cur_pos += size;
	}
	size2 = compute_size(cdbw->data_size);
	size = size * state->entries % size2;
	if (size != 0) {
		size = size2 - size;
		COND_FLUSH_BUFFER(4);
		le32enc(buf + cur_pos, 0);
		cur_pos += size;
	}
	for (data_size = 0, i = 0; i < cdbw->data_counter; ++i) {
		COND_FLUSH_BUFFER(4);
		le32enc(buf + cur_pos, data_size);
		cur_pos += size2;
		data_size += cdbw->data_len[i];
	}
	COND_FLUSH_BUFFER(4);
	le32enc(buf + cur_pos, data_size);
	cur_pos += size2;

	for (i = 0; i < cdbw->data_counter; ++i) {
		COND_FLUSH_BUFFER(cdbw->data_len[i]);
		if (cdbw->data_len[i] < sizeof(buf)) {
			memcpy(buf + cur_pos, cdbw->data_ptr[i],
			    cdbw->data_len[i]);
			cur_pos += cdbw->data_len[i];
		} else {
			ret = write(fd, cdbw->data_ptr[i], cdbw->data_len[i]);
			if (ret == -1 || (size_t)ret != cdbw->data_len[i])
				return -1;
		}
	}
	if (cur_pos != 0) {
		ret = write(fd, buf, cur_pos);
		if (ret == -1 || (size_t)ret != cur_pos)
			return -1;
	}
	return 0;
}

int
cdbw_output(struct cdbw *cdbw, int fd, const char descr[16],
    uint32_t (*seedgen)(void))
{
	struct state state;
	int rv;

	if (cdbw->data_counter == 0 || cdbw->key_counter == 0) {
		state.entries = 0;
		state.seed = 0;
		print_hash(cdbw, &state, fd, descr);
		return 0;
	}

	if (seedgen == NULL)
		seedgen = arc4random;

	rv = 0;

	state.keys = cdbw->key_counter;
	state.data_entries = cdbw->data_counter;
	state.entries = state.keys + (state.keys + 3) / 4;
	if (state.entries < 10)
		state.entries = 10;

#define	NALLOC(var, n)	var = calloc(sizeof(*var), n)
	NALLOC(state.g, state.entries);
	NALLOC(state.visited, state.entries);
	NALLOC(state.verts, state.entries);
	NALLOC(state.edges, state.entries);
	NALLOC(state.output_order, state.keys);
#undef NALLOC

	if (state.g == NULL || state.visited == NULL || state.verts == NULL ||
	    state.edges == NULL || state.output_order == NULL) {
		rv = -1;
		goto release;
	}

	do {
		state.seed = (*seedgen)();
	} while (build_graph(cdbw, &state));

	assign_nodes(&state);
	rv = print_hash(cdbw, &state, fd, descr);

release:
	free(state.g);
	free(state.visited);
	free(state.verts);
	free(state.edges);
	free(state.output_order);

	return rv;
}