2 * uhtbl - Generic coalesced hash table implementation
3 * Copyright (C) 2010 Steven Barth <steven@midlink.org>
4 * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
22 * uhtbl is a coalesced cellared generic hash table implementation with the aim
23 * to be both small in code size and heap memory requirements. This hash table
24 * uses a hybrid approach called coalesced addressing which means that pointers
25 * to other buckets will be used in the case of a collisions. In this case no
26 * linked lists have to be used and less allocation calls have to be done.
27 * To improve performance this hash table carries a cellar for collision
28 * handling which is an additional address area that lies behind the
29 * hash-addressable space.
31 * Overhead (on x86 32bit):
32 * Bookkeeping: 32 Bytes (per hash table)
33 * Metadata: 12 Bytes including pointer to key and keysize (per bucket)
41 /* compile-time configurables */
43 #ifndef UHTBL_PAYLOADFACTOR
44 #define UHTBL_PAYLOADFACTOR 0.86
47 #ifndef UHTBL_GROWFACTOR
48 #define UHTBL_GROWFACTOR 2
51 #ifndef UHTBL_MINIMUMSIZE
52 #define UHTBL_MINIMUMSIZE 16
58 /* Internal flags and values */
59 #define UHTBL_FLAG_OCCUPIED 0x01
60 #define UHTBL_FLAG_STRANGER 0x02
61 #define UHTBL_FLAG_WITHNEXT 0x04
62 #define UHTBL_FLAG_LOCALKEY 0x08
63 #define UHTBL_MAXIMUMSIZE 2147483648
67 #define UHTBL_EINVAL -1
68 #define UHTBL_ENOMEM -2
69 #define UHTBL_ENOENT -3
76 #define UHTBL_INLINE static inline __attribute__((always_inline))
81 #define UHTBL_INLINE static inline
85 typedef union uhtbl_key uhtbl_key_t
;
86 typedef struct uhtbl_head uhtbl_head_t
;
87 typedef struct uhtbl_bucket uhtbl_bucket_t
;
88 typedef struct uhtbl_config uhtbl_config_t
;
89 typedef struct uhtbl uhtbl_t
;
90 typedef uint32_t uhtbl_size_t
;
92 typedef uhtbl_size_t(uhtbl_hash_t
)(const void*, int len
);
93 typedef void(uhtbl_gc_t
)(void *bucket
);
108 struct uhtbl_bucket
{
116 uhtbl_size_t payload
;
117 uhtbl_size_t nextfree
;
118 uhtbl_hash_t
*fct_hash
;
124 * uhtbl_init() - Initialize a hash table.
126 * @bucketsize: size of a bucket
127 * @sizehint: estimated maximum of needed buckets (optional)
128 * @fct_hash: hash function
129 * @fct_gc: bucket garbage collector (optional)
131 * Initializes a new hash table and preallocates memory.
133 * bucketsize is the size in Bytes each bucket will use but note the following:
134 * Each bucket needs to begin with a struct uhtbl_head_t that keeps its metadata
135 * in addition to the payload you want it to carry. You are advised to define a
136 * bucket struct with the first element being a uhtbl_head_t followed by your
137 * desired payload and pass the size of this struct to bucketsize.
139 * sizehint is a hint on how many distinct entries will be stored in the hash
140 * table. This will be used to preallocate space for the buckets and is useful
141 * if you know how many entries will be stored in the hash table as it avoids
142 * expensive rehashing cycles. sizehint should be a power of 2.
144 * fct_hash is the hash function used. It takes a constant void pointer and a
145 * integer as size parameter and returns an unsigned (32bit) int.
147 * fct_gc is the garbage collector for buckets. Every time a bucket gets unset
148 * or the hash table gets cleared or finalized the garbage collector function
149 * taking a pointer to a bucket will take care of doing any finalization for
150 * the buckets' payload and key data. You may use uhtbl_key() to get a reference
151 * to your key pointer or handle for deallocation or cleaning up any other
152 * references. There is an optionally selectable garbage collector that will
153 * take care of free()ing key pointers if your keys point to memory areas.
154 * You have to pass uhtbl_gc_key as fct_gc parameter to use it. You may also
155 * call this function in your custom garbage collector.
157 * WARNING: Your garbage collector function must otherwise not change the
158 * metadata in the uhtbl_head_t structure of the bucket else behaviour will be
159 * undefined for all subsequent actions.
170 * uhtbl_init(&table, sizeof(struct mybucket), 32, MurmurHash2, NULL);
172 * Returns 0 on success or a negative error code.
174 UHTBL_API
int uhtbl_init(uhtbl_t
*tbl
, uint32_t bucketsize
,
175 uhtbl_size_t sizehint
, uhtbl_hash_t
*fct_hash
, uhtbl_gc_t
*fct_gc
);
178 * uhtbl_get() - Get a bucket by its key.
181 * @len: length of key
183 * Finds and returns the bucket with a given key.
186 * 1. A pointer to a memory area then len is its length (must be < 64KB)
187 * 2. A NULL-pointer then len is a locally stored numerical key
191 * struct mybucket *bucket;
192 * bucket = uhtbl_get(table, "foo", sizeof("foo"));
193 * printf("%i", bucket->mypayload1);
195 * bucket = uhtbl_get(table, NULL, 42);
196 * printf("%i", bucket->mypayload1);
198 * Returns the bucket or NULL if no bucket with given key was found.
200 UHTBL_API
void* uhtbl_get(uhtbl_t
*tbl
, const void *key
, long len
);
203 * uhtbl_set() - Sets a bucket for given key.
206 * @len: length of key
208 * Sets a new bucket for the given key and returns a pointer to the bucket for
209 * you to assign your payload data. If there is already a bucket with that key
210 * it will be unset first.
213 * 1. A pointer to a memory area then len is its length (must be < 64KB)
214 * 2. A NULL-pointer then len is a locally stored numerical key
216 * NOTE: If key is a pointer memory management of it will be your business.
217 * You might want to use a garbage collection function (see uhtbl_init())
219 * NOTE: The payload area of your bucket is NOT initialized to zeroes.
221 * WARNING: Note the following side effects when setting previously unset keys:
222 * 1. A set may trigger several moving actions changing the order of buckets.
223 * 2. A set may trigger a rehashing cycle if all buckets are occupied.
224 * Therefore accessing any previously acquired pointers to any bucket results in
225 * undefined behaviour. In addition iterations which have started before may
226 * result in unwanted behaviour (e.g. buckets may be skipped or visited twice).
230 * struct mybucket *bucket;
231 * bucket = uhtbl_set(table, "foo", sizeof("foo"));
232 * bucket->mypayload1 = 42:
234 * bucket = uhtbl_set(table, NULL, 42);
235 * bucket->mypayload1 = 1337;
238 * Returns the bucket or NULL if no bucket could be allocated (out of memory).
240 UHTBL_API
void* uhtbl_set(uhtbl_t
*tbl
, void *key
, long len
);
243 * uhtbl_next() - Iterates over all entries of the hash table.
245 * @iter: Iteration counter
247 * Iterates over all entries of the hash table.
249 * iter is a pointer to a numeric variable that should be set to zero before
250 * the first call and will save the iteration state.
252 * NOTE: You may safely do several iterations in parallel. You may also safely
253 * unset any buckets of the hashtable or set keys that are currently in the
254 * hash table. However setting buckets with keys that don't have an assigned
255 * bucket yet results in undefined behaviour.
259 * struct mybucket *bucket;
260 * while ((bucket = uhtbl_next(table, &iter))) {
261 * printf("%i", bucket->mypayload1);
264 * Return the next bucket or NULL if all buckets were already visited.
266 UHTBL_API
void* uhtbl_next(uhtbl_t
*tbl
, uhtbl_size_t
*iter
);
269 * uhtbl_unset() - Unsets the bucket with given key.
272 * @len: length of key (optional)
274 * Unsets the bucket with given key and calls the garbage collector to free
275 * any payload resources - if any.
278 * 1. A pointer to a memory area then len is its length (must be < 64KB)
279 * 2. A NULL-pointer then len is a locally stored numerical key
282 * uhtbl_unset(table, NULL, 42);
284 * Returns 0 on success or a negative error code if there was no matching bucket
286 UHTBL_API
int uhtbl_unset(uhtbl_t
*tbl
, const void *key
, long len
);
289 * uhtbl_remove() - Unsets a bucket.
293 * Unsets the bucket with given address and calls the garbage collector to free
294 * any payload resources - if any.
297 * uhtbl_remove(table, &bucket->head);
299 * Returns 0 on success or a negative error code if the bucket was not found
301 UHTBL_API
int uhtbl_remove(uhtbl_t
*tbl
, uhtbl_head_t
*head
);
304 * uhtbl_clear() - Clears the hashtable without freeing its memory.
307 * Clears all buckets of the hashtable invoking the garbage collector - if any
308 * but does not free the memory of the hash table. This is usually more
309 * efficient than iterating and using unset.
313 UHTBL_API
void uhtbl_clear(uhtbl_t
*tbl
);
316 * uhtbl_resize() - Resizes and rehashes the hash table.
318 * @payload: Buckets to reserve.
320 * Resizes the hash table and rehashes its entries.
322 * payload is the number of buckets the hashtable should allocate. It must be
323 * greater or at least equal to the number of buckets currently occupied.
325 * NOTE: Rehashing is an expensive process which should be avoided if possible.
326 * However resizing will be automatically done if you try to set a new bucket
327 * but all buckets are already occupied. In this case the payload bucket count
328 * is usually doubled. There is currently no automatic resizing done when the
329 * bucket usage count decreases. You have to take care of this by yourself.
331 * Returns 0 on success or a negative error code if out of memory.
333 UHTBL_API
int uhtbl_resize(uhtbl_t
*tbl
, uhtbl_size_t payload
);
336 * uhtbl_clear() - Clears the hashtable and frees the bucket memory.
339 * Clears all buckets of the hashtable invoking the garbage collector - if any
340 * and frees the memory occupied by the buckets.
344 UHTBL_API
void uhtbl_finalize(uhtbl_t
*tbl
);
347 * uhtbl_key() - Returns the key parameters as passed to set.
349 * @key: Pointer where key pointer should be stored (optional)
350 * @len: Pointer where key len should be stored (optional)
352 * This function might be useful to obtain the key parameters of a bucket
353 * when doing garbage collection.
357 UHTBL_API
void uhtbl_key(uhtbl_head_t
*head
, void **key
, long *len
);
360 * uhtbl_gc_key() - Basic garbage collector that frees key memory.
363 * This function is a basic garbage collector that will free any key pointers.
364 * However it will not touch your payload data.
366 * WARNING: You must not call this function directly on any bucket otherwise
367 * behaviour will be unspecified. Instead you may pass this function to the
368 * uhtbl_init function. You may also call this function from inside a custom
373 UHTBL_API
void uhtbl_gc_key(void *bucket
);
375 #endif /* UHTBL_H_ */