Initial import
[project/libubox.git] / uhtbl.h
1 /**
2 * uhtbl - Generic coalesced hash table implementation
3 * Copyright (C) 2010 Steven Barth <steven@midlink.org>
4 * Copyright (C) 2010 John Crispin <blogic@openwrt.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
19 *
20 */
21 /**
22 * uhtbl is a coalesced cellared generic hash table implementation with the aim
23 * to be both small in code size and heap memory requirements. This hash table
24 * uses a hybrid approach called coalesced addressing which means that pointers
25 * to other buckets will be used in the case of a collisions. In this case no
26 * linked lists have to be used and less allocation calls have to be done.
27 * To improve performance this hash table carries a cellar for collision
28 * handling which is an additional address area that lies behind the
29 * hash-addressable space.
30 *
31 * Overhead (on x86 32bit):
32 * Bookkeeping: 32 Bytes (per hash table)
33 * Metadata: 12 Bytes including pointer to key and keysize (per bucket)
34 *
35 */
36
37 #ifndef UHTBL_H_
38 #define UHTBL_H_
39
40
41 /* compile-time configurables */
42
43 #ifndef UHTBL_PAYLOADFACTOR
44 #define UHTBL_PAYLOADFACTOR 0.86
45 #endif
46
47 #ifndef UHTBL_GROWFACTOR
48 #define UHTBL_GROWFACTOR 2
49 #endif
50
51 #ifndef UHTBL_MINIMUMSIZE
52 #define UHTBL_MINIMUMSIZE 16
53 #endif
54
55
56 #include <stdint.h>
57
58 /* Internal flags and values */
59 #define UHTBL_FLAG_OCCUPIED 0x01
60 #define UHTBL_FLAG_STRANGER 0x02
61 #define UHTBL_FLAG_WITHNEXT 0x04
62 #define UHTBL_FLAG_LOCALKEY 0x08
63 #define UHTBL_MAXIMUMSIZE 2147483648
64
65 /* Status codes */
66 #define UHTBL_OK 0
67 #define UHTBL_EINVAL -1
68 #define UHTBL_ENOMEM -2
69 #define UHTBL_ENOENT -3
70
71 /* API */
72 #if __GNUC__ >= 4
73 #ifndef UHTBL_API
74 #define UHTBL_API
75 #endif
76 #define UHTBL_INLINE static inline __attribute__((always_inline))
77 #else
78 #ifndef UHTBL_API
79 #define UHTBL_API
80 #endif
81 #define UHTBL_INLINE static inline
82 #endif
83
84
85 typedef union uhtbl_key uhtbl_key_t;
86 typedef struct uhtbl_head uhtbl_head_t;
87 typedef struct uhtbl_bucket uhtbl_bucket_t;
88 typedef struct uhtbl_config uhtbl_config_t;
89 typedef struct uhtbl uhtbl_t;
90 typedef uint32_t uhtbl_size_t;
91
92 typedef uhtbl_size_t(uhtbl_hash_t)(const void*, int len);
93 typedef void(uhtbl_gc_t)(void *bucket);
94
95 union uhtbl_key {
96 void *ptr;
97 long handle;
98 };
99
100 struct uhtbl_head {
101 uint8_t user;
102 uint8_t flags;
103 uint16_t keysize;
104 uhtbl_size_t next;
105 uhtbl_key_t key;
106 };
107
108 struct uhtbl_bucket {
109 uhtbl_head_t head;
110 };
111
112 struct uhtbl {
113 uint32_t bucketsize;
114 uhtbl_size_t size;
115 uhtbl_size_t used;
116 uhtbl_size_t payload;
117 uhtbl_size_t nextfree;
118 uhtbl_hash_t *fct_hash;
119 uhtbl_gc_t *fct_gc;
120 void *buckets;
121 };
122
123 /**
124 * uhtbl_init() - Initialize a hash table.
125 * @tbl: hash table
126 * @bucketsize: size of a bucket
127 * @sizehint: estimated maximum of needed buckets (optional)
128 * @fct_hash: hash function
129 * @fct_gc: bucket garbage collector (optional)
130 *
131 * Initializes a new hash table and preallocates memory.
132 *
133 * bucketsize is the size in Bytes each bucket will use but note the following:
134 * Each bucket needs to begin with a struct uhtbl_head_t that keeps its metadata
135 * in addition to the payload you want it to carry. You are advised to define a
136 * bucket struct with the first element being a uhtbl_head_t followed by your
137 * desired payload and pass the size of this struct to bucketsize.
138 *
139 * sizehint is a hint on how many distinct entries will be stored in the hash
140 * table. This will be used to preallocate space for the buckets and is useful
141 * if you know how many entries will be stored in the hash table as it avoids
142 * expensive rehashing cycles. sizehint should be a power of 2.
143 *
144 * fct_hash is the hash function used. It takes a constant void pointer and a
145 * integer as size parameter and returns an unsigned (32bit) int.
146 *
147 * fct_gc is the garbage collector for buckets. Every time a bucket gets unset
148 * or the hash table gets cleared or finalized the garbage collector function
149 * taking a pointer to a bucket will take care of doing any finalization for
150 * the buckets' payload and key data. You may use uhtbl_key() to get a reference
151 * to your key pointer or handle for deallocation or cleaning up any other
152 * references. There is an optionally selectable garbage collector that will
153 * take care of free()ing key pointers if your keys point to memory areas.
154 * You have to pass uhtbl_gc_key as fct_gc parameter to use it. You may also
155 * call this function in your custom garbage collector.
156 *
157 * WARNING: Your garbage collector function must otherwise not change the
158 * metadata in the uhtbl_head_t structure of the bucket else behaviour will be
159 * undefined for all subsequent actions.
160 *
161 *
162 * Example:
163 * struct mybucket {
164 * uhtbl_head_t head;
165 * int mypayload1;
166 * int mypayload2;
167 * }
168 *
169 * uhtbl_t table;
170 * uhtbl_init(&table, sizeof(struct mybucket), 32, MurmurHash2, NULL);
171 *
172 * Returns 0 on success or a negative error code.
173 */
174 UHTBL_API int uhtbl_init(uhtbl_t *tbl, uint32_t bucketsize,
175 uhtbl_size_t sizehint, uhtbl_hash_t *fct_hash, uhtbl_gc_t *fct_gc);
176
177 /**
178 * uhtbl_get() - Get a bucket by its key.
179 * @tbl: hash table
180 * @key: key
181 * @len: length of key
182 *
183 * Finds and returns the bucket with a given key.
184 *
185 * Key can either be:
186 * 1. A pointer to a memory area then len is its length (must be < 64KB)
187 * 2. A NULL-pointer then len is a locally stored numerical key
188 *
189 *
190 * Example:
191 * struct mybucket *bucket;
192 * bucket = uhtbl_get(table, "foo", sizeof("foo"));
193 * printf("%i", bucket->mypayload1);
194 *
195 * bucket = uhtbl_get(table, NULL, 42);
196 * printf("%i", bucket->mypayload1);
197 *
198 * Returns the bucket or NULL if no bucket with given key was found.
199 */
200 UHTBL_API void* uhtbl_get(uhtbl_t *tbl, const void *key, long len);
201
202 /**
203 * uhtbl_set() - Sets a bucket for given key.
204 * @tbl: hash table
205 * @key: key
206 * @len: length of key
207 *
208 * Sets a new bucket for the given key and returns a pointer to the bucket for
209 * you to assign your payload data. If there is already a bucket with that key
210 * it will be unset first.
211 *
212 * Key can either be:
213 * 1. A pointer to a memory area then len is its length (must be < 64KB)
214 * 2. A NULL-pointer then len is a locally stored numerical key
215 *
216 * NOTE: If key is a pointer memory management of it will be your business.
217 * You might want to use a garbage collection function (see uhtbl_init())
218 *
219 * NOTE: The payload area of your bucket is NOT initialized to zeroes.
220 *
221 * WARNING: Note the following side effects when setting previously unset keys:
222 * 1. A set may trigger several moving actions changing the order of buckets.
223 * 2. A set may trigger a rehashing cycle if all buckets are occupied.
224 * Therefore accessing any previously acquired pointers to any bucket results in
225 * undefined behaviour. In addition iterations which have started before may
226 * result in unwanted behaviour (e.g. buckets may be skipped or visited twice).
227 *
228 *
229 * Example:
230 * struct mybucket *bucket;
231 * bucket = uhtbl_set(table, "foo", sizeof("foo"));
232 * bucket->mypayload1 = 42:
233 *
234 * bucket = uhtbl_set(table, NULL, 42);
235 * bucket->mypayload1 = 1337;
236 *
237 *
238 * Returns the bucket or NULL if no bucket could be allocated (out of memory).
239 */
240 UHTBL_API void* uhtbl_set(uhtbl_t *tbl, void *key, long len);
241
242 /**
243 * uhtbl_next() - Iterates over all entries of the hash table.
244 * @tbl: hash table
245 * @iter: Iteration counter
246 *
247 * Iterates over all entries of the hash table.
248 *
249 * iter is a pointer to a numeric variable that should be set to zero before
250 * the first call and will save the iteration state.
251 *
252 * NOTE: You may safely do several iterations in parallel. You may also safely
253 * unset any buckets of the hashtable or set keys that are currently in the
254 * hash table. However setting buckets with keys that don't have an assigned
255 * bucket yet results in undefined behaviour.
256 *
257 * Example:
258 * uint32_t iter = 0;
259 * struct mybucket *bucket;
260 * while ((bucket = uhtbl_next(table, &iter))) {
261 * printf("%i", bucket->mypayload1);
262 * }
263 *
264 * Return the next bucket or NULL if all buckets were already visited.
265 */
266 UHTBL_API void* uhtbl_next(uhtbl_t *tbl, uhtbl_size_t *iter);
267
268 /**
269 * uhtbl_unset() - Unsets the bucket with given key.
270 * @tbl: hash table
271 * @key: key
272 * @len: length of key (optional)
273 *
274 * Unsets the bucket with given key and calls the garbage collector to free
275 * any payload resources - if any.
276 *
277 * Key can either be:
278 * 1. A pointer to a memory area then len is its length (must be < 64KB)
279 * 2. A NULL-pointer then len is a locally stored numerical key
280 *
281 * Example:
282 * uhtbl_unset(table, NULL, 42);
283 *
284 * Returns 0 on success or a negative error code if there was no matching bucket
285 */
286 UHTBL_API int uhtbl_unset(uhtbl_t *tbl, const void *key, long len);
287
288 /**
289 * uhtbl_remove() - Unsets a bucket.
290 * @tbl: hash table
291 * @head: bucket head
292 *
293 * Unsets the bucket with given address and calls the garbage collector to free
294 * any payload resources - if any.
295 *
296 * Example:
297 * uhtbl_remove(table, &bucket->head);
298 *
299 * Returns 0 on success or a negative error code if the bucket was not found
300 */
301 UHTBL_API int uhtbl_remove(uhtbl_t *tbl, uhtbl_head_t *head);
302
303 /**
304 * uhtbl_clear() - Clears the hashtable without freeing its memory.
305 * @tbl: hash table
306 *
307 * Clears all buckets of the hashtable invoking the garbage collector - if any
308 * but does not free the memory of the hash table. This is usually more
309 * efficient than iterating and using unset.
310 *
311 * Returns nothing.
312 */
313 UHTBL_API void uhtbl_clear(uhtbl_t *tbl);
314
315 /**
316 * uhtbl_resize() - Resizes and rehashes the hash table.
317 * @tbl: hash table
318 * @payload: Buckets to reserve.
319 *
320 * Resizes the hash table and rehashes its entries.
321 *
322 * payload is the number of buckets the hashtable should allocate. It must be
323 * greater or at least equal to the number of buckets currently occupied.
324 *
325 * NOTE: Rehashing is an expensive process which should be avoided if possible.
326 * However resizing will be automatically done if you try to set a new bucket
327 * but all buckets are already occupied. In this case the payload bucket count
328 * is usually doubled. There is currently no automatic resizing done when the
329 * bucket usage count decreases. You have to take care of this by yourself.
330 *
331 * Returns 0 on success or a negative error code if out of memory.
332 */
333 UHTBL_API int uhtbl_resize(uhtbl_t *tbl, uhtbl_size_t payload);
334
335 /**
336 * uhtbl_clear() - Clears the hashtable and frees the bucket memory.
337 * @tbl: hash table
338 *
339 * Clears all buckets of the hashtable invoking the garbage collector - if any
340 * and frees the memory occupied by the buckets.
341 *
342 * Returns nothing.
343 */
344 UHTBL_API void uhtbl_finalize(uhtbl_t *tbl);
345
346 /**
347 * uhtbl_key() - Returns the key parameters as passed to set.
348 * @head: Bucket head
349 * @key: Pointer where key pointer should be stored (optional)
350 * @len: Pointer where key len should be stored (optional)
351 *
352 * This function might be useful to obtain the key parameters of a bucket
353 * when doing garbage collection.
354 *
355 * Returns nothing.
356 */
357 UHTBL_API void uhtbl_key(uhtbl_head_t *head, void **key, long *len);
358
359 /**
360 * uhtbl_gc_key() - Basic garbage collector that frees key memory.
361 * @bucket: Bucket
362 *
363 * This function is a basic garbage collector that will free any key pointers.
364 * However it will not touch your payload data.
365 *
366 * WARNING: You must not call this function directly on any bucket otherwise
367 * behaviour will be unspecified. Instead you may pass this function to the
368 * uhtbl_init function. You may also call this function from inside a custom
369 * garbage collector.
370 *
371 * Returns nothing.
372 */
373 UHTBL_API void uhtbl_gc_key(void *bucket);
374
375 #endif /* UHTBL_H_ */