1LIBCDT(3) Library Functions Manual LIBCDT(3)
2
3
4
6 Cdt - container data types
7
9 #include <cdt.h>
10
11 DICTIONARY TYPES
12 Dt_t;
13 Dtdisc_t;
14 Dtmethod_t;
15 Dtlink_t;
16 Dtstat_t;
17
18 DICTIONARY CONTROL
19 Dt_t* dtopen(const Dtdisc_t* disc, const Dtmethod_t* meth);
20 int dtclose(Dt_t* dt);
21 void dtclear(dt);
22 Dtmethod_t* dtmethod(Dt_t* dt, const Dtmethod_t* meth);
23 Dtdisc_t* dtdisc(Dt_t* dt, const Dtdisc_t* disc, int type);
24 Dt_t* dtview(Dt_t* dt, Dt_t* view);
25 int dttreeset(Dt_t* dt, int minp, int balance);
26
27 STORAGE METHODS
28 Dtmethod_t* Dtset;
29 Dtmethod_t* Dtbag;
30 Dtmethod_t* Dtoset;
31 Dtmethod_t* Dtobag;
32 Dtmethod_t* Dtlist;
33 Dtmethod_t* Dtstack;
34 Dtmethod_t* Dtqueue;
35 Dtmethod_t* Dtdeque;
36
37 DISCIPLINE
38 #define DTOFFSET(struct_s,member)
39 #define DTDISC(disc,key,size,link,makef,freef,comparf,hashf,memoryf,eventf)
40 typedef void* (*Dtmake_f)(Dt_t*, void*, Dtdisc_t*);
41 typedef void (*Dtfree_f)(Dt_t*, void*, Dtdisc_t*);
42 typedef int (*Dtcompar_f)(Dt_t*, void*, void*, Dtdisc_t*);
43 typedef unsigned int (*Dthash_f)(Dt_t*, void*, Dtdisc_t*);
44 typedef void* (*Dtmemory_f)(Dt_t*, void*, size_t, Dtdisc_t*);
45 typedef int (*Dtevent_f)(Dt_t*, int, void*, Dtdisc_t*);
46
47 OBJECT OPERATIONS
48 void* dtinsert(Dt_t* dt, void* obj);
49 void* dtappend(Dt_t* dt, void* obj);
50 void* dtdelete(Dt_t* dt, void* obj);
51 void* dtattach(Dt_t* dt, void* obj);
52 void* dtdetach(Dt_t* dt, void* obj);
53 void* dtsearch(Dt_t* dt, void* obj);
54 void* dtmatch(Dt_t* dt, void* key);
55 void* dtfirst(Dt_t* dt);
56 void* dtnext(Dt_t* dt, void* obj);
57 void* dtlast(Dt_t* dt);
58 void* dtprev(Dt_t* dt, void* obj);
59 void* dtfinger(Dt_t* dt);
60 void* dtrenew(Dt_t* dt, void* obj);
61 int dtwalk(Dt_t* dt, int (*userf)(Dt_t*, void*, void*), void*);
62 Dtlink_t* dtflatten(Dt_t* dt);
63 Dtlink_t* dtlink(Dt_t*, Dtlink_t* link);
64 void* dtobj(Dt_t* dt, Dtlink_t* link);
65 Dtlink_t* dtextract(Dt_t* dt);
66 int dtrestore(Dt_t* dt, Dtlink_t* link);
67
68 #define DTTREESEARCH(Dt_t* dt, void* obj, action)
69 #define DTTREEMATCH(Dt_t* dt, void* key, action)
70
71 DICTIONARY STATUS
72 Dt_t* dtvnext(Dt_t* dt);
73 int dtvcount(Dt_t* dt);
74 Dt_t* dtvhere(Dt_t* dt);
75 int dtsize(Dt_t* dt);
76 int dtstat(Dt_t* dt, Dtstat_t*, int all);
77
78 HASH FUNCTIONS
79 unsigned int dtstrhash(unsigned int h, char* str, int n);
80 unsigned int dtcharhash(unsigned int h, unsigned char c);
81
83 Cdt manages run-time dictionaries using standard container data types:
84 unordered set/multiset, ordered set/multiset, list, stack, and queue.
85
86 DICTIONARY TYPES
87 Dt_t
88 This is the type of a dictionary handle.
89
90 Dtdisc_t
91 This defines the type of a discipline structure which describes object
92 lay-out and manipulation functions.
93
94 Dtmethod_t
95 This defines the type of a container method.
96
97 Dtlink_t
98 This is the type of a dictionary object holder (see dtdisc().)
99
100 Dtstat_t
101 This is the type of a structure to return dictionary statistics (see
102 dtstat().)
103
104 DICTIONARY CONTROL
105 Dt_t* dtopen(const Dtdisc_t* disc, const Dtmethod_t* meth)
106 This creates a new dictionary. disc is a discipline structure to
107 describe object format. meth specifies a manipulation method.
108 dtopen() returns the new dictionary or NULL on error. See also the
109 events DT_OPEN and DT_ENDOPEN below.
110
111 int dtclose(Dt_t* dt)
112 This deletes dt and its objects. Note that dtclose() fails if dt is
113 being viewed by some other dictionaries (see dtview()). dtclose()
114 returns 0 on success and -1 on error. See also the events DT_CLOSE and
115 DT_ENDCLOSE below.
116
117 void dtclear(Dt_t* dt)
118 This deletes all objects in dt without closing dt.
119
120 Dtmethod_t dtmethod(Dt_t* dt, const Dtmethod_t* meth)
121 If meth is NULL, dtmethod() returns the current method. Otherwise, it
122 changes the storage method of dt to meth. Object order remains the
123 same during a method switch among Dtlist, Dtstack, Dtqueue and Dtdeque.
124 Switching to and from Dtset/Dtbag and Dtoset/Dtobag may cause objects
125 to be rehashed, reordered, or removed as the case requires. dtmethod()
126 returns the previous method or NULL on error.
127
128 Dtdisc_t* dtdisc(Dt_t* dt, const Dtdisc_t* disc, int type)
129 If disc is NULL, dtdisc() returns the current discipline. Otherwise,
130 it changes the discipline of dt to disc. Objects may be rehashed,
131 reordered, or removed as appropriate. type can be any bit combination
132 of DT_SAMECMP and DT_SAMEHASH. DT_SAMECMP means that objects will com‐
133 pare exactly the same as before thus obviating the need for reordering
134 or removing new duplicates. DT_SAMEHASH means that hash values of
135 objects remain the same thus obviating the need to rehash. dtdisc()
136 returns the previous discipline on success and NULL on error.
137
138 Dt_t* dtview(Dt_t* dt, Dt_t* view)
139 A viewpath allows a search or walk starting from a dictionary to con‐
140 tinue to another. dtview() first terminates any current view from dt
141 to another dictionary. Then, if view is NULL, dtview returns the ter‐
142 minated view dictionary. If view is not NULL, a viewpath from dt to
143 view is established. dtview() returns dt on success and NULL on error.
144
145 It is an error to have dictionaries on a viewpath with different stor‐
146 age methods. In addition, dictionaries on the same view path should
147 treat objects in a consistent manner with respect to comparison or
148 hashing. If not, undefined behaviors may result.
149
150 int dttreeset(Dt_t* dt, int minp, int balance)
151 This function only applies to dictionaries operated under the method
152 Dtoset which uses top-down splay trees (see below). It returns 0 on
153 success and -1 on error.
154
155 minp: This parameter defines the minimum path length before a search
156 path is adjusted. For example, minp equal 0 would mean that
157 search paths are always adjusted. If minp is negative, the min‐
158 imum search path is internally computed based on a function of
159 the current dictionary size. This computed value is such that if
160 the tree is balanced, it will never require adjusting.
161
162 balance:
163 If this is non-zero, the tree will be made balanced.
164
165 STORAGE METHODS
166 Storage methods are of type Dtmethod_t*. Cdt supports the following
167 methods:
168
169 Dtoset
170 Dtobag
171 Objects are ordered by comparisons. Dtoset keeps unique objects. Dto‐
172 bag allows repeatable objects.
173
174 Dtset
175 Dtbag
176 Objects are unordered. Dtset keeps unique objects. Dtbag allows
177 repeatable objects and always keeps them together (note the effect on
178 dictionary walking.) These methods use a hash table with chaining to
179 manage the objects. See also the event DT_HASHSIZE below on how to
180 manage hash table resizing when objects are inserted.
181
182 Dtlist
183 Objects are kept in a list. The call dtinsert() inserts a new object
184 in front of the current object (see dtfinger()) if it is defined or at
185 list front if no current object is defined. Similarly, the call dtap‐
186 pend() appends a new object after the current object (see dtfinger())
187 if it is defined or at list end if no current object is defined.
188
189 Dtdeque
190 Objects are kept in a deque. This is similar to Dtlist except that
191 objects are always inserted at the front and appended at the tail of
192 the list.
193
194 Dtstack
195 Objects are kept in a stack, i.e., in reverse order of insertion.
196 Thus, the last object inserted is at stack top and will be the first to
197 be deleted.
198
199 Dtqueue
200 Objects are kept in a queue, i.e., in order of insertion. Thus, the
201 first object inserted is at queue head and will be the first to be
202 deleted.
203
204 DISCIPLINE
205 Object format and associated management functions are defined in the
206 type Dtdisc_t:
207 typedef struct
208 { int key, size;
209 int link;
210 Dtmake_f makef;
211 Dtfree_f freef;
212 Dtcompar_f comparf;
213 Dthash_f hashf;
214 Dtmemory_f memoryf;
215 Dtevent_f eventf;
216 } Dtdisc_t;
217
218 int key, size
219 Each object obj is identified by a key used for object comparison or
220 hashing. key should be non-negative and defines an offset into obj.
221 If size is negative, the key is a null-terminated string with starting
222 address *(void**)((char*)obj+key). If size is zero, the key is a null-
223 terminated string with starting address (void*)((char*)obj+key).
224 Finally, if size is positive, the key is a byte array of length size
225 starting at (void*)((char*)obj+key).
226
227 int link
228 Let obj be an object to be inserted into dt as discussed below. If
229 link is negative, an internally allocated object holder is used to hold
230 obj. Otherwise, obj should have a Dtlink_t structure embedded link
231 bytes into it, i.e., at address (Dtlink_t*)((char*)obj+link).
232
233 void* (*makef)(Dt_t* dt, void* obj, Dtdisc_t* disc)
234 If makef is not NULL, dtinsert(dt,obj) or dtappend() will call it to
235 make a copy of obj suitable for insertion into dt. If makef is NULL,
236 obj itself will be inserted into dt.
237
238 void (*freef)(Dt_t* dt, void* obj, Dtdisc_t* disc)
239 If not NULL, freef is used to destroy data associated with obj.
240
241 int (*comparf)(Dt_t* dt, void* key1, void* key2, Dtdisc_t* disc)
242 If not NULL, comparf is used to compare two keys. Its return value
243 should be <0, =0, or >0 to indicate whether key1 is smaller, equal to,
244 or larger than key2. All three values are significant for method
245 Dtoset and Dtobag. For other methods, a zero value indicates equality
246 and a non-zero value indicates inequality. If (*comparf)() is NULL, an
247 internal function is used to compare the keys as defined by the
248 Dtdisc_t.size field.
249
250 unsigned int (*hashf)(Dt_t* dt, void* key, Dtdisc_t* disc)
251 If not NULL, hashf is used to compute the hash value of key. It is
252 required that keys compared equal will also have same hash values. If
253 hashf is NULL, an internal function is used to hash the key as defined
254 by the Dtdisc_t.size field.
255
256 void* (*memoryf)(Dt_t* dt, void* addr, size_t size, Dtdisc_t* disc)
257 If not NULL, memoryf is used to allocate and free memory. When addr is
258 NULL, a memory segment of size size is requested. If addr is not NULL
259 and size is zero, addr is to be freed. If addr is not NULL and size is
260 positive, addr is to be resized to the given size. If memoryf is NULL,
261 malloc(3) is used.
262
263 int (*eventf)(Dt_t* dt, int type, void* data, Dtdisc_t* disc)
264 If not NULL, eventf announces various events. Each event may have par‐
265 ticular handling of the return values from eventf. But a negative
266 return value typically means failure. Following are the events:
267
268 DT_OPEN:
269 dt is being opened. If eventf returns negative, the opening
270 process terminates with failure. If eventf returns zero, the
271 opening process proceeds in a default manner. A positive return
272 value indicates special treatment of memory as follows. If
273 *(void**)data is set to point to some memory segment as dis‐
274 cussed in memoryf, that segment of memory is used to start the
275 dictionary. If *(void**)data is NULL, all memory including that
276 of the dictionary handle itself will be allocated via memoryf.
277
278 DT_ENDOPEN:
279 This event announces that dtopen() has successfully opened a
280 dictionary and is about to return. The data argument of eventf
281 should be the new dictionary handle itself.
282
283 DT_CLOSE:
284 dt is about to be closed. If eventf returns negative, the clos‐
285 ing process stops immediately and dtclose() returns -1. Objects
286 in the dictionary are deleted only if eventf returns zero. The
287 dictionary handle itself is processed as follows. If it was
288 allocated via malloc(), it will be freed. If it was allocated
289 via memoryf (see dtopen()) and eventf returns 0, a call to memo‐
290 ryf will be issued to attempt freeing the handle. Otherwise,
291 nothing will be done to its memory.
292
293 As should be clear from their description, the events DT_OPEN
294 and DT_CLOSE are designed to be used along with memoryf to man‐
295 age the allocation and deallocation of dictionary and object
296 memory across dictionaries. In fact, they can be used to manage
297 dictionaries based on shared and/or persistent memory.
298
299 DT_ENDCLOSE:
300 This event announces that dtclose() has successfully closed a
301 dictionary and is about to return.
302
303 DT_DISC:
304 The discipline of dt is being changed to a new one given in
305 (Dtdisc_t*)data.
306
307 DT_METH:
308 The method of dt is being changed to a new one given in
309 (Dtmethod_t*)data.
310
311 DT_HASHSIZE:
312 The hash table (for Dtset and Dtbag) is being resized. In this
313 case, *(int*)data has the current size of the table. The appli‐
314 cation can set the new table size by first changing *(int*)data
315 to the desired size, then return a positive value. The applica‐
316 tion can also fix the table size at the current value forever by
317 setting *(int*)data to a negative value, then again return a
318 positive value. A non-positive return value from the event han‐
319 dling function means that Cdt will be responsible for choosing
320 the hash table size.
321
322 #define DTOFFSET(struct_s,member)
323 This macro function computes the offset of member from the start of
324 structure struct_s. It is useful for getting the offset of a Dtlink_t
325 embedded inside an object.
326
327 #define DTDISC(disc,key,size,link,makef,freef,comparf,hashf,memoryf,eventf)
328 This macro function initializes the discipline pointed to by disc with
329 the given values.
330
331 OBJECT OPERATIONS
332 void* dtinsert(Dt_t* dt, void* obj)
333 void* dtappend(Dt_t* dt, void* obj)
334 These functions add an object prototyped by obj into dt. dtinsert()
335 and dtappend() perform the same function for all methods except for
336 Dtlist. See Dtlist for details. If there is an existing object in dt
337 matching obj and the storage method is Dtset or Dtoset, dtinsert() and
338 dtappend() will simply return the matching object. Otherwise, a new
339 object is inserted according to the method in use. See Dtdisc_t.makef
340 for object construction. The new object or a matching object as noted
341 will be returned on success while NULL is returned on error.
342
343 void* dtdelete(Dt_t* dt, void* obj)
344 If obj is NULL, methods Dtstack and Dtqueue delete respectively stack
345 top or queue head while other methods do nothing. If obj is not NULL,
346 there are two cases. If the method in use is not Dtbag or Dtobag, the
347 first object matching obj is deleted. On the other hand, if the method
348 in use is Dtbag or Dtobag, the library check to see if obj is in the
349 dictionary and delete it. If obj is not in the dictionary, some object
350 matching it will be deleted. See Dtdisc_t.freef for object destruc‐
351 tion. dtdelete() returns the deleted object (even if it was deallo‐
352 cated) or NULL on error.
353
354 void* dtattach(Dt_t* dt, void* obj)
355 This function is similar to dtinsert() but obj itself will be inserted
356 into dt even if a discipline function makef is defined.
357
358 void* dtdetach(Dt_t* dt, void* obj)
359 This function is similar to dtdelete() but the object to be deleted
360 from dt will not be freed (via the discipline freef function).
361
362 void* dtsearch(Dt_t* dt, void* obj)
363 void* dtmatch(Dt_t* dt, void* key)
364 These functions find an object matching obj or key either from dt or
365 from some dictionary accessible from dt via a viewpath (see dtview().)
366 dtsearch() and dtmatch() return the matching object or NULL on failure.
367
368 void* dtfirst(Dt_t* dt)
369 void* dtnext(Dt_t* dt, void* obj)
370 dtfirst() returns the first object in dt. dtnext() returns the object
371 following obj. Objects are ordered based on the storage method in use.
372 For Dtoset and Dtobag, objects are ordered by object comparisons. For
373 Dtstack, objects are ordered in reverse order of insertion. For
374 Dtqueue, objects are ordered in order of insertion. For Dtlist,
375 objects are ordered by list position. For Dtset and Dtbag, objects are
376 ordered by some internal order (more below). Thus, objects in a dic‐
377 tionary or a viewpath can be walked using a for(;;) loop as below.
378 for(obj = dtfirst(dt); obj; obj = dtnext(dt,obj))
379 When a dictionary uses Dtset or Dtbag, the object order is determined
380 upon a call to dtfirst()/dtlast(). This order is frozen until a call
381 dtnext()/dtprev() returns NULL or when these same functions are called
382 with a NULL object argument. It is important that a dtfirst()/dtlast()
383 call be balanced by a dtnext()/dtprev() call as described. Nested
384 loops will require multiple balancing, once per loop. If loop balanc‐
385 ing is not done carefully, either performance is degraded or unexpected
386 behaviors may result.
387
388 void* dtlast(Dt_t* dt)
389 void* dtprev(Dt_t* dt, void* obj)
390 dtlast() and dtprev() are like dtfirst() and dtnext() but work in
391 reverse order. Note that dictionaries on a viewpath are still walked
392 in order but objects in each dictionary are walked in reverse order.
393
394 void* dtfinger(Dt_t* dt)
395 This function returns the current object of dt, if any. The current
396 object is defined after a successful call to one of dtsearch(),
397 dtmatch(), dtinsert(), dtfirst(), dtnext(), dtlast(), or dtprev(). As
398 a side effect of this implementation of Cdt, when a dictionary is based
399 on Dtoset and Dtobag, the current object is always defined and is the
400 root of the tree.
401
402 void* dtrenew(Dt_t* dt, void* obj)
403 This function repositions and perhaps rehashes an object obj after its
404 key has been changed. dtrenew() only works if obj is the current
405 object (see dtfinger()).
406
407 dtwalk(Dt_t* dt, int (*userf)(Dt_t*, void*, void*), void* data)
408 This function calls (*userf)(walk,obj,data) on each object in dt and
409 other dictionaries viewable from it. walk is the dictionary containing
410 obj. If userf() returns a <0 value, dtwalk() terminates and returns
411 the same value. dtwalk() returns 0 on completion.
412
413 Dtlink_t* dtflatten(Dt_t* dt)
414 Dtlink_t* dtlink(Dt_t* dt, Dtlink_t* link)
415 void* dtobj(Dt_t* dt, Dtlink_t* link)
416 Using dtfirst()/dtnext() or dtlast()/dtprev() to walk a single dictio‐
417 nary can incur significant cost due to function calls. For efficient
418 walking of a single directory (i.e., no viewpathing), dtflatten() and
419 dtlink() can be used. Objects in dt are made into a linked list and
420 walked as follows:
421 for(link = dtflatten(dt); link; link = dtlink(dt,link) )
422
423 Note that dtflatten() returns a list of type Dtlink_t*, not void*. That
424 is, it returns a dictionary holder pointer, not a user object pointer
425 (although both are the same if the discipline field link is zero.) The
426 macro function dtlink() returns the dictionary holder object following
427 link. The macro function dtobj(dt,link) returns the user object asso‐
428 ciated with link, Beware that the flattened object list is unflattened
429 on any dictionary operations other than dtlink().
430
431 Dtlink_t* dtextract(Dt_t* dt)
432 int dtrestore(Dt_t* dt, Dtlink_t* link)
433 dtextract() extracts all objects from dt and makes it appear empty.
434 dtrestore() repopulates dt with objects previously obtained via dtex‐
435 tract(). dtrestore() will fail if dt is not empty. These functions
436 can be used to share a same dt handle among many sets of objects. They
437 are useful to reduce dictionary overhead in an application that creates
438 many concurrent dictionaries. It is important that the same discipline
439 and method are in use at both extraction and restoration. Otherwise,
440 undefined behaviors may result.
441
442 #define DTTREESEARCH(Dt_t* dt, void* obj, action)
443 #define DTTREEMATCH(Dt_t* dt, void* key, action)
444 These macro functions are analogues of dtsearch() and dtmatch() but
445 they can only be used on a dictionary based on a binary search tree,
446 i.e., Dtoset or Dtobag.
447
448 obj or key:
449 These are used to find a matching object. If there is no match,
450 the result is NULL.
451
452 action:
453 The matching object o (which may be NULL) will be processed as
454 follow:
455
456 action (o);
457
458 Since action is used verbatim, it can be any C code fragment
459 combinable with (o) to form a syntactically correct C statement.
460 For example, suppose that the matching object is an integer, the
461 below code accumulates the integer value in a variable total:
462
463 DTTREEMATCH(dt, key, total += (int));
464
465
466 DICTIONARY INFORMATION
467 Dt_t* dtvnext(Dt_t* dt)
468 This returns the dictionary that dt is viewing, if any.
469
470 int dtvcount(Dt_t* dt)
471 This returns the number of dictionaries that view dt.
472
473 Dt_t* dtvhere(Dt_t* dt)
474 This returns the dictionary v viewable from dt where an object was
475 found from the most recent search or walk operation.
476
477 int dtsize(Dt_t* dt)
478 This function returns the number of objects stored in dt.
479
480 int dtstat(Dt_t *dt, Dtstat_t* st, int all)
481 This function reports dictionary statistics. If all is non-zero, all
482 fields of st are filled. Otherwise, only the dt_type and dt_size
483 fields are filled. It returns 0 on success and -1 on error.
484
485 Dtstat_t contains the below fields:
486
487 int dt_type:
488 This is one of DT_SET, DT_BAG, DT_OSET, DT_OBAG, DT_LIST,
489 DT_STACK, and DT_QUEUE.
490
491 int dt_size:
492 This contains the number of objects in the dictionary.
493
494 int dt_n:
495 For Dtset and Dtbag, this is the number of non-empty chains in
496 the hash table. For Dtoset and Dtobag, this is the deepest
497 level in the tree (counting from zero.) Each level in the tree
498 contains all nodes of equal distance from the root node. dt_n
499 and the below two fields are undefined for other methods.
500
501 int dt_max:
502 For Dtbag and Dtset, this is the size of a largest chain. For
503 Dtoset and Dtobag, this is the size of a largest level.
504
505 int* dt_count:
506 For Dtset and Dtbag, this is the list of counts for chains of
507 particular sizes. For example, dt_count[1] is the number of
508 chains of size 1. For Dtoset and Dtobag, this is the list of
509 sizes of the levels. For example, dt_count[1] is the size of
510 level 1.
511
512 HASH FUNCTIONS
513 unsigned int dtcharhash(unsigned int h, char c)
514 unsigned int dtstrhash(unsigned int h, char* str, int n)
515 These functions compute hash values from bytes or strings.
516 dtcharhash() computes a new hash value from byte c and seed value h.
517 dtstrhash() computes a new hash value from string str and seed value h.
518 If n is positive, str is a byte array of length n; otherwise, str is a
519 null-terminated string.
520
522 Dtset and Dtbag are based on hash tables with move-to-front collision
523 chains. Dtoset and Dtobag are based on top-down splay trees. Dtlist,
524 Dtstack and Dtqueue are based on doubly linked list.
525
527 Kiem-Phong Vo, kpv@research.att.com
528
529
530
531 LIBCDT(3)