1cdb(3)                     Library Functions Manual                     cdb(3)
2
3
4

NAME

6       cdb - Constant DataBase library
7
8

SYNOPSYS

10        #include <cdb.h>
11        cc ... -lcdb
12
13

DESCRIPTION

15       cdb  is  a  library to create and access Constant DataBase files.  File
16       stores (key,value) pairs and used to quickly find a value  based  on  a
17       given  key.   Cdb  files  are create-once files, that is, once created,
18       file cannot be updated but recreated from scratch -- this is why  data‐
19       base is called constant.  Cdb file is optimized for quick access.  For‐
20       mat of such file described in cdb(5) manpage.  This manual page  corre‐
21       sponds to version 0.78 of tinycdb package.
22
23       Library  defines  two  non-interlaced interfaces: for querying existing
24       cdb file data (read-only mode) and for creating  such  a  file  (almost
25       write-only).  Strictly speaking, those modes allows very limited set of
26       opposite operation as well (i.e. in  query  mode,  it  is  possible  to
27       update key's value).
28
29       All  routines  in  this library are thread-safe as no global data used,
30       except of errno variable for error indication.
31
32       cdb datafiles may be moved between systems safely,  since  format  does
33       not depend on architecture.
34
35

QUERY MODE

37       There  are two query modes available.  First uses a structure that rep‐
38       resents a cdb database, just like FILE structure in stdio library,  and
39       another  works with plain filedescriptor.  First mode is more sophisti‐
40       cated and flexible, and  usually  somewhat  faster.   It  uses  mmap(2)
41       internally.   This mode may look more "natural" or object-oriented com‐
42       pared to second one.
43
44       The following routines works with any mode:
45
46       unsigned cdb_unpack(buf)
47          const unsigned char buf[4];
48              helper routine to convert 32-bit integer from internal represen‐
49              tation  to  machine  format.   May be used to handle application
50              integers in a portable way.  There is no error return.
51
52
53   Query Mode 1
54       All query operations in first more deals with  common  data  structure,
55       struct cdb, associated with an open file descriptor.  This structure is
56       opaque to application.
57
58       The following routines exists for accessing cdb database:
59
60       int cdb_init(cdbp, fd)
61          struct cdb *cdbp;
62          int fd;
63              initializes structure given by cdbp pointer  and  associates  it
64              with opened file descriptor fd.  Memory allocation for structure
65              itself if needed and file  open  operation  should  be  done  by
66              application.   File  fd should be opened at least read-only, and
67              should be seekable.  Routine returns 0 on  success  or  negative
68              value on error.
69
70       void cdb_free(cdbp)
71          struct cdb *cdbp;
72              frees internal resources held by structure.  Note that this rou‐
73              tine does not closes a file.
74
75       int cdb_fileno(cdbp)
76         const struct cdb *cdbp;
77              returns filedescriptor associated with cdb  (as  was  passed  to
78              cdb_init()).
79
80       int cdb_read(cdbp, buf, len, pos)
81       int cdb_readdata(cdbp, buf, len, pos)
82       int cdb_readkey(cdbp, buf, len, pos)
83          const struct cdb *cdbp;
84          void *buf;
85          unsigned len;
86          unsigned pos;
87              reads  a  data from cdb file, starting at position pos of length
88              len, placing result to buf.  This routine may  be  used  to  get
89              actual  value found by cdb_find() or other routines that returns
90              position and length of a data.  Returns 0 on success or negative
91              value  on  error.  Routines cdb_readdata() and cdb_readkey() are
92              shorthands to read current (after e.g. cdb_find()) data and  key
93              respectively, using cdb_read().
94
95       const void *cdb_get(cdbp, len, pos)
96       const void *cdb_getdata(cdbp)
97       const void *cdb_getkey(cdbp)
98          const struct cdb *cdbp;
99          unsigned len;
100          unsigned pos;
101              Internally,  cdb library uses memory-mmaped region to access the
102              on-disk database.  cdb_get() allows to access internal memory in
103              a way similar to cdb_read() but without extra copying and buffer
104              allocation.  Returns pointer to actual data on success  or  NULL
105              on error (position points to outside of the database).  Routines
106              cdb_getdata() and cdb_getkey() are shorthands to access  current
107              (after   e.g.  cdb_find())  data  and  key  respectively,  using
108              cdb_get().
109
110       int cdb_find(cdbp, key, klen)
111       unsigned cdb_datapos(cdbp)
112       unsigned cdb_datalen(cdbp)
113       unsigned cdb_keypos(cdbp)
114       unsigned cdb_keylen(cdbp)
115          struct cdb *cdbp;
116          const void *key;
117          unsigned klen;
118              attempts to find a key given by (key,klen) parameters.   If  key
119              exists  in  database,  routine returns 1 and places position and
120              length of value associated with  this  key  to  internal  fields
121              inside cdbp structure, to be accessible by cdb_datapos(cdbp) and
122              cdb_datalen(cdbp)  routines.   If  key  is  not   in   database,
123              cdb_find()  returns  0.   On  error, negative value is returned.
124              Data pointers (available via  cdb_datapos()  and  cdb_datalen())
125              gets updated only in case of successful search.  Note that using
126              cdb_find() it is possible to lookup only  first  record  with  a
127              given key.
128
129       int cdb_findinit(cdbfp, cdbp, key, klen)
130       int cdb_findnext(cdbfp)
131         struct cdb_find *cdbfp;
132         const struct cdb *cdbp;
133         const void *key;
134         unsigned klen;
135              sequential-find  routines  that  used separate structure.  It is
136              possible to have more than one record with the  same  key  in  a
137              database,  and  these  routines  allows  to  enumerate all them.
138              cdb_findinit() initializes search structure pointed to by cdbfp.
139              It  will return negative value on error or non-negative value on
140              success.  cdb_findnext()  attempts  to  find  next  (first  when
141              called  right  after cdb_findinit()) matching key, setting value
142              position and length in cdbfp structure.  It will return positive
143              value if given key was found, 0 if there is no more such key(s),
144              or negative value on error.  To access value position and length
145              after  successful call to cdb_findnext() (when it returned posi‐
146              tive result), use cdb_datapos(cdbp) and  cdb_datalen(cdbp)  rou‐
147              tines.   It  is  error to continue using cdb_findnext() after it
148              returned 0 or error condition (cdb_findinit() should  be  called
149              again).   Current data pointers (available via cdb_datapos() and
150              cdb_datalen()) gets updated only on successful search.
151
152       void cdb_seqinit(cptr, cdbp)
153       int cdb_seqnext(cptr, cdbp)
154         unsigned *cptr;
155         struct cdb *cdbp;
156              sequential enumeration  of  all  records  stored  in  cdb  file.
157              cdb_seqinit()  initializes  access  current data pointer cptr to
158              point before first record in a cdb file.  cdb_seqnext()  updates
159              data  pointers  in  cdbp to point to the next record and updates
160              cptr, returning positive value on success, 0 on end of data con‐
161              dition  and  negative  value  on  error.  Current record will be
162              available after successful operation using cdb_datapos(cdbp) and
163              cdb_datalen(cdbp)   (for  the  data)  and  cdb_keypos(cdbp)  and
164              cdb_keylen(cdbp) (for the key of  the  record).   Data  pointers
165              gets updated only in case of successful operation.
166
167
168   Query Mode 2
169       In  this mode, one need to open a cdb file using one of standard system
170       calls (such as open(2)) to obtain a filedescriptor, and then pass  that
171       filedescriptor to cdb routines.  Available methods to query a cdb data‐
172       base using only a filedescriptor include:
173
174       int cdb_seek(fd, key, klen, dlenp)
175         int fd;
176         const void *key;
177         unsigned klen;
178         unsigned *dlenp;
179              searches a cdb database (as pointed to by fd filedescriptor) for
180              a  key given by (key, klen), and positions file pointer to start
181              of data associated with that key if found,  so  that  next  read
182              operation  from  this  filedescriptor  will read that value, and
183              places length of value, in bytes,  to  variable  pointed  to  by
184              dlenp.  Returns positive value if operation was successful, 0 if
185              key was not found, or negative value on error.  To read the data
186              from a cdb file, cdb_bread() routine below can be used.
187
188       int cdb_bread(fd, buf, len)
189         int fd;
190         void *buf;
191         int len;
192              reads  data from a file (as pointed to by fd filedescriptor) and
193              places len bytes from this file to a buffer pointed to  by  buf.
194              Returns  0 if exactly len bytes was read, or a negative value in
195              case of error or end-of-file.  This  routine  ignores  interrupt
196              errors  (EINTR).   Sets errno variable to EIO in case of end-of-
197              file condition (when there is less than len bytes  available  to
198              read).
199
200
201   Notes
202       Note  that  value  of  any given key may be updated in place by another
203       value of the same size,  by  writing  to  file  at  position  found  by
204       cdb_find()  or  cdb_seek().   However  one  should be very careful when
205       doing so, since write operation may not succeed in case of  e.g.  power
206       failure,  thus  leaving  corrupted data.  When database is (re)created,
207       one can guarantee that no incorrect data will be written  to  database,
208       but  not  with  inplace  update.   Note also that it is not possible to
209       update any key or to change length of value.
210
211
212
213

CREATING MODE

215       cdb database file should usually be created in two steps: first, tempo‐
216       rary  file created and written to disk, and second, that temporary file
217       is renamed to permanent place.  Unix rename(2) call  is  atomic  opera‐
218       tion, it removes destination file if any AND renaes another file in one
219       step.  This way it is guaranteed that readers will not  see  incomplete
220       database.   To  prevent multiple simultaneous updates, locking may also
221       be used.
222
223       All routines used to create cdb database  works  with  struct  cdb_make
224       object  that  is  opaque  to  application.  Application may assume that
225       struct cdb_make has at least the same member(s) as published in  struct
226       cdb above.
227
228       int cdb_make_start(cdbmp, fd)
229          struct cdb_make *cdbmp;
230          int fd;
231              initializes  structure  to create a database.  File fd should be
232              opened read-write and should be seekable.  Returns 0 on  success
233              or negative value on error.
234
235       int cdb_make_add(cdbmp, key, klen, val, vlen)
236          struct cdb_make *cdbmp;
237          const void *key, *val;
238          unsigned klen, vlen;
239              adds  record with key (key,klen) and value (val,vlen) to a data‐
240              base.  Returns 0 on success or negative value  on  error.   Note
241              that  this  routine does not checks if given key already exists,
242              but cdb_find() will not see second record with the same key.  It
243              is   not   possible   to   continue   building   a  database  if
244              cdb_make_add() returned error indicator.
245
246       int cdb_make_finish(cdbmp)
247          struct cdb_make *cdbmp;
248              finalizes database file, constructing all  needed  indexes,  and
249              frees  memory  structures.   It  does not closes filedescriptor.
250              Returns 0 on success or negative value on error.
251
252       int cdb_make_exists(cdbmp, key, klen)
253          struct cdb_make *cdbmp;
254          const void *key;
255          unsigned klen;
256              This routine attempts to find given by (key,klen) key in a  not-
257              yet-complete database.  It may significantly slow down the whole
258              process, and currently it flushes internal  buffer  to  disk  on
259              every  call  with  key  those  hash  value already exists in db.
260              Returns 0 if such key doesn't exists, 1 if it  is,  or  negative
261              value  on error.  Note that database file should be opened read-
262              write   (not   write-only)   to   use    this    routine.     If
263              cdb_make_exists() returned error, it may be not possible to con‐
264              tinue constructing database.
265
266       int cdb_make_find(cdbmp, key, klen, mode)
267          struct cdb_make *cdbmp;
268          const void *key;
269          unsigned klen;
270          int mode;
271              This routine attempts to find given by  (key,klen)  key  in  the
272              database  being created.  If the given key is already exists, it
273              an action specified by mode will be performed:
274
275              CDB_FIND
276                     checks whenever the given record is already in the  data‐
277                     base.
278
279              CDB_FIND_REMOVE
280                     removes  all  matching records by re-writing the database
281                     file accordingly.
282
283              CDB_FIND_FILL0
284                     fills all matching records with zeros  and  removes  them
285                     from  index  so  that the records in question will not be
286                     findable  with   cdb_find().    This   is   faster   than
287                     CDB_FIND_REMOVE,  but leaves zero "gaps" in the database.
288                     Lastly inserted records, if matched, are always removed.
289
290              If no matching keys was found, routine returns 0.   In  case  at
291              least  one record has been found/removed, positive value will be
292              returned.  On error, negative value will be returned  and  errno
293              will  be set appropriately.  When cdb_make_find() returned nega‐
294              tive value in case of error, it is not possible to continue con‐
295              structing the database.
296
297              cdb_make_exists()  is  the  same as calling cdb_make_find() with
298              mode set to CDB_FIND.
299
300       int cdb_make_put(cdbmp, key, klen, val, vlen, mode)
301          struct cdb_make *cdbmp;
302          const void *key, *val;
303          unsigned klen, vlen;
304          int mode;
305              This is a somewhat combined cdb_make_exists() and cdb_make_add()
306              routines.   mode  argument controls how repeated (already exist‐
307              ing) keys will be treated:
308
309              CDB_PUT_ADD
310                     no duplicate checking will be performed.   This  mode  is
311                     the same as cdb_make_add() routine does.
312
313              CDB_PUT_REPLACE
314                     If  the  key  already exists, it will be removed from the
315                     database before adding new key,value pair.  This requires
316                     moving  data  in  the  file, and can be quite slow if the
317                     file is large.  All matching old records will be  removed
318                     this  way.   This  is the same as calling cdb_make_find()
319                     with CDB_FIND_REMOVE mode argument  followed  by  calling
320                     cdb_make_add().
321
322              CDB_PUT_REPLACE0
323                     If the key already exists and it isn't the last record in
324                     the file, old record will be zeroed out before adding new
325                     key,value    pair.     This    is    alot   faster   than
326                     CDB_PUT_REPLACE,  but  some  extra  data  will  still  be
327                     present  in the file.  The data -- old record -- will not
328                     be accessible by normal  searches,  but  will  appear  in
329                     sequential database traversal.  This is the same as call‐
330                     ing cdb_make_find()  with  CDB_FIND_FILL0  mode  argument
331                     followed by cdb_make_add().
332
333              CDB_PUT_INSERT
334                     add  key,value pair only if such key does not exists in a
335                     database.  Note that since query (see query  mode  above)
336                     will  find first added record, this mode is somewhat use‐
337                     less (but allows to  reduce  database  size  in  case  of
338                     repeated   keys).    This   is   the   same   as  calling
339                     cdb_make_exists(), followed by cdb_make_add() if the  key
340                     was not found.
341
342              CDB_PUT_WARN
343                     add  key,value  pair  unconditionally,  but also check if
344                     this  key  already  exists.   This   is   equivalent   of
345                     cdb_make_exists()  to  check  existence of the given key,
346                     unconditionally followed by cdb_make_add().
347
348              If any error occurred during operations, the routine will return
349              negative  integer and will set global variable errno to indicate
350              reason of failure.  In  case  of  successful  operation  and  no
351              duplicates  found, routine will return 0.  If any duplicates has
352              been found or removed (which, in case  of  CDB_PUT_INSERT  mode,
353              indicates  that  the  new  record  was  not added), routine will
354              return positive value.  If an error occurred and  cdb_make_put()
355              returned negative error, it is not possible to continue database
356              construction process.
357
358              As with cdb_make_exists() and  cdb_make_find(),  usage  of  this
359              routine  with  any  but  CDB_PUT_ADD mode can significantly slow
360              down database creation process, especially when mode is equal to
361              CDB_PUT_REPLACE0.
362
363       void cdb_pack(num, buf)
364          unsigned num;
365          unsigned char buf[4];
366              helper routine that used internally to convert machine integer n
367              to internal form to be stored in datafile.   32-bit  integer  is
368              stored  in 4 bytes in network byte order.  May be used to handle
369              application data.  There is no error return.
370
371       unsigned cdb_hash(buf, len)
372          const void *buf;
373          unsigned len;
374              helper routine that calculates cdb hash value  of  given  bytes.
375              CDB hash function is
376                hash[n] = (hash[n-1] + (hash[n-1] << 5)) ^ buf[n]
377              starting with
378                hash[-1] = 5381
379
380

ERRORS

382       cdb library may set errno to following on error:
383
384
385       EPROTO database file is corrupted in some way
386
387       EINVAL the same as EPROTO above if system lacks EPROTO constant
388
389       EINVAL flag argument for cdb_make_put() is invalid
390
391       EEXIST flag  argument  for  cdb_make_put()  is  CDB_PUT_INSERT, and key
392              already exists
393
394       ENOMEM not enough memory to  complete  operation  (cdb_make_finish  and
395              cdb_make_add)
396
397       EIO    set  by  cdb_bread  and  cdb_seek  if a cdb file is shorter than
398              expected or corrupted in some other way.
399
400

EXAMPLES

402       Note: in all examples below, error checking is not shown for brewity.
403
404
405   Query Mode
406        int fd;
407        struct cdb cdb;
408        char *key, *data;
409        unsigned keylen, datalen;
410
411        /* opening the database */
412        fd = open(filename, O_RDONLY);
413        cdb_init(&cdb, fd);
414        /* initialize key and keylen here */
415
416        /* single-record search. */
417        if (cdb_find(&cdb, key, keylen) > 0) {
418          datalen = cdb_datalen(&cdb);
419          data = malloc(datalen + 1);
420          cdb_read(&cdb, data, datalen, cdb_datapos(&cdb));
421          data[datalen] = '\0';
422          printf("key=%s data=%s\n", key, data);
423          free(data);
424        }
425        else
426          printf("key=%s not found\n", key);
427
428        /* multiple record search */
429        struct cdb_find cdbf;
430        int n;
431        cdb_findinit(&cdbf, &cdb, key, keylen);
432        n = 0;
433        while(cdb_findnext(&cdbf) > 0) {
434          datalen = cdb_datalen(&cdb);
435          data = malloc(datalen + 1);
436          cdb_read(&cdb, data, datalen, cdb_datapos(&cdb));
437          data[datalen] = '\0';
438          printf("key=%s data=%s\n", key, data);
439          free(data);
440          ++n;
441        }
442        printf("key=%s %d records found\n", n);
443
444        /* sequential database access */
445        unsigned pos;
446        int n;
447        cdb_seqinit(&pos, &cdb);
448        n = 0;
449        while(cdb_seqnext(&pos, &cdb) > 0) {
450          keylen = cdb_keylen(&cdb);
451          key = malloc(keylen + 1);
452          cdb_read(&cdb, key, keylen, cdb_keypos(&cdb));
453          key[keylen] = '\0';
454          datalen = cdb_datalen(&cdb);
455          data = malloc(datalen + 1);
456          cdb_read(&cdb, data, datalen, cdb_datapos(&cdb));
457          data[datalen] = '\0';
458          ++n;
459          printf("record %n: key=%s data=%s\n", n, key, data);
460          free(data); free(key);
461        }
462        printf("total records found: %d\n", n);
463
464        /* close the database */
465        cdb_free(&cdb);
466        close(fd);
467
468        /* simplistic query mode */
469        fd = open(filename, O_RDONLY);
470        if (cdb_seek(fd, key, keylen, &datalen) > 0) {
471          data = malloc(datalen + 1);
472          cdb_bread(fd, data, datalen);
473          data[datalen] = '\0';
474          printf("key=%s data=%s\n", key, data);
475        }
476        else
477          printf("key=%s not found\n", key);
478        close(fd);
479
480
481   Create Mode
482        int fd;
483        struct cdb_make cdbm;
484        char *key, *data;
485        unsigned keylen, datalen;
486
487        /* initialize the database */
488        fd = open(filename, O_RDWR|O_CREAT|O_TRUNC, 0644);
489        cdb_make_start(&cdbm, fd);
490
491        while(have_more_data()) {
492          /* initialize key and data */
493          if (cdb_make_exists(&cdbm, key, keylen) == 0)
494            cdb_make_add(&cdbm, key, keylen, data, datalen);
495          /* or use cdb_make_put() with appropriate flags */
496        }
497
498        /* finalize and close the database */
499        cdb_make_finish(&cdbm);
500        close(fd);
501
502

SEE ALSO

504       cdb(5), cdb(1), dbm(3), db(3), open(2).
505
506

AUTHOR

508       The tinycdb package written by Michael Tokarev  <mjt@corpit.ru>,  based
509       on  ideas and shares file format with original cdb library by Dan Bern‐
510       stein.
511
512

LICENSE

514       Public domain.
515
516
517
518                                   Jun 2006                             cdb(3)
Impressum