1dbh.h(0) DBHashTables Programmers' Manual dbh.h(0)
2
3
4
5‐
6
8 dbh.h - Dbh header file
9
11 #include <dbh.h>
12
13 cc -ldbh
14
16 Disk Based Hashtables (DBH) 64 bit
17 Library to create and manage hash tables residing on disk. Associations
18 are made between keys and values so that for a given a key the value
19 can be found and loaded into memory quickly. Being disk based allows
20 for large and persistent hashes. 64 bit support allows for hashtables
21 with sizes over 4 Gigabytes on 32 bit systems. Cuantified key genera‐
22 tion allows for minimum access time on balanced multidimensional trees.
23
24 Functionality
25 A DBHashTable provides associations between keys and values which is
26 optimized so that given a key, the associated value can be found very
27 quickly.
28
29 Note that only one hash record is loaded from disk to memory at any
30 given moment for a DBHashTable. Both keys and values should be copied
31 into the DBHashTable record, so they need not exist for the lifetime of
32 the DBHashTable. This means that the use of static strings and tempo‐
33 rary strings (i.e. those created in buffers and those returned by GTK+
34 widgets) should be copied with dbh_set_key() and dbh_set_data() into
35 the DBHashTable record before being inserted.
36
37 You must be careful to ensure that copied key length matches the
38 defined key length of the DBHashTable, and also that the copied data
39 does not exceed the maximum length of the DBHashTable record (1024
40 bytes by default, and expandable by dbh_set_size(). If the DBHashTable
41 record length is to be variable, be sure to set the appropriate length
42 before each dbh_update(), with dbh_set_recordsize(), otherwise the
43 record length need only be set before the first dbh_update().
44
45 To create a DBHashTable, use dbh_create(). To insert a key and value
46 into a DBHashTable, use dbh_update(). The DBHashTable will not be mod‐
47 ified until this command is given. All changes to the current
48 DBHashTable record only reside in memory: dbh_update() is necessary to
49 commit the changes to the DBHashTable. To lookup a value corresponding
50 to a given key, use dbh_load(). To erase and unerase a key and value,
51 use dbh_erase() dbh_unerase().
52
53 To call a function for each key and value pair (using a sweep route)
54 use dbh_foreach_sweep() and dbh_sweep(). To call a function for each
55 key and value pair (using a fanout route) use dbh_foreach_fanout() and
56 dbh_foreach_fanout(). To destroy a DBHashTable use dbh_destroy().
57
58 This is dbh version 2, incompatible with dbh version 1 files. The main
59 difference between the two version is the handling of file pointers. In
60 version 1, file pointers were 32 bits in length, while in version 2,
61 file pointers are 64 bits in length. This allows for DBHashTables with
62 sizes greater than 2 GBytes.
63
64 Cuantified numbers
65 Cuantified numbers are an alternate way to view the set of natural num‐
66 bers {1, 2, 3, ...} where order is defined in two levels. In natural
67 numbers there is only one level of order (defined by the > boolean
68 operator). In cuantified numbers the first level of order is defined by
69 the cuanta or quantity. The cuanta is obtained by adding all the digits
70 of the cuantified number.
71
72 Thus, for example, 10022, 5, 32, and 11111 are all equal at the first
73 level of order since they all add up to 5. The second level or order
74 may be obtained in different manners. In functions dbh_genkey() and
75 dbh_genkey2() the corresponding order of the natural numbers from which
76 they are associated is not conserved.
77
78 In dbh_orderkey() the corresponding order of the natural numbers from
79 which they are associated is conserved, but at a price. The base, or
80 maximum value each digit may reach, must be defined. This effectively
81 puts a limit on the number of keys which may be generated for a given
82 number of digits.
83
84 When a DBHashTable is constructed with cuantified keys, the maximum
85 amount of disk access instructions generated to access any given record
86 is equal to the cuanta of the cuantified number represented by the key.
87 This allows a DBHashTable to be constructed with minimum access time
88 across all records.
89
90
91 FILE_POINTER
92 FILE_POINTER is an architecture independent 64 bit integer type.
93
94 Structures
95 typedef struct dbh_header_t;
96 dbh_header_t is the structural information written at the first 256
97 bytes of a DBHashTable file.
98
99 typedef struct DBHashTable;
100 DBHashTable is a data structure containing the record information for
101 an open DBHashTable file.
102
103
104 Macros
105 unsigned char DBH_KEYLENGTH (DBHashTable * dbh);
106 FILE_POINTER DBH_RECORD_SIZE (DBHashTable * dbh);
107 void *DBH_KEY (DBHashTable * dbh);
108 void *DBH_DATA (DBHashTable * dbh);
109 FILE_POINTER DBH_ERASED_SPACE (DBHashTable * dbh);
110 FILE_POINTER DBH_DATA_SPACE (DBHashTable * dbh);
111 FILE_POINTER DBH_TOTAL_SPACE (DBHashTable * dbh);
112 FILE_POINTER DBH_FORMAT_SPACE (DBHashTable * dbh);
113 FILE_POINTER DBH_RECORDS (DBHashTable * dbh);
114 FILE_POINTER DBH_MAXIMUM_RECORD_SIZE (DBHashTable * dbh);
115 char *DBH_PATH (DBHashTable * dbh);
116
117 Functions
118 int dbh_close (DBHashTable *dbh);
119 int dbh_destroy (DBHashTable *dbh);
120
121 DBHashTable *dbh_open(constchar*path);
122 DBHashTable *dbh_openR(constchar*path);
123 DBHashTable *dbh_create (const char *path, unsigned char key_length);
124
125 int dbh_erase (DBHashTable *dbh);
126 int dbh_unerase (DBHashTable *dbh);
127 int dbh_prune (DBHashTable *dbh, unsigned char *key, unsigned char sub‐
128 tree_length);
129 int dbh_unprune (DBHashTable *dbh, unsigned char *key, unsigned char
130 subtree_length);
131
132 FILE_POINTER dbh_find (DBHashTable *dbh, int n);
133
134 void dbh_genkey (unsigned char *key, unsigned char length, unsigned int
135 n);
136 void dbh_genkey2 (unsigned char *key, unsigned char length, unsigned
137 int n);
138 void dbh_orderkey (unsigned char *key, unsigned char length, unsigned
139 int n, unsigned char base); sp FILE_POINTER dbh_load (DBHashTable
140 *dbh);
141 unsigned char dbh_load_address (DBHashTable *dbh, FILE_POINTERcur‐
142 rentseek);
143 FILE_POINTER dbh_load_parent (DBHashTable *dbh);
144 FILE_POINTER dbh_load_child (DBHashTable *dbh, unsigned char
145 key_index);
146
147 DBHashTable *dbh_regen_sweep (DBHashTable *dbh);
148 DBHashTable *dbh_regen_fanout (DBHashTable *dbh);
149 int dbh_settempdir (DBHashTable *dbh, char *temp_dir);
150
151 void dbh_set_data (DBHashTable *dbh, void *data, FILE_POINTER size);
152
153 void dbh_set_key (DBHashTable *dbh, unsigned char *key);
154
155 int dbh_set_size (DBHashTable *dbh, FILE_POINTERsize);
156 void dbh_set_recordsize (DBHashTable *dbh, int record_size );
157
158 int dbh_sweep (DBHashTable *dbh, DBHashFunc operate, unsigned char
159 *key1, unsigned char *key2, unsigned char ignore_portion);
160 int dbh_fanout (DBHashTable *dbh, DBHashFunc operate, unsigned char
161 *key1, unsigned char *key2, unsigned char ignore_portion);
162 int dbh_foreach_sweep (DBHashTable *dbh, DBHashFunc operate);
163 int dbh_foreach_fanout (DBHashTable *dbh, DBHashFunc operate);
164 void dbh_exit_sweep (DBHashTable *dbh);
165 void dbh_exit_fanout (DBHashTable *dbh);
166
167 FILE_POINTER dbh_update (DBHashTable *dbh);
168 int dbh_writeheader (DBHashTable *dbh);
169
170
172 dbh_macros (3), dbh_close (3), dbh_create (3), dbh_erase (3), dbh_find
173 (3), dbh_genkey [22m(3), dbh_load (3), dbh_regen_sweep (3), dbh_set_data
174 (3), dbh_set_size (3), dbh_sweep (3), dbh_update (3)
175
177 Edscott Wilson Garcia <edscott@xfce.org>
178
179
180
181DBH DBHashTables dbh.h(0)