1ODEUM(3)                    Quick Database Manager                    ODEUM(3)
2
3
4

NAME

6       Odeum - the inverted API of QDBM
7
8

SYNOPSIS

10       #include <depot.h>
11       #include <cabin.h>
12       #include <odeum.h>
13       #include <stdlib.h>
14
15       typedef struct { int id; int score; } ODPAIR;
16
17       ODEUM *odopen(const char *name, int omode);
18
19       int odclose(ODEUM *odeum);
20
21       int odput(ODEUM *odeum, const ODDOC *doc, int wmax, int over);
22
23       int odout(ODEUM *odeum, const char *uri);
24
25       int odoutbyid(ODEUM *odeum, int id);
26
27       ODDOC *odget(ODEUM *odeum, const char *uri);
28
29       ODDOC *odgetbyid(ODEUM *odeum, int id);
30
31       int odgetidbyuri(ODEUM *odeum, const char *uri);
32
33       int odcheck(ODEUM *odeum, int id);
34
35       ODPAIR *odsearch(ODEUM *odeum, const char *word, int max, int *np);
36
37       int odsearchdnum(ODEUM *odeum, const char *word);
38
39       int oditerinit(ODEUM *odeum);
40
41       ODDOC *oditernext(ODEUM *odeum);
42
43       int odsync(ODEUM *odeum);
44
45       int odoptimize(ODEUM *odeum);
46
47       char *odname(ODEUM *odeum);
48
49       double odfsiz(ODEUM *odeum);
50
51       int odbnum(ODEUM *odeum);
52
53       int odbusenum(ODEUM *odeum);
54
55       int oddnum(ODEUM *odeum);
56
57       int odwnum(ODEUM *odeum);
58
59       int odwritable(ODEUM *odeum);
60
61       int odfatalerror(ODEUM *odeum);
62
63       int odinode(ODEUM *odeum);
64
65       time_t odmtime(ODEUM *odeum);
66
67       int odmerge(const char *name, const CBLIST *elemnames);
68
69       int odremove(const char *name);
70
71       ODDOC *oddocopen(const char *uri);
72
73       void oddocclose(ODDOC *doc);
74
75       void oddocaddattr(ODDOC *doc, const char *name, const char *value);
76
77       void oddocaddword(ODDOC *doc, const char *normal, const char *asis);
78
79       int oddocid(const ODDOC *doc);
80
81       const char *oddocuri(const ODDOC *doc);
82
83       const char *oddocgetattr(const ODDOC *doc, const char *name);
84
85       const CBLIST *oddocnwords(const ODDOC *doc);
86
87       const CBLIST *oddocawords(const ODDOC *doc);
88
89       CBMAP *oddocscores(const ODDOC *doc, int max, ODEUM *odeum);
90
91       CBLIST *odbreaktext(const char *text);
92
93       char *odnormalizeword(const char *asis);
94
95       ODPAIR  *odpairsand(ODPAIR *apairs, int anum, ODPAIR *bpairs, int bnum,
96       int *np);
97
98       ODPAIR *odpairsor(ODPAIR *apairs, int anum, ODPAIR *bpairs,  int  bnum,
99       int *np);
100
101       ODPAIR  *odpairsnotand(ODPAIR  *apairs,  int  anum, ODPAIR *bpairs, int
102       bnum, int *np);
103
104       void odpairssort(ODPAIR *pairs, int pnum);
105
106       double odlogarithm(double x);
107
108       double odvectorcosine(const int *avec, const int *bvec, int vnum);
109
110       void odsettuning(int ibnum, int idnum, int cbnum, int csiz);
111
112       void odanalyzetext(ODEUM *odeum,  const  char  *text,  CBLIST  *awords,
113       CBLIST *nwords);
114
115       void  odsetcharclass(ODEUM  *odeum,  const char *spacechars, const char
116       *delimchars, const char *gluechars);
117
118       ODPAIR *odquery(ODEUM  *odeum,  const  char  *query,  int  *np,  CBLIST
119       *errors);
120
121

DESCRIPTION

123       Odeum is the API which handles an inverted index.  An inverted index is
124       a data structure to retrieve a list of some documents that include  one
125       of  words  which  were extracted from a population of documents.  It is
126       easy to realize a full-text  search  system  with  an  inverted  index.
127       Odeum  provides  an abstract data structure which consists of words and
128       attributes of a document.  It is used when an application stores a doc‐
129       ument  into a database and when an application retrieves some documents
130       from a database.
131
132       Odeum does not provide methods to extract the text  from  the  original
133       data  of  a  document.   It  should  be  implemented  by  applications.
134       Although Odeum provides utilities to extract words from a text,  it  is
135       oriented to such languages whose words are separated with space charac‐
136       ters as English.  If an application handles such languages  which  need
137       morphological  analysis or N-gram analysis as Japanese, or if an appli‐
138       cation perform more such rarefied  analysis  of  natural  languages  as
139       stemming, its own analyzing method can be adopted.  Result of search is
140       expressed as an array contains elements which are  structures  composed
141       of  the  ID number of documents and its score.  In order to search with
142       two or more words, Odeum provides utilities of set operations.
143
144       Odeum is implemented, based on Curia, Cabin, and Villa.  Odeum  creates
145       a  database  with  a directory name.  Some databases of Curia and Villa
146       are placed in the specified  directory.   For  example,  `casket/docs',
147       `casket/index', and `casket/rdocs' are created in the case that a data‐
148       base directory named as `casket'.  `docs' is a  database  directory  of
149       Curia.   The key of each record is the ID number of a document, and the
150       value is such attributes as URI.  `index' is a  database  directory  of
151       Curia.   The  key  of each record is the normalized form of a word, and
152       the value is an array whose element is a pair of the  ID  number  of  a
153       document  including the word and its score.  `rdocs' is a database file
154       of Villa.  The key of each record is the URI of  a  document,  and  the
155       value is its ID number.
156
157       In  order  to  use  Odeum,  you  should  include  `depot.h', `cabin.h',
158       `odeum.h' and `stdlib.h' in the source files.  Usually,  the  following
159       description will be near the beginning of a source file.
160
161              #include <depot.h>
162              #include <cabin.h>
163              #include <odeum.h>
164              #include <stdlib.h>
165
166       A  pointer  to `ODEUM' is used as a database handle.  A database handle
167       is opened with the function `odopen' and closed  with  `odclose'.   You
168       should  not  refer  directly  to  any member of the handle.  If a fatal
169       error occurs in a database, any access method  via  the  handle  except
170       `odclose' will not work and return error status.  Although a process is
171       allowed to use multiple database handles at the same time,  handles  of
172       the same database file should not be used.
173
174       A  pointer  to `ODDOC' is used as a document handle.  A document handle
175       is opened with the function `oddocopen' and closed  with  `oddocclose'.
176       You  should not refer directly to any member of the handle.  A document
177       consists of attributes and words.  Each word is expressed as a pair  of
178       a normalized form and a appearance form.
179
180       Odeum  also assign the external variable `dpecode' with the error code.
181       The function `dperrmsg' is used in order to  get  the  message  of  the
182       error code.
183
184       Structures  of  `ODPAIR'  type  is  used  in order to handle results of
185       search.
186
187       typedef struct { int id; int score; } ODPAIR;
188              `id' specifies the ID number of a document.   `score'  specifies
189              the  score  calculated from the number of searching words in the
190              document.
191
192       The function `odopen' is used in order to get a database handle.
193
194       ODEUM *odopen(const char *name, int omode);
195              `name' specifies the name  of  a  database  directory.   `omode'
196              specifies   the  connection  mode:  `OD_OWRITER'  as  a  writer,
197              `OD_OREADER' as a reader.  If the mode is `OD_OWRITER', the fol‐
198              lowing  may  be added by bitwise or: `OD_OCREAT', which means it
199              creates a new database if not exist, `OD_OTRUNC', which means it
200              creates  a  new  database  regardless  if  one  exists.  Both of
201              `OD_OREADER' and `OD_OWRITER' can be added  to  by  bitwise  or:
202              `OD_ONOLCK',  which  means it opens a database directory without
203              file locking, or `OD_OLCKNB', which means locking  is  performed
204              without  blocking.   The  return value is the database handle or
205              `NULL' if it is not successful.  While connecting as  a  writer,
206              an  exclusive  lock is invoked to the database directory.  While
207              connecting as a reader, a shared lock is invoked to the database
208              directory.   The  thread  blocks until the lock is achieved.  If
209              `OD_ONOLCK' is used, the application is responsible  for  exclu‐
210              sion control.
211
212       The function `odclose' is used in order to close a database handle.
213
214       int odclose(ODEUM *odeum);
215              `odeum'  specifies a database handle.  If successful, the return
216              value is true, else, it is  false.   Because  the  region  of  a
217              closed handle is released, it becomes impossible to use the han‐
218              dle.  Updating a database is assured to be written when the han‐
219              dle  is closed.  If a writer opens a database but does not close
220              it appropriately, the database will be broken.
221
222       The function `odput' is used in order to store a document.
223
224       int odput(ODEUM *odeum, const ODDOC *doc, int wmax, int over);
225              `odeum' specifies a  database  handle  connected  as  a  writer.
226              `doc'  specifies  a  document  handle.  `wmax' specifies the max
227              number of words to be stored in the document database.  If it is
228              negative, the number is unlimited.  `over' specifies whether the
229              data of the duplicated document is overwritten or not.  If it is
230              false  and  the  URI of the document is duplicated, the function
231              returns as an error.  If successful, the return value  is  true,
232              else, it is false.
233
234       The function `odout' is used in order to delete a document specified by
235       a URI.
236
237       int odout(ODEUM *odeum, const char *uri);
238              `odeum' specifies a  database  handle  connected  as  a  writer.
239              `uri'  specifies  the  string of the URI of a document.  If suc‐
240              cessful, the return value is true, else, it is false.  False  is
241              returned when no document corresponds to the specified URI.
242
243       The  function  `odoutbyid' is used in order to delete a document speci‐
244       fied by an ID number.
245
246       int odoutbyid(ODEUM *odeum, int id);
247              `odeum' specifies a database handle connected as a writer.  `id'
248              specifies  the  ID  number  of  a  document.  If successful, the
249              return value is true, else, it is false.  False is returned when
250              no document corresponds to the specified ID number.
251
252       The  function `odget' is used in order to retrieve a document specified
253       by a URI.
254
255       ODDOC *odget(ODEUM *odeum, const char *uri);
256              `odeum' specifies a database handle.  `uri' specifies the string
257              of  the  URI  of a document.  If successful, the return value is
258              the handle of the corresponding document, else,  it  is  `NULL'.
259              `NULL' is returned when no document corresponds to the specified
260              URI.  Because the handle of the return value is opened with  the
261              function  `oddocopen',  it  should  be  closed with the function
262              `oddocclose'.
263
264       The function `odgetbyid' is used in order to retrieve a document by  an
265       ID number.
266
267       ODDOC *odgetbyid(ODEUM *odeum, int id);
268              `odeum' specifies a database handle.  `id' specifies the ID num‐
269              ber of a document.  If successful, the return value is the  han‐
270              dle  of  the corresponding document, else, it is `NULL'.  `NULL'
271              is returned when no document corresponds  to  the  specified  ID
272              number.   Because  the handle of the return value is opened with
273              the function `oddocopen', it should be closed with the  function
274              `oddocclose'.
275
276       The  function `odgetidbyuri' is used in order to retrieve the ID of the
277       document specified by a URI.
278
279       int odgetidbyuri(ODEUM *odeum, const char *uri);
280              `odeum' specifies a database handle.  `uri' specifies the string
281              the  URI  of a document.  If successful, the return value is the
282              ID number of the document, else, it is -1.  -1 is returned  when
283              no document corresponds to the specified URI.
284
285       The  function  `odcheck' is used in order to check whether the document
286       specified by an ID number exists.
287
288       int odcheck(ODEUM *odeum, int id);
289              `odeum' specifies a database handle.  `id' specifies the ID num‐
290              ber  of  a  document.   The return value is true if the document
291              exists, else, it is false.
292
293       The function `odsearch' is used in order to search the  inverted  index
294       for documents including a particular word.
295
296       ODPAIR *odsearch(ODEUM *odeum, const char *word, int max, int *np);
297              `odeum' specifies a database handle.  `word' specifies a search‐
298              ing word.  `max' specifies the max number  of  documents  to  be
299              retrieve.  `np' specifies the pointer to a variable to which the
300              number of the elements of the return value is assigned.  If suc‐
301              cessful,  the  return value is the pointer to an array, else, it
302              is `NULL'.  Each element of the array is a pair of the ID number
303              and  the  score of a document, and sorted in descending order of
304              their scores.  Even if no document corresponds to the  specified
305              word,  it  is not error but returns an dummy array.  Because the
306              region of the return value is allocated with the `malloc'  call,
307              it should be released with the `free' call if it is no longer in
308              use.  Note that each element of the array of  the  return  value
309              can be data of a deleted document.
310
311       The  function `odsearchnum' is used in order to get the number of docu‐
312       ments including a word.
313
314       int odsearchdnum(ODEUM *odeum, const char *word);
315              `odeum' specifies a database handle.  `word' specifies a search‐
316              ing word.  If successful, the return value is the number of doc‐
317              uments including the word, else, it is -1.  Because  this  func‐
318              tion  does  not  read  the  entity  of the inverted index, it is
319              faster than `odsearch'.
320
321       The function `oditerinit' is used in order to initialize  the  iterator
322       of a database handle.
323
324       int oditerinit(ODEUM *odeum);
325              `odeum'  specifies a database handle.  If successful, the return
326              value is true, else, it is false.  The iterator is used in order
327              to access every document stored in a database.
328
329       The  function  `oditernext' is used in order to get the next key of the
330       iterator.
331
332       ODDOC *oditernext(ODEUM *odeum);
333              `odeum' specifies a database handle.  If successful, the  return
334              value  is  the  handle of the next document, else, it is `NULL'.
335              `NULL' is returned when no document is to  be  get  out  of  the
336              iterator.   It is possible to access every document by iteration
337              of calling this function.  However, it is not assured if  updat‐
338              ing  the database is occurred while the iteration.  Besides, the
339              order of this traversal access method is arbitrary, so it is not
340              assured  that the order of string matches the one of the traver‐
341              sal access.  Because the handle of the return  value  is  opened
342              with  the  function  `oddocopen',  it  should be closed with the
343              function `oddocclose'.
344
345       The function `odsync' is used in order to synchronize updating contents
346       with the files and the devices.
347
348       int odsync(ODEUM *odeum);
349              `odeum'  specifies  a database handle connected as a writer.  If
350              successful, the return value is true, else, it is  false.   This
351              function is useful when another process uses the connected data‐
352              base directory.
353
354       The function `odoptimize' is used in order to optimize a database.
355
356       int odoptimize(ODEUM *odeum);
357              `odeum' specifies a database handle connected as a  writer.   If
358              successful,  the  return value is true, else, it is false.  Ele‐
359              ments of the deleted documents in the inverted index are purged.
360
361       The function `odname' is used in order to get the name of a database.
362
363       char *odname(ODEUM *odeum);
364              `odeum' specifies a database handle.  If successful, the  return
365              value  is the pointer to the region of the name of the database,
366              else, it is `NULL'.  Because the region of the return  value  is
367              allocated with the `malloc' call, it should be released with the
368              `free' call if it is no longer in use.
369
370       The function `odfsiz' is used in order to get the total size  of  data‐
371       base files.
372
373       double odfsiz(ODEUM *odeum);
374              `odeum'  specifies a database handle.  If successful, the return
375              value is the total size of the database files, else, it is -1.0.
376
377       The function `odbnum' is used in order to get the total number  of  the
378       elements of the bucket arrays in the inverted index.
379
380       int odbnum(ODEUM *odeum);
381              `odeum'  specifies a database handle.  If successful, the return
382              value is the total number of the elements of the bucket  arrays,
383              else, it is -1.
384
385       The  function  `odbusenum'  is used in order to get the total number of
386       the used elements of the bucket arrays in the inverted index.
387
388       int odbusenum(ODEUM *odeum);
389              `odeum' specifies a database handle.  If successful, the  return
390              value  is  the  total  number of the used elements of the bucket
391              arrays, else, it is -1.
392
393       The function `oddnum' is used in order to get the number of  the  docu‐
394       ments stored in a database.
395
396       int oddnum(ODEUM *odeum);
397              `odeum'  specifies a database handle.  If successful, the return
398              value is the number of the documents  stored  in  the  database,
399              else, it is -1.
400
401       The  function  `odwnum' is used in order to get the number of the words
402       stored in a database.
403
404       int odwnum(ODEUM *odeum);
405              `odeum' specifies a database handle.  If successful, the  return
406              value  is  the number of the words stored in the database, else,
407              it is -1.  Because of the I/O buffer, the return  value  may  be
408              less than the hard number.
409
410       The  function `odwritable' is used in order to check whether a database
411       handle is a writer or not.
412
413       int odwritable(ODEUM *odeum);
414              `odeum' specifies a database handle.  The return value  is  true
415              if the handle is a writer, false if not.
416
417       The  function  `odfatalerror' is used in order to check whether a data‐
418       base has a fatal error or not.
419
420       int odfatalerror(ODEUM *odeum);
421              `odeum' specifies a database handle.  The return value  is  true
422              if the database has a fatal error, false if not.
423
424       The  function  `odinode'  is used in order to get the inode number of a
425       database directory.
426
427       int odinode(ODEUM *odeum);
428              `odeum' specifies a database handle.  The return  value  is  the
429              inode number of the database directory.
430
431       The  function  `odmtime' is used in order to get the last modified time
432       of a database.
433
434       time_t odmtime(ODEUM *odeum);
435              `odeum' specifies a database handle.  The return  value  is  the
436              last modified time of the database.
437
438       The function `odmerge' is used in order to merge plural database direc‐
439       tories.
440
441       int odmerge(const char *name, const CBLIST *elemnames);
442              `name' specifies the name of a  database  directory  to  create.
443              `elemnames'  specifies a list of names of element databases.  If
444              successful, the return value is true, else, it is false.  If two
445              or more documents which have the same URL come in, the first one
446              is adopted and the others are ignored.
447
448       The function `odremove' is used in order to remove  a  database  direc‐
449       tory.
450
451       int odremove(const char *name);
452              `name'  specifies the name of a database directory.  If success‐
453              ful, the return value is true, else, it is  false.   A  database
454              directory  can contain databases of other APIs of QDBM, they are
455              also removed by this function.
456
457       The function `oddocopen' is used in order to get a document handle.
458
459       ODDOC *oddocopen(const char *uri);
460              `uri' specifies the URI of a document.  The return  value  is  a
461              document  handle.   The  ID  number  of  a  new  document is not
462              defined.  It is defined when the document is stored in  a  data‐
463              base.
464
465       The function `oddocclose' is used in order to close a document handle.
466
467       void oddocclose(ODDOC *doc);
468              `doc'  specifies  a  document  handle.   Because the region of a
469              closed handle is released, it becomes impossible to use the han‐
470              dle.
471
472       The  function  `oddocaddattr' is used in order to add an attribute to a
473       document.
474
475       void oddocaddattr(ODDOC *doc, const char *name, const char *value);
476              `doc' specifies a document handle.  `name' specifies the  string
477              of  the  name  of an attribute.  `value' specifies the string of
478              the value of the attribute.
479
480       The function `oddocaddword' is used in order to add a word to  a  docu‐
481       ment.
482
483       void oddocaddword(ODDOC *doc, const char *normal, const char *asis);
484              `doc'  specifies  a  document  handle.   `normal'  specifies the
485              string of the normalized form of a word.  Normalized  forms  are
486              treated  as  keys of the inverted index.  If the normalized form
487              of a word is an empty string, the word is not reflected  in  the
488              inverted  index.   `asis' specifies the string of the appearance
489              form of the word.  Appearance forms are used after the  document
490              is retrieved by an application.
491
492       The function `oddocid' is used in order to get the ID number of a docu‐
493       ment.
494
495       int oddocid(const ODDOC *doc);
496              `doc' specifies a document handle.  The return value is  the  ID
497              number of a document.
498
499       The function `oddocuri' is used in order to get the URI of a document.
500
501       const char *oddocuri(const ODDOC *doc);
502              `doc'  specifies  a  document  handle.   The return value is the
503              string of the URI of a document.
504
505       The function `oddocgetattr' is used in order to get  the  value  of  an
506       attribute of a document.
507
508       const char *oddocgetattr(const ODDOC *doc, const char *name);
509              `doc'  specifies a document handle.  `name' specifies the string
510              of the name of an attribute.  The return value is the string  of
511              the  value  of  the  attribute, or `NULL' if no attribute corre‐
512              sponds.
513
514       The function `oddocnwords' is used in order to get the list handle con‐
515       tains words in normalized form of a document.
516
517       const CBLIST *oddocnwords(const ODDOC *doc);
518              `doc' specifies a document handle.  The return value is the list
519              handle contains words in normalized form.
520
521       The function `oddocawords' is used in order to get the list handle con‐
522       tains words in appearance form of a document.
523
524       const CBLIST *oddocawords(const ODDOC *doc);
525              `doc' specifies a document handle.  The return value is the list
526              handle contains words in appearance form.
527
528       The function `oddocscores' is used in order to get the map handle  con‐
529       tains keywords in normalized form and their scores.
530
531       CBMAP *oddocscores(const ODDOC *doc, int max, ODEUM *odeum);
532              `doc' specifies a document handle.  `max' specifies the max num‐
533              ber of keywords to get.  `odeum'  specifies  a  database  handle
534              with which the IDF for weighting is calculate.  If it is `NULL',
535              it is not used.  The return value is  the  map  handle  contains
536              keywords  and  their  scores.   Scores  are expressed as decimal
537              strings.  Because the handle of the return value is opened  with
538              the  function `cbmapopen', it should be closed with the function
539              `cbmapclose' if it is no longer in use.
540
541       The function `odbreaktext' is used in order to break a text into  words
542       in appearance form.
543
544       CBLIST *odbreaktext(const char *text);
545              `text'  specifies the string of a text.  The return value is the
546              list handle contains words in appearance form.  Words are  sepa‐
547              rated with space characters and such delimiters as period, comma
548              and so on.  Because the handle of the  return  value  is  opened
549              with  the  function  `cblistopen',  it should be closed with the
550              function `cblistclose' if it is no longer in use.
551
552       The function `odnormalizeword' is used in order to make the  normalized
553       form of a word.
554
555       char *odnormalizeword(const char *asis);
556              `asis'  specifies  the  string of the appearance form of a word.
557              The return value is is the string of the normalized form of  the
558              word.  Alphabets of the ASCII code are unified into lower cases.
559              Words composed of only delimiters are treated as empty  strings.
560              Because  the  region  of  the return value is allocated with the
561              `malloc' call, it should be released with the `free' call if  it
562              is no longer in use.
563
564       The  function  `odpairsand' is used in order to get the common elements
565       of two sets of documents.
566
567       ODPAIR *odpairsand(ODPAIR *apairs, int anum, ODPAIR *bpairs, int  bnum,
568       int *np);
569              `apairs'  specifies  the  pointer  to the former document array.
570              `anum' specifies the number of the elements of the former  docu‐
571              ment  array.  `bpairs' specifies the pointer to the latter docu‐
572              ment array.  `bnum' specifies the number of the elements of  the
573              latter document array.  `np' specifies the pointer to a variable
574              to which the number of the  elements  of  the  return  value  is
575              assigned.   The  return  value  is the pointer to a new document
576              array whose elements commonly belong to the specified two  sets.
577              Elements  of  the  array are sorted in descending order of their
578              scores.  Because the region of the  return  value  is  allocated
579              with  the  `malloc'  call, it should be released with the `free'
580              call if it is no longer in use.
581
582       The function `odpairsor' is used in order to get the sum of elements of
583       two sets of documents.
584
585       ODPAIR  *odpairsor(ODPAIR  *apairs, int anum, ODPAIR *bpairs, int bnum,
586       int *np);
587              `apairs' specifies the pointer to  the  former  document  array.
588              `anum'  specifies the number of the elements of the former docu‐
589              ment array.  `bpairs' specifies the pointer to the latter  docu‐
590              ment  array.  `bnum' specifies the number of the elements of the
591              latter document array.  `np' specifies the pointer to a variable
592              to  which  the  number  of  the  elements of the return value is
593              assigned.  The return value is the pointer  to  a  new  document
594              array  whose  elements belong to both or either of the specified
595              two sets.  Elements of the array are sorted in descending  order
596              of  their  scores.   Because  the  region of the return value is
597              allocated with the `malloc' call, it should be released with the
598              `free' call if it is no longer in use.
599
600       The function `odpairsnotand' is used in order to get the difference set
601       of documents.
602
603       ODPAIR *odpairsnotand(ODPAIR *apairs, int  anum,  ODPAIR  *bpairs,  int
604       bnum, int *np);
605              `apairs'  specifies  the  pointer  to the former document array.
606              `anum' specifies the number of the elements of the former  docu‐
607              ment  array.  `bpairs' specifies the pointer to the latter docu‐
608              ment array of the sum of elements.  `bnum' specifies the  number
609              of  the  elements  of the latter document array.  `np' specifies
610              the pointer to a variable to which the number of the elements of
611              the  return  value is assigned.  The return value is the pointer
612              to a new document array whose elements belong to the former  set
613              but  not to the latter set.  Elements of the array are sorted in
614              descending order of their scores.  Because  the  region  of  the
615              return  value  is allocated with the `malloc' call, it should be
616              released with the `free' call if it is no longer in use.
617
618       The function `odpairssort' is used in order to sort a set of  documents
619       in descending order of scores.
620
621       void odpairssort(ODPAIR *pairs, int pnum);
622              `pairs' specifies the pointer to a document array.  `pnum' spec‐
623              ifies the number of the elements of the document array.
624
625       The function `odlogarithm' is used in order to get  the  natural  loga‐
626       rithm of a number.
627
628       double odlogarithm(double x);
629              `x'  specifies  a number.  The return value is the natural loga‐
630              rithm of the number.  If the number is equal  to  or  less  than
631              1.0,  the  return value is 0.0.  This function is useful when an
632              application calculates the IDF of search results.
633
634       The function `odvectorcosine' is used in order to get the cosine of the
635       angle of two vectors.
636
637       double odvectorcosine(const int *avec, const int *bvec, int vnum);
638              `avec'  specifies  the  pointer to one array of numbers.  `bvec'
639              specifies the pointer to the other  array  of  numbers.   `vnum'
640              specifies  the  number  of  elements  of each array.  The return
641              value is the cosine of the angle of two vectors.  This  function
642              is  useful  when  an  application calculates similarity of docu‐
643              ments.
644
645       The function `odsettuning' is used in order to set  the  global  tuning
646       parameters.
647
648       void odsettuning(int ibnum, int idnum, int cbnum, int csiz);
649              `ibnum'  specifies  the  number of buckets for inverted indexes.
650              `idnum'  specifies  the  division  number  of  inverted   index.
651              `cbnum'  specifies  the  number  of  buckets  for dirty buffers.
652              `csiz' specifies the maximum bytes to use memory for dirty  buf‐
653              fers.   The default setting is equivalent to `odsettuning(32749,
654              7, 262139, 8388608)'.  This function  should  be  called  before
655              opening a handle.
656
657       The  function  `odanalyzetext'  is  used  in order to break a text into
658       words and store appearance forms and normalized form into lists.
659
660       void odanalyzetext(ODEUM *odeum,  const  char  *text,  CBLIST  *awords,
661       CBLIST *nwords);
662              `odeum'  specifies  a  database  handle.   `text'  specifies the
663              string of a text.  `awords' specifies a list handle  into  which
664              appearance form is store.  `nwords' specifies a list handle into
665              which normalized form is store.  If it is `NULL', it is ignored.
666              Words are separated with space characters and such delimiters as
667              period, comma and so on.
668
669       The function `odsetcharclass' is used in order to set  the  classes  of
670       characters used by `odanalyzetext'.
671
672       void  odsetcharclass(ODEUM  *odeum,  const char *spacechars, const char
673       *delimchars, const char *gluechars);
674              `odeum' specifies a database handle.  `spacechars'  spacifies  a
675              string  contains  space  characters.   `delimchars'  spacifies a
676              string contains delimiter characters.  `gluechars'  spacifies  a
677              string contains glue characters.
678
679       The  function  `odquery'  is  used in order to query a database using a
680       small boolean query language.
681
682       ODPAIR *odquery(ODEUM  *odeum,  const  char  *query,  int  *np,  CBLIST
683       *errors);
684              `odeum' specifies a database handle.  'query' specifies the text
685              of the query.  `np' specifies the pointer to a variable to which
686              the  number  of  the  elements  of the return value is assigned.
687              `errors' specifies a list handle into which error  messages  are
688              stored.   If  it  is  `NULL', it is ignored.  If successful, the
689              return value is the pointer to an array,  else,  it  is  `NULL'.
690              Each  element  of  the  array is a pair of the ID number and the
691              score of a document, and sorted in  descending  order  of  their
692              scores.  Even if no document corresponds to the specified condi‐
693              tion, it is not error but returns an dummy array.   Because  the
694              region  of the return value is allocated with the `malloc' call,
695              it should be released with the `free' call if it is no longer in
696              use.   Note  that  each element of the array of the return value
697              can be data of a deleted document.
698
699       If QDBM was built  with  POSIX  thread  enabled,  the  global  variable
700       `dpecode'  is  treated  as thread specific data, and functions of Odeum
701       are reentrant.  In that case, they are thread-safe as long as a  handle
702       is  not  accessed  by  threads at the same time, on the assumption that
703       `errno', `malloc', and so on are thread-safe.
704
705       If QDBM was built with ZLIB enabled, records in the database for  docu‐
706       ment attributes are compressed.  In that case, the size of the database
707       is reduced to 30% or less.  Thus, you should enable  ZLIB  if  you  use
708       Odeum.   A database of Odeum created without ZLIB enabled is not avail‐
709       able on environment with ZLIB enabled, and vice versa.  If ZLIB was not
710       enabled but LZO, LZO is used instead.
711
712       The  query  language of the function `odquery' is a basic language fol‐
713       lowing this grammar:
714
715              expr ::= subexpr ( op subexpr )*
716              subexpr ::= WORD
717              subexpr ::= LPAREN expr RPAREN
718
719       Operators are "&" (AND), "|" (OR),  and  "!"  (NOTAND).   You  can  use
720       parenthesis  to group sub-expressions together in order to change order
721       of operations.  The given query is broken up using the function `odana‐
722       lyzetext',  so  if  you  want to specify different text breaking rules,
723       then make sure that you at least set "&", "|", "!", "(", and ")" to  be
724       delimiter  characters.   Consecutive  words  are  treated  as having an
725       implicit "&" operator between them, so "zed shaw" is  actually  "zed  &
726       shaw".
727
728       The  encoding of the query text should be the same with the encoding of
729       target documents.  Moreover, each of space characters, delimiter  char‐
730       acters, and glue characters should be single byte.
731
732

SEE ALSO

734       qdbm(3),  depot(3),  curia(3),  relic(3), hovel(3), cabin(3), villa(3),
735       ndbm(3), gdbm(3)
736
737
738
739Man Page                          2004-04-22                          ODEUM(3)
Impressum