1LIBEXTRACTOR(3)            Library Functions Manual            LIBEXTRACTOR(3)
2
3
4

NAME

6       libextractor - meta-information extraction library 0.5.11
7

SYNOPSIS

9       #include <extractor.h>
10
11        typedef struct EXTRACTOR_Keywords {
12          char * keyword;
13          EXTRACTOR_KeywordType keywordType;
14          struct EXTRACTOR_Keywords * next;
15        } EXTRACTOR_KeywordList;
16
17
18        EXTRACTOR_ExtractorList * EXTRACTOR_loadDefaultLibraries ();
19
20        const  char  *  EXTRACTOR_getKeywordTypeAsString (const EXTRACTOR_Key‐
21       wordType type);
22
23        EXTRACTOR_ExtractorList   *   EXTRACTOR_loadConfigLibraries   (EXTRAC‐
24       TOR_ExtractorList * prev, const char * config);
25
26        EXTRACTOR_ExtractorList   *   EXTRACTOR_addLibrary  (EXTRACTOR_Extrac‐
27       torList * prev, const char * library);
28
29        EXTRACTOR_ExtractorList * EXTRACTOR_addLibraryLast  (EXTRACTOR_Extrac‐
30       torList * prev, const char * library);
31
32        EXTRACTOR_ExtractorList  *  EXTRACTOR_removeLibrary (EXTRACTOR_Extrac‐
33       torList * prev, const char * library);
34
35        void EXTRACTOR_removeAll (EXTRACTOR_ExtractorList * prev);
36
37        EXTRACTOR_KeywordList * EXTRACTOR_getKeywords (EXTRACTOR_ExtractorList
38       * extractor, const char * filename);
39
40        EXTRACTOR_KeywordList * EXTRACTOR_getKeywords (EXTRACTOR_ExtractorList
41       * extractor, const char * data, size_t size);
42
43        EXTRACTOR_KeywordList * EXTRACTOR_removeEmptyKeywords  (EXTRACTOR_Key‐
44       wordList * list);
45
46        EXTRACTOR_KeywordList   *  EXTRACTOR_removeDuplicateKeywords  (EXTRAC‐
47       TOR_KeywordList * list, const unsigned int options);
48
49        void EXTRACTOR_printKeywords (FILE * handle,  EXTRACTOR_KeywordList  *
50       keywords);
51
52        void EXTRACTOR_freeKeywords (EXTRACTOR_KeywordList * keywords);
53
54        const  char  *  EXTRACTOR_extractLast  (const  EXTRACTOR_KeywordType *
55       type, EXTRACTOR_KeywordList * keywords);
56
57        const char * EXTRACTOR_extractLastByString (const char * type, EXTRAC‐
58       TOR_KeywordList * keywords);
59
60        unsigned  int  EXTRACTOR_countKeywords  (EXTRACTOR_KeywordList  * key‐
61       words);
62
63        EXTRACTOR_DEFAULT_LIBRARIES
64
65        EXTRACTOR_VERSION
66
67

DESCRIPTION

69       libextractor is a simple library for keyword extraction.   libExtractor
70       does  not  support all formats but supports a simple plugging mechanism
71       such that you can quickly add extractors for additional  formats,  even
72       without  recompiling  libExtractor.   libExtractor typically ships with
73       one or more helper-libraries that can be used to obtain  keywords  from
74       common  file-types.   If  you want to write your own extractor for some
75       filetype, all you need to do is write a little library that  implements
76       a single method with this signature:
77
78        EXTRACTOR_KeywordList * LIBRARYNAME_extract(const char * filename,
79                                                    char * data,
80                                                    size_t size,
81                                                    EXTRACTOR_KeywordList    *
82       prev);
83
84
85       The filename is the name of the file, data is a pointer to the contents
86       of  the file and size is the size of the file.  The extract method must
87       prepend keywords that it finds to the linked list 'prev' and return the
88       new  head.  The library must allocate (malloc) the entry in the keyword
89       list and the memory for the filename since  both  will  be  free'ed  by
90       libExtractor once the application calls freeKeywords. An example imple‐
91       mentation can be found  in  mp3extractor.c.   The  application  extract
92       gives an example how to use libExtractor.
93
94
95       The  basic use of libextractor is to load the plugins (for example with
96       EXTRACTOR_loadDefaultLibraries), then to extract the keyword list using
97       EXTRACTOR_getKeywords,  processing the list (using application specific
98       code and possibly some of the postprocessing convenience functions like
99       EXTRACTOR_removeDuplicateKeywords),  freeing  the  keyword  list (using
100       EXTRACTOR_freeKeywords) and finally unloading the plugins (with EXTRAC‐
101       TOR_removeAll).
102
103       The  keywords  obtained  from  libextractor  are  supposed  to be UTF-8
104       encoded.  The EXTRACTOR_printKeywords function converts the UTF-8  key‐
105       words  to  the  character  set  from the current locale before printing
106       them.  Plugins are supposed to convert meta-data to UTF-8 if necessary.
107

SEE ALSO

109       extract(1)
110
111
113       libextractor  is  released  under   the   GPL   and   a   GNU   project
114       (http://www.gnu.org/).
115
116

BUGS

118       A couple of file-formats (on the order of 10^3) are not recognized...
119
120

AUTHORS

122       extract   was   originally   written   by  Christian  Grothoff  <chris‐
123       tian@grothoff.org> and Vidyut Samanta <vids@cs.ucla.edu>.  Use  <libex‐
124       tractor@gnu.org> to contact the current maintainer(s).
125
126

AVAILABILITY

128       You   can   obtain   the   original   author's   latest   version  from
129       http://gnunet.org/libextractor/.
130
131
132
133                                 Jul 14, 2005                  LIBEXTRACTOR(3)
Impressum