1LIBEXTRACTOR(3)            Library Functions Manual            LIBEXTRACTOR(3)
2
3
4

NAME

6       libextractor - meta-information extraction library 1.0.0
7

SYNOPSIS

9       #include <extractor.h>
10
11       const   char   *EXTRACTOR_metatype_to_string  (enum  EXTRACTOR_MetaType
12       type);
13
14       const char *EXTRACTOR_metatype_to_description (enum  EXTRACTOR_MetaType
15       type);
16
17       enum EXTRACTOR_MetaTypeEXTRACTOR_metatype_get_max (void);
18
19       struct    EXTRACTOR_PluginList   *EXTRACTOR_plugin_add_defaults   (enum
20       EXTRACTOR_Options flags);
21
22       struct  EXTRACTOR_PluginList  *EXTRACTOR_plugin_add   (struct   EXTRAC‐
23       TOR_PluginList  *prev,  const  char *library, const char *options, enum
24       EXTRACTOR_Options flags);
25
26
27       struct EXTRACTOR_PluginList *EXTRACTOR_plugin_add_last (struct  EXTRAC‐
28       TOR_PluginList  *prev,  const  char *library, const char *options, enum
29       EXTRACTOR_Options flags);
30
31       struct   EXTRACTOR_PluginList   *EXTRACTOR_plugin_add_config    (struct
32       EXTRACTOR_PluginList  *prev, const char *config, enum EXTRACTOR_Options
33       flags);            struct EXTRACTOR_PluginList *EXTRACTOR_plugin_remove
34       (struct EXTRACTOR_PluginList *prev, const char *library);
35
36       void  EXTRACTOR_plugin_remove_all  (struct  EXTRACTOR_PluginList *plug‐
37       ins);
38
39       void EXTRACTOR_extract  (struct  EXTRACTOR_PluginList  *plugins,  const
40       char  *filename,  const void *data, size_t size, EXTRACTOR_MetaDataPro‐
41       cessor proc, void *proc_cls);
42
43       int EXTRACTOR_meta_data_prin t(void *handle, const  char  *plugin_name,
44       enum  EXTRACTOR_MetaType  type, enum EXTRACTOR_MetaFormat format, const
45       char *data_mime_type, const char *data, size_t data_len);
46
47       EXTRACTOR_VERSION
48
49

DESCRIPTION

51       GNU libextractor is a simple library for  keyword  extraction.   libex‐
52       tractor  does  not  support  all formats but supports a simple plugging
53       mechanism such that you can quickly add extractors for additional  for‐
54       mats,  even  without  recompiling libextractor.  libextractor typically
55       ships with dozens of plugins that can be used to obtain meta data  from
56       common file-types.  If you want to write your own plugin for some file‐
57       type, all you need to do is write a little library  that  implements  a
58       single method with this signature:
59
60        void   EXTRACTOR_XXX_extract_method  (struct  EXTRACTOR_ExtractContext
61       *ec);
62
63
64       ec contains function pointers for reading, seeking, getting the overall
65       file  size and returning meta data.  There is also a field with options
66       for the plugin.  New plugins will be  automatically  located  and  used
67       once  they  are  installed in the respective directory (typically some‐
68       thing like /usr/lib/libextractor/).
69
70       The application extract gives an example how to use libextractor.
71
72       The basic use of libextractor is to load the plugins (for example  with
73       EXTRACTOR_plugin_add_defaults),  then to extract the keyword list using
74       EXTRACTOR_extract, and finally  unloading  the  plugins  (with  EXTRAC‐
75       TOR_plugin_remove_all).
76
77       Textual  meta  data  obtained from libextractor is supposed to be UTF-8
78       encoded if the text encoding is known.  Plugins are supposed to convert
79       meta-data to UTF-8 if necessary.    The EXTRACTOR_meta_data_print func‐
80       tion converts the UTF-8 keywords to the character set from the  current
81       locale before printing them.
82

SEE ALSO

84       extract(1)
85
86
88       libextractor   is   released   under   the   GPL   and  a  GNU  package
89       (http://www.gnu.org/).
90
91

BUGS

93       A couple of file-formats (on the order of 10^3) are not recognized...
94
95

AUTHORS

97       extract  was  originally  written   by   Christian   Grothoff   <chris‐
98       tian@grothoff.org>  and  Vidyut Samanta <vids@cs.ucla.edu>. Use <libex‐
99       tractor@gnu.org> to contact the current maintainer(s).
100
101

AVAILABILITY

103       You  can   obtain   the   original   author's   latest   version   from
104       http://www.gnu.org/software/libextractor/.
105
106
107
108GNU libextractor 1.0.0           Sept 4, 2012                  LIBEXTRACTOR(3)
Impressum