1ESTCMD(1)                       Hyper Estraier                       ESTCMD(1)
2
3
4

NAME

6       estcmd - command line interface of the core API
7
8

SYNOPSIS

10       estcmd  create  [-tr] [-apn|-acc] [-xs|-xl|-xh|-xh2|-xh3] [-sv|-si|-sa]
11       [-attr name type] db
12
13       estcmd  put  [-tr]  [-cl]  [-ws]  [-apn|-acc]  [-xs|-xl|-xh||-xh2|-xh3]
14       [-sv|-si|-sa] db [file]
15
16       estcmd out [-cl] [-pc enc] db expr
17
18       estcmd edit [-pc enc] db expr name [value]
19
20       estcmd get [-nl|-nb] [-pidx path] [-pc enc] db expr [attr]
21
22       estcmd list [-nl|-nb] [-lp] db
23
24       estcmd uriid [-nl|-nb] [-pidx path] [-pc enc] db expr
25
26       estcmd meta db [name [value]]
27
28       estcmd inform [-nl|-nb] db
29
30       estcmd optimize [-onp] [-ond] db
31
32       estcmd merge [-cl] db target
33
34       estcmd repair [-rst|-rsh] db
35
36       estcmd      search     [-nl|-nb]     [-pidx     path]     [-ic     enc]
37       [-vu|-va|-vf|-vs|-vh|-vx|-dd] [-sn wnum hnum anum] [-kn num] [-um] [-ec
38       rn]  [-gs|-gf|-ga]  [-cd] [-ni] [-sf|-sfr|-sfu|-sfi] [-hs] [-attr expr]
39       [-ord expr] [-max num] [-sk num] [-aux num] [-dis name]  [-sim  id]  db
40       [phrase]
41
42       estcmd  gather [-tr] [-cl] [-ws] [-no] [-fe|-ft|-fh|-fm] [-fx sufs cmd]
43       [-fz] [-fo] [-rm sufs] [-ic enc] [-il lang] [-bc] [-lt num]  [-lf  num]
44       [-pc     enc]    [-px    name]    [-aa    name    value]    [-apn|-acc]
45       [-xs|-xl|-xh|-xh2|-xh3] [-sv|-si|-sa] [-ss name] [-sd] [-cm] [-cs  num]
46       [-ncm] [-kn num] [-um] db [file|dir]
47
48       estcmd purge [-cl] [-no] [-fc] [-pc enc] [-attr expr] db [prefix]
49
50       estcmd  extkeys  [-no]  [-fc] [-dfdb file] [-ncm] [-ni] [-kn num] [-um]
51       [-attr expr] db [prefix]
52
53       estcmd words [-nl|-nb] [-dfdb file] [-kw|-kt] db
54
55       estcmd draft [-ft|-fh|-fm] [-ic enc] [-il lang] [-bc]  [-lt  num]  [-kn
56       num] [-um] [file]
57
58       estcmd break [-ic enc] [-il lang] [-apn|-acc] [-wt] [file]
59
60       estcmd iconv [-ic enc] [-il lang] [-oc enc] [file]
61
62       estcmd regex [-inv] [-repl str] expr [file]
63
64       estcmd scandir [-tf|-td] [-pa|-pu] [dir]
65
66       estcmd  multi  [-db  db]  [-nl|-nb] [-ic enc] [-gs|-gf|-ga] [-cd] [-ni]
67       [-sf|-sfr|-sfu|-sfi] [-hs] [-hu] [-attr expr] [-ord  expr]  [-max  num]
68       [-sk num] [-aux num] [-dis name] [phrase]
69
70       estcmd randput [-ren|-rla|-reu|-ror|-rjp|-rch] [-cs num] db dnum
71
72       estcmd wicked db dnum
73
74       estcmd regression db
75
76       estcmd version
77
78

DESCRIPTION

80       estcmd is an aggregation of sub commands.  The name of a sub command is
81       specified by the first argument.  Other arguments are parsed  according
82       to each sub command.  The argument db specifies the path of an index.
83
84       estcmd  create  [-tr] [-apn|-acc] [-xs|-xl|-xh|-xh2|-xh3] [-sv|-si|-sa]
85       [-attr name type] db
86              Create an index.
87              If -tr is specified, a new index is created  regardless  if  one
88              exists.
89              If -apn is specified, N-gram analysis is performed against Euro‐
90              pean text also.
91              If -acc is specified, character category analysis  is  performed
92              instead of N-gram analysis.
93              If  -xs  is  specified, the index is tuned to register less than
94              50000 documents.
95              If -xl is specified, the index is tuned to  register  more  than
96              300000 documents.
97              If  -xh  is  specified, the index is tuned to register more than
98              1000000 documents.
99              If -xh2 is specified, the index is tuned to register  more  than
100              5000000 documents.
101              If  -xh3  is specified, the index is tuned to register more than
102              10000000 documents.
103              If -sv is specified, scores are stored as void.
104              If -si is specified, scores are stored as 32-bit integer.
105              If -sa is specified, scores are stored as-is and marked  not  to
106              be tuned when search.
107              -attr  specifies  an  attribute  index  and its data type.  This
108              option can be specified multiple times.
109
110       estcmd   put   [-tr]    [-cl]    [-apn|-acc]    [-xs|-xl|-xh|-xh2|-xh3]
111       [-sv|-si|-sa] db [file]
112              Register a document of document draft to an index.
113              file  specifies  a  target file.  If it is omitted, the standard
114              input is read.
115              If -tr is specified, a new index is created  regardless  if  one
116              exists.
117              If  -cl  is  specified,  regions  of  a overwritten document are
118              cleaned up.
119              If -ws is specified, scores are weighted statically  with  score
120              weighting attribute.
121              If -apn is specified, N-gram analysis is performed against Euro‐
122              pean text also.
123              If -acc is specified, character category analysis  is  performed
124              instead of N-gram analysis.
125              If  -xs  is  specified, the index is tuned to register less than
126              50000 documents.
127              If -xl is specified, the index is tuned to  register  more  than
128              300000 documents.
129              If  -xh  is  specified, the index is tuned to register more than
130              1000000 documents.
131              If -xh2 is specified, the index is tuned to register  more  than
132              5000000 documents.
133              If  -xh3  is specified, the index is tuned to register more than
134              10000000 documents.
135              If -sv is specified, scores are stored as void.
136              If -si is specified, scores are stored as 32-bit integer.
137              If -sa is specified, scores are stored as-is and marked  not  to
138              be tuned when search.
139
140       estcmd out [-pc enc] [-cl] db expr
141              Remove information of a document from an index.
142              expr  specifies  the  ID number, the URI, or the local path of a
143              document.
144              If -cl is specified, regions of the document are cleaned up.
145              -pc specifies the encoding of file paths.   By  default,  it  is
146              ISO-8859-1.
147
148       estcmd edit [-pc enc] db expr name [value]
149              Edit an attribute of a document in an index.
150              expr  specifies  the  ID number, the URI, or the local path of a
151              document.
152              name specifies the name of an attribute.
153              value specifies the value of the attribute.  If it  is  omitted,
154              the attribute is removed.
155              -pc  specifies  the  encoding of the file path and the attribute
156              value.  By default, it is ISO-8859-1.
157
158       estcmd get [-nl|-nb] [-pidx path] [-pc enc] db expr [attr]
159              Output document draft of a document in an index.
160              expr specifies the ID number, the URI, or the local  path  of  a
161              document.
162              If attr is specified, only the value of the attribute is output.
163              If -nl is specified, the index is opened without file locking.
164              If -nb is specified, file locking is performed without blocking.
165              -pidx  specifies the path of a pseudo index.  This option can be
166              specified multiple times.
167              -pc specifies the encoding of file paths.   By  default,  it  is
168              ISO-8859-1.
169
170       estcmd list [-nl|-nb] [-lp] db
171              Output a list of all document in an index.
172              If -nl is specified, the index is opened without file locking.
173              If -nb is specified, file locking is performed without blocking.
174              If  -lp  is specified, local path equivalent to URL of "file://"
175              is output.
176
177       estcmd uriid [-nl|-nb] [-pidx path] [-pc enc] db expr
178              Output the ID number of a document specified by URI.
179              expr specifies the URI or the local path of a document.
180              If -nl is specified, the index is opened without file locking.
181              If -nb is specified, file locking is performed without blocking.
182              -pidx specifies the path of a pseudo index.  This option can  be
183              specified multiple times.
184              -pc  specifies  the  encoding  of file paths.  By default, it is
185              ISO-8859-1.
186
187       estcmd meta db [name [value]]
188              Handle meta data.
189              name specifies the name of a piece of meta data.  If it is omit‐
190              ted, a list of all names is output.
191              value  specifies  the value of the meta data to be recorded.  If
192              it is omitted, the current value is output.  If it is  an  empty
193              string, the meta data is removed.
194
195       estcmd inform [-nl|-nb] db
196              Output the number of documents and the number of unique words in
197              an index.
198              If -nl is specified, the index is opened without file locking.
199              If -nb is specified, file locking is performed without blocking.
200
201       estcmd optimize [-onp] [-ond] db
202              Optimize an index and clean up dispensable regions.
203              If -onp is specified, it is  omitted  to  clean  up  dispensable
204              regions.
205              If  -ond  is  specified,  it is omitted to optimize the database
206              files.
207
208       estcmd merge [-cl] db target
209              Merge another index.
210              target specifies the path of another index.
211              If -cl  is  specified,  regions  of  overwritten  documents  are
212              cleaned up.
213
214       estcmd repair [-rst|-rsh] db
215              Repair a broken index.
216              If -rst is specified, strict consistency check is performed.
217              If -rsh is specified, consistency check is omitted.
218
219       estcmd      search     [-nl|-nb]     [-pidx     path]     [-ic     enc]
220       [-vu|-va|-vf|-vs|-vh|-vx|-dd] [-sn wnum hnum anum] [-kn num] [-um] [-ec
221       rn]  [-gs|-gf|-ga]  [-cd] [-ni] [-sf|-sfr|-sfu|-sfi] [-hs] [-attr expr]
222       [-ord expr] [-max num] [-sk num] [-aux num] [-dis name]  [-sim  id]  db
223       [phrase]
224              Search an index for documents.
225              phrase specifies the search phrase.
226              If -nl is specified, the index is opened without file locking.
227              If -nb is specified, file locking is performed without blocking.
228              -pidx  specifies the path of a pseudo index.  This option can be
229              specified multiple times.
230              -ic specifies the input encoding.  By default, it is UTF-8.
231              If -vu is specified, TSV of ID number and URI are output.
232              If -va is specified, multipart format  including  attributes  is
233              output.
234              If  -vf  is specified, multipart format including document draft
235              is output.
236              If -vs is specified, multipart format including  attributes  and
237              snippets is output.
238              If  -vh is specified, human readable format including attributes
239              and snippets is output.
240              If -vx is specified,  XML  including  including  attributes  and
241              snippets is output.
242              If  -dd  is  specified, document draft data are dumped and saved
243              into separated files.
244              -sn specifies the number of whole width of snippet and width  of
245              strings  picked  up  from the beginning of the text and width of
246              strings picked up around each highlighted word.
247              -kn specifies the  number  of  keywords  to  be  extracted.   By
248              default, keyword extraction is not performed.
249              If  -um  is specified, morphological analyzers are used for key‐
250              word extraction.
251              -ec specifies lower limit of similarity eclipse.
252              If -gs is  specified,  every  key  of  N-gram  is  checked.   By
253              default, it is alternately.
254              If -gf is specified, keys of N-gram are checked every three.
255              If -ga is specified, keys of N-gram are checked every four.
256              If  -cd  is specified, whether documents match the search phrase
257              definitely is checked.
258              If -ni is specified, TF-IDF tuning is omitted.
259              If -sf is specified, the phrase is treated as a simplified form.
260              If -sfr is specified, the phrase is treated as a rough form.
261              If -sfu is specified, the phrase is treated as a union form.
262              If -sfi is specified, the phrase is treated as  an  intersection
263              form.
264              If   -hs  is  specified,  score  information  is  output  as  an
265              attribute.
266              -attr specifies an attribute search condition.  This option  can
267              be specified multiple times.
268              -ord specifies the order expression.  By default, it is descend‐
269              ing by score.
270              -max specifies the maximum number of shown documents.   Negative
271              means unlimited.  By default, it is 10.
272              -sk  specifies  the  number  of  documents  to  be  skipped.  By
273              default, it is 0.
274              -aux specifies permission  to  adopt  result  of  the  auxiliary
275              index.   If  it  is  not more than 0, the auxiliary index is not
276              used.  By default, it is 32.
277              -dis specifies the name of the distinct attribute.
278              -sim specifies the ID number of the seed document for similarity
279              search.
280
281       estcmd  gather [-tr] [-cl] [-ws] [-no] [-fe|-ft|-fh|-fm] [-fx sufs cmd]
282       [-fz] [-fo] [-rm sufs] [-ic enc] [-il lang] [-bc] [-lt num]  [-lf  num]
283       [-pc     enc]    [-px    name]    [-aa    name    value]    [-apn|-acc]
284       [-xs|-xl|-xh|-xh2|-xh3] [-sv|-si|-sa] [-ss name] [-sd] [-cm] [-cs  num]
285       [-ncm] [-kn num] [-um] db [file|dir]
286              Scan the local file system and register documents into an index.
287              If  the third argument is the name of a file, a list of paths of
288              target documents are read from it.  If it is "-",  the  standard
289              input is specified.
290              If  the  third  argument  is the name of a directory.  All files
291              under the directory are treated as target documents.
292              If -tr is specified, a new index is created  regardless  if  one
293              exists.
294              If  -cl  is  specified,  regions  of  overwritten  documents are
295              cleaned up.
296              If -ws is specified, scores are weighted statically  with  score
297              weighting attribute.
298              If  -no  is  specified,  operations are printed but not executed
299              actually.
300              If -fe is specified, target files are treated as document draft.
301              By  default,  the format is detected by the suffix of each docu‐
302              ment.
303              If -ft is specified, target files are treated as plain text.
304              If -fh is specified, target files are treated as HTML.
305              If -fm is specified, target files are treated as MIME.
306              If -fx is specified, target files with  the  specified  suffixes
307              are  processed  by the specified outer command.  "*" matches any
308              file.  If the command is leaded by "T@", the output of the  com‐
309              mand  is  treated  as  plain  text.  If the command is leaded by
310              "H@", the output of the command is treated as HTML.  If the com‐
311              mand  is leaded by "M@", the output of the command is treated as
312              MIME.  Else, the output is  treated  as  document  draft.   This
313              option can be specified multiple times.
314              If -fz is specified, documents which do not corresponding to the
315              condition of -fx are ignored.
316              If -fo is specified, target files are not read.   It  is  useful
317              for efficient process of the outer command.
318              If  -rm  is  specified, target files with the specified suffixes
319              are removed.  "*" matches any file.  This option can  be  speci‐
320              fied multiple times.
321              -ic  specifies  the  input encoding.  By default, it is detected
322              automatically.
323              -il specifies the preferred input language.  By default, English
324              is preferred.
325              If -bc is specified, binary files are detected and ignored.
326              -lt  specifies  the  text  size  limitation  by  kilo bytes.  By
327              default, it is 128KB.  If it is negative, the size is unlimited.
328              -lf specifies the  file  size  limitation  by  mega  bytes.   By
329              default, it is 32MB.  If it is negative, the size is unlimited.
330              -pc  specifies  the  encoding  of file paths.  By default, it is
331              ISO-8859-1.
332              -px specifies the name of an attribute read  from  the  list  of
333              paths.   As  the  list  of paths can be in TSV format, the first
334              field is treated as the path of a target  document,  the  second
335              field  and  the  followers  are definitions of attribute values.
336              -px specifies the name of each values of the  second  field  and
337              the followers.  This option can be specified multiple times.
338              -aa specifies the name and the value of an additional attribute.
339              This option can be specified multiple times.
340              If -apn is specified, N-gram analysis is performed against Euro‐
341              pean text also.
342              If  -acc  is specified, character category analysis is performed
343              instead of N-gram analysis.
344              If -xs is specified, the index is tuned to  register  less  than
345              50000 documents.
346              If  -xl  is  specified, the index is tuned to register more than
347              300000 documents.
348              If -xh is specified, the index is tuned to  register  more  than
349              1000000 documents.
350              If  -xh2  is specified, the index is tuned to register more than
351              5000000 documents.
352              If -xh3 is specified, the index is tuned to register  more  than
353              10000000 documents.
354              If -sv is specified, scores are stored as void.
355              If -si is specified, scores are stored as 32-bit integer.
356              If  -sa  is specified, scores are stored as-is and marked not to
357              be tuned when search.
358              -ss specifies the name of an attribute for substitute score.
359              If -sd is specified, the  modification  date  of  each  file  is
360              recorded as an attribute.
361              If  -cm  is specified, documents whose modification date has not
362              changed are ignored.
363              -cs specifies the size  of  cache  memory  by  mega  bytes.   By
364              default, it is 64MB.
365              If  -ncm is specified, checking availability of the virtual mem‐
366              ory is omitted.
367              -kn specifies the  number  of  keywords  to  be  extracted.   By
368              default, keyword extraction is not performed.
369              If  -um  is specified, morphological analyzers are used for key‐
370              word extraction.
371
372       estcmd purge [-cl] [-no] [-fc] [-pc enc] [-attr expr] db [prefix]
373              Purge information of documents which do not exist  on  the  file
374              system.
375              If  prefix  is  specified,  only documents whose URIs are begins
376              with it.  It can be specified by the local path of a directory.
377              If -cl is  specified,  regions  of  the  deleted  documents  are
378              cleaned up.
379              If  -no  is  specified,  operations are printed but not executed
380              actually.
381              If -fc is specified, information of  all  target  documents  are
382              deleted.
383              -pc  specifies  the  encoding  of file paths.  By default, it is
384              ISO-8859-1.
385              -attr specifies an attribute search condition.  This option  can
386              be specified multiple times.
387
388       estcmd  extkeys  [-no]  [-fc] [-dfdb file] [-ncm] [-ni] [-kn num] [-um]
389       [-attr expr] db [prefix]
390              Create a database of keywords extracted from documents.
391              If prefix is specified, only documents  whose  URIs  are  begins
392              with it.
393              If  -no  is  specified,  operations are printed but not executed
394              actually.
395              If -fc is specified, all target documents are  processed  which‐
396              ever they have existing records or not.
397              -dfdb  specifies  an  outher database of document frequency.  By
398              default, document frequency is calculated dynamically  according
399              to the index.
400              If  -ncm is specified, checking availability of the virtual mem‐
401              ory is omitted.
402              If -ni is specified, TF-IDF tuning is omitted.
403              -kn specifies the  number  of  keywords  to  be  extracted.   By
404              default, it is 32.
405              If  -um  is specified, morphological analyzers are used for key‐
406              word extraction.
407              -attr specifies an attribute search condition.  This option  can
408              be specified multiple times.
409
410       estcmd words [-nl|-nb] [-dfdb file] [-kw|-kt] db
411              Output  a list of all unique words and each record size which is
412              treated as docuemnt frequency.
413              If -nl is specified, the index is opened without file locking.
414              If -nb is specified, file locking is performed without blocking.
415              -dfdb specifies an outer database where the  result  is  stored.
416              By  default, the result is output to the standard output as TSV.
417              If the outer database already exists, the value of  each  record
418              is incremented.
419              If -kw is specified, keywords and numbers of corresponding docu‐
420              ments are output.
421              If -kt is specified, keywords and their related terms  are  out‐
422              put.
423
424       estcmd  draft  [-ft|-fh|-fm]  [-ic enc] [-il lang] [-bc] [-lt num] [-kn
425       num] [-um] [file]
426              For test and debug.
427
428       estcmd break [-ic enc] [-il lang] [-apn|-acc] [-wt] [file]
429              For test and debug.
430
431       estcmd iconv [-ic enc] [-il lang] [-oc enc] [file]
432              For test and debug.
433
434       estcmd regex [-inv] [-repl str] expr [file]
435              For test and debug.
436
437       estcmd scandir [-tf|-td] [-pa|-pu] [dir]
438              For test and debug.
439
440       estcmd multi [-db db] [-nl|-nb] [-ic  enc]  [-gs|-gf|-ga]  [-cd]  [-ni]
441       [-sf|-sfr|-sfu|-sfi]  [-hs]  [-hu]  [-attr expr] [-ord expr] [-max num]
442       [-sk num] [-aux num] [-dis name] [phrase]
443              For test and debug.
444
445       estcmd randput [-ren|-rla|-reu|-ror|-rjp|-rch] [-cs num] db dnum
446              For test and debug.
447
448       estcmd wicked db dnum
449              For test and debug.
450
451       estcmd regression db
452              For test and debug.
453
454       estcmd version
455              Show the version information.
456
457       All sub commands return 0 if the operation is success, else  return  1.
458       As  for  put, out, gather, purge, randput, wicked, and regression, they
459       finish with closing the database when they catch the signal 1 (SIGHUP),
460       2 (SIGINT), 3 (SIGQUIT), 13 (SIGPIPE), or 15 (SIGTERM).
461
462       The  data type of attribute indexes specified by -attr option of create
463       sub command should be "seq" for sequencial type, "str" for string type,
464       or "num" for number type.
465
466       Each  pseudo  index specified by -pidx option of search sub command and
467       so on is a directory containing files of document draft.  If you search
468       a  main  index  with  pseudo indexes, meta search of the main index and
469       pseudo indexes is performed.
470
471       The encoding name specified by -ic option should be  such  name  regis‐
472       tered to IETF as UTF-8, ISO-8859-1, and so on.  The language name spec‐
473       ified by -il option should be one of "en"  (English),  "ja"  (Japanese,
474       "zh" (Chinese), "ko" (Korean).
475
476       The  outer  command specified by -fx option of gather receives the path
477       of the target document by the first argument and the path for output by
478       the second argument.  The original path of the target document is given
479       as the value of the environment variable `ESTORIGFILE'.
480
481       Note that similarity search is very slow, by default.  To  improve  the
482       performance  of  similarity search, running "estcmd extkeys" beforehand
483       is strongly recommended.
484
485

SEE ALSO

487       estconfig(1), estmaster(1), estcall(1), estwaver(1), estraier(3), estn‐
488       ode(3)
489
490       Please   see   http://hyperestraier.sourceforge.net/uguide-en.html  for
491       detail.
492
493
494
495Man Page                          2007-03-06                         ESTCMD(1)
Impressum