1UUlib(3)              User Contributed Perl Documentation             UUlib(3)
2
3
4

NAME

6       Convert::UUlib - Perl interface to the uulib library (a.k.a.
7       uudeview/uuenview).
8

SYNOPSIS

10        use Convert::UUlib ':all';
11
12        # read all the files named on the commandline and decode them
13        # into the CURRENT directory. See below for a longer example.
14        LoadFile $_ for @ARGV;
15        for (my $i = 0; my $uu = GetFileListItem $i; $i++) {
16           if ($uu->state & FILE_OK) {
17             $uu->decode;
18             print $uu->filename, "\n";
19           }
20        }
21

DESCRIPTION

23       Read the file doc/library.pdf from the distribution for in-depth
24       information about the C-library used in this interface, and the rest of
25       this document and especially the non-trivial decoder program at the
26       end.
27

EXPORTED CONSTANTS

29   Action code constants
30         ACT_IDLE      we don't do anything
31         ACT_SCANNING  scanning an input file
32         ACT_DECODING  decoding into a temp file
33         ACT_COPYING   copying temp to target
34         ACT_ENCODING  encoding a file
35
36   Message severity levels
37         MSG_MESSAGE   just a message, nothing important
38         MSG_NOTE      something that should be noticed
39         MSG_WARNING   important msg, processing continues
40         MSG_ERROR     processing has been terminated
41         MSG_FATAL     decoder cannot process further requests
42         MSG_PANIC     recovery impossible, app must terminate
43
44   Options
45         OPT_VERSION   version number MAJOR.MINORplPATCH (ro)
46         OPT_FAST      assumes only one part per file
47         OPT_DUMBNESS  switch off the program's intelligence
48         OPT_BRACKPOL  give numbers in [] higher precendence
49         OPT_VERBOSE   generate informative messages
50         OPT_DESPERATE try to decode incomplete files
51         OPT_IGNREPLY  ignore RE:plies (off by default)
52         OPT_OVERWRITE whether it's OK to overwrite ex. files
53         OPT_SAVEPATH  prefix to save-files on disk
54         OPT_IGNMODE   ignore the original file mode
55         OPT_DEBUG     print messages with FILE/LINE info
56         OPT_ERRNO     get last error code for RET_IOERR (ro)
57         OPT_PROGRESS  retrieve progress information
58         OPT_USETEXT   handle text messages
59         OPT_PREAMB    handle Mime preambles/epilogues
60         OPT_TINYB64   detect short B64 outside of Mime
61         OPT_ENCEXT    extension for single-part encoded files
62         OPT_REMOVE    remove input files after decoding (dangerous)
63         OPT_MOREMIME  strict MIME adherence
64         OPT_DOTDOT    ".."-unescaping has not yet been done on input files
65         OPT_RBUF      set default read I/O buffer size in bytes
66         OPT_WBUF      set default write I/O buffer size in bytes
67         OPT_AUTOCHECK automatically check file list after every loadfile
68
69   Result/Error codes
70         RET_OK        everything went fine
71         RET_IOERR     I/O Error - examine errno
72         RET_NOMEM     not enough memory
73         RET_ILLVAL    illegal value for operation
74         RET_NODATA    decoder didn't find any data
75         RET_NOEND     encoded data wasn't ended properly
76         RET_UNSUP     unsupported function (encoding)
77         RET_EXISTS    file exists (decoding)
78         RET_CONT      continue -- special from ScanPart
79         RET_CANCEL    operation canceled
80
81   File States
82        This code is zero, i.e. "false":
83
84         UUFILE_READ   Read in, but not further processed
85
86        The following state codes are or'ed together:
87
88         FILE_MISPART  Missing Part(s) detected
89         FILE_NOBEGIN  No 'begin' found
90         FILE_NOEND    No 'end' found
91         FILE_NODATA   File does not contain valid uudata
92         FILE_OK       All Parts found, ready to decode
93         FILE_ERROR    Error while decoding
94         FILE_DECODED  Successfully decoded
95         FILE_TMPFILE  Temporary decoded file exists
96
97   Encoding types
98         UU_ENCODED    UUencoded data
99         B64_ENCODED   Mime-Base64 data
100         XX_ENCODED    XXencoded data
101         BH_ENCODED    Binhex encoded
102         PT_ENCODED    Plain-Text encoded (MIME)
103         QP_ENCODED    Quoted-Printable (MIME)
104         YENC_ENCODED  yEnc encoded (non-MIME)
105

EXPORTED FUNCTIONS

107   Initializing and cleanup
108       Initialize is automatically called when the module is loaded and
109       allocates quite a small amount of memory for todays machines ;) CleanUp
110       releases that again.
111
112       On my machine, a fairly complete decode with DBI backend needs about
113       10MB RSS to decode 20000 files.
114
115       Initialize
116           Not normally necessary, (re-)initializes the library.
117
118       CleanUp
119           Not normally necessary, could be called at the end to release
120           memory before starting a new decoding round.
121
122   Setting and querying options
123       $option = GetOption OPT_xxx
124       SetOption OPT_xxx, opt-value
125
126       See the "OPT_xxx" constants above to see which options exist.
127
128   Setting various callbacks
129       SetMsgCallback [callback-function]
130       SetBusyCallback [callback-function]
131       SetFileCallback [callback-function]
132       SetFNameFilter [callback-function]
133
134   Call the currently selected FNameFilter
135       $file = FNameFilter $file
136
137   Loading sourcefiles, optionally fuzzy merge and start decoding
138       ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
139           Load the given file and scan it for encoded contents. Optionally
140           tag it with the given id, and if $delflag is true, delete the file
141           after it is no longer necessary. If you are certain of the part
142           number, you can specify it as the last argument.
143
144           A better (usually faster) way of doing this is using the
145           "SetFNameFilter" functionality.
146
147       $retval = Smerge $pass
148           If you are desperate, try to call "Smerge" with increasing $pass
149           values, beginning at 0, to try to merge parts that usually would
150           not have been merged.
151
152           Most probably this will result in garbled files, so never do this
153           by default, except:
154
155           If the "OPT_AUTOCHECK" option has been disabled (by default it is
156           enabled) to speed up file loading, then you have to call "Smerge
157           -1" after loading all files as an additional pre-pass (which is
158           normally done by "LoadFile").
159
160       $item = GetFileListItem $item_number
161           Return the $item structure for the $item_number'th found file, or
162           "undef" of no file with that number exists.
163
164           The first file has number 0, and the series has no holes, so you
165           can iterate over all files by starting with zero and incrementing
166           until you hit "undef".
167
168   Decoding files
169       $retval = $item->rename($newname)
170           Change the ondisk filename where the decoded file will be saved.
171
172       $retval = $item->decode_temp
173           Decode the file into a temporary location, use "$item->infile" to
174           retrieve the temporary filename.
175
176       $retval = $item->remove_temp
177           Remove the temporarily decoded file again.
178
179       $retval = $item->decode([$target_path])
180           Decode the file to it's destination, or the given target path.
181
182       $retval = $item->info(callback-function)
183
184   Querying (and setting) item attributes
185       $state    = $item->state
186       $mode     = $item->mode([newmode])
187       $uudet    = $item->uudet
188       $size     = $item->size
189       $filename = $item->filename([newfilename})
190       $subfname = $item->subfname
191       $mimeid   = $item->mimeid
192       $mimetype = $item->mimetype
193       $binfile  = $item->binfile
194
195   Information about source parts
196       $parts = $item->parts
197           Return information about all parts (source files) used to decode
198           the file as a list of hashrefs with the following structure:
199
200            {
201              partno   => <integer describing the part number, starting with 1>,
202              # the following member sonly exist when they contain useful information
203              sfname   => <local pathname of the file where this part is from>,
204              filename => <the ondisk filename of the decoded file>,
205              subfname => <used to cluster postings, possibly the posting filename>,
206              subject  => <the subject of the posting/mail>,
207              origin   => <the possible source (From) address>,
208              mimetype => <the possible mimetype of the decoded file>,
209              mimeid   => <the id part of the Content-Type>,
210            }
211
212           Usually you are interested mostly the "sfname" and possibly the
213           "partno" and "filename" members.
214
215   Functions below not documented and not very well tested
216         QuickDecode
217         EncodeMulti
218         EncodePartial
219         EncodeToStream
220         EncodeToFile
221         E_PrepSingle
222         E_PrepPartial
223
224   EXTENSION FUNCTIONS
225       Functions found in this module but not documented in the uulib
226       documentation:
227
228       $msg = straction ACT_xxx
229           Return a human readable string representing the given action code.
230
231       $msg = strerror RET_xxx
232           Return a human readable string representing the given error code.
233
234       $str = strencoding xxx_ENCODED
235           Return the name of the encoding type as a string.
236
237       $str = strmsglevel MSG_xxx
238           Returns the message level as a string.
239
240       SetFileNameCallback $cb
241           Sets (or queries) the FileNameCallback, which is called whenever
242           the decoding library can't find a filename and wants to extract a
243           filename from the subject line of a posting. The callback will be
244           called with two arguments, the subject line and the current
245           candidate for the filename. The latter argument can be "undef",
246           which means that no filename could be found (and likely no one
247           exists, so it is safe to also return "undef" in this case). If it
248           doesn't return anything (not even "undef"!), then nothing happens,
249           so this is a no-op callback:
250
251              sub cb {
252                 return ();
253              }
254
255           If it returns "undef", then this indicates that no filename could
256           be found. In all other cases, the return value is taken to be the
257           filename.
258
259           This is a slightly more useful callback:
260
261             sub cb {
262                return unless $_[1]; # skip "Re:"-plies et al.
263                my ($subject, $filename) = @_;
264                # if we find some *.rar, take it
265                return $1 if $subject =~ /(\w+\.rar)/;
266                # otherwise just pass what we have
267                return ();
268             }
269

LARGE EXAMPLE DECODER

271       This is the file "example-decoder" from the distribution, put here
272       instead of more thorough documentation.
273
274          #!/usr/bin/perl
275
276          # decode all the files in the directory uusrc/ and copy
277          # the resulting files to uudst/
278
279          use Convert::UUlib ':all';
280
281          sub namefilter {
282             my ($path) = @_;
283
284             $path=~s/^.*[\/\\]//;
285
286             $path
287          }
288
289          sub busycb {
290             my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
291             $_[0]=straction($action);
292             print "busy_callback(", (join ",",@_), ")\n";
293             0
294          }
295
296          SetOption OPT_RBUF, 128*1024;
297          SetOption OPT_WBUF, 1024*1024;
298          SetOption OPT_IGNMODE, 1;
299          SetOption OPT_IGNMODE, 1;
300          SetOption OPT_VERBOSE, 1;
301
302          # show the three ways you can set callback functions. I normally
303          # prefer the one with the sub inplace.
304          SetFNameFilter \&namefilter;
305
306          SetBusyCallback "busycb", 333;
307
308          SetMsgCallback sub {
309             my ($msg, $level) = @_;
310             print uc strmsglevel $_[1], ": $msg\n";
311          };
312
313          # the following non-trivial FileNameCallback takes care
314          # of some subject lines not detected properly by uulib:
315          SetFileNameCallback sub {
316             return unless $_[1]; # skip "Re:"-plies et al.
317             local $_ = $_[0];
318
319             # the following rules are rather effective on some newsgroups,
320             # like alt.binaries.games.anime, where non-mime, uuencoded data
321             # is very common
322
323             # if we find some *.rar, take it as the filename
324             return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
325
326             # one common subject format
327             return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
328
329             # - filename.par (04/55)
330             return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
331
332             # - (xxx) No. 1 sayuri81.jpg 756565 bytes
333             # - (20 files) No.17 Roseanne.jpg [2/2]
334             return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
335
336             # try to detect some common forms of filenames
337             return $1 if /([a-z0-9_\-+.]{3,}\.[a-z]{3,4}(?:.\d+))/i;
338
339             # otherwise just pass what we have
340             ()
341          };
342
343          # now read all files in the directory uusrc/*
344          for(<uusrc/*>) {
345             my ($retval, $count) = LoadFile ($_, $_, 1);
346             print "file($_), status(", strerror $retval, ") parts($count)\n";
347          }
348
349          SetOption OPT_SAVEPATH, "uudst/";
350
351          # now wade through all files and their source parts
352          $i = 0;
353          while ($uu = GetFileListItem $i) {
354             $i++;
355             print "file nr. $i";
356             print " state ", $uu->state;
357             print " mode ", $uu->mode;
358             print " uudet ", strencoding $uu->uudet;
359             print " size ", $uu->size;
360             print " filename ", $uu->filename;
361             print " subfname ", $uu->subfname;
362             print " mimeid ", $uu->mimeid;
363             print " mimetype ", $uu->mimetype;
364             print "\n";
365
366             # print additional info about all parts
367             for ($uu->parts) {
368                while (my ($k, $v) = each %$_) {
369                   print "$k > $v, ";
370                }
371                print "\n";
372             }
373
374             print $uu->filename;
375
376             $uu->remove_temp;
377
378             if (my $err = $uu->decode ()) {
379                print ", ", strerror $err, "\n";
380             } else {
381                print ", saved as uudst/", $uu->filename, "\n";
382             }
383          }
384
385          print "cleanup...\n";
386
387          CleanUp;
388

AUTHOR

390       Marc Lehmann <schmorp@schmorp.de>, the original uulib library was
391       written by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later
392       heavily bugfixed by Marc Lehmann.
393

SEE ALSO

395       perl(1), uudeview homepage at
396       http://www.uni-frankfurt.de/~fp/uudeview/.
397
398
399
400perl v5.12.4                      2011-05-29                          UUlib(3)
Impressum