1XML::TreePP(3)        User Contributed Perl Documentation       XML::TreePP(3)
2
3
4

NAME

6       XML::TreePP -- Pure Perl implementation for parsing/writing XML
7       documents
8

SYNOPSIS

10       parse an XML document from file into hash tree:
11
12           use XML::TreePP;
13           my $tpp = XML::TreePP->new();
14           my $tree = $tpp->parsefile( "index.rdf" );
15           print "Title: ", $tree->{"rdf:RDF"}->{item}->[0]->{title}, "\n";
16           print "URL:   ", $tree->{"rdf:RDF"}->{item}->[0]->{link}, "\n";
17
18       write an XML document as string from hash tree:
19
20           use XML::TreePP;
21           my $tpp = XML::TreePP->new();
22           my $tree = { rss => { channel => { item => [ {
23               title   => "The Perl Directory",
24               link    => "http://www.perl.org/",
25           }, {
26               title   => "The Comprehensive Perl Archive Network",
27               link    => "http://cpan.perl.org/",
28           } ] } } };
29           my $xml = $tpp->write( $tree );
30           print $xml;
31
32       get a remote XML document by HTTP-GET and parse it into hash tree:
33
34           use XML::TreePP;
35           my $tpp = XML::TreePP->new();
36           my $tree = $tpp->parsehttp( GET => "http://use.perl.org/index.rss" );
37           print "Title: ", $tree->{"rdf:RDF"}->{channel}->{title}, "\n";
38           print "URL:   ", $tree->{"rdf:RDF"}->{channel}->{link}, "\n";
39
40       get a remote XML document by HTTP-POST and parse it into hash tree:
41
42           use XML::TreePP;
43           my $tpp = XML::TreePP->new( force_array => [qw( item )] );
44           my $cgiurl = "http://search.hatena.ne.jp/keyword";
45           my $keyword = "ajax";
46           my $cgiquery = "mode=rss2&word=".$keyword;
47           my $tree = $tpp->parsehttp( POST => $cgiurl, $cgiquery );
48           print "Link: ", $tree->{rss}->{channel}->{item}->[0]->{link}, "\n";
49           print "Desc: ", $tree->{rss}->{channel}->{item}->[0]->{description}, "\n";
50

DESCRIPTION

52       XML::TreePP module parses an XML document and expands it for a hash
53       tree.  This generates an XML document from a hash tree as the opposite
54       way around.  This is a pure Perl implementation and requires no modules
55       depended.  This can also fetch and parse an XML document from remote
56       web server like the XMLHttpRequest object does at JavaScript language.
57

EXAMPLES

59   Parse XML file
60       Sample XML document:
61
62           <?xml version="1.0" encoding="UTF-8"?>
63           <family name="Kawasaki">
64               <father>Yasuhisa</father>
65               <mother>Chizuko</mother>
66               <children>
67                   <girl>Shiori</girl>
68                   <boy>Yusuke</boy>
69                   <boy>Kairi</boy>
70               </children>
71           </family>
72
73       Sample program to read a xml file and dump it:
74
75           use XML::TreePP;
76           use Data::Dumper;
77           my $tpp = XML::TreePP->new();
78           my $tree = $tpp->parsefile( "family.xml" );
79           my $text = Dumper( $tree );
80           print $text;
81
82       Result dumped:
83
84           $VAR1 = {
85               'family' => {
86                   '-name' => 'Kawasaki',
87                   'father' => 'Yasuhisa',
88                   'mother' => 'Chizuko',
89                   'children' => {
90                       'girl' => 'Shiori'
91                       'boy' => [
92                           'Yusuke',
93                           'Kairi'
94                       ],
95                   }
96               }
97           };
98
99       Details:
100
101           print $tree->{family}->{father};        # the father's given name.
102
103       The prefix '-' is added on every attribute's name.
104
105           print $tree->{family}->{"-name"};       # the family name of the family
106
107       The array is used because the family has two boys.
108
109           print $tree->{family}->{children}->{boy}->[1];  # The second boy's name
110           print $tree->{family}->{children}->{girl};      # The girl's name
111
112   Text node and attributes:
113       If a element has both of a text node and attributes or both of a text
114       node and other child nodes, value of a text node is moved to "#text"
115       like child nodes.
116
117           use XML::TreePP;
118           use Data::Dumper;
119           my $tpp = XML::TreePP->new();
120           my $source = '<span class="author">Kawasaki Yusuke</span>';
121           my $tree = $tpp->parse( $source );
122           my $text = Dumper( $tree );
123           print $text;
124
125       The result dumped is following:
126
127           $VAR1 = {
128               'span' => {
129                   '-class' => 'author',
130                   '#text'  => 'Kawasaki Yusuke'
131               }
132           };
133
134       The special node name of "#text" is used because this elements has
135       attribute(s) in addition to the text node.  See also "text_node_key"
136       option.
137

METHODS

139   new
140       This constructor method returns a new XML::TreePP object with %options.
141
142           $tpp = XML::TreePP->new( %options );
143
144   set
145       This method sets a option value for "option_name".  If $option_value is
146       not defined, its option is deleted.
147
148           $tpp->set( option_name => $option_value );
149
150       See OPTIONS section below for details.
151
152   get
153       This method returns a current option value for "option_name".
154
155           $tpp->get( 'option_name' );
156
157   parse
158       This method reads an XML document by string and returns a hash tree
159       converted.  The first argument is a scalar or a reference to a scalar.
160
161               $tree = $tpp->parse( $source );
162
163   parsefile
164       This method reads an XML document by file and returns a hash tree
165       converted.  The first argument is a filename.
166
167           $tree = $tpp->parsefile( $file );
168
169   parsehttp
170       This method receives an XML document from a remote server via HTTP and
171       returns a hash tree converted.
172
173           $tree = $tpp->parsehttp( $method, $url, $body, $head );
174
175       $method is a method of HTTP connection: GET/POST/PUT/DELETE $url is an
176       URI of an XML file.  $body is a request body when you use POST method.
177       $head is a request headers as a hash ref.  LWP::UserAgent module or
178       HTTP::Lite module is required to fetch a file.
179
180           ( $tree, $xml, $code ) = $tpp->parsehttp( $method, $url, $body, $head );
181
182       In array context, This method returns also raw XML document received
183       and HTTP response's status code.
184
185   write
186       This method parses a hash tree and returns an XML document as a string.
187
188           $source = $tpp->write( $tree, $encode );
189
190       $tree is a reference to a hash tree.
191
192   writefile
193       This method parses a hash tree and writes an XML document into a file.
194
195           $tpp->writefile( $file, $tree, $encode );
196
197       $file is a filename to create.  $tree is a reference to a hash tree.
198

OPTIONS FOR PARSING XML

200       This module accepts option parameters following:
201
202   force_array
203       This option allows you to specify a list of element names which should
204       always be forced into an array representation.
205
206           $tpp->set( force_array => [ 'rdf:li', 'item', '-xmlns' ] );
207
208       The default value is null, it means that context of the elements will
209       determine to make array or to keep it scalar or hash.  Note that the
210       special wildcard name '*' means all elements.
211
212   force_hash
213       This option allows you to specify a list of element names which should
214       always be forced into an hash representation.
215
216           $tpp->set( force_hash => [ 'item', 'image' ] );
217
218       The default value is null, it means that context of the elements will
219       determine to make hash or to keep it scalar as a text node.  See also
220       "text_node_key" option below.  Note that the special wildcard name '*'
221       means all elements.
222
223   cdata_scalar_ref
224       This option allows you to convert a cdata section into a reference for
225       scalar on parsing an XML document.
226
227           $tpp->set( cdata_scalar_ref => 1 );
228
229       The default value is false, it means that each cdata section is
230       converted into a scalar.
231
232   user_agent
233       This option allows you to specify a HTTP_USER_AGENT string which is
234       used by parsehttp() method.
235
236           $tpp->set( user_agent => 'Mozilla/4.0 (compatible; ...)' );
237
238       The default string is 'XML-TreePP/#.##', where '#.##' is substituted
239       with the version number of this library.
240
241   http_lite
242       This option forces pasrsehttp() method to use a HTTP::Lite instance.
243
244           my $http = HTTP::Lite->new();
245           $tpp->set( http_lite => $http );
246
247   lwp_useragent
248       This option forces pasrsehttp() method to use a LWP::UserAgent
249       instance.
250
251           my $ua = LWP::UserAgent->new();
252           $ua->timeout( 60 );
253           $ua->env_proxy;
254           $tpp->set( lwp_useragent => $ua );
255
256       You may use this with LWP::UserAgent::WithCache.
257
258   base_class
259       This blesses class name for each element's hashref.  Each class is
260       named straight as a child class of it parent class.
261
262           $tpp->set( base_class => 'MyElement' );
263           my $xml  = '<root><parent><child key="val">text</child></parent></root>';
264           my $tree = $tpp->parse( $xml );
265           print ref $tree->{root}->{parent}->{child}, "\n";
266
267       A hash for <child> element above is blessed to
268       "MyElement::root::parent::child" class. You may use this with
269       Class::Accessor.
270
271   elem_class
272       This blesses class name for each element's hashref.  Each class is
273       named horizontally under the direct child of "MyElement".
274
275           $tpp->set( base_class => 'MyElement' );
276           my $xml  = '<root><parent><child key="val">text</child></parent></root>';
277           my $tree = $tpp->parse( $xml );
278           print ref $tree->{root}->{parent}->{child}, "\n";
279
280       A hash for <child> element above is blessed to "MyElement::child"
281       class.
282
283   xml_deref
284       This option dereferences the numeric character references, like &#xEB;,
285       &#28450;, etc., in an XML document when this value is true.
286
287           $tpp->set( xml_deref => 1 );
288
289       Note that, for security reasons and your convenient, this module
290       dereferences the predefined character entity references, &amp;, &lt;,
291       &gt;, &apos; and &quot;, and the numeric character references up to
292       U+007F without xml_deref per default.
293

OPTIONS FOR WRITING XML

295   first_out
296       This option allows you to specify a list of element/attribute names
297       which should always appears at first on output XML document.
298
299           $tpp->set( first_out => [ 'link', 'title', '-type' ] );
300
301       The default value is null, it means alphabetical order is used.
302
303   last_out
304       This option allows you to specify a list of element/attribute names
305       which should always appears at last on output XML document.
306
307           $tpp->set( last_out => [ 'items', 'item', 'entry' ] );
308
309   indent
310       This makes the output more human readable by indenting appropriately.
311
312           $tpp->set( indent => 2 );
313
314       This doesn't strictly follow the XML specification but does looks nice.
315
316   xml_decl
317       This module inserts an XML declaration on top of the XML document
318       generated per default. This option forces to change it to another or
319       just remove it.
320
321           $tpp->set( xml_decl => '' );
322
323   output_encoding
324       This option allows you to specify a encoding of the XML document
325       generated by write/writefile methods.
326
327           $tpp->set( output_encoding => 'UTF-8' );
328
329       On Perl 5.8.0 and later, you can select it from every encodings
330       supported by Encode.pm. On Perl 5.6.x and before with Jcode.pm, you can
331       use "Shift_JIS", "EUC-JP", "ISO-2022-JP" and "UTF-8". The default value
332       is "UTF-8" which is recommended encoding.
333

OPTIONS FOR BOTH

335   utf8_flag
336       This makes utf8 flag on for every element's value parsed and makes it
337       on for the XML document generated as well.
338
339           $tpp->set( utf8_flag => 1 );
340
341       Perl 5.8.1 or later is required to use this.
342
343   attr_prefix
344       This option allows you to specify a prefix character(s) which is
345       inserted before each attribute names.
346
347           $tpp->set( attr_prefix => '@' );
348
349       The default character is '-'.  Or set '@' to access attribute values
350       like E4X, ECMAScript for XML.  Zero-length prefix '' is available as
351       well, it means no prefix is added.
352
353   text_node_key
354       This option allows you to specify a hash key for text nodes.
355
356           $tpp->set( text_node_key => '#text' );
357
358       The default key is "#text".
359
360   ignore_error
361       This module calls Carp::croak function on an error per default.  This
362       option makes all errors ignored and just returns.
363
364           $tpp->set( ignore_error => 1 );
365
366   use_ixhash
367       This option keeps the order for each element appeared in XML.
368       Tie::IxHash module is required.
369
370           $tpp->set( use_ixhash => 1 );
371
372       This makes parsing performance slow.  (about 100% slower than default)
373

AUTHOR

375       Yusuke Kawasaki, http://www.kawa.net/
376
378       Copyright (c) 2006-2009 Yusuke Kawasaki. All rights reserved.  This
379       program is free software; you can redistribute it and/or modify it
380       under the same terms as Perl itself.
381
382
383
384perl v5.12.1                      2009-06-30                    XML::TreePP(3)
Impressum