1XML::TreePP(3)        User Contributed Perl Documentation       XML::TreePP(3)
2
3
4

NAME

6       XML::TreePP -- Pure Perl implementation for parsing/writing XML
7       documents
8

SYNOPSIS

10       parse an XML document from file into hash tree:
11
12           use XML::TreePP;
13           my $tpp = XML::TreePP->new();
14           my $tree = $tpp->parsefile( "index.rdf" );
15           print "Title: ", $tree->{"rdf:RDF"}->{item}->[0]->{title}, "\n";
16           print "URL:   ", $tree->{"rdf:RDF"}->{item}->[0]->{link}, "\n";
17
18       write an XML document as string from hash tree:
19
20           use XML::TreePP;
21           my $tpp = XML::TreePP->new();
22           my $tree = { rss => { channel => { item => [ {
23               title   => "The Perl Directory",
24               link    => "http://www.perl.org/",
25           }, {
26               title   => "The Comprehensive Perl Archive Network",
27               link    => "http://cpan.perl.org/",
28           } ] } } };
29           my $xml = $tpp->write( $tree );
30           print $xml;
31
32       get a remote XML document by HTTP-GET and parse it into hash tree:
33
34           use XML::TreePP;
35           my $tpp = XML::TreePP->new();
36           my $tree = $tpp->parsehttp( GET => "http://use.perl.org/index.rss" );
37           print "Title: ", $tree->{"rdf:RDF"}->{channel}->{title}, "\n";
38           print "URL:   ", $tree->{"rdf:RDF"}->{channel}->{link}, "\n";
39
40       get a remote XML document by HTTP-POST and parse it into hash tree:
41
42           use XML::TreePP;
43           my $tpp = XML::TreePP->new( force_array => [qw( item )] );
44           my $cgiurl = "http://search.hatena.ne.jp/keyword";
45           my $keyword = "ajax";
46           my $cgiquery = "mode=rss2&word=".$keyword;
47           my $tree = $tpp->parsehttp( POST => $cgiurl, $cgiquery );
48           print "Link: ", $tree->{rss}->{channel}->{item}->[0]->{link}, "\n";
49           print "Desc: ", $tree->{rss}->{channel}->{item}->[0]->{description}, "\n";
50

DESCRIPTION

52       XML::TreePP module parses an XML document and expands it for a hash
53       tree.  This generates an XML document from a hash tree as the opposite
54       way around.  This is a pure Perl implementation and requires no modules
55       depended.  This can also fetch and parse an XML document from remote
56       web server like the XMLHttpRequest object does at JavaScript language.
57

EXAMPLES

59   Parse XML file
60       Sample XML document:
61
62           <?xml version="1.0" encoding="UTF-8"?>
63           <family name="Kawasaki">
64               <father>Yasuhisa</father>
65               <mother>Chizuko</mother>
66               <children>
67                   <girl>Shiori</girl>
68                   <boy>Yusuke</boy>
69                   <boy>Kairi</boy>
70               </children>
71           </family>
72
73       Sample program to read a xml file and dump it:
74
75           use XML::TreePP;
76           use Data::Dumper;
77           my $tpp = XML::TreePP->new();
78           my $tree = $tpp->parsefile( "family.xml" );
79           my $text = Dumper( $tree );
80           print $text;
81
82       Result dumped:
83
84           $VAR1 = {
85               'family' => {
86                   '-name' => 'Kawasaki',
87                   'father' => 'Yasuhisa',
88                   'mother' => 'Chizuko',
89                   'children' => {
90                       'girl' => 'Shiori'
91                       'boy' => [
92                           'Yusuke',
93                           'Kairi'
94                       ],
95                   }
96               }
97           };
98
99       Details:
100
101           print $tree->{family}->{father};        # the father's given name.
102
103       The prefix '-' is added on every attribute's name.
104
105           print $tree->{family}->{"-name"};       # the family name of the family
106
107       The array is used because the family has two boys.
108
109           print $tree->{family}->{children}->{boy}->[1];  # The second boy's name
110           print $tree->{family}->{children}->{girl};      # The girl's name
111
112   Text node and attributes:
113       If a element has both of a text node and attributes or both of a text
114       node and other child nodes, value of a text node is moved to "#text"
115       like child nodes.
116
117           use XML::TreePP;
118           use Data::Dumper;
119           my $tpp = XML::TreePP->new();
120           my $source = '<span class="author">Kawasaki Yusuke</span>';
121           my $tree = $tpp->parse( $source );
122           my $text = Dumper( $tree );
123           print $text;
124
125       The result dumped is following:
126
127           $VAR1 = {
128               'span' => {
129                   '-class' => 'author',
130                   '#text'  => 'Kawasaki Yusuke'
131               }
132           };
133
134       The special node name of "#text" is used because this elements has
135       attribute(s) in addition to the text node.  See also "text_node_key"
136       option.
137

METHODS

139   new
140       This constructor method returns a new XML::TreePP object with %options.
141
142           $tpp = XML::TreePP->new( %options );
143
144   set
145       This method sets a option value for "option_name".  If $option_value is
146       not defined, its option is deleted.
147
148           $tpp->set( option_name => $option_value );
149
150       See OPTIONS section below for details.
151
152   get
153       This method returns a current option value for "option_name".
154
155           $tpp->get( 'option_name' );
156
157   parse
158       This method reads an XML document by string and returns a hash tree
159       converted.  The first argument is a scalar or a reference to a scalar.
160
161               $tree = $tpp->parse( $source );
162
163   parsefile
164       This method reads an XML document by file and returns a hash tree
165       converted.  The first argument is a filename.
166
167           $tree = $tpp->parsefile( $file );
168
169   parsehttp
170       This method receives an XML document from a remote server via HTTP and
171       returns a hash tree converted.
172
173           $tree = $tpp->parsehttp( $method, $url, $body, $head );
174
175       $method is a method of HTTP connection: GET/POST/PUT/DELETE $url is an
176       URI of an XML file.  $body is a request body when you use POST method.
177       $head is a request headers as a hash ref.  LWP::UserAgent module or
178       HTTP::Lite module is required to fetch a file.
179
180           ( $tree, $xml, $code ) = $tpp->parsehttp( $method, $url, $body, $head );
181
182       In array context, This method returns also raw XML document received
183       and HTTP response's status code.
184
185   write
186       This method parses a hash tree and returns an XML document as a string.
187
188           $source = $tpp->write( $tree, $encode );
189
190       $tree is a reference to a hash tree.
191
192   writefile
193       This method parses a hash tree and writes an XML document into a file.
194
195           $tpp->writefile( $file, $tree, $encode );
196
197       $file is a filename to create.  $tree is a reference to a hash tree.
198

OPTIONS FOR PARSING XML

200       This module accepts option parameters following:
201
202   force_array
203       This option allows you to specify a list of element names which should
204       always be forced into an array representation.
205
206           $tpp->set( force_array => [ 'rdf:li', 'item', '-xmlns' ] );
207
208       The default value is null, it means that context of the elements will
209       determine to make array or to keep it scalar or hash.  Note that the
210       special wildcard name '*' means all elements.
211
212   force_hash
213       This option allows you to specify a list of element names which should
214       always be forced into an hash representation.
215
216           $tpp->set( force_hash => [ 'item', 'image' ] );
217
218       The default value is null, it means that context of the elements will
219       determine to make hash or to keep it scalar as a text node.  See also
220       "text_node_key" option below.  Note that the special wildcard name '*'
221       means all elements.
222
223   cdata_scalar_ref
224       This option allows you to convert a cdata section into a reference for
225       scalar on parsing an XML document.
226
227           $tpp->set( cdata_scalar_ref => 1 );
228
229       The default value is false, it means that each cdata section is
230       converted into a scalar.
231
232   user_agent
233       This option allows you to specify a HTTP_USER_AGENT string which is
234       used by parsehttp() method.
235
236           $tpp->set( user_agent => 'Mozilla/4.0 (compatible; ...)' );
237
238       The default string is 'XML-TreePP/#.##', where '#.##' is substituted
239       with the version number of this library.
240
241   http_lite
242       This option forces pasrsehttp() method to use a HTTP::Lite instance.
243
244           my $http = HTTP::Lite->new();
245           $tpp->set( http_lite => $http );
246
247   lwp_useragent
248       This option forces parsehttp() method to use a LWP::UserAgent instance.
249
250           my $ua = LWP::UserAgent->new();
251           $ua->timeout( 60 );
252           $ua->env_proxy;
253           $tpp->set( lwp_useragent => $ua );
254
255       You may use this with LWP::UserAgent::WithCache.
256
257   base_class
258       This blesses class name for each element's hashref.  Each class is
259       named straight as a child class of it parent class.
260
261           $tpp->set( base_class => 'MyElement' );
262           my $xml  = '<root><parent><child key="val">text</child></parent></root>';
263           my $tree = $tpp->parse( $xml );
264           print ref $tree->{root}->{parent}->{child}, "\n";
265
266       A hash for <child> element above is blessed to
267       "MyElement::root::parent::child" class. You may use this with
268       Class::Accessor.
269
270   elem_class
271       This blesses class name for each element's hashref.  Each class is
272       named horizontally under the direct child of "MyElement".
273
274           $tpp->set( base_class => 'MyElement' );
275           my $xml  = '<root><parent><child key="val">text</child></parent></root>';
276           my $tree = $tpp->parse( $xml );
277           print ref $tree->{root}->{parent}->{child}, "\n";
278
279       A hash for <child> element above is blessed to "MyElement::child"
280       class.
281
282   xml_deref
283       This option dereferences the numeric character references, like &#xEB;,
284       &#28450;, etc., in an XML document when this value is true.
285
286           $tpp->set( xml_deref => 1 );
287
288       Note that, for security reasons and your convenient, this module
289       dereferences the predefined character entity references, &amp;, &lt;,
290       &gt;, &apos; and &quot;, and the numeric character references up to
291       U+007F without xml_deref per default.
292
293   require_xml_decl
294       This option requires XML declaration at the top of XML document to
295       parse.
296
297           $tpp->set( require_xml_decl => 1 );
298
299       This will die when <?xml .../?> declration not found.
300

OPTIONS FOR WRITING XML

302   first_out
303       This option allows you to specify a list of element/attribute names
304       which should always appears at first on output XML document.
305
306           $tpp->set( first_out => [ 'link', 'title', '-type' ] );
307
308       The default value is null, it means alphabetical order is used.
309
310   last_out
311       This option allows you to specify a list of element/attribute names
312       which should always appears at last on output XML document.
313
314           $tpp->set( last_out => [ 'items', 'item', 'entry' ] );
315
316   indent
317       This makes the output more human readable by indenting appropriately.
318
319           $tpp->set( indent => 2 );
320
321       This doesn't strictly follow the XML specification but does looks nice.
322
323   xml_decl
324       This module inserts an XML declaration on top of the XML document
325       generated per default. This option forces to change it to another or
326       just remove it.
327
328           $tpp->set( xml_decl => '' );
329
330   output_encoding
331       This option allows you to specify a encoding of the XML document
332       generated by write/writefile methods.
333
334           $tpp->set( output_encoding => 'UTF-8' );
335
336       On Perl 5.8.0 and later, you can select it from every encodings
337       supported by Encode.pm. On Perl 5.6.x and before with Jcode.pm, you can
338       use "Shift_JIS", "EUC-JP", "ISO-2022-JP" and "UTF-8". The default value
339       is "UTF-8" which is recommended encoding.
340
341   empty_element_tag_end
342           $tpp->set( empty_element_tag_end => '>' );
343
344       Set characters which close empty tag. The default value is ' />'.
345

OPTIONS FOR BOTH

347   utf8_flag
348       This makes utf8 flag on for every element's value parsed and makes it
349       on for the XML document generated as well.
350
351           $tpp->set( utf8_flag => 1 );
352
353       Perl 5.8.1 or later is required to use this.
354
355   attr_prefix
356       This option allows you to specify a prefix character(s) which is
357       inserted before each attribute names.
358
359           $tpp->set( attr_prefix => '@' );
360
361       The default character is '-'.  Or set '@' to access attribute values
362       like E4X, ECMAScript for XML.  Zero-length prefix '' is available as
363       well, it means no prefix is added.
364
365   text_node_key
366       This option allows you to specify a hash key for text nodes.
367
368           $tpp->set( text_node_key => '#text' );
369
370       The default key is "#text".
371
372   ignore_error
373       This module calls Carp::croak function on an error per default.  This
374       option makes all errors ignored and just returns.
375
376           $tpp->set( ignore_error => 1 );
377
378   use_ixhash
379       This option keeps the order for each element appeared in XML.
380       Tie::IxHash module is required.
381
382           $tpp->set( use_ixhash => 1 );
383
384       This makes parsing performance slow.  (about 100% slower than default)
385

AUTHOR

387       Yusuke Kawasaki, http://www.kawa.net/
388

REPOSITORY

390       https://github.com/kawanet/XML-TreePP
391
393       The following copyright notice applies to all the files provided in
394       this distribution, including binary files, unless explicitly noted
395       otherwise.
396
397       Copyright 2006-2010 Yusuke Kawasaki
398

LICENSE

400       This library is free software; you can redistribute it and/or modify it
401       under the same terms as Perl itself.
402
403
404
405perl v5.36.0                      2022-07-22                    XML::TreePP(3)
Impressum