1XML::LibXML::Document(3U)ser Contributed Perl DocumentatiXoMnL::LibXML::Document(3)
2
3
4

NAME

6       XML::LibXML::Document - XML::LibXML DOM Document Class
7

SYNOPSIS

9         use XML::LibXML;
10         # Only methods specific to Document nodes are listed here,
11         # see the XML::LibXML::Node manpage for other methods
12
13         $dom = XML::LibXML::Document->new( $version, $encoding );
14         $dom = XML::LibXML::Document->createDocument( $version, $encoding );
15         $strURI = $doc->URI();
16         $doc->setURI($strURI);
17         $strEncoding = $doc->encoding();
18         $strEncoding = $doc->actualEncoding();
19         $doc->setEncoding($new_encoding);
20         $strVersion = $doc->version();
21         $doc->standalone
22         $doc->setStandalone($numvalue);
23         my $compression = $doc->compression;
24         $doc->setCompression($ziplevel);
25         $docstring = $dom->toString($format);
26         $c14nstr = $doc->toStringC14N($comment_flag, $xpath [, $xpath_context ]);
27         $ec14nstr = $doc->toStringEC14N($comment_flag, $xpath [, $xpath_context ], $inclusive_prefix_list);
28         $str = $doc->serialize($format);
29         $state = $doc->toFile($filename, $format);
30         $state = $doc->toFH($fh, $format);
31         $str = $document->toStringHTML();
32         $str = $document->serialize_html();
33         $bool = $dom->is_valid();
34         $dom->validate();
35         $root = $dom->documentElement();
36         $dom->setDocumentElement( $root );
37         $element = $dom->createElement( $nodename );
38         $element = $dom->createElementNS( $namespaceURI, $nodename );
39         $text = $dom->createTextNode( $content_text );
40         $comment = $dom->createComment( $comment_text );
41         $attrnode = $doc->createAttribute($name [,$value]);
42         $attrnode = $doc->createAttributeNS( namespaceURI, $name [,$value] );
43         $fragment = $doc->createDocumentFragment();
44         $cdata = $dom->createCDATASection( $cdata_content );
45         my $pi = $doc->createProcessingInstruction( $target, $data );
46         my $entref = $doc->createEntityReference($refname);
47         $dtd = $document->createInternalSubset( $rootnode, $public, $system);
48         $dtd = $document->createExternalSubset( $rootnode_name, $publicId, $systemId);
49         $document->importNode( $node );
50         $document->adoptNode( $node );
51         my $dtd = $doc->externalSubset;
52         my $dtd = $doc->internalSubset;
53         $doc->setExternalSubset($dtd);
54         $doc->setInternalSubset($dtd);
55         my $dtd = $doc->removeExternalSubset();
56         my $dtd = $doc->removeInternalSubset();
57         my @nodelist = $doc->getElementsByTagName($tagname);
58         my @nodelist = $doc->getElementsByTagNameNS($nsURI,$tagname);
59         my @nodelist = $doc->getElementsByLocalName($localname);
60         my $node = $doc->getElementById($id);
61         $dom->indexElements();
62

DESCRIPTION

64       The Document Class is in most cases the result of a parsing process.
65       But sometimes it is necessary to create a Document from scratch. The
66       DOM Document Class provides functions that conform to the DOM Core
67       naming style.
68
69       It inherits all functions from XML::LibXML::Node as specified in the
70       DOM specification. This enables access to the nodes besides the root
71       element on document level - a "DTD" for example. The support for these
72       nodes is limited at the moment.
73
74       While generally nodes are bound to a document in the DOM concept it is
75       suggested that one should always create a node not bound to any
76       document. There is no need of really including the node to the
77       document, but once the node is bound to a document, it is quite safe
78       that all strings have the correct encoding. If an unbound text node
79       with an ISO encoded string is created (e.g.  with $CLASS->new()), the
80       "toString" function may not return the expected result.
81
82       To prevent such problems, it is recommended to pass all data to
83       XML::LibXML methods as character strings (i.e. UTF-8 encoded, with the
84       UTF8 flag on).
85

METHODS

87       Many functions listed here are extensively documented in the DOM Level
88       3 specification (<http://www.w3.org/TR/DOM-Level-3-Core/>). Please
89       refer to the specification for extensive documentation.
90
91       new
92             $dom = XML::LibXML::Document->new( $version, $encoding );
93
94           alias for createDocument()
95
96       createDocument
97             $dom = XML::LibXML::Document->createDocument( $version, $encoding );
98
99           The constructor for the document class. As Parameter it takes the
100           version string and (optionally) the encoding string. Simply calling
101           createDocument() will create the document:
102
103             <?xml version="your version" encoding="your encoding"?>
104
105           Both parameter are optional. The default value for $version is 1.0,
106           of course. If the $encoding parameter is not set, the encoding will
107           be left unset, which means UTF-8 is implied.
108
109           The call of createDocument() without any parameter will result the
110           following code:
111
112             <?xml version="1.0"?>
113
114           Alternatively one can call this constructor directly from the
115           XML::LibXML class level, to avoid some typing. This will not have
116           any effect on the class instance, which is always
117           XML::LibXML::Document.
118
119             my $document = XML::LibXML->createDocument( "1.0", "UTF-8" );
120
121           is therefore a shortcut for
122
123             my $document = XML::LibXML::Document->createDocument( "1.0", "UTF-8" );
124
125       URI
126             $strURI = $doc->URI();
127
128           Returns the URI (or filename) of the original document. For
129           documents obtained by parsing a string of a FH without using the
130           URI parsing argument of the corresponding "parse_*" function, the
131           result is a generated string unknown-XYZ where XYZ is some number;
132           for documents created with the constructor "new", the URI is
133           undefined.
134
135           The value can be modified by calling "setURI" method on the
136           document node.
137
138       setURI
139             $doc->setURI($strURI);
140
141           Sets the URI of the document reported by the method URI (see also
142           the URI argument to the various "parse_*" functions).
143
144       encoding
145             $strEncoding = $doc->encoding();
146
147           returns the encoding string of the document.
148
149             my $doc = XML::LibXML->createDocument( "1.0", "ISO-8859-15" );
150             print $doc->encoding; # prints ISO-8859-15
151
152       actualEncoding
153             $strEncoding = $doc->actualEncoding();
154
155           returns the encoding in which the XML will be returned by
156           $doc->toString().  This is usually the original encoding of the
157           document as declared in the XML declaration and returned by
158           $doc->encoding. If the original encoding is not known (e.g. if
159           created in memory or parsed from a XML without a declared
160           encoding), 'UTF-8' is returned.
161
162             my $doc = XML::LibXML->createDocument( "1.0", "ISO-8859-15" );
163             print $doc->encoding; # prints ISO-8859-15
164
165       setEncoding
166             $doc->setEncoding($new_encoding);
167
168           This method allows one to change the declaration of encoding in the
169           XML declaration of the document. The value also affects the
170           encoding in which the document is serialized to XML by
171           $doc->toString(). Use setEncoding() to remove the encoding
172           declaration.
173
174       version
175             $strVersion = $doc->version();
176
177           returns the version string of the document
178
179           getVersion() is an alternative form of this function.
180
181       standalone
182             $doc->standalone
183
184           This function returns the Numerical value of a documents XML
185           declarations standalone attribute. It returns 1 if standalone="yes"
186           was found, 0 if standalone="no" was found and -1 if standalone was
187           not specified (default on creation).
188
189       setStandalone
190             $doc->setStandalone($numvalue);
191
192           Through this method it is possible to alter the value of a
193           documents standalone attribute. Set it to 1 to set
194           standalone="yes", to 0 to set standalone="no" or set it to -1 to
195           remove the standalone attribute from the XML declaration.
196
197       compression
198             my $compression = $doc->compression;
199
200           libxml2 allows reading of documents directly from gzipped files. In
201           this case the compression variable is set to the compression level
202           of that file (0-8). If XML::LibXML parsed a different source or the
203           file wasn't compressed, the returned value will be -1.
204
205       setCompression
206             $doc->setCompression($ziplevel);
207
208           If one intends to write the document directly to a file, it is
209           possible to set the compression level for a given document. This
210           level can be in the range from 0 to 8. If XML::LibXML should not
211           try to compress use -1 (default).
212
213           Note that this feature will only work if libxml2 is compiled with
214           zlib support and toFile() is used for output.
215
216       toString
217             $docstring = $dom->toString($format);
218
219           toString is a DOM serializing function, so the DOM Tree is
220           serialized into an XML string, ready for output.
221
222           IMPORTANT: unlike toString for other nodes, on document nodes this
223           function returns the XML as a byte string in the original encoding
224           of the document (see the actualEncoding() method)! This means you
225           can simply do:
226
227             open my $out_fh, '>', $file;
228             print {$out_fh} $doc->toString;
229
230           regardless of the actual encoding of the document. See the section
231           on encodings in XML::LibXML for more details.
232
233           The optional $format parameter sets the indenting of the output.
234           This parameter is expected to be an "integer" value, that specifies
235           that indentation should be used. The format parameter can have
236           three different values if it is used:
237
238           If $format is 0, than the document is dumped as it was originally
239           parsed
240
241           If $format is 1, libxml2 will add ignorable white spaces, so the
242           nodes content is easier to read. Existing text nodes will not be
243           altered
244
245           If $format is 2 (or higher), libxml2 will act as $format == 1 but
246           it add a leading and a trailing line break to each text node.
247
248           libxml2 uses a hard-coded indentation of 2 space characters per
249           indentation level. This value can not be altered on run-time.
250
251       toStringC14N
252             $c14nstr = $doc->toStringC14N($comment_flag, $xpath [, $xpath_context ]);
253
254           See the documentation in XML::LibXML::Node.
255
256       toStringEC14N
257             $ec14nstr = $doc->toStringEC14N($comment_flag, $xpath [, $xpath_context ], $inclusive_prefix_list);
258
259           See the documentation in XML::LibXML::Node.
260
261       serialize
262             $str = $doc->serialize($format);
263
264           An alias for toString(). This function was name added to be more
265           consistent with libxml2.
266
267       serialize_c14n
268           An alias for toStringC14N().
269
270       serialize_exc_c14n
271           An alias for toStringEC14N().
272
273       toFile
274             $state = $doc->toFile($filename, $format);
275
276           This function is similar to toString(), but it writes the document
277           directly into a filesystem. This function is very useful, if one
278           needs to store large documents.
279
280           The format parameter has the same behaviour as in toString().
281
282       toFH
283             $state = $doc->toFH($fh, $format);
284
285           This function is similar to toString(), but it writes the document
286           directly to a filehandle or a stream. A byte stream in the document
287           encoding is passed to the file handle. Do NOT apply any
288           ":encoding(...)" or ":utf8" PerlIO layer to the filehandle! See the
289           section on encodings in XML::LibXML for more details.
290
291           The format parameter has the same behaviour as in toString().
292
293       toStringHTML
294             $str = $document->toStringHTML();
295
296           toStringHTML serialize the tree to a byte string in the document
297           encoding as HTML. With this method indenting is automatic and
298           managed by libxml2 internally.
299
300       serialize_html
301             $str = $document->serialize_html();
302
303           An alias for toStringHTML().
304
305       is_valid
306             $bool = $dom->is_valid();
307
308           Returns either TRUE or FALSE depending on whether the DOM Tree is a
309           valid Document or not.
310
311           You may also pass in a XML::LibXML::Dtd object, to validate against
312           an external DTD:
313
314             if (!$dom->is_valid($dtd)) {
315                  warn("document is not valid!");
316              }
317
318       validate
319             $dom->validate();
320
321           This is an exception throwing equivalent of is_valid. If the
322           document is not valid it will throw an exception containing the
323           error. This allows you much better error reporting than simply
324           is_valid or not.
325
326           Again, you may pass in a DTD object
327
328       documentElement
329             $root = $dom->documentElement();
330
331           Returns the root element of the Document. A document can have just
332           one root element to contain the documents data.
333
334           Optionally one can use getDocumentElement.
335
336       setDocumentElement
337             $dom->setDocumentElement( $root );
338
339           This function enables you to set the root element for a document.
340           The function supports the import of a node from a different
341           document tree, but does not support a document fragment as $root.
342
343       createElement
344             $element = $dom->createElement( $nodename );
345
346           This function creates a new Element Node bound to the DOM with the
347           name $nodename.
348
349       createElementNS
350             $element = $dom->createElementNS( $namespaceURI, $nodename );
351
352           This function creates a new Element Node bound to the DOM with the
353           name $nodename and placed in the given namespace.
354
355       createTextNode
356             $text = $dom->createTextNode( $content_text );
357
358           As an equivalent of createElement, but it creates a Text Node bound
359           to the DOM.
360
361       createComment
362             $comment = $dom->createComment( $comment_text );
363
364           As an equivalent of createElement, but it creates a Comment Node
365           bound to the DOM.
366
367       createAttribute
368             $attrnode = $doc->createAttribute($name [,$value]);
369
370           Creates a new Attribute node.
371
372       createAttributeNS
373             $attrnode = $doc->createAttributeNS( namespaceURI, $name [,$value] );
374
375           Creates an Attribute bound to a namespace.
376
377       createDocumentFragment
378             $fragment = $doc->createDocumentFragment();
379
380           This function creates a DocumentFragment.
381
382       createCDATASection
383             $cdata = $dom->createCDATASection( $cdata_content );
384
385           Similar to createTextNode and createComment, this function creates
386           a CDataSection bound to the current DOM.
387
388       createProcessingInstruction
389             my $pi = $doc->createProcessingInstruction( $target, $data );
390
391           create a processing instruction node.
392
393           Since this method is quite long one may use its short form
394           createPI().
395
396       createEntityReference
397             my $entref = $doc->createEntityReference($refname);
398
399           If a document has a DTD specified, one can create entity references
400           by using this function. If one wants to add a entity reference to
401           the document, this reference has to be created by this function.
402
403           An entity reference is unique to a document and cannot be passed to
404           other documents as other nodes can be passed.
405
406           NOTE: A text content containing something that looks like an entity
407           reference, will not be expanded to a real entity reference unless
408           it is a predefined entity
409
410             my $string = "&foo;";
411              $some_element->appendText( $string );
412              print $some_element->textContent; # prints "&amp;foo;"
413
414       createInternalSubset
415             $dtd = $document->createInternalSubset( $rootnode, $public, $system);
416
417           This function creates and adds an internal subset to the given
418           document.  Because the function automatically adds the DTD to the
419           document there is no need to add the created node explicitly to the
420           document.
421
422             my $document = XML::LibXML::Document->new();
423              my $dtd      = $document->createInternalSubset( "foo", undef, "foo.dtd" );
424
425           will result in the following XML document:
426
427             <?xml version="1.0"?>
428              <!DOCTYPE foo SYSTEM "foo.dtd">
429
430           By setting the public parameter it is possible to set PUBLIC DTDs
431           to a given document. So
432
433             my $document = XML::LibXML::Document->new();
434             my $dtd      = $document->createInternalSubset( "foo", "-//FOO//DTD FOO 0.1//EN", undef );
435
436           will cause the following declaration to be created on the document:
437
438             <?xml version="1.0"?>
439             <!DOCTYPE foo PUBLIC "-//FOO//DTD FOO 0.1//EN">
440
441       createExternalSubset
442             $dtd = $document->createExternalSubset( $rootnode_name, $publicId, $systemId);
443
444           This function is similar to "createInternalSubset()" but this DTD
445           is considered to be external and is therefore not added to the
446           document itself. Nevertheless it can be used for validation
447           purposes.
448
449       importNode
450             $document->importNode( $node );
451
452           If a node is not part of a document, it can be imported to another
453           document. As specified in DOM Level 2 Specification the Node will
454           not be altered or removed from its original document
455           ("$node->cloneNode(1)" will get called implicitly).
456
457           NOTE: Don't try to use importNode() to import sub-trees that
458           contain an entity reference - even if the entity reference is the
459           root node of the sub-tree. This will cause serious problems to your
460           program. This is a limitation of libxml2 and not of XML::LibXML
461           itself.
462
463       adoptNode
464             $document->adoptNode( $node );
465
466           If a node is not part of a document, it can be imported to another
467           document. As specified in DOM Level 3 Specification the Node will
468           not be altered but it will removed from its original document.
469
470           After a document adopted a node, the node, its attributes and all
471           its descendants belong to the new document. Because the node does
472           not belong to the old document, it will be unlinked from its old
473           location first.
474
475           NOTE: Don't try to adoptNode() to import sub-trees that contain
476           entity references - even if the entity reference is the root node
477           of the sub-tree. This will cause serious problems to your program.
478           This is a limitation of libxml2 and not of XML::LibXML itself.
479
480       externalSubset
481             my $dtd = $doc->externalSubset;
482
483           If a document has an external subset defined it will be returned by
484           this function.
485
486           NOTE Dtd nodes are no ordinary nodes in libxml2. The support for
487           these nodes in XML::LibXML is still limited. In particular one may
488           not want use common node function on doctype declaration nodes!
489
490       internalSubset
491             my $dtd = $doc->internalSubset;
492
493           If a document has an internal subset defined it will be returned by
494           this function.
495
496           NOTE Dtd nodes are no ordinary nodes in libxml2. The support for
497           these nodes in XML::LibXML is still limited. In particular one may
498           not want use common node function on doctype declaration nodes!
499
500       setExternalSubset
501             $doc->setExternalSubset($dtd);
502
503           EXPERIMENTAL!
504
505           This method sets a DTD node as an external subset of the given
506           document.
507
508       setInternalSubset
509             $doc->setInternalSubset($dtd);
510
511           EXPERIMENTAL!
512
513           This method sets a DTD node as an internal subset of the given
514           document.
515
516       removeExternalSubset
517             my $dtd = $doc->removeExternalSubset();
518
519           EXPERIMENTAL!
520
521           If a document has an external subset defined it can be removed from
522           the document by using this function. The removed dtd node will be
523           returned.
524
525       removeInternalSubset
526             my $dtd = $doc->removeInternalSubset();
527
528           EXPERIMENTAL!
529
530           If a document has an internal subset defined it can be removed from
531           the document by using this function. The removed dtd node will be
532           returned.
533
534       getElementsByTagName
535             my @nodelist = $doc->getElementsByTagName($tagname);
536
537           Implements the DOM Level 2 function
538
539           In SCALAR context this function returns an XML::LibXML::NodeList
540           object.
541
542       getElementsByTagNameNS
543             my @nodelist = $doc->getElementsByTagNameNS($nsURI,$tagname);
544
545           Implements the DOM Level 2 function
546
547           In SCALAR context this function returns an XML::LibXML::NodeList
548           object.
549
550       getElementsByLocalName
551             my @nodelist = $doc->getElementsByLocalName($localname);
552
553           This allows the fetching of all nodes from a given document with
554           the given Localname.
555
556           In SCALAR context this function returns an XML::LibXML::NodeList
557           object.
558
559       getElementById
560             my $node = $doc->getElementById($id);
561
562           Returns the element that has an ID attribute with the given value.
563           If no such element exists, this returns undef.
564
565           Note: the ID of an element may change while manipulating the
566           document. For documents with a DTD, the information about ID
567           attributes is only available if DTD loading/validation has been
568           requested. For HTML documents parsed with the HTML parser ID
569           detection is done automatically. In XML documents, all "xml:id"
570           attributes are considered to be of type ID. You can test ID-ness of
571           an attribute node with $attr->isId().
572
573           In versions 1.59 and earlier this method was called
574           getElementsById() (plural) by mistake. Starting from 1.60 this name
575           is maintained as an alias only for backward compatibility.
576
577       indexElements
578             $dom->indexElements();
579
580           This function causes libxml2 to stamp all elements in a document
581           with their document position index which considerably speeds up
582           XPath queries for large documents. It should only be used with
583           static documents that won't be further changed by any DOM methods,
584           because once a document is indexed, XPath will always prefer the
585           index to other methods of determining the document order of nodes.
586           XPath could therefore return improperly ordered node-lists when
587           applied on a document that has been changed after being indexed. It
588           is of course possible to use this method to re-index a modified
589           document before using it with XPath again. This function is not a
590           part of the DOM specification.
591
592           This function returns number of elements indexed, -1 if error
593           occurred, or -2 if this feature is not available in the running
594           libxml2.
595

AUTHORS

597       Matt Sergeant, Christian Glahn, Petr Pajas
598

VERSION

600       2.0207
601
603       2001-2007, AxKit.com Ltd.
604
605       2002-2006, Christian Glahn.
606
607       2006-2009, Petr Pajas.
608

LICENSE

610       This program is free software; you can redistribute it and/or modify it
611       under the same terms as Perl itself.
612
613
614
615perl v5.34.0                      2021-07-23          XML::LibXML::Document(3)
Impressum