PYTHON The XMLReader class

is this article helpful?
|
Python replacement for PHP's The XMLReader class [ edit | history ]
Module libxml2

http://dev.laptop.org/~cscott/joyride-api/libxml2-module.html

Below is just a sample for those guys who like PHP style. The code is untested code, you must take your own risk.
import libxml2

class XMLReader(object):
    def __init__(self):
        self.attributeCount = self.depth = 0
        self.baseURI = self.localName = self.name = self.namespaceURI = self.nodeType = self.prefix = self.value = self.xmlLang = ''
        self.hasAttributes = self.hasValue = self.isDefault = self.isEmptyElement = False
        self.xmlDoc = None
        
    def __createFunctions(self):
        dic_func_map = {'close' : 'Close',
                        'expand': 'Expand',
                        'getAttribute' : 'GetAttribute',
                        'getAttributeNo' : 'GetAttributeNo',
                        'getAttributeNs' : 'GetAttributeNs',
                        'getParserProperty' : 'GetParserProp',
                        'isValid' : 'IsValid',
                        'lookupNamespace' : 'LookupNamespace',
                        'moveToAttribute' : 'MoveToAttribute',
                        'moveToAttributeNo' : 'MoveToAttributeNo',
                        'moveToAttributeNs' : 'MoveToAttributeNs',
                        'moveToElement' : 'MoveToElement',
                        'moveToFirstAttribute' : 'MoveToFirstAttribute',
                        'moveToNextAttribute' : 'MoveToNextAttribute',
                        'next' : 'Next',
                        'readInnerXML' : 'ReadInnerXml',
                        'readOuterXML' : 'ReadOuterXml',
                        'readString' : 'ReadString',
                        'setParserProperty' : 'SetParserProp',
                        'setRelaxNGSchema' : 'RelaxNGSetSchema',
                        'setRelaxNGSchemaSource' : 'RelaxNGSetSchema',
                        'setSchema' : 'SetSchema',
                        }
        for k, v in dic_func_map.items():
            setattr(self, k, getattr(self.xmlDoc, v))
        
    def open(self, str_URI, str_encoding='', int_options=0):
        try:
            self.xmlDoc = libxml2.readerForFile(str_URI, str_encoding, int_options)
            self.__createFunctions()
            return True
        except libxml2.treeError, e:
            print e
            return False
    
    def read(self):
        if self.xmlDoc:
            dic_attr_map = {'attributeCount' : 'AttributeCount',
                            'baseURI' : 'BaseUri',
                            'depth' : 'Depth',
                            'localName' : 'LocalName',
                            'name' : 'Name',
                            'nodeType' : 'NodeType',
                            'prefix' : 'Prefix',
                            'value' : 'Value',
                            'namespaceURI' : 'NamespaceUri',
                            'xmlLang' : 'XmlLang',
                            'hasAttributes' : 'HasAttributes',
                            'hasValue' : 'HasValue',
                            'isDefault' : 'IsDefault',
                            'isEmptyElement' : 'IsEmptyElement',
                            }
            for k, v in dic_attr_map.items():
                setattr(self, k, getattr(self.xmlDoc, v)())
            return self.xmlDoc.Read()
        else:
            print 'Please use function open() to open a XML file'
    
    def xml(self, str_source, str_encoding='', int_options=0):
        try:
            self.xmlDoc = libxml2.readerForDoc(str_source, None, str_encoding, int_options)
            return True
        except libxml2.treeError, e:
            print e
            return False
    
if __name__ == '__main__':
    f = '[url]http://php.net/manual/en/book.xmlreader.php[/url]'
    f1 = '[url]http://soap.amazon.com/schemas2/AmazonWebServices.wsdl[/url]'
    f2 = '[url]http://www.google.com/sitemap.xml[/url]'
    f3 = '[url]http://news.google.com/?output=rss[/url]'

    x = XMLReader()           #php > $o = new XMLReader();
    if x.open(f3):          #php > if($o->open($f3)){
        while x.read():         #php > while($o->read()){
            print x.name            #php > echo $o->name;
            print x.getAttribute('isPermaLink') #php > echo $o->getAttribute('isPermaLink');
            print '========='       #php > echo '=========' }}
    else:                   #php > else{
        print 'Can not open file %s' %f3 # echo 'balah balah'}

PHP The XMLReader class

PHP original manual for The XMLReader class [ show | php.net ]

The XMLReader class

Introduction

The XMLReader extension is an XML Pull parser. The reader acts as a cursor going forward on the document stream and stopping at each node on the way.

Class synopsis

XMLReader
XMLReader {
/* Constants */
const int XMLReader::NONE = 0 ;
const int XMLReader::ELEMENT = 1 ;
const int XMLReader::ATTRIBUTE = 2 ;
const int XMLReader::TEXT = 3 ;
const int XMLReader::CDATA = 4 ;
const int XMLReader::ENTITY_REF = 5 ;
const int XMLReader::ENTITY = 6 ;
const int XMLReader::PI = 7 ;
const int XMLReader::COMMENT = 8 ;
const int XMLReader::DOC = 9 ;
const int XMLReader::DOC_TYPE = 10 ;
const int XMLReader::DOC_FRAGMENT = 11 ;
const int XMLReader::NOTATION = 12 ;
const int XMLReader::WHITESPACE = 13 ;
const int XMLReader::END_ELEMENT = 15 ;
const int XMLReader::END_ENTITY = 16 ;
const int XMLReader::XML_DECLARATION = 17 ;
const int XMLReader::LOADDTD = 1 ;
const int XMLReader::DEFAULTATTRS = 2 ;
const int XMLReader::VALIDATE = 3 ;
const int XMLReader::SUBST_ENTITIES = 4 ;
/* Properties */
public readonly int $attributeCount ;
public readonly string $baseURI ;
public readonly int $depth ;
public readonly bool $hasAttributes ;
public readonly bool $hasValue ;
public readonly bool $isDefault ;
public readonly bool $isEmptyElement ;
public readonly string $localName ;
public readonly string $name ;
public readonly string $namespaceURI ;
public readonly int $nodeType ;
public readonly string $prefix ;
public readonly string $value ;
public readonly string $xmlLang ;
/* Methods */
bool close ( void )
DOMNode expand ( void )
string getAttribute ( string $name )
string getAttributeNo ( int $index )
string getAttributeNs ( string $localName , string $namespaceURI )
bool getParserProperty ( int $property )
bool isValid ( void )
bool lookupNamespace ( string $prefix )
bool moveToAttribute ( string $name )
bool moveToAttributeNo ( int $index )
bool moveToAttributeNs ( string $localName , string $namespaceURI )
bool moveToElement ( void )
bool moveToFirstAttribute ( void )
bool moveToNextAttribute ( void )
bool next ([ string $localname ] )
bool open ( string $URI [, string $encoding [, int $options= 0 ]] )
bool read ( void )
string readInnerXML ( void )
string readOuterXML ( void )
string readString ( void )
bool setParserProperty ( int $property , bool $value )
bool setRelaxNGSchema ( string $filename )
bool setRelaxNGSchemaSource ( string $source )
bool setSchema ( string $filename )
bool xml ( string $source [, string $encoding [, int $options= 0 ]] )
}

Properties

attributeCount

The number of attributes on the node

baseURI

The base URI of the node

depth

Depth of the node in the tree, starting at 0

hasAttributes

Indicates if node has attributes

hasValue

Indicates if node has a text value

isDefault

Indicates if attribute is defaulted from DTD

isEmptyElement

Indicates if node is an empty element tag

localName

The local name of the node

name

The qualified name of the node

namespaceURI

The URI of the namespace associated with the node

nodeType

The node type for the node

prefix

The prefix of the namespace associated with the node

value

The text value of the node

xmlLang

The xml:lang scope which the node resides

Predefined Constants

XMLReader Node Types

XMLReader::NONE

No node type

XMLReader::ELEMENT

Start element

XMLReader::ATTRIBUTE

Attribute node

XMLReader::TEXT

Text node

XMLReader::CDATA

CDATA node

XMLReader::ENTITY_REF

Entity Reference node

XMLReader::ENTITY

Entity Declaration node

XMLReader::PI

Processing Instruction node

XMLReader::COMMENT

Comment node

XMLReader::DOC

Document node

XMLReader::DOC_TYPE

Document Type node

XMLReader::DOC_FRAGMENT

Document Fragment node

XMLReader::NOTATION

Notation node

XMLReader::WHITESPACE

Whitespace node

XMLReader::SIGNIFICANT_WHITESPACE

Significant Whitespace node

XMLReader::END_ELEMENT

End Element

XMLReader::END_ENTITY

End Entity

XMLReader::XML_DECLARATION

XML Declaration node

XMLReader Parser Options

XMLReader::LOADDTD

Load DTD but do not validate

XMLReader::DEFAULTATTRS

Load DTD and default attributes but do not validate

XMLReader::VALIDATE

Load DTD and validate while parsing

XMLReader::SUBST_ENTITIES

Substitute entities and expand references

Table of Contents