< Python Programming
Introduction
Python includes several modules for manipulating xml.
- TODO:Must include parenthesis for "print" in(python 3.8)
xml.sax.handler
import xml.sax.handler as saxhandler
import xml.sax as saxparser
class MyReport:
def __init__(self):
self.Y = 1
class MyCH(saxhandler.ContentHandler):
def __init__(self, report):
self.X = 1
self.report = report
def startDocument(self):
print 'startDocument'
def startElement(self, name, attrs):
print 'Element:', name
report = MyReport() #for future use
ch = MyCH(report)
xml = """\
<collection>
<comic title=\"Sandman\" number='62'>
<writer>Neil Gaiman</writer>
<penciller pages='1-9,18-24'>Glyn Dillon</penciller>
<penciller pages="10-17">Charles Vess</penciller>
</comic>
</collection>
"""
print xml
saxparser.parseString(xml, ch)
xml.dom.minidom
An example of doing RSS feed parsing with DOM
from xml.dom import minidom as dom
import urllib2
def fetchPage(url):
a = urllib2.urlopen(url)
return ''.join(a.readlines())
def extract(page):
a = dom.parseString(page)
item = a.getElementsByTagName('item')
for i in item:
if i.hasChildNodes():
t = i.getElementsByTagName('title')[0].firstChild.wholeText
l = i.getElementsByTagName('link')[0].firstChild.wholeText
d = i.getElementsByTagName('description')[0].firstChild.wholeText
print t, l, d
if __name__=='__main__':
page = fetchPage("http://rss.slashdot.org/Slashdot/slashdot")
extract(page)
XML document provided by pyxml documentation.
This article is issued from Wikibooks. The text is licensed under Creative Commons - Attribution - Sharealike. Additional terms may apply for the media files.