""" Markup Example 11
Defines the method getlLinkFromXhtml() which is later used to return the href
for a given regular expression, if it exists.
(Includes the code form markup example 07)
author: Julian Harty
edited: 03 December 2007
"""
import re
import sys
import urllib
import BeautifulSoup
from xml.dom import minidom
def getLinkFromXhtml(content, text_regex):
"""getLink returns the href link for a given text_label.
Args:
content: the source content e.g. an xHTML response.
text_regex: the text to match as a regluar experession.
Returns:
The href if the test is found, else None.
"""
doc = minidom.parseString(content)
links = doc.getElementsByTagName('a')
rx = re.compile(text_regex)
for i in links:
if i.hasAttribute('href'):
t = i.firstChild
text = ""
while t:
if t.nodeType == t.TEXT_NODE:
text += t.data
t = t.nextSibling
match = rx.search(text)
if match:
return str(i.toxml())
return None
request = urllib.FancyURLopener()
request.addheader('Accept', 'application/xhtml+xml')
request.addheader('User-Agent',
'Nokia6230/2.0+(04.43)+Profile/MIDP-2.0+'
'Configuration/CLDC-1.1+UP.Link/6.3.0.0.0')
response = request.open("http://www.google.co.uk/m")
content = response.read()
print getLinkFromXhtml(content, 'Maps with My Location')
soup = BeautifulSoup.BeautifulSoup(content)
#print soup.prettify()
# write the content to a file so it can be displayed in a browser
f = open("markup_ex11b_prettified.xml", "wb")
f.write(content)
f.close()
Monday, 3 December 2007
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment