Monday, 3 December 2007

""" Markup Example 12

Adding a user-agent string to emulate a Nokia 6230

author: Julian Harty
edited: 03 December 2007
"""
import urllib
import re
import amara


def getHrefFromXML(doc, search_regex):
"""Returns the href link if the in search_regex is
found in any
tags.

Assumes the links are in the html body's div tags.

Args:
doc: an amara xml object
search_regex: the regular expression to match in
the href text

Returns:
the href as a string if the pattern is found, else None.
"""
ru1 = re.compile(search_regex)
for item in doc.html.body.div:
try:
# print str(item.a.xml_children[0])
# print type(item.a.xml_children[0])
p = ru1.search(item.a.xml_children[0])
if p:
return item.a.href
except:
pass

return None

if __name__ == "__main__":
  request = urllib.FancyURLopener()
  request.addheader('Accept',
    'application/xhtml+xml')
  request.addheader('User-Agent',
    'Nokia6230/2.0+(04.43)'
    '+Profile/MIDP-2.0+Configuration'
    '/CLDC-1.1+UP.Link/6.3.0.0.0')
  response = request.open("http://www.google.co.uk/m")
  content = response.read()

  # Use the live content
  doc = amara.parse(content)
  print "should return: '/gmm?source=m&dc=mobile-promotion'"
  print getHrefFromXML(doc, "Maps")

No comments: