Monday, 3 December 2007

Markup exercise 12 using cached file

""" Markup Example 12 local

Adding a user-agent string to emulate a Nokia 6230
Reads the content from a cached file

author: Julian Harty
edited: 03 December 2007
"""
import re
import amara


def getHrefFromXML(doc, search_regex):
  """Returns the href link if the in search_regex is
  found in any div tags.

  Assumes the links are in the html body's div tags.

  Args:
    doc: an amara xml object
    search_regex: the regular expression to match in
      the href text

  Returns:
    the href as a string if the pattern is found, else None.
  """
  ru1 = re.compile(search_regex)
  for item in doc.html.body.div:
    try:
      # print str(item.a.xml_children[0])
      # print type(item.a.xml_children[0])
      p = ru1.search(item.a.xml_children[0])
      if p:
        return item.a.href
        except:
      pass

  return None

if __name__ == "__main__":
  # use a local copy of the content
  doc = amara.parse(open("mobile-homepage.xhtml"))

  print "should return: '/gmm?source=m&dc=mobile-promotion'"
  print getHrefFromXML(doc, "Maps")