Monday 3 December 2007

Markup exercise 11b prettified

""" Markup Example 11

Defines the method getlLinkFromXhtml() which is later used to return the href
for a given regular expression, if it exists.

(Includes the code form markup example 07)

author: Julian Harty
edited: 03 December 2007
"""
import re
import sys
import urllib
import BeautifulSoup

from xml.dom import minidom

def getLinkFromXhtml(content, text_regex):
  """getLink returns the href link for a given text_label.

  Args:
    content: the source content e.g. an xHTML response.
    text_regex: the text to match as a regluar experession.
  Returns:
    The href if the test is found, else None.
  """

  doc = minidom.parseString(content)
  links = doc.getElementsByTagName('a')

  rx = re.compile(text_regex)

  for i in links:
    if i.hasAttribute('href'):
      t = i.firstChild
      text = ""
      while t:
        if t.nodeType == t.TEXT_NODE:
          text += t.data
        t = t.nextSibling
      match = rx.search(text)
      if match:
        return str(i.toxml())

  return None


request = urllib.FancyURLopener()
request.addheader('Accept', 'application/xhtml+xml')
request.addheader('User-Agent',
  'Nokia6230/2.0+(04.43)+Profile/MIDP-2.0+'
  'Configuration/CLDC-1.1+UP.Link/6.3.0.0.0')
response = request.open("http://www.google.co.uk/m")
content = response.read()

print getLinkFromXhtml(content, 'Maps with My Location')

soup = BeautifulSoup.BeautifulSoup(content)
#print soup.prettify()

# write the content to a file so it can be displayed in a browser
f = open("markup_ex11b_prettified.xml", "wb")
f.write(content)
f.close()