markup_ex12b.py - Copyright 2009 mwta committers Licensed u…

/staticmobilewebsites/py/markup_ex12b.py

http://mwta.googlecode.com/ · Python · 64 lines · 24 code · 6 blank · 34 comment · 3 complexity · c2b359eb7b96e58871510517bb870c54 MD5 · raw file


# Copyright 2009 mwta committers

#

# Licensed under the Apache License, Version 2.0 (the "License");

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

#

#     http://www.apache.org/licenses/LICENSE-2.0

#

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License



""" Markup Example 12



Adding a user-agent string to emulate a Nokia 6230

"""



import urllib

import re

import amara





def getHrefFromXML(doc, search_regex):

  """Returns the href link if the in search_regex is

  found in any <div> tags.

   

  Assumes the links are in the html body's div tags.

   

  Args:

    doc: an amara xml object

    search_regex: the regular expression to match in 

                  the href text

   

  Returns:

    the href as a string if the pattern is found, else None.

  """

  ru1 = re.compile(search_regex)

  for item in doc.html.body.div:

    try:

      # print str(item.a.xml_children[0])

      # print type(item.a.xml_children[0])

      p = ru1.search(item.a.xml_children[0])

      if p:

        return item.a.href

    except:

      pass



  return None



if __name__ == "__main__":  

  request = urllib.FancyURLopener()

  request.addheader('Accept', 'application/xhtml+xml') 

  request.addheader('User-Agent', 'Nokia6230/2.0+(04.43)'

    '+Profile/MIDP-2.0+Configuration'

    '/CLDC-1.1+UP.Link/6.3.0.0.0') 

  response = request.open("http://www.google.co.uk/m")

  content = response.read() 



  # Use the live content

  doc = amara.parse(content)

  print "should return: '/gmm?source=m&dc=mobile-promotion'"

  print getHrefFromXML(doc, "Maps")

Alerts (2)

'type(' Use isinstance() for type checking instead of type()
43
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
47