/staticmobilewebsites/py/markup_ex12b.py
http://mwta.googlecode.com/ · Python · 64 lines · 24 code · 6 blank · 34 comment · 3 complexity · c2b359eb7b96e58871510517bb870c54 MD5 · raw file
- # Copyright 2009 mwta committers
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License
-
- """ Markup Example 12
-
- Adding a user-agent string to emulate a Nokia 6230
- """
-
- import urllib
- import re
- import amara
-
-
- def getHrefFromXML(doc, search_regex):
- """Returns the href link if the in search_regex is
- found in any <div> tags.
-
- Assumes the links are in the html body's div tags.
-
- Args:
- doc: an amara xml object
- search_regex: the regular expression to match in
- the href text
-
- Returns:
- the href as a string if the pattern is found, else None.
- """
- ru1 = re.compile(search_regex)
- for item in doc.html.body.div:
- try:
- # print str(item.a.xml_children[0])
- # print type(item.a.xml_children[0])
- p = ru1.search(item.a.xml_children[0])
- if p:
- return item.a.href
- except:
- pass
-
- return None
-
- if __name__ == "__main__":
- request = urllib.FancyURLopener()
- request.addheader('Accept', 'application/xhtml+xml')
- request.addheader('User-Agent', 'Nokia6230/2.0+(04.43)'
- '+Profile/MIDP-2.0+Configuration'
- '/CLDC-1.1+UP.Link/6.3.0.0.0')
- response = request.open("http://www.google.co.uk/m")
- content = response.read()
-
- # Use the live content
- doc = amara.parse(content)
- print "should return: '/gmm?source=m&dc=mobile-promotion'"
- print getHrefFromXML(doc, "Maps")