/staticmobilewebsites/py/markup_ex12b.py
Python | 64 lines | 24 code | 6 blank | 34 comment | 3 complexity | c2b359eb7b96e58871510517bb870c54 MD5 | raw file
1# Copyright 2009 mwta committers 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License 14 15""" Markup Example 12 16 17Adding a user-agent string to emulate a Nokia 6230 18""" 19 20import urllib 21import re 22import amara 23 24 25def getHrefFromXML(doc, search_regex): 26 """Returns the href link if the in search_regex is 27 found in any <div> tags. 28 29 Assumes the links are in the html body's div tags. 30 31 Args: 32 doc: an amara xml object 33 search_regex: the regular expression to match in 34 the href text 35 36 Returns: 37 the href as a string if the pattern is found, else None. 38 """ 39 ru1 = re.compile(search_regex) 40 for item in doc.html.body.div: 41 try: 42 # print str(item.a.xml_children[0]) 43 # print type(item.a.xml_children[0]) 44 p = ru1.search(item.a.xml_children[0]) 45 if p: 46 return item.a.href 47 except: 48 pass 49 50 return None 51 52if __name__ == "__main__": 53 request = urllib.FancyURLopener() 54 request.addheader('Accept', 'application/xhtml+xml') 55 request.addheader('User-Agent', 'Nokia6230/2.0+(04.43)' 56 '+Profile/MIDP-2.0+Configuration' 57 '/CLDC-1.1+UP.Link/6.3.0.0.0') 58 response = request.open("http://www.google.co.uk/m") 59 content = response.read() 60 61 # Use the live content 62 doc = amara.parse(content) 63 print "should return: '/gmm?source=m&dc=mobile-promotion'" 64 print getHrefFromXML(doc, "Maps")