PageRenderTime 16ms CodeModel.GetById 13ms app.highlight 2ms RepoModel.GetById 0ms app.codeStats 0ms

/staticmobilewebsites/py/markup_ex12b.py

http://mwta.googlecode.com/
Python | 64 lines | 24 code | 6 blank | 34 comment | 3 complexity | c2b359eb7b96e58871510517bb870c54 MD5 | raw file
 1# Copyright 2009 mwta committers
 2#
 3# Licensed under the Apache License, Version 2.0 (the "License");
 4# you may not use this file except in compliance with the License.
 5# You may obtain a copy of the License at
 6#
 7#     http://www.apache.org/licenses/LICENSE-2.0
 8#
 9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License
14
15""" Markup Example 12
16
17Adding a user-agent string to emulate a Nokia 6230
18"""
19
20import urllib
21import re
22import amara
23
24
25def getHrefFromXML(doc, search_regex):
26  """Returns the href link if the in search_regex is
27  found in any <div> tags.
28   
29  Assumes the links are in the html body's div tags.
30   
31  Args:
32    doc: an amara xml object
33    search_regex: the regular expression to match in 
34                  the href text
35   
36  Returns:
37    the href as a string if the pattern is found, else None.
38  """
39  ru1 = re.compile(search_regex)
40  for item in doc.html.body.div:
41    try:
42      # print str(item.a.xml_children[0])
43      # print type(item.a.xml_children[0])
44      p = ru1.search(item.a.xml_children[0])
45      if p:
46        return item.a.href
47    except:
48      pass
49
50  return None
51
52if __name__ == "__main__":  
53  request = urllib.FancyURLopener()
54  request.addheader('Accept', 'application/xhtml+xml') 
55  request.addheader('User-Agent', 'Nokia6230/2.0+(04.43)'
56    '+Profile/MIDP-2.0+Configuration'
57    '/CLDC-1.1+UP.Link/6.3.0.0.0') 
58  response = request.open("http://www.google.co.uk/m")
59  content = response.read() 
60
61  # Use the live content
62  doc = amara.parse(content)
63  print "should return: '/gmm?source=m&dc=mobile-promotion'"
64  print getHrefFromXML(doc, "Maps")