curso_python_basico /modulos/html-re.py

Language Python Lines 54
MD5 Hash 130deb463dd5d772c51f08b12dc422d7 Estimated Cost $848 (why?)
Repository https://bitbucket.org/alfonsodg/curso_python_basico.git View Raw File View Project SPDX
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# -*- coding:utf-8 -*-

import re

# common variables

rawstr = r"""<.*?>"""
embedded_rawstr = r"""(?is)<.*?>"""
matchstr = """			<table cellspacing="0" cellpadding="0" width="560" border="0" ID="Table12"> 
			<tr> 
				<td bgcolor='#DCE2EE'><div align="left"><a href="#" class="linkhover1"><b>DE LA GUARDA CHUMPITAZ HAYDEE</b></a></div></td> 
			</tr> 
			<tr> 
				<td><div align="left"> 
				<table cellspacing="0" cellpadding="0" width="100%" border="0" ID="Table13"> 
					<tr> 
					<td class="cel05">Sn Sn Bl. 49 Int. B1 Altura Del Estadio San Marcos Urb. Unidad Vecinal No.3<br>El Cercado, LIMA<br /> 
						(+51) (1) <b>538-0229</b> 
					</td> 
					<td width="175" align="right" valign="top"></td> 
					</tr> 
				</table> 
				</div></td> 
			</tr> 
			<tr height="15"><td></td></tr> 
			</table> 
			 
			<table cellspacing="0" cellpadding="0" width="560" border="0" ID="Table12"> 
			<tr> 
				<td bgcolor='#DCE2EE'><div align="left"><a href="#" class="linkhover1"><b>DE LA GUARDA GONZALES DANIEL MARTIN</b></a></div></td> 
			</tr> 
			<tr> 
				<td><div align="left"> 
				<table cellspacing="0" cellpadding="0" width="100%" border="0" ID="Table13"> 
					<tr> 
					<td class="cel05">JR Saenz PeĂąa 1412 Int. 102<br>La Victoria, LIMA<br />"""

# method 1: using a compile object
compile_obj = re.compile(rawstr,  re.IGNORECASE| re.DOTALL)
match_obj = compile_obj.search(matchstr)
print match_obj.group(0)

# method 2: using search function (w/ external flags)
match_obj = re.search(rawstr, matchstr,  re.IGNORECASE| re.DOTALL)
print match_obj.group(0)

# method 3: using search function (w/ embedded flags)
match_obj = re.search(embedded_rawstr, matchstr)
print match_obj.group(0)

# Replace string
#newstr = compile_obj.subn(' ', 0)
Back to Top