/modulos/html-re.py
Python | 53 lines | 47 code | 4 blank | 2 comment | 0 complexity | 1091b30e0f690c4b773a8c76f45c8336 MD5 | raw file
- # -*- coding:utf-8 -*-
- import re
- # common variables
- rawstr = r"""<.*?>"""
- embedded_rawstr = r"""(?is)<.*?>"""
- matchstr = """ <table cellspacing="0" cellpadding="0" width="560" border="0" ID="Table12">
- <tr>
- <td bgcolor='#DCE2EE'><div align="left"><a href="#" class="linkhover1"><b>DE LA GUARDA CHUMPITAZ HAYDEE</b></a></div></td>
- </tr>
- <tr>
- <td><div align="left">
- <table cellspacing="0" cellpadding="0" width="100%" border="0" ID="Table13">
- <tr>
- <td class="cel05">Sn Sn Bl. 49 Int. B1 Altura Del Estadio San Marcos Urb. Unidad Vecinal No.3<br>El Cercado, LIMA<br />
- (+51) (1) <b>538-0229</b>
- </td>
- <td width="175" align="right" valign="top"></td>
- </tr>
- </table>
- </div></td>
- </tr>
- <tr height="15"><td></td></tr>
- </table>
-
- <table cellspacing="0" cellpadding="0" width="560" border="0" ID="Table12">
- <tr>
- <td bgcolor='#DCE2EE'><div align="left"><a href="#" class="linkhover1"><b>DE LA GUARDA GONZALES DANIEL MARTIN</b></a></div></td>
- </tr>
- <tr>
- <td><div align="left">
- <table cellspacing="0" cellpadding="0" width="100%" border="0" ID="Table13">
- <tr>
- <td class="cel05">JR Saenz Peña 1412 Int. 102<br>La Victoria, LIMA<br />"""
- # method 1: using a compile object
- compile_obj = re.compile(rawstr, re.IGNORECASE| re.DOTALL)
- match_obj = compile_obj.search(matchstr)
- print match_obj.group(0)
- # method 2: using search function (w/ external flags)
- match_obj = re.search(rawstr, matchstr, re.IGNORECASE| re.DOTALL)
- print match_obj.group(0)
- # method 3: using search function (w/ embedded flags)
- match_obj = re.search(embedded_rawstr, matchstr)
- print match_obj.group(0)
- # Replace string
- #newstr = compile_obj.subn(' ', 0)