|
Check-in Number:
|
472 | |
| Date: |
2010-Jul-27 09:06:18 (local)
2010-Jul-27 16:06:18 (UTC) |
| User: | majid |
| Branch: | |
| Comment: |
get rid of yet more blegs and gunk |
| Tickets: |
|
| Inspections: |
|
| Files: |
|
temboz/transform.py 1.40 -> 1.41
--- /tmp/T007aqo6 Sun Sep 5 16:46:38 2010
+++ /tmp/T117aqo6 Sun Sep 5 16:46:38 2010
@@ -42,6 +42,8 @@
re.MULTILINE + re.DOTALL + re.IGNORECASE),
degunk.Re('<a href="[^">]*.tweetmeme.com.*?</a>',
re.MULTILINE + re.DOTALL + re.IGNORECASE),
+ degunk.Re('<a [^>]* href="http://twitter.com/home/[?]status.*?</a>',
+ re.MULTILINE + re.DOTALL + re.IGNORECASE),
# Feedburner annoyances
degunk.Re('<a href[^>]*><img src="http://feeds.feedburner[^>]*></a>'),
degunk.Re('<p><a href="(http://feeds\\.[^"/>]*/~./)[^"]*">'
@@ -165,6 +167,7 @@
# unwarranted multiple empty lines
degunk.Re('<br>\s*(<br>\s*)+', 0, '<br>'),
degunk.Re('<p> </p>'),
+ degunk.Re('<p [^>]*></p>'),
degunk.Re('<p>-</p>'),
degunk.Re('<span[^>]*></span>', 0, '', iterate=True),
# junk