|
Check-in Number:
|
419 | |
| Date: |
2009-Sep-18 01:35:06 (local)
2009-Sep-18 08:35:06 (UTC) |
| User: | majid |
| Branch: | |
| Comment: |
updated feedparser |
| Tickets: |
|
| Inspections: |
|
| Files: |
|
temboz/feedparser.py 1.11 -> 1.12
--- /tmp/T0v5aWe2 Mon Sep 6 18:11:51 2010
+++ /tmp/T1w5aWe2 Mon Sep 6 18:11:51 2010
@@ -11,7 +11,7 @@
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
"""
-__version__ = "4.2-pre-" + "$Revision: 1.11 $"[11:14] + "-svn"
+__version__ = "4.2-pre-" + "$Revision: 1.12 $"[11:14] + "-svn"
__license__ = """Copyright (c) 2002-2008, Mark Pilgrim, All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@@ -523,7 +523,7 @@
attrs.append(('xmlns',namespace))
if tag=='svg' and namespace=='http://www.w3.org/2000/svg':
attrs.append(('xmlns',namespace))
- if tag == 'svg': self.svgOK = 1
+ if tag == 'svg': self.svgOK += 1
return self.handle_data('<%s%s>' % (tag, self.strattrs(attrs)), escape=0)
# match namespaces
@@ -559,7 +559,7 @@
prefix = self.namespacemap.get(prefix, prefix)
if prefix:
prefix = prefix + '_'
- if suffix == 'svg': self.svgOK = 0
+ if suffix == 'svg' and self.svgOK: self.svgOK -= 1
# call special handler (if defined) or default handler
methodname = '_end_' + prefix + suffix
@@ -1532,6 +1532,24 @@
value = self.pop('itunes_explicit', 0)
self._getContext()['itunes_explicit'] = (value == 'yes') and 1 or 0
+ def _start_media_content(self, attrsD):
+ context = self._getContext()
+ context.setdefault('media_content', [])
+ context['media_content'].append(attrsD)
+
+ def _start_media_thumbnail(self, attrsD):
+ context = self._getContext()
+ context.setdefault('media_thumbnail', [])
+ self.push('url', 1) # new
+ context['media_thumbnail'].append(attrsD)
+
+ def _end_media_thumbnail(self):
+ url = self.pop('url')
+ context = self._getContext()
+ if url != None and len(url.strip()) != 0:
+ if not context['media_thumbnail'][-1].has_key('url'):
+ context['media_thumbnail'][-1]['url'] = url
+
if _XML_AVAILABLE:
class _StrictFeedParser(_FeedParserMixin, xml.sax.handler.ContentHandler):
def __init__(self, baseuri, baselang, encoding):
@@ -1654,7 +1672,7 @@
def feed(self, data):
data = re.compile(r'<!((?!DOCTYPE|--|\[))', re.IGNORECASE).sub(r'<!\1', data)
#data = re.sub(r'<(\S+?)\s*?/>', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace
- data = re.sub(r'<([^<\s]+?)\s*/>', self._shorttag_replace, data)
+ data = re.sub(r'<([^<>\s]+?)\s*/>', self._shorttag_replace, data)
data = data.replace(''', "'")
data = data.replace('"', '"')
if self.encoding and type(data) == type(u''):
@@ -2282,14 +2300,14 @@
'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript']
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
- 'action', 'align', 'alt', 'autoplay', 'autocomplete', 'autofocus', 'axis',
+ 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
'background', 'balance', 'bgcolor', 'bgproperties', 'border',
'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color', 'cols',
- 'colspan', 'compact', 'contenteditable', 'coords', 'data', 'datafld',
- 'datapagesize', 'datasrc', 'datetime', 'default', 'delay', 'dir',
- 'disabled', 'draggable', 'dynsrc', 'enctype', 'end', 'face', 'for',
+ 'colspan', 'compact', 'contenteditable', 'controls', 'coords', 'data',
+ 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default', 'delay',
+ 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end', 'face', 'for',
'form', 'frame', 'galleryimg', 'gutter', 'headers', 'height', 'hidefocus',
'hidden', 'high', 'href', 'hreflang', 'hspace', 'icon', 'id', 'inputmode',
'ismap', 'keytype', 'label', 'leftspacing', 'lang', 'list', 'longdesc',
@@ -2404,9 +2422,9 @@
# not otherwise acceptable, perhaps it is MathML or SVG?
if tag=='math' and ('xmlns','http://www.w3.org/1998/Math/MathML') in attrs:
- self.mathmlOK = 1
+ self.mathmlOK += 1
if tag=='svg' and ('xmlns','http://www.w3.org/2000/svg') in attrs:
- self.svgOK = 1
+ self.svgOK += 1
# chose acceptable attributes based on tag class, else bail
if self.mathmlOK and tag in self.mathml_elements:
@@ -2451,10 +2469,10 @@
if tag in self.unacceptable_elements_with_end_tag:
self.unacceptablestack -= 1
if self.mathmlOK and tag in self.mathml_elements:
- if tag == 'math': self.mathmlOK = 0
+ if tag == 'math' and self.mathmlOK: self.mathmlOK -= 1
elif self.svgOK and tag in self.svg_elements:
tag = self.svg_elem_map.get(tag,tag)
- if tag == 'svg': self.svgOK = 0
+ if tag == 'svg' and self.svgOK: self.svgOK -= 1
else:
return
_BaseHTMLProcessor.unknown_endtag(self, tag)