mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-03-31 04:12:23 +10:00
fix RSS parser bailing out when lines have whitespace before tags
This commit is contained in:
@@ -161,7 +161,7 @@ def parse_rss_export(rss_file):
|
||||
rows = leading_removed.split('\n')
|
||||
|
||||
def get_row(key):
|
||||
return [r for r in rows if r.startswith('<{}>'.format(key))][0]
|
||||
return [r for r in rows if r.strip().startswith('<{}>'.format(key))][0]
|
||||
|
||||
title = str_between(get_row('title'), '<![CDATA[', ']]').strip()
|
||||
url = str_between(get_row('link'), '<link>', '</link>')
|
||||
@@ -209,7 +209,6 @@ def parse_shaarli_rss_export(rss_file):
|
||||
ts_str = str_between(get_row('published'), '<published>', '</published>')
|
||||
time = datetime.strptime(ts_str, "%Y-%m-%dT%H:%M:%S%z")
|
||||
|
||||
|
||||
info = {
|
||||
'url': url,
|
||||
'domain': domain(url),
|
||||
|
||||
Reference in New Issue
Block a user