Fix XML parsing errors.
authorDeryck Hodge <deryck@samba.org>
Thu, 10 Mar 2005 21:10:51 +0000 (21:10 +0000)
committerDeryck Hodge <deryck@samba.org>
Thu, 10 Mar 2005 21:10:51 +0000 (21:10 +0000)
deryck

git-svn-id: file:///home/svn/samba-web/trunk@564 44aeb9d7-1cd8-0310-b257-a505e0beeac2

scripts/updateNews.py

index beb7a8013ff3ab9bad7b912c65a716b3434c5535..70980589e3d6296890b2a1498f03dc81c4492df5 100755 (executable)
@@ -192,15 +192,24 @@ feed.write('<title>news.samba.org</title>\n')
 feed.write('<description>Latest news and happenings with Samba and Samba development.</description>\n')
 feed.write('<link>http://news.samba.org/</link>\n\n')
 
+# Characters to avoid as "undefined entities" in XML
+ents = { '&mdash;' : '--', '&amp;' : 'and' }
+
 count = 10
 for date in post_dates:
+       item_text = all_stories[date]
+       if '&' in item_text and ';' in item_text:
+               for ent in ents.keys():
+                       item_text = item_text.replace(ent, ents[ent])
+                               
        if count > 0:
-               title = re.search('(?<=\"\>).+(?=\<\/a)', all_stories[date])
-               link = re.search('(?<=\<div class=\"reference\">Link: \<a href=\").+(?=\"\>)', all_stories[date])
+               title = re.search('(?<=\"\>).+(?=\<\/a)', item_text)
+               link = re.search('(?<=\<div class=\"reference\">Link: \<a href=\").+(?=\"\>)', item_text)
 
-               begin = all_stories[date].find('<p>')
-               end = all_stories[date].find('</p>')
-               descrip = all_stories[date][begin:end]
+               # Index out the HTML tags for XML
+               begin = item_text.find('<p>') + 3
+               end = item_text.find('</p>') 
+               descrip = item_text[begin:end]
 
                feed.write('<item>\n')
                feed.write('<title>' + title.group(0) + '</title>\n')