Fix XML parsing errors.

author Deryck Hodge <deryck@samba.org>

Thu, 10 Mar 2005 21:10:51 +0000 (21:10 +0000)

committer Deryck Hodge <deryck@samba.org>

Thu, 10 Mar 2005 21:10:51 +0000 (21:10 +0000)
author Deryck Hodge <deryck@samba.org>
Thu, 10 Mar 2005 21:10:51 +0000 (21:10 +0000)
committer Deryck Hodge <deryck@samba.org>
Thu, 10 Mar 2005 21:10:51 +0000 (21:10 +0000)
diff --git a/scripts/updateNews.py b/scripts/updateNews.py

index beb7a8013ff3ab9bad7b912c65a716b3434c5535..70980589e3d6296890b2a1498f03dc81c4492df5 100755 (executable)
--- a/scripts/updateNews.py
+++ b/scripts/updateNews.py
@@ -192,15 +192,24 @@ feed.write('<title>news.samba.org</title>\n')
  feed.write('<description>Latest news and happenings with Samba and Samba development.</description>\n')
  feed.write('<link>http://news.samba.org/</link>\n\n')
  
+# Characters to avoid as "undefined entities" in XML
+ents = { '&mdash;' : '--', '&amp;' : 'and' }
+
  count = 10
  for date in post_dates:
+       item_text = all_stories[date]
+       if '&' in item_text and ';' in item_text:
+               for ent in ents.keys():
+                       item_text = item_text.replace(ent, ents[ent])
+                               
         if count > 0:
-               title = re.search('(?<=\"\>).+(?=\<\/a)', all_stories[date])
-               link = re.search('(?<=\<div class=\"reference\">Link: \<a href=\").+(?=\"\>)', all_stories[date])
+               title = re.search('(?<=\"\>).+(?=\<\/a)', item_text)
+               link = re.search('(?<=\<div class=\"reference\">Link: \<a href=\").+(?=\"\>)', item_text)
  
-               begin = all_stories[date].find('<p>')
-               end = all_stories[date].find('</p>')
-               descrip = all_stories[date][begin:end]
+               # Index out the HTML tags for XML
+               begin = item_text.find('<p>') + 3
+               end = item_text.find('</p>') 
+               descrip = item_text[begin:end]
  
                 feed.write('<item>\n')
                 feed.write('<title>' + title.group(0) + '</title>\n')
author	Deryck Hodge <deryck@samba.org>
	Thu, 10 Mar 2005 21:10:51 +0000 (21:10 +0000)
committer	Deryck Hodge <deryck@samba.org>
	Thu, 10 Mar 2005 21:10:51 +0000 (21:10 +0000)