feed.write('<description>Latest news and happenings with Samba and Samba development.</description>\n')
feed.write('<link>http://news.samba.org/</link>\n\n')
+# Characters to avoid as "undefined entities" in XML
+ents = { '—' : '--', '&' : 'and' }
+
count = 10
for date in post_dates:
+ item_text = all_stories[date]
+ if '&' in item_text and ';' in item_text:
+ for ent in ents.keys():
+ item_text = item_text.replace(ent, ents[ent])
+
if count > 0:
- title = re.search('(?<=\"\>).+(?=\<\/a)', all_stories[date])
- link = re.search('(?<=\<div class=\"reference\">Link: \<a href=\").+(?=\"\>)', all_stories[date])
+ title = re.search('(?<=\"\>).+(?=\<\/a)', item_text)
+ link = re.search('(?<=\<div class=\"reference\">Link: \<a href=\").+(?=\"\>)', item_text)
- begin = all_stories[date].find('<p>')
- end = all_stories[date].find('</p>')
- descrip = all_stories[date][begin:end]
+ # Index out the HTML tags for XML
+ begin = item_text.find('<p>') + 3
+ end = item_text.find('</p>')
+ descrip = item_text[begin:end]
feed.write('<item>\n')
feed.write('<title>' + title.group(0) + '</title>\n')