class SimpleRSS
Monkey patches for outstanding issues logged in the simple-rss project.
* Add support for issued time field: http://rubyforge.org/tracker/index.php?func=detail&aid=13980&group_id=893&atid=3517 * The '+' symbol is lost when escaping fields. http://rubyforge.org/tracker/index.php?func=detail&aid=10852&group_id=893&atid=3517
Public Instance Methods
clean_content(tag, attrs, content)
click to toggle source
# File lib/parsers/simple-rss.rb, line 13 def clean_content(tag, attrs, content) content = content.to_s case tag when :pubDate, :lastBuildDate, :published, :updated, :expirationDate, :modified, :'dc:date', :issued Time.parse(content) rescue unescape(content) when :author, :contributor, :skipHours, :skipDays unescape(content.gsub(/<.*?>/,'')) else content.empty? && "#{attrs} " =~ /href=['"]?([^'"]*)['" ]/mi ? $1.strip : unescape(content) end end
unescape(s)
click to toggle source
# File lib/parsers/simple-rss.rb, line 26 def unescape(s) if s =~ /^\s*(<!\[CDATA\[|\]\]>)/ # Raw HTML is inside the CDATA, so just remove the CDATA wrapper. s.gsub(/(<!\[CDATA\[|\]\]>)/,'') elsif s =~ /[<>]/ # Already looks like HTML. s else # Make it HTML. FeedNormalizer::HtmlCleaner.unescapeHTML(s) end end