From 9c8b19ff68ba55d870edbf528120d6979fc3250b Mon Sep 17 00:00:00 2001 From: Frederic Guillot Date: Sat, 29 Jun 2013 13:41:36 -0400 Subject: [PATCH] Update picoFeed to support broken feeds and new blacklist media --- vendor/PicoFeed/Filter.php | 3 + vendor/PicoFeed/Parser.php | 10 +++ vendor/PicoFeed/Parsers/Atom.php | 4 +- vendor/PicoFeed/Parsers/Rss10.php | 4 +- vendor/PicoFeed/Parsers/Rss20.php | 15 +++- vendor/PicoFeed/Writer.php | 10 ++- vendor/PicoFeed/Writers/Atom.php | 130 ++++++++++++++++++------------ vendor/PicoFeed/Writers/Rss20.php | 125 ++++++++++++++++++++++++++++ 8 files changed, 239 insertions(+), 62 deletions(-) create mode 100644 vendor/PicoFeed/Writers/Rss20.php diff --git a/vendor/PicoFeed/Filter.php b/vendor/PicoFeed/Filter.php index 99ce254..7aa4193 100644 --- a/vendor/PicoFeed/Filter.php +++ b/vendor/PicoFeed/Filter.php @@ -74,6 +74,9 @@ class Filter 'da.feedsportal.com', 'rss.feedsportal.com', 'res.feedsportal.com', + 'res1.feedsportal.com', + 'res2.feedsportal.com', + 'res3.feedsportal.com', 'pi.feedsportal.com', 'rss.nytimes.com', 'feeds.wordpress.com', diff --git a/vendor/PicoFeed/Parser.php b/vendor/PicoFeed/Parser.php index 2153bc1..9056fcc 100644 --- a/vendor/PicoFeed/Parser.php +++ b/vendor/PicoFeed/Parser.php @@ -77,4 +77,14 @@ abstract class Parser return $data; } + + + // Trim whitespace from the begining, the end and inside a string and don't break utf-8 string + public function stripWhiteSpace($value) + { + $value = str_replace("\r", "", $value); + $value = str_replace("\t", "", $value); + $value = str_replace("\n", "", $value); + return trim($value); + } } diff --git a/vendor/PicoFeed/Parsers/Atom.php b/vendor/PicoFeed/Parsers/Atom.php index 05cefda..b873ca4 100644 --- a/vendor/PicoFeed/Parsers/Atom.php +++ b/vendor/PicoFeed/Parsers/Atom.php @@ -16,7 +16,7 @@ class Atom extends \PicoFeed\Parser } $this->url = $this->getUrl($xml); - $this->title = (string) $xml->title; + $this->title = $this->stripWhiteSpace((string) $xml->title); $this->id = (string) $xml->id; $this->updated = strtotime((string) $xml->updated); $author = (string) $xml->author->name; @@ -30,7 +30,7 @@ class Atom extends \PicoFeed\Parser $item = new \StdClass; $item->id = (string) $entry->id; - $item->title = (string) $entry->title; + $item->title = $this->stripWhiteSpace((string) $entry->title); $item->url = $this->getUrl($entry); $item->updated = strtotime((string) $entry->updated); $item->author = $author; diff --git a/vendor/PicoFeed/Parsers/Rss10.php b/vendor/PicoFeed/Parsers/Rss10.php index 6b9ef9c..d386b78 100644 --- a/vendor/PicoFeed/Parsers/Rss10.php +++ b/vendor/PicoFeed/Parsers/Rss10.php @@ -17,7 +17,7 @@ class Rss10 extends \PicoFeed\Parser $namespaces = $xml->getNamespaces(true); - $this->title = (string) $xml->channel->title; + $this->title = $this->stripWhiteSpace((string) $xml->channel->title); $this->url = (string) $xml->channel->link; $this->id = $this->url; @@ -34,7 +34,7 @@ class Rss10 extends \PicoFeed\Parser foreach ($xml->item as $entry) { $item = new \StdClass; - $item->title = (string) $entry->title; + $item->title = $this->stripWhiteSpace((string) $entry->title); $item->url = ''; $item->author= ''; $item->updated = ''; diff --git a/vendor/PicoFeed/Parsers/Rss20.php b/vendor/PicoFeed/Parsers/Rss20.php index 88a2851..2f1bca1 100644 --- a/vendor/PicoFeed/Parsers/Rss20.php +++ b/vendor/PicoFeed/Parsers/Rss20.php @@ -35,7 +35,7 @@ class Rss20 extends \PicoFeed\Parser $this->url = (string) $xml->channel->link; } - $this->title = (string) $xml->channel->title; + $this->title = $this->stripWhiteSpace((string) $xml->channel->title); $this->id = $this->url; $this->updated = isset($xml->channel->pubDate) ? (string) $xml->channel->pubDate : (string) $xml->channel->lastBuildDate; $this->updated = $this->updated ? strtotime($this->updated) : time(); @@ -46,7 +46,7 @@ class Rss20 extends \PicoFeed\Parser foreach ($xml->channel->item as $entry) { $item = new \StdClass; - $item->title = (string) $entry->title; + $item->title = $this->stripWhiteSpace((string) $entry->title); $item->url = ''; $item->author= ''; $item->updated = ''; @@ -63,7 +63,16 @@ class Rss20 extends \PicoFeed\Parser if (! $item->content && ! empty($namespace->encoded)) $item->content = (string) $namespace->encoded; } - if (empty($item->url)) $item->url = (string) $entry->link; + if (empty($item->url)) { + + if (isset($entry->link)) { + $item->url = (string) $entry->link; + } + else if (isset($entry->guid)) { + $item->url = (string) $entry->guid; + } + } + if (empty($item->updated)) $item->updated = strtotime((string) $entry->pubDate) ?: $this->updated; if (empty($item->content)) { diff --git a/vendor/PicoFeed/Writer.php b/vendor/PicoFeed/Writer.php index c9fc8e4..3049968 100644 --- a/vendor/PicoFeed/Writer.php +++ b/vendor/PicoFeed/Writer.php @@ -6,13 +6,15 @@ abstract class Writer { public $items = array(); + abstract public function execute($filename = ''); - public function checkRequiredProperties() - { - foreach ($this->required_properties as $property) { - if (! isset($this->$property)) { + public function checkRequiredProperties($properties, $container) + { + foreach ($properties as $property) { + + if ((is_object($container) && ! isset($container->$property)) || (is_array($container) && ! isset($container[$property]))) { throw new \RuntimeException('Required property missing: '.$property); } diff --git a/vendor/PicoFeed/Writers/Atom.php b/vendor/PicoFeed/Writers/Atom.php index 084b4f6..5609293 100644 --- a/vendor/PicoFeed/Writers/Atom.php +++ b/vendor/PicoFeed/Writers/Atom.php @@ -6,116 +6,144 @@ require_once __DIR__.'/../Writer.php'; class Atom extends \PicoFeed\Writer { - protected $required_properties = array( + private $required_feed_properties = array( 'title', 'site_url', - 'feed_url' + 'feed_url', + ); + + private $required_item_properties = array( + 'title', + 'url', ); public function execute($filename = '') { - $this->checkRequiredProperties(); + $this->checkRequiredProperties($this->required_feed_properties, $this); - $dom = new \DomDocument('1.0', 'UTF-8'); - $dom->formatOutput = true; + $this->dom = new \DomDocument('1.0', 'UTF-8'); + $this->dom->formatOutput = true; // - $feed = $dom->createElement('feed'); + $feed = $this->dom->createElement('feed'); $feed->setAttributeNodeNS(new \DomAttr('xmlns', 'http://www.w3.org/2005/Atom')); // - $generator = $dom->createElement('generator', 'PicoFeed'); + $generator = $this->dom->createElement('generator', 'PicoFeed'); $generator->setAttribute('url', 'https://github.com/fguillot/picoFeed'); $feed->appendChild($generator); // - $feed->appendChild($dom->createElement('title', $this->title)); + $feed->appendChild($this->dom->createElement('title', $this->title)); + + // <id/> + $feed->appendChild($this->dom->createElement('id', $this->site_url)); // <updated/> - $feed->appendChild($dom->createElement('updated', date(DATE_ATOM, isset($this->updated) ? $this->updated : time()))); + $this->addUpdated($feed, isset($this->updated) ? $this->updated : ''); // <link rel="alternate" type="text/html" href="http://example.org/"/> - $link = $dom->createElement('link'); - $link->setAttribute('rel', 'alternate'); - $link->setAttribute('type', 'text/html'); - $link->setAttribute('href', $this->site_url); - $feed->appendChild($link); + $this->addLink($feed, $this->site_url); // <link rel="self" type="application/atom+xml" href="http://example.org/feed.atom"/> - $link = $dom->createElement('link'); - $link->setAttribute('rel', 'self'); - $link->setAttribute('type', 'application/atom+xml'); - $link->setAttribute('href', $this->feed_url); - $feed->appendChild($link); + $this->addLink($feed, $this->feed_url, 'self', 'application/atom+xml'); // <author/> - if (isset($this->author)) { - - $name = $dom->createElement('name', $this->author); - - $author = $dom->createElement('author'); - $author->appendChild($name); - $feed->appendChild($author); - } + if (isset($this->author)) $this->addAuthor($feed, $this->author); // <entry/> foreach ($this->items as $item) { - $entry = $dom->createElement('entry'); + $this->checkRequiredProperties($this->required_item_properties, $item); + + $entry = $this->dom->createElement('entry'); // <title/> - $entry->appendChild($dom->createElement('title', $item['title'])); + $entry->appendChild($this->dom->createElement('title', $item['title'])); + + // <id/> + $entry->appendChild($this->dom->createElement('id', $item['url'])); // <updated/> - $entry->appendChild($dom->createElement('updated', date(DATE_ATOM, isset($item['updated']) ? $item['updated'] : time()))); + $this->addUpdated($entry, isset($item['updated']) ? $item['updated'] : ''); // <published/> if (isset($item['published'])) { - $entry->appendChild($dom->createElement('published', date(DATE_ATOM, $item['published']))); + $entry->appendChild($this->dom->createElement('published', date(DATE_ATOM, $item['published']))); } // <link rel="alternate" type="text/html" href="http://example.org/"/> - $link = $dom->createElement('link'); - $link->setAttribute('rel', 'alternate'); - $link->setAttribute('type', 'text/html'); - $link->setAttribute('href', $item['url']); - $entry->appendChild($link); + $this->addLink($entry, $item['url']); // <summary/> if (isset($item['summary'])) { - $entry->appendChild($dom->createElement('summary', $item['summary'])); + $entry->appendChild($this->dom->createElement('summary', $item['summary'])); } // <content/> if (isset($item['content'])) { - $content = $dom->createElement('content'); + $content = $this->dom->createElement('content'); $content->setAttribute('type', 'html'); - $content->appendChild($dom->createCDATASection($item['content'])); + $content->appendChild($this->dom->createCDATASection($item['content'])); $entry->appendChild($content); } // <author/> - if (isset($item['author'])) { - - $name = $dom->createElement('name', $item['author']); - - $author = $dom->createElement('author'); - $author->appendChild($name); - - $entry->appendChild($author); - } + if (isset($item['author'])) $this->addAuthor($entry, $item['author']); $feed->appendChild($entry); } - $dom->appendChild($feed); + $this->dom->appendChild($feed); if ($filename) { - $dom->save($filename); + $this->dom->save($filename); } else { - return $dom->saveXML(); + return $this->dom->saveXML(); } } + + + public function addLink($xml, $url, $rel = 'alternate', $type = 'text/html') + { + $link = $this->dom->createElement('link'); + $link->setAttribute('rel', $rel); + $link->setAttribute('type', $type); + $link->setAttribute('href', $url); + $xml->appendChild($link); + } + + + public function addUpdated($xml, $value = '') + { + $xml->appendChild($this->dom->createElement( + 'updated', + date(DATE_ATOM, $value ?: time()) + )); + } + + + public function addAuthor($xml, array $values) + { + $author = $this->dom->createElement('author'); + + if (isset($values['name'])) { + $name = $this->dom->createElement('name', $values['name']); + $author->appendChild($name); + } + + if (isset($values['email'])) { + $email = $this->dom->createElement('email', $values['email']); + $author->appendChild($email); + } + + if (isset($values['url'])) { + $uri = $this->dom->createElement('uri', $values['url']); + $author->appendChild($uri); + } + + $xml->appendChild($author); + } } \ No newline at end of file diff --git a/vendor/PicoFeed/Writers/Rss20.php b/vendor/PicoFeed/Writers/Rss20.php new file mode 100644 index 0000000..ee3d233 --- /dev/null +++ b/vendor/PicoFeed/Writers/Rss20.php @@ -0,0 +1,125 @@ +<?php + +namespace PicoFeed\Writers; + +require_once __DIR__.'/../Writer.php'; + +class Rss20 extends \PicoFeed\Writer +{ + private $required_feed_properties = array( + 'title', + 'site_url', + ); + + private $required_item_properties = array( + 'title', + 'url', + ); + + + public function execute($filename = '') + { + $this->checkRequiredProperties($this->required_feed_properties, $this); + + $this->dom = new \DomDocument('1.0', 'UTF-8'); + $this->dom->formatOutput = true; + + // <rss/> + $rss = $this->dom->createElement('rss'); + $rss->setAttribute('version', '2.0'); + $rss->setAttributeNodeNS(new \DomAttr('xmlns:content', 'http://purl.org/rss/1.0/modules/content/')); + + $channel = $this->dom->createElement('channel'); + + // <generator/> + $generator = $this->dom->createElement('generator', 'PicoFeed (https://github.com/fguillot/picoFeed)'); + $channel->appendChild($generator); + + // <title/> + $channel->appendChild($this->dom->createElement('title', $this->title)); + + // <description/> + $channel->appendChild($this->dom->createElement('description', isset($this->description) ? $this->description : $this->title)); + + // <pubDate/> + $this->addPubDate($channel, isset($this->updated) ? $this->updated : ''); + + // <link/> + $channel->appendChild($this->dom->createElement('link', $this->site_url)); + + // <webMaster/> + if (isset($this->author)) $this->addAuthor($channel, 'webMaster', $this->author); + + // <item/> + foreach ($this->items as $item) { + + $this->checkRequiredProperties($this->required_item_properties, $item); + + $entry = $this->dom->createElement('entry'); + + // <title/> + $entry->appendChild($this->dom->createElement('title', $item['title'])); + + // <link/> + $entry->appendChild($this->dom->createElement('link', $item['url'])); + + // <guid/> + $guid = $this->dom->createElement('guid', $item['url']); + $guid->setAttribute('isPermaLink', 'true'); + $entry->appendChild($guid); + + // <pubDate/> + $this->addPubDate($entry, isset($item['updated']) ? $item['updated'] : ''); + + // <description/> + if (isset($item['summary'])) { + $entry->appendChild($this->dom->createElement('description', $item['summary'])); + } + + // <content/> + if (isset($item['content'])) { + $content = $this->dom->createElement('content:encoded'); + $content->appendChild($this->dom->createCDATASection($item['content'])); + $entry->appendChild($content); + } + + // <author/> + if (isset($item['author'])) $this->addAuthor($entry, 'author', $item['author']); + + $channel->appendChild($entry); + } + + $rss->appendChild($channel); + $this->dom->appendChild($rss); + + if ($filename) { + $this->dom->save($filename); + } + else { + return $this->dom->saveXML(); + } + } + + + public function addPubDate($xml, $value = '') + { + $xml->appendChild($this->dom->createElement( + 'pubDate', + date(DATE_RFC822, $value ?: time()) + )); + } + + + public function addAuthor($xml, $tag, array $values) + { + $value = ''; + + if (isset($values['email'])) $value .= $values['email']; + if ($value && isset($values['name'])) $value .= ' ('.$values['name'].')'; + + if ($value) { + $author = $this->dom->createElement($tag, $value); + $xml->appendChild($author); + } + } +} \ No newline at end of file