Update picoFeed to support broken feeds and new blacklist media

This commit is contained in:
Frederic Guillot 2013-06-29 13:41:36 -04:00
parent d292974394
commit 9c8b19ff68
8 changed files with 239 additions and 62 deletions

View File

@ -74,6 +74,9 @@ class Filter
'da.feedsportal.com', 'da.feedsportal.com',
'rss.feedsportal.com', 'rss.feedsportal.com',
'res.feedsportal.com', 'res.feedsportal.com',
'res1.feedsportal.com',
'res2.feedsportal.com',
'res3.feedsportal.com',
'pi.feedsportal.com', 'pi.feedsportal.com',
'rss.nytimes.com', 'rss.nytimes.com',
'feeds.wordpress.com', 'feeds.wordpress.com',

View File

@ -77,4 +77,14 @@ abstract class Parser
return $data; return $data;
} }
// Trim whitespace from the begining, the end and inside a string and don't break utf-8 string
public function stripWhiteSpace($value)
{
$value = str_replace("\r", "", $value);
$value = str_replace("\t", "", $value);
$value = str_replace("\n", "", $value);
return trim($value);
}
} }

View File

@ -16,7 +16,7 @@ class Atom extends \PicoFeed\Parser
} }
$this->url = $this->getUrl($xml); $this->url = $this->getUrl($xml);
$this->title = (string) $xml->title; $this->title = $this->stripWhiteSpace((string) $xml->title);
$this->id = (string) $xml->id; $this->id = (string) $xml->id;
$this->updated = strtotime((string) $xml->updated); $this->updated = strtotime((string) $xml->updated);
$author = (string) $xml->author->name; $author = (string) $xml->author->name;
@ -30,7 +30,7 @@ class Atom extends \PicoFeed\Parser
$item = new \StdClass; $item = new \StdClass;
$item->id = (string) $entry->id; $item->id = (string) $entry->id;
$item->title = (string) $entry->title; $item->title = $this->stripWhiteSpace((string) $entry->title);
$item->url = $this->getUrl($entry); $item->url = $this->getUrl($entry);
$item->updated = strtotime((string) $entry->updated); $item->updated = strtotime((string) $entry->updated);
$item->author = $author; $item->author = $author;

View File

@ -17,7 +17,7 @@ class Rss10 extends \PicoFeed\Parser
$namespaces = $xml->getNamespaces(true); $namespaces = $xml->getNamespaces(true);
$this->title = (string) $xml->channel->title; $this->title = $this->stripWhiteSpace((string) $xml->channel->title);
$this->url = (string) $xml->channel->link; $this->url = (string) $xml->channel->link;
$this->id = $this->url; $this->id = $this->url;
@ -34,7 +34,7 @@ class Rss10 extends \PicoFeed\Parser
foreach ($xml->item as $entry) { foreach ($xml->item as $entry) {
$item = new \StdClass; $item = new \StdClass;
$item->title = (string) $entry->title; $item->title = $this->stripWhiteSpace((string) $entry->title);
$item->url = ''; $item->url = '';
$item->author= ''; $item->author= '';
$item->updated = ''; $item->updated = '';

View File

@ -35,7 +35,7 @@ class Rss20 extends \PicoFeed\Parser
$this->url = (string) $xml->channel->link; $this->url = (string) $xml->channel->link;
} }
$this->title = (string) $xml->channel->title; $this->title = $this->stripWhiteSpace((string) $xml->channel->title);
$this->id = $this->url; $this->id = $this->url;
$this->updated = isset($xml->channel->pubDate) ? (string) $xml->channel->pubDate : (string) $xml->channel->lastBuildDate; $this->updated = isset($xml->channel->pubDate) ? (string) $xml->channel->pubDate : (string) $xml->channel->lastBuildDate;
$this->updated = $this->updated ? strtotime($this->updated) : time(); $this->updated = $this->updated ? strtotime($this->updated) : time();
@ -46,7 +46,7 @@ class Rss20 extends \PicoFeed\Parser
foreach ($xml->channel->item as $entry) { foreach ($xml->channel->item as $entry) {
$item = new \StdClass; $item = new \StdClass;
$item->title = (string) $entry->title; $item->title = $this->stripWhiteSpace((string) $entry->title);
$item->url = ''; $item->url = '';
$item->author= ''; $item->author= '';
$item->updated = ''; $item->updated = '';
@ -63,7 +63,16 @@ class Rss20 extends \PicoFeed\Parser
if (! $item->content && ! empty($namespace->encoded)) $item->content = (string) $namespace->encoded; if (! $item->content && ! empty($namespace->encoded)) $item->content = (string) $namespace->encoded;
} }
if (empty($item->url)) $item->url = (string) $entry->link; if (empty($item->url)) {
if (isset($entry->link)) {
$item->url = (string) $entry->link;
}
else if (isset($entry->guid)) {
$item->url = (string) $entry->guid;
}
}
if (empty($item->updated)) $item->updated = strtotime((string) $entry->pubDate) ?: $this->updated; if (empty($item->updated)) $item->updated = strtotime((string) $entry->pubDate) ?: $this->updated;
if (empty($item->content)) { if (empty($item->content)) {

View File

@ -6,13 +6,15 @@ abstract class Writer
{ {
public $items = array(); public $items = array();
abstract public function execute($filename = ''); abstract public function execute($filename = '');
public function checkRequiredProperties()
{
foreach ($this->required_properties as $property) {
if (! isset($this->$property)) { public function checkRequiredProperties($properties, $container)
{
foreach ($properties as $property) {
if ((is_object($container) && ! isset($container->$property)) || (is_array($container) && ! isset($container[$property]))) {
throw new \RuntimeException('Required property missing: '.$property); throw new \RuntimeException('Required property missing: '.$property);
} }

View File

@ -6,116 +6,144 @@ require_once __DIR__.'/../Writer.php';
class Atom extends \PicoFeed\Writer class Atom extends \PicoFeed\Writer
{ {
protected $required_properties = array( private $required_feed_properties = array(
'title', 'title',
'site_url', 'site_url',
'feed_url' 'feed_url',
);
private $required_item_properties = array(
'title',
'url',
); );
public function execute($filename = '') public function execute($filename = '')
{ {
$this->checkRequiredProperties(); $this->checkRequiredProperties($this->required_feed_properties, $this);
$dom = new \DomDocument('1.0', 'UTF-8'); $this->dom = new \DomDocument('1.0', 'UTF-8');
$dom->formatOutput = true; $this->dom->formatOutput = true;
// <feed/> // <feed/>
$feed = $dom->createElement('feed'); $feed = $this->dom->createElement('feed');
$feed->setAttributeNodeNS(new \DomAttr('xmlns', 'http://www.w3.org/2005/Atom')); $feed->setAttributeNodeNS(new \DomAttr('xmlns', 'http://www.w3.org/2005/Atom'));
// <generator/> // <generator/>
$generator = $dom->createElement('generator', 'PicoFeed'); $generator = $this->dom->createElement('generator', 'PicoFeed');
$generator->setAttribute('url', 'https://github.com/fguillot/picoFeed'); $generator->setAttribute('url', 'https://github.com/fguillot/picoFeed');
$feed->appendChild($generator); $feed->appendChild($generator);
// <title/> // <title/>
$feed->appendChild($dom->createElement('title', $this->title)); $feed->appendChild($this->dom->createElement('title', $this->title));
// <id/>
$feed->appendChild($this->dom->createElement('id', $this->site_url));
// <updated/> // <updated/>
$feed->appendChild($dom->createElement('updated', date(DATE_ATOM, isset($this->updated) ? $this->updated : time()))); $this->addUpdated($feed, isset($this->updated) ? $this->updated : '');
// <link rel="alternate" type="text/html" href="http://example.org/"/> // <link rel="alternate" type="text/html" href="http://example.org/"/>
$link = $dom->createElement('link'); $this->addLink($feed, $this->site_url);
$link->setAttribute('rel', 'alternate');
$link->setAttribute('type', 'text/html');
$link->setAttribute('href', $this->site_url);
$feed->appendChild($link);
// <link rel="self" type="application/atom+xml" href="http://example.org/feed.atom"/> // <link rel="self" type="application/atom+xml" href="http://example.org/feed.atom"/>
$link = $dom->createElement('link'); $this->addLink($feed, $this->feed_url, 'self', 'application/atom+xml');
$link->setAttribute('rel', 'self');
$link->setAttribute('type', 'application/atom+xml');
$link->setAttribute('href', $this->feed_url);
$feed->appendChild($link);
// <author/> // <author/>
if (isset($this->author)) { if (isset($this->author)) $this->addAuthor($feed, $this->author);
$name = $dom->createElement('name', $this->author);
$author = $dom->createElement('author');
$author->appendChild($name);
$feed->appendChild($author);
}
// <entry/> // <entry/>
foreach ($this->items as $item) { foreach ($this->items as $item) {
$entry = $dom->createElement('entry'); $this->checkRequiredProperties($this->required_item_properties, $item);
$entry = $this->dom->createElement('entry');
// <title/> // <title/>
$entry->appendChild($dom->createElement('title', $item['title'])); $entry->appendChild($this->dom->createElement('title', $item['title']));
// <id/>
$entry->appendChild($this->dom->createElement('id', $item['url']));
// <updated/> // <updated/>
$entry->appendChild($dom->createElement('updated', date(DATE_ATOM, isset($item['updated']) ? $item['updated'] : time()))); $this->addUpdated($entry, isset($item['updated']) ? $item['updated'] : '');
// <published/> // <published/>
if (isset($item['published'])) { if (isset($item['published'])) {
$entry->appendChild($dom->createElement('published', date(DATE_ATOM, $item['published']))); $entry->appendChild($this->dom->createElement('published', date(DATE_ATOM, $item['published'])));
} }
// <link rel="alternate" type="text/html" href="http://example.org/"/> // <link rel="alternate" type="text/html" href="http://example.org/"/>
$link = $dom->createElement('link'); $this->addLink($entry, $item['url']);
$link->setAttribute('rel', 'alternate');
$link->setAttribute('type', 'text/html');
$link->setAttribute('href', $item['url']);
$entry->appendChild($link);
// <summary/> // <summary/>
if (isset($item['summary'])) { if (isset($item['summary'])) {
$entry->appendChild($dom->createElement('summary', $item['summary'])); $entry->appendChild($this->dom->createElement('summary', $item['summary']));
} }
// <content/> // <content/>
if (isset($item['content'])) { if (isset($item['content'])) {
$content = $dom->createElement('content'); $content = $this->dom->createElement('content');
$content->setAttribute('type', 'html'); $content->setAttribute('type', 'html');
$content->appendChild($dom->createCDATASection($item['content'])); $content->appendChild($this->dom->createCDATASection($item['content']));
$entry->appendChild($content); $entry->appendChild($content);
} }
// <author/> // <author/>
if (isset($item['author'])) { if (isset($item['author'])) $this->addAuthor($entry, $item['author']);
$name = $dom->createElement('name', $item['author']);
$author = $dom->createElement('author');
$author->appendChild($name);
$entry->appendChild($author);
}
$feed->appendChild($entry); $feed->appendChild($entry);
} }
$dom->appendChild($feed); $this->dom->appendChild($feed);
if ($filename) { if ($filename) {
$dom->save($filename); $this->dom->save($filename);
} }
else { else {
return $dom->saveXML(); return $this->dom->saveXML();
} }
} }
public function addLink($xml, $url, $rel = 'alternate', $type = 'text/html')
{
$link = $this->dom->createElement('link');
$link->setAttribute('rel', $rel);
$link->setAttribute('type', $type);
$link->setAttribute('href', $url);
$xml->appendChild($link);
}
public function addUpdated($xml, $value = '')
{
$xml->appendChild($this->dom->createElement(
'updated',
date(DATE_ATOM, $value ?: time())
));
}
public function addAuthor($xml, array $values)
{
$author = $this->dom->createElement('author');
if (isset($values['name'])) {
$name = $this->dom->createElement('name', $values['name']);
$author->appendChild($name);
}
if (isset($values['email'])) {
$email = $this->dom->createElement('email', $values['email']);
$author->appendChild($email);
}
if (isset($values['url'])) {
$uri = $this->dom->createElement('uri', $values['url']);
$author->appendChild($uri);
}
$xml->appendChild($author);
}
} }

125
vendor/PicoFeed/Writers/Rss20.php vendored Normal file
View File

@ -0,0 +1,125 @@
<?php
namespace PicoFeed\Writers;
require_once __DIR__.'/../Writer.php';
class Rss20 extends \PicoFeed\Writer
{
private $required_feed_properties = array(
'title',
'site_url',
);
private $required_item_properties = array(
'title',
'url',
);
public function execute($filename = '')
{
$this->checkRequiredProperties($this->required_feed_properties, $this);
$this->dom = new \DomDocument('1.0', 'UTF-8');
$this->dom->formatOutput = true;
// <rss/>
$rss = $this->dom->createElement('rss');
$rss->setAttribute('version', '2.0');
$rss->setAttributeNodeNS(new \DomAttr('xmlns:content', 'http://purl.org/rss/1.0/modules/content/'));
$channel = $this->dom->createElement('channel');
// <generator/>
$generator = $this->dom->createElement('generator', 'PicoFeed (https://github.com/fguillot/picoFeed)');
$channel->appendChild($generator);
// <title/>
$channel->appendChild($this->dom->createElement('title', $this->title));
// <description/>
$channel->appendChild($this->dom->createElement('description', isset($this->description) ? $this->description : $this->title));
// <pubDate/>
$this->addPubDate($channel, isset($this->updated) ? $this->updated : '');
// <link/>
$channel->appendChild($this->dom->createElement('link', $this->site_url));
// <webMaster/>
if (isset($this->author)) $this->addAuthor($channel, 'webMaster', $this->author);
// <item/>
foreach ($this->items as $item) {
$this->checkRequiredProperties($this->required_item_properties, $item);
$entry = $this->dom->createElement('entry');
// <title/>
$entry->appendChild($this->dom->createElement('title', $item['title']));
// <link/>
$entry->appendChild($this->dom->createElement('link', $item['url']));
// <guid/>
$guid = $this->dom->createElement('guid', $item['url']);
$guid->setAttribute('isPermaLink', 'true');
$entry->appendChild($guid);
// <pubDate/>
$this->addPubDate($entry, isset($item['updated']) ? $item['updated'] : '');
// <description/>
if (isset($item['summary'])) {
$entry->appendChild($this->dom->createElement('description', $item['summary']));
}
// <content/>
if (isset($item['content'])) {
$content = $this->dom->createElement('content:encoded');
$content->appendChild($this->dom->createCDATASection($item['content']));
$entry->appendChild($content);
}
// <author/>
if (isset($item['author'])) $this->addAuthor($entry, 'author', $item['author']);
$channel->appendChild($entry);
}
$rss->appendChild($channel);
$this->dom->appendChild($rss);
if ($filename) {
$this->dom->save($filename);
}
else {
return $this->dom->saveXML();
}
}
public function addPubDate($xml, $value = '')
{
$xml->appendChild($this->dom->createElement(
'pubDate',
date(DATE_RFC822, $value ?: time())
));
}
public function addAuthor($xml, $tag, array $values)
{
$value = '';
if (isset($values['email'])) $value .= $values['email'];
if ($value && isset($values['name'])) $value .= ' ('.$values['name'].')';
if ($value) {
$author = $this->dom->createElement($tag, $value);
$xml->appendChild($author);
}
}
}