Implements a fallback url for broken feeds

This commit is contained in:
Frédéric Guillot 2014-10-22 22:10:17 -04:00
parent 85d4e9231d
commit 6efd1ff538
3 changed files with 60 additions and 4 deletions

View File

@ -134,6 +134,15 @@ abstract class Client
*/ */
protected $max_body_size = 2097152; // 2MB protected $max_body_size = 2097152; // 2MB
/**
* Do the HTTP request
*
* @abstract
* @access public
* @return array
*/
abstract public function doRequest();
/** /**
* Get client instance: curl or stream driver * Get client instance: curl or stream driver
* *

View File

@ -45,6 +45,14 @@ abstract class Parser
*/ */
protected $content = ''; protected $content = '';
/**
* Fallback url
*
* @access protected
* @var string
*/
protected $fallback_url = '';
/** /**
* XML namespaces * XML namespaces
* *
@ -81,11 +89,13 @@ abstract class Parser
* Constructor * Constructor
* *
* @access public * @access public
* @param string $content Feed content * @param string $content Feed content
* @param string $http_encoding HTTP encoding (headers) * @param string $http_encoding HTTP encoding (headers)
* @param string $base_url Fallback url when the feed provide relative or broken url
*/ */
public function __construct($content, $http_encoding = '') public function __construct($content, $http_encoding = '', $fallback_url = '')
{ {
$this->fallback_url = $fallback_url;
$xml_encoding = XmlParser::getEncodingFromXmlTag($content); $xml_encoding = XmlParser::getEncodingFromXmlTag($content);
// Strip XML tag to avoid multiple encoding/decoding in the next XML processing // Strip XML tag to avoid multiple encoding/decoding in the next XML processing
@ -120,7 +130,10 @@ abstract class Parser
$this->namespaces = $xml->getNamespaces(true); $this->namespaces = $xml->getNamespaces(true);
$feed = new Feed; $feed = new Feed;
$this->findFeedUrl($xml, $feed); $this->findFeedUrl($xml, $feed);
$this->checkFeedUrl($feed);
$this->findFeedTitle($xml, $feed); $this->findFeedTitle($xml, $feed);
$this->findFeedDescription($xml, $feed); $this->findFeedDescription($xml, $feed);
$this->findFeedLanguage($xml, $feed); $this->findFeedLanguage($xml, $feed);
@ -132,7 +145,10 @@ abstract class Parser
$item = new Item; $item = new Item;
$this->findItemAuthor($xml, $entry, $item); $this->findItemAuthor($xml, $entry, $item);
$this->findItemUrl($entry, $item); $this->findItemUrl($entry, $item);
$this->checkItemUrl($feed, $item);
$this->findItemTitle($entry, $item); $this->findItemTitle($entry, $item);
$this->findItemId($entry, $item, $feed); $this->findItemId($entry, $item, $feed);
$this->findItemDate($entry, $item); $this->findItemDate($entry, $item);
@ -151,6 +167,37 @@ abstract class Parser
return $feed; return $feed;
} }
/**
* Check if the feed url is correct
*
* @access public
* @param Feed $feed Feed object
*/
public function checkFeedUrl(Feed $feed)
{
$url = new Url($feed->getUrl());
if ($url->isRelativeUrl()) {
$feed->url = $this->fallback_url;
}
}
/**
* Check if the item url is correct
*
* @access public
* @param Feed $feed Feed object
* @param Item $item Item object
*/
public function checkItemUrl(Feed $feed, Item $item)
{
$url = new Url($item->getUrl());
if ($url->isRelativeUrl()) {
$item->url = Url::resolve($item->getUrl(), $feed->getUrl());
}
}
/** /**
* Fetch item content with the content grabber * Fetch item content with the content grabber
* *

View File

@ -104,7 +104,7 @@ class Reader
require_once __DIR__.'/Parsers/'.ucfirst($name).'.php'; require_once __DIR__.'/Parsers/'.ucfirst($name).'.php';
$name = '\PicoFeed\Parsers\\'.$name; $name = '\PicoFeed\Parsers\\'.$name;
$parser = new $name($this->content, $this->encoding); $parser = new $name($this->content, $this->encoding, $this->getUrl());
$parser->setHashAlgo($this->config->getParserHashAlgo()); $parser->setHashAlgo($this->config->getParserHashAlgo());
$parser->setTimezone($this->config->getTimezone()); $parser->setTimezone($this->config->getTimezone());
$parser->setConfig($this->config); $parser->setConfig($this->config);