Implements a fallback url for broken feeds

This commit is contained in:
Frédéric Guillot 2014-10-22 22:10:17 -04:00
parent 85d4e9231d
commit 6efd1ff538
3 changed files with 60 additions and 4 deletions

View File

@ -134,6 +134,15 @@ abstract class Client
*/
protected $max_body_size = 2097152; // 2MB
/**
* Do the HTTP request
*
* @abstract
* @access public
* @return array
*/
abstract public function doRequest();
/**
* Get client instance: curl or stream driver
*

View File

@ -45,6 +45,14 @@ abstract class Parser
*/
protected $content = '';
/**
* Fallback url
*
* @access protected
* @var string
*/
protected $fallback_url = '';
/**
* XML namespaces
*
@ -81,11 +89,13 @@ abstract class Parser
* Constructor
*
* @access public
* @param string $content Feed content
* @param string $http_encoding HTTP encoding (headers)
* @param string $content Feed content
* @param string $http_encoding HTTP encoding (headers)
* @param string $base_url Fallback url when the feed provide relative or broken url
*/
public function __construct($content, $http_encoding = '')
public function __construct($content, $http_encoding = '', $fallback_url = '')
{
$this->fallback_url = $fallback_url;
$xml_encoding = XmlParser::getEncodingFromXmlTag($content);
// Strip XML tag to avoid multiple encoding/decoding in the next XML processing
@ -120,7 +130,10 @@ abstract class Parser
$this->namespaces = $xml->getNamespaces(true);
$feed = new Feed;
$this->findFeedUrl($xml, $feed);
$this->checkFeedUrl($feed);
$this->findFeedTitle($xml, $feed);
$this->findFeedDescription($xml, $feed);
$this->findFeedLanguage($xml, $feed);
@ -132,7 +145,10 @@ abstract class Parser
$item = new Item;
$this->findItemAuthor($xml, $entry, $item);
$this->findItemUrl($entry, $item);
$this->checkItemUrl($feed, $item);
$this->findItemTitle($entry, $item);
$this->findItemId($entry, $item, $feed);
$this->findItemDate($entry, $item);
@ -151,6 +167,37 @@ abstract class Parser
return $feed;
}
/**
* Check if the feed url is correct
*
* @access public
* @param Feed $feed Feed object
*/
public function checkFeedUrl(Feed $feed)
{
$url = new Url($feed->getUrl());
if ($url->isRelativeUrl()) {
$feed->url = $this->fallback_url;
}
}
/**
* Check if the item url is correct
*
* @access public
* @param Feed $feed Feed object
* @param Item $item Item object
*/
public function checkItemUrl(Feed $feed, Item $item)
{
$url = new Url($item->getUrl());
if ($url->isRelativeUrl()) {
$item->url = Url::resolve($item->getUrl(), $feed->getUrl());
}
}
/**
* Fetch item content with the content grabber
*

View File

@ -104,7 +104,7 @@ class Reader
require_once __DIR__.'/Parsers/'.ucfirst($name).'.php';
$name = '\PicoFeed\Parsers\\'.$name;
$parser = new $name($this->content, $this->encoding);
$parser = new $name($this->content, $this->encoding, $this->getUrl());
$parser->setHashAlgo($this->config->getParserHashAlgo());
$parser->setTimezone($this->config->getTimezone());
$parser->setConfig($this->config);