miniflux-legacy/vendor/PicoFeed/Reader.php

173 lines
4.5 KiB
PHP
Raw Normal View History

2013-02-18 03:48:21 +01:00
<?php
namespace PicoFeed;
2013-04-07 03:14:52 +02:00
require_once __DIR__.'/Parser.php';
require_once __DIR__.'/RemoteResource.php';
2013-02-18 03:48:21 +01:00
class Reader
{
private $url = '';
private $content = '';
public function __construct($content = '')
{
$this->content = $content;
return $this;
}
2013-04-07 03:14:52 +02:00
public function download($url, $last_modified = '', $etag = '', $timeout = 5, $user_agent = 'PicoFeed (https://github.com/fguillot/picoFeed)')
2013-02-18 03:48:21 +01:00
{
if (strpos($url, 'http') !== 0) {
$url = 'http://'.$url;
}
2013-04-07 03:14:52 +02:00
$resource = new RemoteResource($url, $timeout, $user_agent);
$resource->setLastModified($last_modified);
$resource->setEtag($etag);
$resource->execute();
2013-04-07 03:14:52 +02:00
$this->content = $resource->getContent();
$this->url = $resource->getUrl();
2013-04-07 03:14:52 +02:00
return $resource;
}
2013-02-18 03:48:21 +01:00
public function getContent()
{
return $this->content;
}
public function getUrl()
{
return $this->url;
}
2013-02-24 20:03:14 +01:00
public function getFirstTag($data)
2013-02-18 03:48:21 +01:00
{
2013-06-27 01:30:46 +02:00
// Strip HTML comments (max of 5,000 characters long to prevent crashing)
$data = preg_replace('/<!--(.{0,5000}?)-->/Uis', '', $data);
/* Strip Doctype:
* Doctype needs to be within the first 500 characters. (Ideally the first!)
* If it's not found by then, we need to stop looking to prevent PREG
* from reaching max backtrack depth and crashing.
*/
$data = preg_replace('/^.{0,500}<!DOCTYPE([^>]*)>/Uis', '', $data);
2013-03-22 00:58:52 +01:00
// Find <?xml version....
2013-02-24 20:03:14 +01:00
if (strpos($data, '<?xml') !== false) {
2013-02-18 03:48:21 +01:00
2013-02-24 20:03:14 +01:00
$data = substr($data, strrpos($data, '?>') + 2);
2013-02-18 03:48:21 +01:00
2013-03-22 00:58:52 +01:00
// Find the first tag
2013-02-24 20:03:14 +01:00
$open_tag = strpos($data, '<');
$close_tag = strpos($data, '>');
2013-02-18 03:48:21 +01:00
2013-02-24 20:03:14 +01:00
return substr($data, $open_tag, $close_tag);
2013-02-18 03:48:21 +01:00
}
2013-02-24 20:03:14 +01:00
return $data;
}
public function getParser($discover = false)
{
$first_tag = $this->getFirstTag($this->content);
2013-04-07 03:14:52 +02:00
if (strpos($first_tag, '<feed') !== false) {
2013-02-18 03:48:21 +01:00
2013-04-05 05:34:07 +02:00
require_once __DIR__.'/Parsers/Atom.php';
2013-06-29 03:50:15 +02:00
return new Parsers\Atom($this->content);
2013-02-18 03:48:21 +01:00
}
2013-04-07 03:14:52 +02:00
else if (strpos($first_tag, '<rss') !== false &&
2013-04-02 03:31:54 +02:00
(strpos($first_tag, 'version="2.0"') !== false || strpos($first_tag, 'version=\'2.0\'') !== false)) {
2013-02-18 03:48:21 +01:00
2013-04-05 05:34:07 +02:00
require_once __DIR__.'/Parsers/Rss20.php';
2013-06-29 03:50:15 +02:00
return new Parsers\Rss20($this->content);
2013-02-24 20:03:14 +01:00
}
2013-04-07 03:14:52 +02:00
else if (strpos($first_tag, '<rss') !== false &&
2013-04-02 03:31:54 +02:00
(strpos($first_tag, 'version="0.92"') !== false || strpos($first_tag, 'version=\'0.92\'') !== false)) {
2013-03-26 02:29:55 +01:00
2013-04-05 05:34:07 +02:00
require_once __DIR__.'/Parsers/Rss92.php';
2013-06-29 03:50:15 +02:00
return new Parsers\Rss92($this->content);
2013-03-26 02:29:55 +01:00
}
2013-04-07 03:14:52 +02:00
else if (strpos($first_tag, '<rss') !== false &&
2013-04-02 03:31:54 +02:00
(strpos($first_tag, 'version="0.91"') !== false || strpos($first_tag, 'version=\'0.91\'') !== false)) {
2013-03-26 02:29:55 +01:00
2013-04-05 05:34:07 +02:00
require_once __DIR__.'/Parsers/Rss91.php';
2013-06-29 03:50:15 +02:00
return new Parsers\Rss91($this->content);
2013-03-26 02:29:55 +01:00
}
2013-03-20 05:19:12 +01:00
else if (strpos($first_tag, '<rdf:') !== false && strpos($first_tag, 'xmlns="http://purl.org/rss/1.0/"') !== false) {
2013-04-05 05:34:07 +02:00
require_once __DIR__.'/Parsers/Rss10.php';
2013-06-29 03:50:15 +02:00
return new Parsers\Rss10($this->content);
2013-03-20 05:19:12 +01:00
}
2013-02-24 20:03:14 +01:00
else if ($discover === true) {
return false;
}
else if ($this->discover()) {
2013-02-18 03:48:21 +01:00
2013-02-24 20:03:14 +01:00
return $this->getParser(true);
}
2013-02-18 03:48:21 +01:00
return false;
}
public function discover()
{
2013-02-24 20:03:14 +01:00
if (! $this->content) {
return false;
}
2013-02-18 03:48:21 +01:00
\libxml_use_internal_errors(true);
$dom = new \DOMDocument;
$dom->loadHTML($this->content);
$xpath = new \DOMXPath($dom);
$queries = array(
"//link[@type='application/atom+xml']",
"//link[@type='application/rss+xml']"
);
foreach ($queries as $query) {
$nodes = $xpath->query($query);
if ($nodes->length !== 0) {
$link = $nodes->item(0)->getAttribute('href');
2013-06-29 03:50:15 +02:00
if (! empty($link)) {
2013-02-18 03:48:21 +01:00
2013-06-29 03:50:15 +02:00
// Relative links
if (strpos($link, 'http') !== 0) {
2013-02-18 03:48:21 +01:00
2013-06-29 03:50:15 +02:00
if ($link{0} === '/') $link = substr($link, 1);
if ($this->url{strlen($this->url) - 1} !== '/') $this->url .= '/';
$link = $this->url.$link;
}
2013-02-18 03:48:21 +01:00
2013-06-29 03:50:15 +02:00
$this->download($link);
2013-02-18 03:48:21 +01:00
2013-06-29 03:50:15 +02:00
return true;
}
2013-02-18 03:48:21 +01:00
}
}
return false;
}
}