From 3a1050b749334f11ffce33bb20a847a280c113da Mon Sep 17 00:00:00 2001 From: Frederic Guillot Date: Sat, 6 Apr 2013 21:14:52 -0400 Subject: [PATCH] Update of picoFeed --- miniflux/vendor/PicoFeed/Filter.php | 51 +++--- miniflux/vendor/PicoFeed/Reader.php | 45 ++---- miniflux/vendor/PicoFeed/RemoteResource.php | 166 ++++++++++++++++++++ 3 files changed, 211 insertions(+), 51 deletions(-) create mode 100644 miniflux/vendor/PicoFeed/RemoteResource.php diff --git a/miniflux/vendor/PicoFeed/Filter.php b/miniflux/vendor/PicoFeed/Filter.php index f911014..3f16272 100644 --- a/miniflux/vendor/PicoFeed/Filter.php +++ b/miniflux/vendor/PicoFeed/Filter.php @@ -7,11 +7,9 @@ class Filter private $data = ''; private $url = ''; private $input = ''; - private $empty_tag = false; + private $empty_tags = array(); private $strip_content = false; - public $ignored_tags = array(); - public $allowed_tags = array( 'dt' => array(), 'dd' => array(), @@ -68,15 +66,20 @@ class Filter public $blacklist_media = array( 'feeds.feedburner.com', - 'feedsportal.com', + 'da.feedsportal.com', + 'rss.feedsportal.com', + 'res.feedsportal.com', + 'pi.feedsportal.com', 'rss.nytimes.com', 'feeds.wordpress.com', - 'stats.wordpress.com' + 'stats.wordpress.com', + 'rss.cnn.com' ); public $required_attributes = array( 'a' => array('href'), - 'img' => array('src') + 'img' => array('src'), + 'iframe' => array('src') ); public $add_attributes = array( @@ -85,13 +88,15 @@ class Filter public $iframe_allowed_resources = array( 'http://www.youtube.com/', - 'http://player.vimeo.com/' + 'https://www.youtube.com/', + 'http://player.vimeo.com/', + 'https://player.vimeo.com/' ); - public function __construct($data, $url) + public function __construct($data, $site_url) { - $this->url = $url; + $this->url = $site_url; // Convert bad formatted documents to XML $dom = new \DOMDocument; @@ -122,12 +127,12 @@ class Filter public function startTag($parser, $name, $attributes) { - $this->empty_tag = false; + $empty_tag = false; $this->strip_content = false; if ($this->isPixelTracker($name, $attributes)) { - $this->empty_tag = true; + $empty_tag = true; } else if ($this->isAllowedTag($name)) { @@ -140,10 +145,13 @@ class Filter if ($this->isResource($attribute)) { - if ($name === 'iframe' && $this->isAllowedIframeResource($value)) { + if ($name === 'iframe') { - $attr_data .= ' '.$attribute.'="'.$value.'"'; - $used_attributes[] = $attribute; + if ($this->isAllowedIframeResource($value)) { + + $attr_data .= ' '.$attribute.'="'.$value.'"'; + $used_attributes[] = $attribute; + } } else if ($this->isRelativePath($value)) { @@ -164,45 +172,46 @@ class Filter } } + // Check for required attributes if (isset($this->required_attributes[$name])) { foreach ($this->required_attributes[$name] as $required_attribute) { if (! in_array($required_attribute, $used_attributes)) { - $this->empty_tag = true; + $empty_tag = true; break; } } } - if (! $this->empty_tag) { + if (! $empty_tag) { $this->data .= '<'.$name.$attr_data; + // Add custom attributes if (isset($this->add_attributes[$name])) { $this->data .= ' '.$this->add_attributes[$name].' '; } + // If img or br, we don't close it here if ($name !== 'img' && $name !== 'br') $this->data .= '>'; } } - else { - - $this->ignored_tags[] = $name; - } if (in_array($name, $this->strip_tags_content)) { $this->strip_content = true; } + + $this->empty_tags[] = $empty_tag; } public function endTag($parser, $name) { - if (! $this->empty_tag && $this->isAllowedTag($name)) { + if (! array_pop($this->empty_tags) && $this->isAllowedTag($name)) { $this->data .= $name !== 'img' && $name !== 'br' ? '' : '/>'; } diff --git a/miniflux/vendor/PicoFeed/Reader.php b/miniflux/vendor/PicoFeed/Reader.php index 13323e9..4fdca9c 100644 --- a/miniflux/vendor/PicoFeed/Reader.php +++ b/miniflux/vendor/PicoFeed/Reader.php @@ -2,6 +2,9 @@ namespace PicoFeed; +require_once __DIR__.'/Parser.php'; +require_once __DIR__.'/RemoteResource.php'; + class Reader { private $url = ''; @@ -16,40 +19,22 @@ class Reader } - public function download($url, $timeout = 5, $user_agent = 'PicoFeed (https://github.com/fguillot/picoFeed)') + public function download($url, $last_modified = '', $etag = '', $timeout = 5, $user_agent = 'PicoFeed (https://github.com/fguillot/picoFeed)') { if (strpos($url, 'http') !== 0) { $url = 'http://'.$url; } - $this->url = $url; - $this->content = $this->fetchRemoteFile($url, $timeout, $user_agent); + $resource = new RemoteResource($url, $timeout, $user_agent); + $resource->setLastModified($last_modified); + $resource->setEtag($etag); + $resource->execute(); - return $this; - } + $this->content = $resource->getContent(); + $this->url = $resource->getUrl(); - - public function fetchRemoteFile($url, $timeout, $user_agent) - { - if (! \function_exists('curl_init')) { - - return @file_get_contents($this->url); - } - - $ch = \curl_init(); - - \curl_setopt($ch, CURLOPT_URL, $url); - \curl_setopt($ch, CURLOPT_HEADER, false); - \curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - \curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); - \curl_setopt($ch, CURLOPT_USERAGENT, $user_agent); - - $content = \curl_exec($ch); - - \curl_close($ch); - - return $content; + return $resource; } @@ -90,24 +75,24 @@ class Reader { $first_tag = $this->getFirstTag($this->content); - if (strpos($first_tag, 'content); } - else if (strpos($first_tag, 'content); } - else if (strpos($first_tag, 'content); } - else if (strpos($first_tag, 'url = $url; + $this->timeout = $timeout; + $this->user_agent = $user_agent; + + return $this; + } + + + public function setLastModified($last_modified) + { + $this->last_modified = $last_modified; + return $this; + } + + + public function getLastModified() + { + return $this->last_modified; + } + + + public function setEtag($etag) + { + $this->etag = $etag; + return $this; + } + + + public function getEtag() + { + return $this->etag; + } + + + public function getUrl() + { + return $this->url; + } + + + public function getContent() + { + return $this->content; + } + + + public function isModified() + { + return $this->is_modified; + } + + + public function execute() + { + $response = $this->makeRequest(); + + $this->etag = isset($response['headers']['ETag']) ? $response['headers']['ETag'] : ''; + $this->last_modified = isset($response['headers']['Last-Modified']) ? $response['headers']['Last-Modified'] : ''; + + if ($response['status'] == 304) { + + $this->is_modified = false; + } + else if ($response['status'] == 301 || $response['status'] == 302) { + + if (isset($response['headers']['Location'])) { + + $this->url = $response['headers']['Location']; + } + else if (isset($response['headers']['location'])) { + + $this->url = $response['headers']['location']; + } + + $this->execute(); + } + else { + + $this->content = $response['body']; + } + } + + + public function makeRequest() + { + $http_code = 200; + $http_body = ''; + $http_headers = array(); + + if (! function_exists('curl_init')) { + + $http_body = @file_get_contents($this->url); + } + else { + + $headers = array('Connection: close'); + + if ($this->etag) $headers[] = 'If-None-Match: '.$this->etag; + if ($this->last_modified) $headers[] = 'If-Modified-Since: '.$this->last_modified; + + $ch = curl_init(); + + curl_setopt($ch, CURLOPT_URL, $this->url); + curl_setopt($ch, CURLOPT_HEADER, true); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->timeout); + curl_setopt($ch, CURLOPT_USERAGENT, $this->user_agent); + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); + + $http_response = curl_exec($ch); + $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); + $http_body = ''; + $http_headers = array(); + + curl_close($ch); + + $lines = explode("\r\n", $http_response); + $body_start = 0; + $i = 0; + + foreach ($lines as $line) { + + if ($line === '') { + + $body_start = $i; + break; + } + else if (($p = strpos($line, ':')) !== false) { + + $key = substr($line, 0, $p); + $value = substr($line, $p + 1); + + $http_headers[trim($key)] = trim($value); + } + + $i++; + } + + $http_body = implode("\r\n", array_splice($lines, $i + 1)); + } + + return array( + 'status' => $http_code, + 'body' => $http_body, + 'headers' => $http_headers + ); + } +} \ No newline at end of file