diff --git a/vendor/PicoFeed/Filter.php b/vendor/PicoFeed/Filter.php index cf9f80f..c66d99f 100644 --- a/vendor/PicoFeed/Filter.php +++ b/vendor/PicoFeed/Filter.php @@ -9,6 +9,7 @@ class Filter private $input = ''; private $empty_tags = array(); private $strip_content = false; + private $is_code = false; // Allow only these tags and attributes public static $whitelist_tags = array( @@ -186,6 +187,9 @@ class Filter xml_parse($parser, $this->input, true); // We ignore parsing error (for old libxml) xml_parser_free($parser); + $this->data = $this->removeEmptyTags($this->data); + $this->data = $this->removeMultipleTags($this->data); + return $this->data; } @@ -195,6 +199,8 @@ class Filter $empty_tag = false; $this->strip_content = false; + if ($this->is_code === false && $name === 'pre') $this->is_code = true; + if ($this->isPixelTracker($name, $attributes)) { $empty_tag = true; @@ -274,7 +280,6 @@ class Filter } if (in_array($name, self::$blacklist_tags)) { - $this->strip_content = true; } @@ -285,15 +290,25 @@ class Filter public function endTag($parser, $name) { if (! array_pop($this->empty_tags) && $this->isAllowedTag($name)) { - $this->data .= $name !== 'img' && $name !== 'br' ? '' : '/>'; } + + if ($this->is_code && $name === 'pre') $this->is_code = false; } public function dataTag($parser, $content) { - if (! $this->strip_content) $this->data .= htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false); + $content = str_replace("\xc2\xa0", ' ', $content); // Replace   with normal space + + // Replace mutliple space by a single one + if (! $this->is_code) { + $content = preg_replace('!\s+!', ' ', $content); + } + + if (! $this->strip_content && trim($content) !== '') { + $this->data .= htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false); + } } @@ -420,4 +435,23 @@ class Filter return true; } + + + public function removeMultipleTags($data) + { + // Replace

by only one + return preg_replace("/(\s*)+/", "
", $data); + } + + + public function removeEmptyTags($data) + { + return preg_replace('/<([^<\/>]*)>([\s]*?|(?R))<\/\1>/imsU', '', $data); + } + + + public function removeHTMLTags($data) + { + return preg_replace('~<(?:!DOCTYPE|/?(?:html|head|body))[^>]*>\s*~i', '', $data); + } } diff --git a/vendor/PicoFeed/RemoteResource.php b/vendor/PicoFeed/RemoteResource.php deleted file mode 100644 index f186f20..0000000 --- a/vendor/PicoFeed/RemoteResource.php +++ /dev/null @@ -1,201 +0,0 @@ -url = $url; - $this->timeout = $timeout; - $this->user_agent = $user_agent; - - return $this; - } - - - public function setLastModified($last_modified) - { - $this->last_modified = $last_modified; - return $this; - } - - - public function getLastModified() - { - return $this->last_modified; - } - - - public function setEtag($etag) - { - $this->etag = $etag; - return $this; - } - - - public function getEtag() - { - return $this->etag; - } - - - public function getUrl() - { - return $this->url; - } - - - public function getContent() - { - return $this->content; - } - - - public function isModified() - { - return $this->is_modified; - } - - - public function execute() - { - $response = $this->doRequest(); - - if ($response['status'] == 304) { - - $this->is_modified = false; - } - else if ($response['status'] == 301 || $response['status'] == 302) { - - if (isset($response['headers']['Location'])) { - - $this->url = $response['headers']['Location']; - } - else if (isset($response['headers']['location'])) { - - $this->url = $response['headers']['location']; - } - - $this->execute(); - } - else { - $this->etag = isset($response['headers']['ETag']) ? $response['headers']['ETag'] : ''; - $this->last_modified = isset($response['headers']['Last-Modified']) ? $response['headers']['Last-Modified'] : ''; - $this->content = $response['body']; - } - } - - - public function doRequest() - { - $http_code = 200; - $http_body = ''; - $http_headers = array(); - - if (! function_exists('curl_init')) { - - $http_body = @file_get_contents($this->url); - } - else { - - Logging::log('Fetch URL: '.$this->url); - Logging::log('Etag: '.$this->etag); - Logging::log('Last-Modified: '.$this->last_modified); - - $headers = array('Connection: close'); - - if ($this->etag) $headers[] = 'If-None-Match: '.$this->etag; - if ($this->last_modified) $headers[] = 'If-Modified-Since: '.$this->last_modified; - - $ch = curl_init(); - - curl_setopt($ch, CURLOPT_URL, $this->url); - curl_setopt($ch, CURLOPT_HEADER, true); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->timeout); - curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); - curl_setopt($ch, CURLOPT_USERAGENT, $this->user_agent); - curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); - curl_setopt($ch, CURLOPT_ENCODING, ''); - - // Don't check SSL certificates (for auto-signed certificates...) - curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); - - $http_response = curl_exec($ch); - - if (curl_errno($ch)) { - - Logging::log('cURL error: '.curl_error($ch)); - - curl_close($ch); - - return array( - 'status' => $http_code, - 'body' => $http_body, - 'headers' => $http_headers - ); - } - - Logging::log('cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME)); - Logging::log('cURL dns lookup time: '.curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME)); - Logging::log('cURL connect time: '.curl_getinfo($ch, CURLINFO_CONNECT_TIME)); - Logging::log('cURL speed download: '.curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD)); - - $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE); - $http_body = ''; - $http_headers = array(); - - curl_close($ch); - - $lines = explode("\r\n", $http_response); - $body_start = 0; - $i = 0; - - foreach ($lines as $line) { - - if ($line === '') { - - $body_start = $i; - break; - } - else if (($p = strpos($line, ':')) !== false) { - - $key = substr($line, 0, $p); - $value = substr($line, $p + 1); - - $http_headers[trim($key)] = trim($value); - } - - $i++; - } - - $http_body = implode("\r\n", array_splice($lines, $i + 1)); - } - - Logging::log('HTTP status code: '.$http_code); - - foreach ($http_headers as $header_name => $header_value) { - - Logging::log('HTTP headers: '.$header_name.' => '.$header_value); - } - - return array( - 'status' => $http_code, - 'body' => $http_body, - 'headers' => $http_headers - ); - } -} \ No newline at end of file