From 079545daed91f44807377d8f018c694cfe1dcb92 Mon Sep 17 00:00:00 2001 From: Frederic Guillot Date: Tue, 15 Dec 2015 19:26:15 -0500 Subject: [PATCH] Upgrade to PicoFeed 0.1.16 --- composer.json | 2 +- vendor/composer/autoload_classmap.php | 1 + vendor/composer/autoload_namespaces.php | 1 + vendor/composer/installed.json | 66 +++- .../picofeed/lib/PicoFeed/Client/Client.php | 2 +- .../picofeed/lib/PicoFeed/Client/Curl.php | 52 ++- .../PicoFeed/Parser/XmlEntityException.php | 12 + .../lib/PicoFeed/Parser/XmlParser.php | 108 ++--- .../lib/PicoFeed/Rules/bizjournals.com.php | 5 +- .../picofeed/lib/PicoFeed/Rules/cnet.com.php | 29 +- .../lib/PicoFeed/Rules/engadget.com.php | 4 +- .../picofeed/lib/PicoFeed/Rules/heise.de.php | 1 + .../lib/PicoFeed/Rules/nature.com.php | 20 + .../lib/PicoFeed/Rules/networkworld.com.php | 5 +- .../lib/PicoFeed/Rules/neustadt-ticker.de.php | 10 +- .../PicoFeed/Rules/news.sciencemag.org.php | 18 + .../PicoFeed/Rules/retractionwatch.com.php | 18 + .../lib/PicoFeed/Rules/thelocal.se.php | 21 + .../picofeed/lib/PicoFeed/Rules/upi.com.php | 8 +- .../PicoFeed/Rules/www.geekculture.com.php | 13 + vendor/zendframework/zendxml/.gitignore | 5 + vendor/zendframework/zendxml/.travis.yml | 23 ++ vendor/zendframework/zendxml/LICENSE.md | 12 + vendor/zendframework/zendxml/README.md | 50 +++ vendor/zendframework/zendxml/composer.json | 40 ++ .../ZendXml/Exception/ExceptionInterface.php | 14 + .../Exception/InvalidArgumentException.php | 17 + .../ZendXml/Exception/RuntimeException.php | 17 + .../zendxml/library/ZendXml/Security.php | 374 ++++++++++++++++++ .../zendframework/zendxml/tests/Bootstrap.php | 92 +++++ .../tests/ZendXmlTest/MultibyteTest.php | 125 ++++++ .../tests/ZendXmlTest/SecurityTest.php | 135 +++++++ .../zendxml/tests/phpunit.xml.dist | 27 ++ 33 files changed, 1185 insertions(+), 142 deletions(-) create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/nature.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/news.sciencemag.org.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/retractionwatch.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/thelocal.se.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.geekculture.com.php create mode 100644 vendor/zendframework/zendxml/.gitignore create mode 100644 vendor/zendframework/zendxml/.travis.yml create mode 100644 vendor/zendframework/zendxml/LICENSE.md create mode 100644 vendor/zendframework/zendxml/README.md create mode 100644 vendor/zendframework/zendxml/composer.json create mode 100644 vendor/zendframework/zendxml/library/ZendXml/Exception/ExceptionInterface.php create mode 100644 vendor/zendframework/zendxml/library/ZendXml/Exception/InvalidArgumentException.php create mode 100644 vendor/zendframework/zendxml/library/ZendXml/Exception/RuntimeException.php create mode 100644 vendor/zendframework/zendxml/library/ZendXml/Security.php create mode 100644 vendor/zendframework/zendxml/tests/Bootstrap.php create mode 100644 vendor/zendframework/zendxml/tests/ZendXmlTest/MultibyteTest.php create mode 100644 vendor/zendframework/zendxml/tests/ZendXmlTest/SecurityTest.php create mode 100755 vendor/zendframework/zendxml/tests/phpunit.xml.dist diff --git a/composer.json b/composer.json index 5678656..4c320b5 100644 --- a/composer.json +++ b/composer.json @@ -14,7 +14,7 @@ "fguillot/simple-validator": "v1.0.0", "fguillot/json-rpc": "v1.0.2", "fguillot/picodb": "v1.0.2", - "fguillot/picofeed": "v0.1.15" + "fguillot/picofeed": "v0.1.16" }, "require-dev": { "phpunit/phpunit": "4.8.3", diff --git a/vendor/composer/autoload_classmap.php b/vendor/composer/autoload_classmap.php index 34fa836..9838bcd 100644 --- a/vendor/composer/autoload_classmap.php +++ b/vendor/composer/autoload_classmap.php @@ -54,6 +54,7 @@ return array( 'PicoFeed\\Parser\\Rss20' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php', 'PicoFeed\\Parser\\Rss91' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Parser/Rss91.php', 'PicoFeed\\Parser\\Rss92' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Parser/Rss92.php', + 'PicoFeed\\Parser\\XmlEntityException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php', 'PicoFeed\\Parser\\XmlParser' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php', 'PicoFeed\\PicoFeedException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/PicoFeedException.php', 'PicoFeed\\Reader\\Favicon' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php', diff --git a/vendor/composer/autoload_namespaces.php b/vendor/composer/autoload_namespaces.php index ece67a9..d3002d8 100644 --- a/vendor/composer/autoload_namespaces.php +++ b/vendor/composer/autoload_namespaces.php @@ -6,6 +6,7 @@ $vendorDir = dirname(dirname(__FILE__)); $baseDir = dirname($vendorDir); return array( + 'ZendXml\\' => array($vendorDir . '/zendframework/zendxml/library'), 'SimpleValidator' => array($vendorDir . '/fguillot/simple-validator/src'), 'PicoFeed' => array($vendorDir . '/fguillot/picofeed/lib'), 'PicoDb' => array($vendorDir . '/fguillot/picodb/lib'), diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json index f96d709..8a13bd0 100644 --- a/vendor/composer/installed.json +++ b/vendor/composer/installed.json @@ -115,18 +115,65 @@ "homepage": "https://github.com/fguillot/picoDb" }, { - "name": "fguillot/picofeed", - "version": "v0.1.15", - "version_normalized": "0.1.15.0", + "name": "zendframework/zendxml", + "version": "1.0.1", + "version_normalized": "1.0.1.0", "source": { "type": "git", - "url": "https://github.com/fguillot/picoFeed.git", - "reference": "ee91bcfd4be93d38ae5f870723c652a9d795c75f" + "url": "https://github.com/zendframework/ZendXml.git", + "reference": "54edb3875aba5b45f02824f65f311c9fb2743a38" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/ee91bcfd4be93d38ae5f870723c652a9d795c75f", - "reference": "ee91bcfd4be93d38ae5f870723c652a9d795c75f", + "url": "https://api.github.com/repos/zendframework/ZendXml/zipball/54edb3875aba5b45f02824f65f311c9fb2743a38", + "reference": "54edb3875aba5b45f02824f65f311c9fb2743a38", + "shasum": "" + }, + "require": { + "php": ">=5.3.3" + }, + "require-dev": { + "phpunit/phpunit": "~3.7", + "squizlabs/php_codesniffer": "~1.5" + }, + "time": "2015-08-03 14:50:10", + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.0-dev" + } + }, + "installation-source": "dist", + "autoload": { + "psr-0": { + "ZendXml\\": "library/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "BSD-3-Clause" + ], + "description": "Utility library for XML usage, best practices, and security in PHP", + "homepage": "http://packages.zendframework.com/", + "keywords": [ + "security", + "xml", + "zf2" + ] + }, + { + "name": "fguillot/picofeed", + "version": "v0.1.16", + "version_normalized": "0.1.16.0", + "source": { + "type": "git", + "url": "https://github.com/fguillot/picoFeed.git", + "reference": "4162314ea083f0957079d6b1a63b2c24b1f6de73" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/4162314ea083f0957079d6b1a63b2c24b1f6de73", + "reference": "4162314ea083f0957079d6b1a63b2c24b1f6de73", "shasum": "" }, "require": { @@ -135,12 +182,13 @@ "ext-libxml": "*", "ext-simplexml": "*", "ext-xml": "*", - "php": ">=5.3.0" + "php": ">=5.3.0", + "zendframework/zendxml": "^1.0" }, "suggest": { "ext-curl": "PicoFeed will use cURL if present" }, - "time": "2015-10-23 21:59:31", + "time": "2015-12-15 17:01:33", "bin": [ "picofeed" ], diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php index 3c64ce5..24c619c 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php @@ -496,7 +496,7 @@ abstract class Client } /** - * Set the mximum number of HTTP redirections. + * Set the maximum number of HTTP redirections. * * @param int $max Maximum * diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php index b07685f..82ea87a 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php @@ -88,27 +88,6 @@ class Curl extends Client return $length; } - /** - * cURL callback to passthrough the HTTP status header to the client. - * - * @param resource $ch cURL handler - * @param string $buffer Header line - * - * @return int Length of the buffer - */ - public function passthroughHeaders($ch, $buffer) - { - list($status, $headers) = HttpHeaders::parse(array($buffer)); - - if ($status !== 0) { - header(':', true, $status); - } elseif (isset($headers['Content-Type'])) { - header($buffer); - } - - return $this->readHeaders($ch, $buffer); - } - /** * cURL callback to passthrough the HTTP body to the client. * @@ -121,9 +100,27 @@ class Curl extends Client */ public function passthroughBody($ch, $buffer) { + // do it only at the beginning of a transmission + if ($this->body_length === 0) { + list($status, $headers) = HttpHeaders::parse(explode("\n", $this->response_headers[$this->response_headers_count - 1])); + + if ($this->isRedirection($status)) { + return $this->handleRedirection($headers['Location']); + } + + header($status); + + if (isset($headers['Content-Type'])) { + header('Content-Type:' .$headers['Content-Type']); + } + } + + $length = strlen($buffer); + $this->body_length += $length; + echo $buffer; - return strlen($buffer); + return $length; } /** @@ -207,7 +204,6 @@ class Curl extends Client if ($this->isPassthroughEnabled()) { $write_function = 'passthroughBody'; - $header_function = 'passthroughHeaders'; } curl_setopt($ch, CURLOPT_WRITEFUNCTION, array($this, $write_function)); @@ -285,17 +281,15 @@ class Curl extends Client /** * Do the HTTP request. * - * @param bool $follow_location Flag used when there is an open_basedir restriction - * * @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...] */ - public function doRequest($follow_location = true) + public function doRequest() { $this->executeContext(); list($status, $headers) = HttpHeaders::parse(explode("\n", $this->response_headers[$this->response_headers_count - 1])); - if ($follow_location && $this->isRedirection($status)) { + if ($this->isRedirection($status)) { return $this->handleRedirection($headers['Location']); } @@ -307,7 +301,7 @@ class Curl extends Client } /** - * Handle manually redirections when there is an open base dir restriction. + * Handle HTTP redirects * * @param string $location Redirected URL * @@ -330,7 +324,7 @@ class Curl extends Client throw new MaxRedirectException('Maximum number of redirections reached'); } - $result = $this->doRequest(false); + $result = $this->doRequest(); if ($this->isRedirection($result['status'])) { $this->url = Url::resolve($result['headers']['Location'], $this->url); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php new file mode 100644 index 0000000..f3f914d --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php @@ -0,0 +1,12 @@ +childNodes as $child) { - if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { - if ($child->entities->length > 0) { - return false; - } - } - } - - if ($isRunningFpm === false) { - libxml_disable_entity_loader($entityLoaderDisabled); - } - - return $dom; + return self::scan($input); } /** @@ -101,12 +46,7 @@ class XmlParser return false; } - $dom = self::scanInput($input, function ($in) { - $dom = new DomDocument(); - $dom->loadXml($in, LIBXML_NONET); - - return $dom; - }); + $dom = self::scan($input, new DOMDocument()); // The document is empty, there is probably some parsing errors if ($dom && $dom->childNodes->length === 0) { @@ -116,6 +56,22 @@ class XmlParser return $dom; } + /** + * Small wrapper around ZendXml to turn their exceptions into picoFeed + * exceptions + * @param $input the xml to load + * @param $dom pass in a dom document or use null/omit if simpleXml should + * be used + */ + private static function scan($input, $dom=null) + { + try { + return Security::scan($input, $dom); + } catch(\ZendXml\Exception\RuntimeException $e) { + throw new XmlEntityException($e->getMessage()); + } + } + /** * Load HTML document by using a DomDocument instance or return false on failure. * @@ -127,27 +83,21 @@ class XmlParser */ public static function getHtmlDocument($input) { + $dom = new DomDocument(); + if (empty($input)) { - return new DomDocument(); + return $dom; } + libxml_use_internal_errors(true); + if (version_compare(PHP_VERSION, '5.4.0', '>=')) { - $callback = function ($in) { - $dom = new DomDocument(); - $dom->loadHTML($in, LIBXML_NONET); - - return $dom; - }; + $dom->loadHTML($input, LIBXML_NONET); } else { - $callback = function ($in) { - $dom = new DomDocument(); - $dom->loadHTML($in); - - return $dom; - }; + $dom->loadHTML($input); } - return self::scanInput($input, $callback); + return $dom; } /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bizjournals.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bizjournals.com.php index 44a644a..6d8a5a9 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bizjournals.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bizjournals.com.php @@ -5,9 +5,8 @@ return array( '%.*%' => array( 'test_url' => 'http://www.bizjournals.com/milwaukee/news/2015/09/30/bucks-will-hike-prices-on-best-seats-at-new-arena.html', 'body' => array( - '//p[@class="media__caption"]', - '//figure/div/a/img', - '//p[@class="content__segment"]', + '//figure/div/a/img', + '//p[@class="content__segment"]', ), ), ), diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cnet.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cnet.com.php index fdffefd..4021968 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cnet.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cnet.com.php @@ -2,6 +2,23 @@ return array( 'grabber' => array( + '%^/products.*%' => array( + 'test_url' => 'http://www.cnet.com/products/fibaro-flood-sensor/#ftag=CADf328eec', + 'body' => array( + '//li[contains(@class,"slide first"] || //figure[contains(@class,(promoFigure))]', + '//div[@class="quickInfo"]', + '//div[@class="col-6 ratings"]', + '//div[@id="editorReview"]', + ), + 'strip' => array( + '//script', + '//a[@class="clickToEnlarge"]', + '//div[@section="topSharebar"]', + '//div[contains(@class,"related")]', + '//div[contains(@class,"ad-")]', + '//div[@section="shortcodeGallery"]', + ), + ), '%.*%' => array( 'test_url' => 'http://cnet.com.feedsportal.com/c/34938/f/645093/s/4a340866/sc/28/l/0L0Scnet0N0Cnews0Cman0Eclaims0Eonline0Epsychic0Emade0Ehim0Ebuy0E10Emillion0Epowerball0Ewinning0Eticket0C0Tftag0FCAD590Aa51e/story01.htm', 'body' => array( @@ -9,12 +26,12 @@ return array( '//div[@itemprop="articleBody"]', ), 'strip' => array( - '//script', - '//a[@class="clickToEnlarge"]', - '//div[@section="topSharebar"]', - '//div[contains(@class,"related")]', - '//div[contains(@class,"ad-")]', - '//div[@section="shortcodeGallery"]', + '//script', + '//a[@class="clickToEnlarge"]', + '//div[@section="topSharebar"]', + '//div[contains(@class,"related")]', + '//div[contains(@class,"ad-")]', + '//div[@section="shortcodeGallery"]', ), ), ), diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/engadget.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/engadget.com.php index 3b7fc5d..87775eb 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/engadget.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/engadget.com.php @@ -4,8 +4,8 @@ return array( 'grabber' => array( '%.*%' => array( 'test_url' => 'http://www.engadget.com/2015/04/20/dark-matter-discovery/?ncid=rss_truncated', - 'body' => array('//div[@class="article-content"]/p[not(@class="read-more")] | //div[@class="article-content"]/div[@style="text-align: center;"]'), - 'strip' => array(), + 'body' => array('//div[@id="page_body"]/div[@class="container@m-"]'), + 'strip' => array('//aside[@role="banner"]'), ), ), ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/heise.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/heise.de.php index 8a5f312..85904c0 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/heise.de.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/heise.de.php @@ -6,6 +6,7 @@ return array( 'test_url' => 'http://www.heise.de/security/meldung/BND-300-Millionen-Euro-fuer-Fruehwarnsystem-gegen-Cyber-Attacken-2192237.html', 'body' => array( '//div[@class="meldung_wrapper"]', + '//div[@class="artikel_content"]', ), ), ), diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/nature.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/nature.com.php new file mode 100644 index 0000000..1dad8b6 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/nature.com.php @@ -0,0 +1,20 @@ + array( + 'http://dx.doi.org/10.1038.*%' => array( + 'test_url' => 'http://dx.doi.org/10.1038/525184a', + 'body' => array( + '//div[@class="content "]', + ), + 'strip' => array() + ), + '%.*%' => array( + 'test_url' => 'http://www.nature.com/doifinder/10.1038/nature.2015.18340', + 'body' => array( + '//div[contains(@class,"main-content")]', + ), + 'strip' => array() + ), + ) +); + diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/networkworld.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/networkworld.com.php index dbb63a2..94f346e 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/networkworld.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/networkworld.com.php @@ -7,10 +7,13 @@ return array( 'body' => array( '//figure/img[@class="hero-img"]', '//section[@class="deck"]', - '//div[@itemprop="articleBody"]', + '//div[@itemprop="articleBody"] || //div[@itemprop="reviewBody"]', + '//div[@class="carousel-inside-crop"]', ), 'strip' => array( '//aside', + '//div[@class="credit"]', + '//div[@class="view-large"]', ), ), ), diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php index d17ed91..60d9bfa 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php @@ -3,11 +3,13 @@ return array( 'grabber' => array( '%.*%' => array( - 'test_url' => 'http://www.neustadt-ticker.de/36480/aktuell/nachrichten/buergerbuero-neustadt-ab-heute-wieder-geoeffnet', - 'body' => array('//div[contains(@class,"article")]/div[@class="PostContent" and *[not(contains(@class, "navigation"))]]'), + 'test_url' => 'http://www.neustadt-ticker.de/41302/alltag/kultur/demo-auf-der-boehmischen', + 'body' => array( + '//div[@class="entry-content"]', + ), 'strip' => array( - '//*[@id="wp_rp_first"]', - '//*[@class="yarpp-related"]', + '//*[contains(@class, "sharedaddy")]', + '//*[contains(@class, "yarpp-related")]', ), ), ), diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/news.sciencemag.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/news.sciencemag.org.php new file mode 100644 index 0000000..9b572ef --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/news.sciencemag.org.php @@ -0,0 +1,18 @@ + array( + '%.*%' => array( + 'test_url' => 'http://news.sciencemag.org/biology/2015/09/genetic-engineering-turns-common-plant-cancer-fighter', + 'body' => array( + '//div[@class="content"]', + ), + 'strip' => array( + '//h1[@class="snews-article__headline"]', + '//div[contains(@class,"easy_social_box")]', + '//div[@class="author-teaser"]', + '//div[@class="article-byline"]', + ), + ), + ) +); + diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/retractionwatch.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/retractionwatch.com.php new file mode 100644 index 0000000..b97c73e --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/retractionwatch.com.php @@ -0,0 +1,18 @@ + array( + '%.*%' => array( + 'test_url' => 'http://retractionwatch.com/2015/11/12/psychologist-jens-forster-settles-case-by-agreeing-to-2-retractions/', + 'body' => array( + '//*[@class="main"]', + '//*[@class="entry-content"]', + ), + 'strip' => array( + '//*[contains(@class, "sharedaddy")]', + '//*[contains(@class, "jp-relatedposts")]', + '//p[@class="p1"]', + ) + ) + ) +); + diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thelocal.se.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thelocal.se.php new file mode 100644 index 0000000..964850f --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thelocal.se.php @@ -0,0 +1,21 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.thelocal.se/20151018/swedish-moderates-tighten-focus-on-begging-ban', + 'body' => array( + '//article', + ), + 'strip' => array( + '//p[@id="mobile-signature"]', + '//article/div[4]', + '//article/ul[1]', + '//div[@class="clr"]', + '//p[@class="small"]', + '//p[@style="font-weight: bold; font-size: 14px;"]', + '//div[@class="author"]', + ) + ) + ) +); + diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/upi.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/upi.com.php index 413a172..5d78f8b 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/upi.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/upi.com.php @@ -6,16 +6,10 @@ return array( 'test_url' => 'http://www.upi.com/Top_News/US/2015/09/26/Tech-giants-Hollywood-stars-among-guests-at-state-dinner-for-Chinas-Xi-Jinping/4541443281006/', 'body' => array( '//div[@class="img"]', - '//div[@class="st_text_c"]', + '//div/article[@itemprop="articleBody"]', ), 'strip' => array( '//div[@align="center"]', - '//div[@class="ad_slot"]', - '//div[@class="ipara"]', - '//div[@class="st_embed"]', - '//div[contains(@styel,"font-size"]', - '//ul', - '//style[@type="text/css"]', ), ), ), diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.geekculture.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.geekculture.com.php new file mode 100644 index 0000000..7f03a1d --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.geekculture.com.php @@ -0,0 +1,13 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.geekculture.com/joyoftech/joyarchives/2180.html', + 'body' => array( + '//p[contains(@class,"Maintext")][2]/img', + ), + 'strip' => array(), + ), + ), +); + diff --git a/vendor/zendframework/zendxml/.gitignore b/vendor/zendframework/zendxml/.gitignore new file mode 100644 index 0000000..0a4f6e2 --- /dev/null +++ b/vendor/zendframework/zendxml/.gitignore @@ -0,0 +1,5 @@ +composer.lock +vendor +.buildpath +.project +.settings diff --git a/vendor/zendframework/zendxml/.travis.yml b/vendor/zendframework/zendxml/.travis.yml new file mode 100644 index 0000000..967d999 --- /dev/null +++ b/vendor/zendframework/zendxml/.travis.yml @@ -0,0 +1,23 @@ +language: php +php: + - 5.3 + - 5.4 + - 5.5 + - 5.6 + - hhvm + +matrix: + allow_failures: + - php: hhvm + +before_script: + - composer self-update + - composer install --dev + +script: + - ./vendor/bin/phpunit -c ./tests + - ./vendor/bin/phpcs --standard=PSR2 --ignore=tests/Bootstrap.php library tests + +notifications: + irc: "irc.freenode.org#zftalk.dev" + email: false diff --git a/vendor/zendframework/zendxml/LICENSE.md b/vendor/zendframework/zendxml/LICENSE.md new file mode 100644 index 0000000..141d3a2 --- /dev/null +++ b/vendor/zendframework/zendxml/LICENSE.md @@ -0,0 +1,12 @@ +Copyright (c) 2014-2015, Zend Technologies USA, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +- Neither the name of Zend Technologies USA, Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/zendframework/zendxml/README.md b/vendor/zendframework/zendxml/README.md new file mode 100644 index 0000000..2c67008 --- /dev/null +++ b/vendor/zendframework/zendxml/README.md @@ -0,0 +1,50 @@ +ZendXml +======= + +An utility component for XML usage and best practices in PHP + +Installation +------------ + +You can install using: + +``` +curl -s https://getcomposer.org/installer | php +php composer.phar install +``` + +Notice that this library doesn't have any external dependencies, the usage of composer is for autoloading and standard purpose. + + +ZendXml\Security +---------------- + +This is a security component to prevent [XML eXternal Entity](https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Processing) (XXE) and [XML Entity Expansion](http://projects.webappsec.org/w/page/13247002/XML%20Entity%20Expansion) (XEE) attacks on XML documents. + +The XXE attack is prevented disabling the load of external entities in the libxml library used by PHP, using the function [libxml_disable_entity_loader](http://www.php.net/manual/en/function.libxml-disable-entity-loader.php). + +The XEE attack is prevented looking inside the XML document for ENTITY usage. If the XML document uses ENTITY the library throw an Exception. + +We have two static methods to scan and load XML document from a string (scan) and from a file (scanFile). You can decide to get a SimpleXMLElement or DOMDocument as result, using the following use cases: + +```php +use ZendXml\Security as XmlSecurity; + +$xml = << + + test + +XML; + +// SimpleXML use case +$simplexml = XmlSecurity::scan($xml); +printf ("SimpleXMLElement: %s\n", ($simplexml instanceof \SimpleXMLElement) ? 'yes' : 'no'); + +// DOMDocument use case +$dom = new \DOMDocument('1.0'); +$dom = XmlSecurity::scan($xml, $dom); +printf ("DOMDocument: %s\n", ($dom instanceof \DOMDocument) ? 'yes' : 'no'); +``` + + diff --git a/vendor/zendframework/zendxml/composer.json b/vendor/zendframework/zendxml/composer.json new file mode 100644 index 0000000..139f1e2 --- /dev/null +++ b/vendor/zendframework/zendxml/composer.json @@ -0,0 +1,40 @@ +{ + "name": "zendframework/zendxml", + "description": "Utility library for XML usage, best practices, and security in PHP", + "type": "library", + "license": "BSD-3-Clause", + "keywords": [ + "zf2", + "xml", + "security" + ], + "homepage": "http://packages.zendframework.com/", + "autoload": { + "psr-0": { + "ZendXml\\": "library/" + } + }, + "autoload-dev": { + "psr-4": { + "ZendTest\\Xml\\": "tests/ZendXmlTest/" + } + }, + "repositories": [ + { + "type": "composer", + "url": "http://packages.zendframework.com/" + } + ], + "require": { + "php": ">=5.3.3" + }, + "extra": { + "branch-alias": { + "dev-master": "1.0-dev" + } + }, + "require-dev": { + "phpunit/phpunit": "~3.7", + "squizlabs/php_codesniffer": "~1.5" + } +} diff --git a/vendor/zendframework/zendxml/library/ZendXml/Exception/ExceptionInterface.php b/vendor/zendframework/zendxml/library/ZendXml/Exception/ExceptionInterface.php new file mode 100644 index 0000000..c55eb90 --- /dev/null +++ b/vendor/zendframework/zendxml/library/ZendXml/Exception/ExceptionInterface.php @@ -0,0 +1,14 @@ + 0) { + return true; + } + return false; + }, E_WARNING); + $result = $dom->loadXml($xml, LIBXML_NONET); + restore_error_handler(); + + if (!$result) { + // Entity load to previous setting + if (!self::isPhpFpm()) { + libxml_disable_entity_loader($loadEntities); + libxml_use_internal_errors($useInternalXmlErrors); + } + return false; + } + + // Scan for potential XEE attacks using ENTITY, if not PHP-FPM + if (!self::isPhpFpm()) { + foreach ($dom->childNodes as $child) { + if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { + if ($child->entities->length > 0) { + throw new Exception\RuntimeException(self::ENTITY_DETECT); + } + } + } + } + + // Entity load to previous setting + if (!self::isPhpFpm()) { + libxml_disable_entity_loader($loadEntities); + libxml_use_internal_errors($useInternalXmlErrors); + } + + if (isset($simpleXml)) { + $result = simplexml_import_dom($dom); + if (!$result instanceof SimpleXMLElement) { + return false; + } + return $result; + } + return $dom; + } + + /** + * Scan XML file for potential XXE/XEE attacks + * + * @param string $file + * @param DOMDocument $dom + * @throws Exception\InvalidArgumentException + * @return SimpleXMLElement|DomDocument + */ + public static function scanFile($file, DOMDocument $dom = null) + { + if (!file_exists($file)) { + throw new Exception\InvalidArgumentException( + "The file $file specified doesn't exist" + ); + } + return self::scan(file_get_contents($file), $dom); + } + + /** + * Return true if PHP is running with PHP-FPM + * + * This method is mainly used to determine whether or not heuristic checks + * (vs libxml checks) should be made, due to threading issues in libxml; + * under php-fpm, threading becomes a concern. + * + * However, PHP versions 5.5.22+ and 5.6.6+ contain a patch to the + * libxml support in PHP that makes the libxml checks viable; in such + * versions, this method will return false to enforce those checks, which + * are more strict and accurate than the heuristic checks. + * + * @return boolean + */ + public static function isPhpFpm() + { + $isVulnerableVersion = ( + version_compare(PHP_VERSION, '5.5.22', 'lt') + || ( + version_compare(PHP_VERSION, '5.6', 'gte') + && version_compare(PHP_VERSION, '5.6.6', 'lt') + ) + ); + + if (substr(php_sapi_name(), 0, 3) === 'fpm' && $isVulnerableVersion) { + return true; + } + return false; + } + + /** + * Determine and return the string(s) to use for the $generator) { + $prefix = $generator('<' . '?xml'); + if (0 === strncmp($xml, $prefix, strlen($prefix))) { + return $encoding; + } + } + + // Fallback + return 'UTF-8'; + } + + /** + * Attempt to detect the specified XML encoding. + * + * Using the file's encoding, determines if an "encoding" attribute is + * present and well-formed in the XML declaration; if so, it returns a + * list with both the ASCII representation of that declaration and the + * original file encoding. + * + * If not, a list containing only the provided file encoding is returned. + * + * @param string $xml + * @param string $fileEncoding + * @return string[] Potential XML encodings + */ + protected static function detectXmlEncoding($xml, $fileEncoding) + { + $encodingMap = self::getAsciiEncodingMap(); + $generator = $encodingMap[$fileEncoding]; + $encAttr = $generator('encoding="'); + $quote = $generator('"'); + $close = $generator('>'); + + $closePos = strpos($xml, $close); + if (false === $closePos) { + return array($fileEncoding); + } + + $encPos = strpos($xml, $encAttr); + if (false === $encPos + || $encPos > $closePos + ) { + return array($fileEncoding); + } + + $encPos += strlen($encAttr); + $quotePos = strpos($xml, $quote, $encPos); + if (false === $quotePos) { + return array($fileEncoding); + } + + $encoding = self::substr($xml, $encPos, $quotePos); + return array( + // Following line works because we're only supporting 8-bit safe encodings at this time. + str_replace('\0', '', $encoding), // detected encoding + $fileEncoding, // file encoding + ); + } + + /** + * Return a list of BOM maps. + * + * Returns a list of common encoding -> BOM maps, along with the character + * length to compare against. + * + * @link https://en.wikipedia.org/wiki/Byte_order_mark + * @return array + */ + protected static function getBomMap() + { + return array( + array( + 'encoding' => 'UTF-32BE', + 'bom' => pack('CCCC', 0x00, 0x00, 0xfe, 0xff), + 'length' => 4, + ), + array( + 'encoding' => 'UTF-32LE', + 'bom' => pack('CCCC', 0xff, 0xfe, 0x00, 0x00), + 'length' => 4, + ), + array( + 'encoding' => 'GB-18030', + 'bom' => pack('CCCC', 0x84, 0x31, 0x95, 0x33), + 'length' => 4, + ), + array( + 'encoding' => 'UTF-16BE', + 'bom' => pack('CC', 0xfe, 0xff), + 'length' => 2, + ), + array( + 'encoding' => 'UTF-16LE', + 'bom' => pack('CC', 0xff, 0xfe), + 'length' => 2, + ), + array( + 'encoding' => 'UTF-8', + 'bom' => pack('CCC', 0xef, 0xbb, 0xbf), + 'length' => 3, + ), + ); + } + + /** + * Return a map of encoding => generator pairs. + * + * Returns a map of encoding => generator pairs, where the generator is a + * callable that accepts a string and returns the appropriate byte order + * sequence of that string for the encoding. + * + * @return array + */ + protected static function getAsciiEncodingMap() + { + return array( + 'UTF-32BE' => function ($ascii) { + return preg_replace('/(.)/', "\0\0\0\\1", $ascii); + }, + 'UTF-32LE' => function ($ascii) { + return preg_replace('/(.)/', "\\1\0\0\0", $ascii); + }, + 'UTF-32odd1' => function ($ascii) { + return preg_replace('/(.)/', "\0\\1\0\0", $ascii); + }, + 'UTF-32odd2' => function ($ascii) { + return preg_replace('/(.)/', "\0\0\\1\0", $ascii); + }, + 'UTF-16BE' => function ($ascii) { + return preg_replace('/(.)/', "\0\\1", $ascii); + }, + 'UTF-16LE' => function ($ascii) { + return preg_replace('/(.)/', "\\1\0", $ascii); + }, + 'UTF-8' => function ($ascii) { + return $ascii; + }, + 'GB-18030' => function ($ascii) { + return $ascii; + }, + ); + } + + /** + * Binary-safe substr. + * + * substr() is not binary-safe; this method loops by character to ensure + * multi-byte characters are aggregated correctly. + * + * @param string $string + * @param int $start + * @param int $end + * @return string + */ + protected static function substr($string, $start, $end) + { + $substr = ''; + for ($i = $start; $i < $end; $i += 1) { + $substr .= $string[$i]; + } + return $substr; + } +} diff --git a/vendor/zendframework/zendxml/tests/Bootstrap.php b/vendor/zendframework/zendxml/tests/Bootstrap.php new file mode 100644 index 0000000..a9d0e6a --- /dev/null +++ b/vendor/zendframework/zendxml/tests/Bootstrap.php @@ -0,0 +1,92 @@ +addDirectoryToWhitelist($zfCoreLibrary . '/' . $lastArg); + } elseif (is_file($zfCoreTests . '/' . $lastArg)) { + $codeCoverageFilter->addDirectoryToWhitelist(dirname($zfCoreLibrary . '/' . $lastArg)); + } else { + $codeCoverageFilter->addDirectoryToWhitelist($zfCoreLibrary); + } + + /* + * Omit from code coverage reports the contents of the tests directory + */ + $codeCoverageFilter->addDirectoryToBlacklist($zfCoreTests, ''); + $codeCoverageFilter->addDirectoryToBlacklist(PEAR_INSTALL_DIR, ''); + $codeCoverageFilter->addDirectoryToBlacklist(PHP_LIBDIR, ''); + + unset($codeCoverageFilter); +} + +/* + * Unset global variables that are no longer needed. + */ +unset($phpUnitVersion); diff --git a/vendor/zendframework/zendxml/tests/ZendXmlTest/MultibyteTest.php b/vendor/zendframework/zendxml/tests/ZendXmlTest/MultibyteTest.php new file mode 100644 index 0000000..165e8fa --- /dev/null +++ b/vendor/zendframework/zendxml/tests/ZendXmlTest/MultibyteTest.php @@ -0,0 +1,125 @@ + array('UTF-16LE', pack('CC', 0xff, 0xfe), 3), + 'UTF-16BE' => array('UTF-16BE', pack('CC', 0xfe, 0xff), 3), + 'UTF-32LE' => array('UTF-32LE', pack('CCCC', 0xff, 0xfe, 0x00, 0x00), 4), + 'UTF-32BE' => array('UTF-32BE', pack('CCCC', 0x00, 0x00, 0xfe, 0xff), 4), + ); + } + + public function getXmlWithXXE() + { + return << + +]> + + retrieved: &pocdata; + +XML; + } + + /** + * Invoke ZendXml\Security::heuristicScan with the provided XML. + * + * @param string $xml + * @return void + * @throws Exception\RuntimeException + */ + public function invokeHeuristicScan($xml) + { + $r = new ReflectionMethod('ZendXml\Security', 'heuristicScan'); + $r->setAccessible(true); + return $r->invoke(null, $xml); + } + + /** + * @dataProvider multibyteEncodings + * @group heuristicDetection + */ + public function testDetectsMultibyteXXEVectorsUnderFPMWithEncodedStringMissingBOM($encoding, $bom, $bomLength) + { + $xml = $this->getXmlWithXXE(); + $xml = str_replace('{ENCODING}', $encoding, $xml); + $xml = iconv('UTF-8', $encoding, $xml); + $this->assertNotSame(0, strncmp($xml, $bom, $bomLength)); + $this->setExpectedException('ZendXml\Exception\RuntimeException', 'ENTITY'); + $this->invokeHeuristicScan($xml); + } + + /** + * @dataProvider multibyteEncodings + */ + public function testDetectsMultibyteXXEVectorsUnderFPMWithEncodedStringUsingBOM($encoding, $bom) + { + $xml = $this->getXmlWithXXE(); + $xml = str_replace('{ENCODING}', $encoding, $xml); + $orig = iconv('UTF-8', $encoding, $xml); + $xml = $bom . $orig; + $this->setExpectedException('ZendXml\Exception\RuntimeException', 'ENTITY'); + $this->invokeHeuristicScan($xml); + } + + public function getXmlWithoutXXE() + { + return << + + retrieved: &pocdata; + +XML; + } + + /** + * @dataProvider multibyteEncodings + */ + public function testDoesNotFlagValidMultibyteXmlAsInvalidUnderFPM($encoding) + { + $xml = $this->getXmlWithoutXXE(); + $xml = str_replace('{ENCODING}', $encoding, $xml); + $xml = iconv('UTF-8', $encoding, $xml); + try { + $result = $this->invokeHeuristicScan($xml); + $this->assertNull($result); + } catch (\Exception $e) { + $this->fail('Security scan raised exception when it should not have'); + } + } + + /** + * @dataProvider multibyteEncodings + * @group mixedEncoding + */ + public function testDetectsXXEWhenXMLDocumentEncodingDiffersFromFileEncoding($encoding, $bom) + { + $xml = $this->getXmlWithXXE(); + $xml = str_replace('{ENCODING}', 'UTF-8', $xml); + $xml = iconv('UTF-8', $encoding, $xml); + $xml = $bom . $xml; + $this->setExpectedException('ZendXml\Exception\RuntimeException', 'ENTITY'); + $this->invokeHeuristicScan($xml); + } +} diff --git a/vendor/zendframework/zendxml/tests/ZendXmlTest/SecurityTest.php b/vendor/zendframework/zendxml/tests/ZendXmlTest/SecurityTest.php new file mode 100644 index 0000000..fa3b30b --- /dev/null +++ b/vendor/zendframework/zendxml/tests/ZendXmlTest/SecurityTest.php @@ -0,0 +1,135 @@ + +]> + + This result is &harmless; + +XML; + + $this->setExpectedException('ZendXml\Exception\RuntimeException'); + $result = XmlSecurity::scan($xml); + } + + public function testScanForXXE() + { + $file = tempnam(sys_get_temp_dir(), 'ZendXml_Security'); + file_put_contents($file, 'This is a remote content!'); + $xml = << + +]> + + &foo; + +XML; + + try { + $result = XmlSecurity::scan($xml); + } catch (Exception\RuntimeException $e) { + unlink($file); + return; + } + $this->fail('An expected exception has not been raised.'); + } + + public function testScanSimpleXmlResult() + { + $result = XmlSecurity::scan($this->getXml()); + $this->assertTrue($result instanceof SimpleXMLElement); + $this->assertEquals($result->result, 'test'); + } + + public function testScanDom() + { + $dom = new DOMDocument('1.0'); + $result = XmlSecurity::scan($this->getXml(), $dom); + $this->assertTrue($result instanceof DOMDocument); + $node = $result->getElementsByTagName('result')->item(0); + $this->assertEquals($node->nodeValue, 'test'); + } + + public function testScanInvalidXml() + { + $xml = <<test +XML; + + $result = XmlSecurity::scan($xml); + $this->assertFalse($result); + } + + public function testScanInvalidXmlDom() + { + $xml = <<test +XML; + + $dom = new DOMDocument('1.0'); + $result = XmlSecurity::scan($xml, $dom); + $this->assertFalse($result); + } + + public function testScanFile() + { + $file = tempnam(sys_get_temp_dir(), 'ZendXml_Security'); + file_put_contents($file, $this->getXml()); + + $result = XmlSecurity::scanFile($file); + $this->assertTrue($result instanceof SimpleXMLElement); + $this->assertEquals($result->result, 'test'); + unlink($file); + } + + public function testScanXmlWithDTD() + { + $xml = << + + +]> + + test + +XML; + + $dom = new DOMDocument('1.0'); + $result = XmlSecurity::scan($xml, $dom); + $this->assertTrue($result instanceof DOMDocument); + $this->assertTrue($result->validate()); + } + + protected function getXml() + { + return << + + test + +XML; + } +} diff --git a/vendor/zendframework/zendxml/tests/phpunit.xml.dist b/vendor/zendframework/zendxml/tests/phpunit.xml.dist new file mode 100755 index 0000000..069784b --- /dev/null +++ b/vendor/zendframework/zendxml/tests/phpunit.xml.dist @@ -0,0 +1,27 @@ + + + + ./ZendXmlTest + ./ZendXmlTest/TestAsset + + + + + + + + + + + + + + ./ZendXmlTest + ../vendor + + + + + + +