Upgrade to PicoFeed 0.1.16

This commit is contained in:
Frederic Guillot 2015-12-15 19:26:15 -05:00
parent 42340702b1
commit 079545daed
33 changed files with 1185 additions and 142 deletions

View File

@ -14,7 +14,7 @@
"fguillot/simple-validator": "v1.0.0",
"fguillot/json-rpc": "v1.0.2",
"fguillot/picodb": "v1.0.2",
"fguillot/picofeed": "v0.1.15"
"fguillot/picofeed": "v0.1.16"
},
"require-dev": {
"phpunit/phpunit": "4.8.3",

View File

@ -54,6 +54,7 @@ return array(
'PicoFeed\\Parser\\Rss20' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Parser/Rss20.php',
'PicoFeed\\Parser\\Rss91' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Parser/Rss91.php',
'PicoFeed\\Parser\\Rss92' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Parser/Rss92.php',
'PicoFeed\\Parser\\XmlEntityException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Parser/XmlEntityException.php',
'PicoFeed\\Parser\\XmlParser' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php',
'PicoFeed\\PicoFeedException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/PicoFeedException.php',
'PicoFeed\\Reader\\Favicon' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Reader/Favicon.php',

View File

@ -6,6 +6,7 @@ $vendorDir = dirname(dirname(__FILE__));
$baseDir = dirname($vendorDir);
return array(
'ZendXml\\' => array($vendorDir . '/zendframework/zendxml/library'),
'SimpleValidator' => array($vendorDir . '/fguillot/simple-validator/src'),
'PicoFeed' => array($vendorDir . '/fguillot/picofeed/lib'),
'PicoDb' => array($vendorDir . '/fguillot/picodb/lib'),

View File

@ -115,18 +115,65 @@
"homepage": "https://github.com/fguillot/picoDb"
},
{
"name": "fguillot/picofeed",
"version": "v0.1.15",
"version_normalized": "0.1.15.0",
"name": "zendframework/zendxml",
"version": "1.0.1",
"version_normalized": "1.0.1.0",
"source": {
"type": "git",
"url": "https://github.com/fguillot/picoFeed.git",
"reference": "ee91bcfd4be93d38ae5f870723c652a9d795c75f"
"url": "https://github.com/zendframework/ZendXml.git",
"reference": "54edb3875aba5b45f02824f65f311c9fb2743a38"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/ee91bcfd4be93d38ae5f870723c652a9d795c75f",
"reference": "ee91bcfd4be93d38ae5f870723c652a9d795c75f",
"url": "https://api.github.com/repos/zendframework/ZendXml/zipball/54edb3875aba5b45f02824f65f311c9fb2743a38",
"reference": "54edb3875aba5b45f02824f65f311c9fb2743a38",
"shasum": ""
},
"require": {
"php": ">=5.3.3"
},
"require-dev": {
"phpunit/phpunit": "~3.7",
"squizlabs/php_codesniffer": "~1.5"
},
"time": "2015-08-03 14:50:10",
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "1.0-dev"
}
},
"installation-source": "dist",
"autoload": {
"psr-0": {
"ZendXml\\": "library/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"BSD-3-Clause"
],
"description": "Utility library for XML usage, best practices, and security in PHP",
"homepage": "http://packages.zendframework.com/",
"keywords": [
"security",
"xml",
"zf2"
]
},
{
"name": "fguillot/picofeed",
"version": "v0.1.16",
"version_normalized": "0.1.16.0",
"source": {
"type": "git",
"url": "https://github.com/fguillot/picoFeed.git",
"reference": "4162314ea083f0957079d6b1a63b2c24b1f6de73"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/4162314ea083f0957079d6b1a63b2c24b1f6de73",
"reference": "4162314ea083f0957079d6b1a63b2c24b1f6de73",
"shasum": ""
},
"require": {
@ -135,12 +182,13 @@
"ext-libxml": "*",
"ext-simplexml": "*",
"ext-xml": "*",
"php": ">=5.3.0"
"php": ">=5.3.0",
"zendframework/zendxml": "^1.0"
},
"suggest": {
"ext-curl": "PicoFeed will use cURL if present"
},
"time": "2015-10-23 21:59:31",
"time": "2015-12-15 17:01:33",
"bin": [
"picofeed"
],

View File

@ -496,7 +496,7 @@ abstract class Client
}
/**
* Set the mximum number of HTTP redirections.
* Set the maximum number of HTTP redirections.
*
* @param int $max Maximum
*

View File

@ -88,27 +88,6 @@ class Curl extends Client
return $length;
}
/**
* cURL callback to passthrough the HTTP status header to the client.
*
* @param resource $ch cURL handler
* @param string $buffer Header line
*
* @return int Length of the buffer
*/
public function passthroughHeaders($ch, $buffer)
{
list($status, $headers) = HttpHeaders::parse(array($buffer));
if ($status !== 0) {
header(':', true, $status);
} elseif (isset($headers['Content-Type'])) {
header($buffer);
}
return $this->readHeaders($ch, $buffer);
}
/**
* cURL callback to passthrough the HTTP body to the client.
*
@ -121,9 +100,27 @@ class Curl extends Client
*/
public function passthroughBody($ch, $buffer)
{
// do it only at the beginning of a transmission
if ($this->body_length === 0) {
list($status, $headers) = HttpHeaders::parse(explode("\n", $this->response_headers[$this->response_headers_count - 1]));
if ($this->isRedirection($status)) {
return $this->handleRedirection($headers['Location']);
}
header($status);
if (isset($headers['Content-Type'])) {
header('Content-Type:' .$headers['Content-Type']);
}
}
$length = strlen($buffer);
$this->body_length += $length;
echo $buffer;
return strlen($buffer);
return $length;
}
/**
@ -207,7 +204,6 @@ class Curl extends Client
if ($this->isPassthroughEnabled()) {
$write_function = 'passthroughBody';
$header_function = 'passthroughHeaders';
}
curl_setopt($ch, CURLOPT_WRITEFUNCTION, array($this, $write_function));
@ -285,17 +281,15 @@ class Curl extends Client
/**
* Do the HTTP request.
*
* @param bool $follow_location Flag used when there is an open_basedir restriction
*
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
*/
public function doRequest($follow_location = true)
public function doRequest()
{
$this->executeContext();
list($status, $headers) = HttpHeaders::parse(explode("\n", $this->response_headers[$this->response_headers_count - 1]));
if ($follow_location && $this->isRedirection($status)) {
if ($this->isRedirection($status)) {
return $this->handleRedirection($headers['Location']);
}
@ -307,7 +301,7 @@ class Curl extends Client
}
/**
* Handle manually redirections when there is an open base dir restriction.
* Handle HTTP redirects
*
* @param string $location Redirected URL
*
@ -330,7 +324,7 @@ class Curl extends Client
throw new MaxRedirectException('Maximum number of redirections reached');
}
$result = $this->doRequest(false);
$result = $this->doRequest();
if ($this->isRedirection($result['status'])) {
$this->url = Url::resolve($result['headers']['Location'], $this->url);

View File

@ -0,0 +1,12 @@
<?php
namespace PicoFeed\Parser;
/**
* XmlEntityException Exception.
*
* @author Bernhard Posselt
*/
class XmlEntityException extends MalformedXmlException
{
}

View File

@ -2,9 +2,11 @@
namespace PicoFeed\Parser;
use Closure;
use DomDocument;
use SimpleXmlElement;
use Exception;
use ZendXml\Security;
/**
* XML parser class.
@ -26,64 +28,7 @@ class XmlParser
*/
public static function getSimpleXml($input)
{
$dom = self::getDomDocument($input);
if ($dom !== false) {
$simplexml = simplexml_import_dom($dom);
if (!$simplexml instanceof SimpleXmlElement) {
return false;
}
return $simplexml;
}
return false;
}
/**
* Scan the input for XXE attacks.
*
* @param string $input Unsafe input
* @param Closure $callback Callback called to build the dom.
* Must be an instance of DomDocument and receives the input as argument
*
* @return bool|DomDocument False if an XXE attack was discovered,
* otherwise the return of the callback
*/
private static function scanInput($input, Closure $callback)
{
$isRunningFpm = substr(php_sapi_name(), 0, 3) === 'fpm';
if ($isRunningFpm) {
// If running with PHP-FPM and an entity is detected we refuse to parse the feed
// @see https://bugs.php.net/bug.php?id=64938
if (strpos($input, '<!ENTITY') !== false) {
return false;
}
} else {
$entityLoaderDisabled = libxml_disable_entity_loader(true);
}
libxml_use_internal_errors(true);
$dom = $callback($input);
// Scan for potential XEE attacks using ENTITY
foreach ($dom->childNodes as $child) {
if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
if ($child->entities->length > 0) {
return false;
}
}
}
if ($isRunningFpm === false) {
libxml_disable_entity_loader($entityLoaderDisabled);
}
return $dom;
return self::scan($input);
}
/**
@ -101,12 +46,7 @@ class XmlParser
return false;
}
$dom = self::scanInput($input, function ($in) {
$dom = new DomDocument();
$dom->loadXml($in, LIBXML_NONET);
return $dom;
});
$dom = self::scan($input, new DOMDocument());
// The document is empty, there is probably some parsing errors
if ($dom && $dom->childNodes->length === 0) {
@ -116,6 +56,22 @@ class XmlParser
return $dom;
}
/**
* Small wrapper around ZendXml to turn their exceptions into picoFeed
* exceptions
* @param $input the xml to load
* @param $dom pass in a dom document or use null/omit if simpleXml should
* be used
*/
private static function scan($input, $dom=null)
{
try {
return Security::scan($input, $dom);
} catch(\ZendXml\Exception\RuntimeException $e) {
throw new XmlEntityException($e->getMessage());
}
}
/**
* Load HTML document by using a DomDocument instance or return false on failure.
*
@ -127,27 +83,21 @@ class XmlParser
*/
public static function getHtmlDocument($input)
{
$dom = new DomDocument();
if (empty($input)) {
return new DomDocument();
return $dom;
}
libxml_use_internal_errors(true);
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
$callback = function ($in) {
$dom = new DomDocument();
$dom->loadHTML($in, LIBXML_NONET);
return $dom;
};
$dom->loadHTML($input, LIBXML_NONET);
} else {
$callback = function ($in) {
$dom = new DomDocument();
$dom->loadHTML($in);
return $dom;
};
$dom->loadHTML($input);
}
return self::scanInput($input, $callback);
return $dom;
}
/**

View File

@ -5,9 +5,8 @@ return array(
'%.*%' => array(
'test_url' => 'http://www.bizjournals.com/milwaukee/news/2015/09/30/bucks-will-hike-prices-on-best-seats-at-new-arena.html',
'body' => array(
'//p[@class="media__caption"]',
'//figure/div/a/img',
'//p[@class="content__segment"]',
'//figure/div/a/img',
'//p[@class="content__segment"]',
),
),
),

View File

@ -2,6 +2,23 @@
return array(
'grabber' => array(
'%^/products.*%' => array(
'test_url' => 'http://www.cnet.com/products/fibaro-flood-sensor/#ftag=CADf328eec',
'body' => array(
'//li[contains(@class,"slide first"] || //figure[contains(@class,(promoFigure))]',
'//div[@class="quickInfo"]',
'//div[@class="col-6 ratings"]',
'//div[@id="editorReview"]',
),
'strip' => array(
'//script',
'//a[@class="clickToEnlarge"]',
'//div[@section="topSharebar"]',
'//div[contains(@class,"related")]',
'//div[contains(@class,"ad-")]',
'//div[@section="shortcodeGallery"]',
),
),
'%.*%' => array(
'test_url' => 'http://cnet.com.feedsportal.com/c/34938/f/645093/s/4a340866/sc/28/l/0L0Scnet0N0Cnews0Cman0Eclaims0Eonline0Epsychic0Emade0Ehim0Ebuy0E10Emillion0Epowerball0Ewinning0Eticket0C0Tftag0FCAD590Aa51e/story01.htm',
'body' => array(
@ -9,12 +26,12 @@ return array(
'//div[@itemprop="articleBody"]',
),
'strip' => array(
'//script',
'//a[@class="clickToEnlarge"]',
'//div[@section="topSharebar"]',
'//div[contains(@class,"related")]',
'//div[contains(@class,"ad-")]',
'//div[@section="shortcodeGallery"]',
'//script',
'//a[@class="clickToEnlarge"]',
'//div[@section="topSharebar"]',
'//div[contains(@class,"related")]',
'//div[contains(@class,"ad-")]',
'//div[@section="shortcodeGallery"]',
),
),
),

View File

@ -4,8 +4,8 @@ return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://www.engadget.com/2015/04/20/dark-matter-discovery/?ncid=rss_truncated',
'body' => array('//div[@class="article-content"]/p[not(@class="read-more")] | //div[@class="article-content"]/div[@style="text-align: center;"]'),
'strip' => array(),
'body' => array('//div[@id="page_body"]/div[@class="container@m-"]'),
'strip' => array('//aside[@role="banner"]'),
),
),
);

View File

@ -6,6 +6,7 @@ return array(
'test_url' => 'http://www.heise.de/security/meldung/BND-300-Millionen-Euro-fuer-Fruehwarnsystem-gegen-Cyber-Attacken-2192237.html',
'body' => array(
'//div[@class="meldung_wrapper"]',
'//div[@class="artikel_content"]',
),
),
),

View File

@ -0,0 +1,20 @@
<?php
return array(
'grabber' => array(
'http://dx.doi.org/10.1038.*%' => array(
'test_url' => 'http://dx.doi.org/10.1038/525184a',
'body' => array(
'//div[@class="content "]',
),
'strip' => array()
),
'%.*%' => array(
'test_url' => 'http://www.nature.com/doifinder/10.1038/nature.2015.18340',
'body' => array(
'//div[contains(@class,"main-content")]',
),
'strip' => array()
),
)
);

View File

@ -7,10 +7,13 @@ return array(
'body' => array(
'//figure/img[@class="hero-img"]',
'//section[@class="deck"]',
'//div[@itemprop="articleBody"]',
'//div[@itemprop="articleBody"] || //div[@itemprop="reviewBody"]',
'//div[@class="carousel-inside-crop"]',
),
'strip' => array(
'//aside',
'//div[@class="credit"]',
'//div[@class="view-large"]',
),
),
),

View File

@ -3,11 +3,13 @@
return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://www.neustadt-ticker.de/36480/aktuell/nachrichten/buergerbuero-neustadt-ab-heute-wieder-geoeffnet',
'body' => array('//div[contains(@class,"article")]/div[@class="PostContent" and *[not(contains(@class, "navigation"))]]'),
'test_url' => 'http://www.neustadt-ticker.de/41302/alltag/kultur/demo-auf-der-boehmischen',
'body' => array(
'//div[@class="entry-content"]',
),
'strip' => array(
'//*[@id="wp_rp_first"]',
'//*[@class="yarpp-related"]',
'//*[contains(@class, "sharedaddy")]',
'//*[contains(@class, "yarpp-related")]',
),
),
),

View File

@ -0,0 +1,18 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://news.sciencemag.org/biology/2015/09/genetic-engineering-turns-common-plant-cancer-fighter',
'body' => array(
'//div[@class="content"]',
),
'strip' => array(
'//h1[@class="snews-article__headline"]',
'//div[contains(@class,"easy_social_box")]',
'//div[@class="author-teaser"]',
'//div[@class="article-byline"]',
),
),
)
);

View File

@ -0,0 +1,18 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://retractionwatch.com/2015/11/12/psychologist-jens-forster-settles-case-by-agreeing-to-2-retractions/',
'body' => array(
'//*[@class="main"]',
'//*[@class="entry-content"]',
),
'strip' => array(
'//*[contains(@class, "sharedaddy")]',
'//*[contains(@class, "jp-relatedposts")]',
'//p[@class="p1"]',
)
)
)
);

View File

@ -0,0 +1,21 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://www.thelocal.se/20151018/swedish-moderates-tighten-focus-on-begging-ban',
'body' => array(
'//article',
),
'strip' => array(
'//p[@id="mobile-signature"]',
'//article/div[4]',
'//article/ul[1]',
'//div[@class="clr"]',
'//p[@class="small"]',
'//p[@style="font-weight: bold; font-size: 14px;"]',
'//div[@class="author"]',
)
)
)
);

View File

@ -6,16 +6,10 @@ return array(
'test_url' => 'http://www.upi.com/Top_News/US/2015/09/26/Tech-giants-Hollywood-stars-among-guests-at-state-dinner-for-Chinas-Xi-Jinping/4541443281006/',
'body' => array(
'//div[@class="img"]',
'//div[@class="st_text_c"]',
'//div/article[@itemprop="articleBody"]',
),
'strip' => array(
'//div[@align="center"]',
'//div[@class="ad_slot"]',
'//div[@class="ipara"]',
'//div[@class="st_embed"]',
'//div[contains(@styel,"font-size"]',
'//ul',
'//style[@type="text/css"]',
),
),
),

View File

@ -0,0 +1,13 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://www.geekculture.com/joyoftech/joyarchives/2180.html',
'body' => array(
'//p[contains(@class,"Maintext")][2]/img',
),
'strip' => array(),
),
),
);

View File

@ -0,0 +1,5 @@
composer.lock
vendor
.buildpath
.project
.settings

View File

@ -0,0 +1,23 @@
language: php
php:
- 5.3
- 5.4
- 5.5
- 5.6
- hhvm
matrix:
allow_failures:
- php: hhvm
before_script:
- composer self-update
- composer install --dev
script:
- ./vendor/bin/phpunit -c ./tests
- ./vendor/bin/phpcs --standard=PSR2 --ignore=tests/Bootstrap.php library tests
notifications:
irc: "irc.freenode.org#zftalk.dev"
email: false

12
vendor/zendframework/zendxml/LICENSE.md vendored Normal file
View File

@ -0,0 +1,12 @@
Copyright (c) 2014-2015, Zend Technologies USA, Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
- Neither the name of Zend Technologies USA, Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

50
vendor/zendframework/zendxml/README.md vendored Normal file
View File

@ -0,0 +1,50 @@
ZendXml
=======
An utility component for XML usage and best practices in PHP
Installation
------------
You can install using:
```
curl -s https://getcomposer.org/installer | php
php composer.phar install
```
Notice that this library doesn't have any external dependencies, the usage of composer is for autoloading and standard purpose.
ZendXml\Security
----------------
This is a security component to prevent [XML eXternal Entity](https://www.owasp.org/index.php/XML_External_Entity_%28XXE%29_Processing) (XXE) and [XML Entity Expansion](http://projects.webappsec.org/w/page/13247002/XML%20Entity%20Expansion) (XEE) attacks on XML documents.
The XXE attack is prevented disabling the load of external entities in the libxml library used by PHP, using the function [libxml_disable_entity_loader](http://www.php.net/manual/en/function.libxml-disable-entity-loader.php).
The XEE attack is prevented looking inside the XML document for ENTITY usage. If the XML document uses ENTITY the library throw an Exception.
We have two static methods to scan and load XML document from a string (scan) and from a file (scanFile). You can decide to get a SimpleXMLElement or DOMDocument as result, using the following use cases:
```php
use ZendXml\Security as XmlSecurity;
$xml = <<<XML
<?xml version="1.0"?>
<results>
<result>test</result>
</results>
XML;
// SimpleXML use case
$simplexml = XmlSecurity::scan($xml);
printf ("SimpleXMLElement: %s\n", ($simplexml instanceof \SimpleXMLElement) ? 'yes' : 'no');
// DOMDocument use case
$dom = new \DOMDocument('1.0');
$dom = XmlSecurity::scan($xml, $dom);
printf ("DOMDocument: %s\n", ($dom instanceof \DOMDocument) ? 'yes' : 'no');
```

View File

@ -0,0 +1,40 @@
{
"name": "zendframework/zendxml",
"description": "Utility library for XML usage, best practices, and security in PHP",
"type": "library",
"license": "BSD-3-Clause",
"keywords": [
"zf2",
"xml",
"security"
],
"homepage": "http://packages.zendframework.com/",
"autoload": {
"psr-0": {
"ZendXml\\": "library/"
}
},
"autoload-dev": {
"psr-4": {
"ZendTest\\Xml\\": "tests/ZendXmlTest/"
}
},
"repositories": [
{
"type": "composer",
"url": "http://packages.zendframework.com/"
}
],
"require": {
"php": ">=5.3.3"
},
"extra": {
"branch-alias": {
"dev-master": "1.0-dev"
}
},
"require-dev": {
"phpunit/phpunit": "~3.7",
"squizlabs/php_codesniffer": "~1.5"
}
}

View File

@ -0,0 +1,14 @@
<?php
/**
* Zend Framework (http://framework.zend.com/)
*
* @link http://github.com/zendframework/zf2 for the canonical source repository
* @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
namespace ZendXml\Exception;
interface ExceptionInterface
{
}

View File

@ -0,0 +1,17 @@
<?php
/**
* Zend Framework (http://framework.zend.com/)
*
* @link http://github.com/zendframework/zf2 for the canonical source repository
* @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
namespace ZendXml\Exception;
/**
* Invalid argument exception
*/
class InvalidArgumentException extends \InvalidArgumentException implements ExceptionInterface
{
}

View File

@ -0,0 +1,17 @@
<?php
/**
* Zend Framework (http://framework.zend.com/)
*
* @link http://github.com/zendframework/zf2 for the canonical source repository
* @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
namespace ZendXml\Exception;
/**
* Runtime exception
*/
class RuntimeException extends \RuntimeException implements ExceptionInterface
{
}

View File

@ -0,0 +1,374 @@
<?php
/**
* Zend Framework (http://framework.zend.com/)
*
* @link http://github.com/zendframework/zf2 for the canonical source repository
* @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
namespace ZendXml;
use DOMDocument;
use SimpleXMLElement;
class Security
{
const ENTITY_DETECT = 'Detected use of ENTITY in XML, disabled to prevent XXE/XEE attacks';
/**
* Heuristic scan to detect entity in XML
*
* @param string $xml
* @throws Exception\RuntimeException If entity expansion or external entity declaration was discovered.
*/
protected static function heuristicScan($xml)
{
foreach (self::getEntityComparison($xml) as $compare) {
if (strpos($xml, $compare) !== false) {
throw new Exception\RuntimeException(self::ENTITY_DETECT);
}
}
}
/**
* Scan XML string for potential XXE and XEE attacks
*
* @param string $xml
* @param DomDocument $dom
* @throws Exception\RuntimeException
* @return SimpleXMLElement|DomDocument|boolean
*/
public static function scan($xml, DOMDocument $dom = null)
{
// If running with PHP-FPM we perform an heuristic scan
// We cannot use libxml_disable_entity_loader because of this bug
// @see https://bugs.php.net/bug.php?id=64938
if (self::isPhpFpm()) {
self::heuristicScan($xml);
}
if (null === $dom) {
$simpleXml = true;
$dom = new DOMDocument();
}
if (!self::isPhpFpm()) {
$loadEntities = libxml_disable_entity_loader(true);
$useInternalXmlErrors = libxml_use_internal_errors(true);
}
// Load XML with network access disabled (LIBXML_NONET)
// error disabled with @ for PHP-FPM scenario
set_error_handler(function ($errno, $errstr) {
if (substr_count($errstr, 'DOMDocument::loadXML()') > 0) {
return true;
}
return false;
}, E_WARNING);
$result = $dom->loadXml($xml, LIBXML_NONET);
restore_error_handler();
if (!$result) {
// Entity load to previous setting
if (!self::isPhpFpm()) {
libxml_disable_entity_loader($loadEntities);
libxml_use_internal_errors($useInternalXmlErrors);
}
return false;
}
// Scan for potential XEE attacks using ENTITY, if not PHP-FPM
if (!self::isPhpFpm()) {
foreach ($dom->childNodes as $child) {
if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
if ($child->entities->length > 0) {
throw new Exception\RuntimeException(self::ENTITY_DETECT);
}
}
}
}
// Entity load to previous setting
if (!self::isPhpFpm()) {
libxml_disable_entity_loader($loadEntities);
libxml_use_internal_errors($useInternalXmlErrors);
}
if (isset($simpleXml)) {
$result = simplexml_import_dom($dom);
if (!$result instanceof SimpleXMLElement) {
return false;
}
return $result;
}
return $dom;
}
/**
* Scan XML file for potential XXE/XEE attacks
*
* @param string $file
* @param DOMDocument $dom
* @throws Exception\InvalidArgumentException
* @return SimpleXMLElement|DomDocument
*/
public static function scanFile($file, DOMDocument $dom = null)
{
if (!file_exists($file)) {
throw new Exception\InvalidArgumentException(
"The file $file specified doesn't exist"
);
}
return self::scan(file_get_contents($file), $dom);
}
/**
* Return true if PHP is running with PHP-FPM
*
* This method is mainly used to determine whether or not heuristic checks
* (vs libxml checks) should be made, due to threading issues in libxml;
* under php-fpm, threading becomes a concern.
*
* However, PHP versions 5.5.22+ and 5.6.6+ contain a patch to the
* libxml support in PHP that makes the libxml checks viable; in such
* versions, this method will return false to enforce those checks, which
* are more strict and accurate than the heuristic checks.
*
* @return boolean
*/
public static function isPhpFpm()
{
$isVulnerableVersion = (
version_compare(PHP_VERSION, '5.5.22', 'lt')
|| (
version_compare(PHP_VERSION, '5.6', 'gte')
&& version_compare(PHP_VERSION, '5.6.6', 'lt')
)
);
if (substr(php_sapi_name(), 0, 3) === 'fpm' && $isVulnerableVersion) {
return true;
}
return false;
}
/**
* Determine and return the string(s) to use for the <!ENTITY comparison.
*
* @param string $xml
* @return string[]
*/
protected static function getEntityComparison($xml)
{
$encodingMap = self::getAsciiEncodingMap();
return array_map(function ($encoding) use ($encodingMap) {
$generator = isset($encodingMap[$encoding]) ? $encodingMap[$encoding] : $encodingMap['UTF-8'];
return $generator('<!ENTITY');
}, self::detectXmlEncoding($xml, self::detectStringEncoding($xml)));
}
/**
* Determine the string encoding.
*
* Determines string encoding from either a detected BOM or a
* heuristic.
*
* @param string $xml
* @return string File encoding
*/
protected static function detectStringEncoding($xml)
{
return self::detectBom($xml) ?: self::detectXmlStringEncoding($xml);
}
/**
* Attempt to match a known BOM.
*
* Iterates through the return of getBomMap(), comparing the initial bytes
* of the provided string to the BOM of each; if a match is determined,
* it returns the encoding.
*
* @param string $string
* @return false|string Returns encoding on success.
*/
protected static function detectBom($string)
{
foreach (self::getBomMap() as $criteria) {
if (0 === strncmp($string, $criteria['bom'], $criteria['length'])) {
return $criteria['encoding'];
}
}
return false;
}
/**
* Attempt to detect the string encoding of an XML string.
*
* @param string $xml
* @return string Encoding
*/
protected static function detectXmlStringEncoding($xml)
{
foreach (self::getAsciiEncodingMap() as $encoding => $generator) {
$prefix = $generator('<' . '?xml');
if (0 === strncmp($xml, $prefix, strlen($prefix))) {
return $encoding;
}
}
// Fallback
return 'UTF-8';
}
/**
* Attempt to detect the specified XML encoding.
*
* Using the file's encoding, determines if an "encoding" attribute is
* present and well-formed in the XML declaration; if so, it returns a
* list with both the ASCII representation of that declaration and the
* original file encoding.
*
* If not, a list containing only the provided file encoding is returned.
*
* @param string $xml
* @param string $fileEncoding
* @return string[] Potential XML encodings
*/
protected static function detectXmlEncoding($xml, $fileEncoding)
{
$encodingMap = self::getAsciiEncodingMap();
$generator = $encodingMap[$fileEncoding];
$encAttr = $generator('encoding="');
$quote = $generator('"');
$close = $generator('>');
$closePos = strpos($xml, $close);
if (false === $closePos) {
return array($fileEncoding);
}
$encPos = strpos($xml, $encAttr);
if (false === $encPos
|| $encPos > $closePos
) {
return array($fileEncoding);
}
$encPos += strlen($encAttr);
$quotePos = strpos($xml, $quote, $encPos);
if (false === $quotePos) {
return array($fileEncoding);
}
$encoding = self::substr($xml, $encPos, $quotePos);
return array(
// Following line works because we're only supporting 8-bit safe encodings at this time.
str_replace('\0', '', $encoding), // detected encoding
$fileEncoding, // file encoding
);
}
/**
* Return a list of BOM maps.
*
* Returns a list of common encoding -> BOM maps, along with the character
* length to compare against.
*
* @link https://en.wikipedia.org/wiki/Byte_order_mark
* @return array
*/
protected static function getBomMap()
{
return array(
array(
'encoding' => 'UTF-32BE',
'bom' => pack('CCCC', 0x00, 0x00, 0xfe, 0xff),
'length' => 4,
),
array(
'encoding' => 'UTF-32LE',
'bom' => pack('CCCC', 0xff, 0xfe, 0x00, 0x00),
'length' => 4,
),
array(
'encoding' => 'GB-18030',
'bom' => pack('CCCC', 0x84, 0x31, 0x95, 0x33),
'length' => 4,
),
array(
'encoding' => 'UTF-16BE',
'bom' => pack('CC', 0xfe, 0xff),
'length' => 2,
),
array(
'encoding' => 'UTF-16LE',
'bom' => pack('CC', 0xff, 0xfe),
'length' => 2,
),
array(
'encoding' => 'UTF-8',
'bom' => pack('CCC', 0xef, 0xbb, 0xbf),
'length' => 3,
),
);
}
/**
* Return a map of encoding => generator pairs.
*
* Returns a map of encoding => generator pairs, where the generator is a
* callable that accepts a string and returns the appropriate byte order
* sequence of that string for the encoding.
*
* @return array
*/
protected static function getAsciiEncodingMap()
{
return array(
'UTF-32BE' => function ($ascii) {
return preg_replace('/(.)/', "\0\0\0\\1", $ascii);
},
'UTF-32LE' => function ($ascii) {
return preg_replace('/(.)/', "\\1\0\0\0", $ascii);
},
'UTF-32odd1' => function ($ascii) {
return preg_replace('/(.)/', "\0\\1\0\0", $ascii);
},
'UTF-32odd2' => function ($ascii) {
return preg_replace('/(.)/', "\0\0\\1\0", $ascii);
},
'UTF-16BE' => function ($ascii) {
return preg_replace('/(.)/', "\0\\1", $ascii);
},
'UTF-16LE' => function ($ascii) {
return preg_replace('/(.)/', "\\1\0", $ascii);
},
'UTF-8' => function ($ascii) {
return $ascii;
},
'GB-18030' => function ($ascii) {
return $ascii;
},
);
}
/**
* Binary-safe substr.
*
* substr() is not binary-safe; this method loops by character to ensure
* multi-byte characters are aggregated correctly.
*
* @param string $string
* @param int $start
* @param int $end
* @return string
*/
protected static function substr($string, $start, $end)
{
$substr = '';
for ($i = $start; $i < $end; $i += 1) {
$substr .= $string[$i];
}
return $substr;
}
}

View File

@ -0,0 +1,92 @@
<?php
/**
* Zend Framework (http://framework.zend.com/)
*
* @link http://github.com/zendframework/zf2 for the canonical source repository
* @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @package Zend
*/
/**
* Set error reporting to the level to which Zend Framework code must comply.
*/
error_reporting( E_ALL | E_STRICT );
if (class_exists('PHPUnit_Runner_Version', true)) {
$phpUnitVersion = PHPUnit_Runner_Version::id();
if ('@package_version@' !== $phpUnitVersion && version_compare($phpUnitVersion, '3.7.0', '<')) {
echo 'This version of PHPUnit (' .
PHPUnit_Runner_Version::id() .
') is not supported for ZendXml unit tests - use v 3.7.0 or higher.'
. PHP_EOL
;
exit(1);
}
unset($phpUnitVersion);
}
/**
* Setup autoloading
*/
// Try to use Composer autoloader
if (file_exists(__DIR__ . '/../vendor/autoload.php')) {
include_once __DIR__ . '/../vendor/autoload.php';
}
// ... or use a simple SPL autoloader
else{
// update include path
set_include_path(implode(PATH_SEPARATOR, array(
__DIR__.'/../src',
__DIR__,
get_include_path()
)));
/**
* @link https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-0.md#example-implementation
*/
spl_autoload_register(function ($className) {
$className = ltrim($className, '\\');
$fileName = '';
$namespace = '';
if ($lastNsPos = strrpos($className, '\\')) {
$namespace = substr($className, 0, $lastNsPos);
$className = substr($className, $lastNsPos + 1);
$fileName = str_replace('\\', DIRECTORY_SEPARATOR, $namespace) . DIRECTORY_SEPARATOR;
}
$fileName .= str_replace('_', DIRECTORY_SEPARATOR, $className) . '.php';
require $fileName;
});
}
/**
* Code coverage option
*/
if (defined('TESTS_GENERATE_REPORT') && TESTS_GENERATE_REPORT === true) {
$codeCoverageFilter = new PHP_CodeCoverage_Filter();
$lastArg = end($_SERVER['argv']);
if (is_dir($zfCoreTests . '/' . $lastArg)) {
$codeCoverageFilter->addDirectoryToWhitelist($zfCoreLibrary . '/' . $lastArg);
} elseif (is_file($zfCoreTests . '/' . $lastArg)) {
$codeCoverageFilter->addDirectoryToWhitelist(dirname($zfCoreLibrary . '/' . $lastArg));
} else {
$codeCoverageFilter->addDirectoryToWhitelist($zfCoreLibrary);
}
/*
* Omit from code coverage reports the contents of the tests directory
*/
$codeCoverageFilter->addDirectoryToBlacklist($zfCoreTests, '');
$codeCoverageFilter->addDirectoryToBlacklist(PEAR_INSTALL_DIR, '');
$codeCoverageFilter->addDirectoryToBlacklist(PHP_LIBDIR, '');
unset($codeCoverageFilter);
}
/*
* Unset global variables that are no longer needed.
*/
unset($phpUnitVersion);

View File

@ -0,0 +1,125 @@
<?php
/**
* Zend Framework (http://framework.zend.com/)
*
* @link http://github.com/zendframework/zf2 for the canonical source repository
* @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
namespace ZendTest\Xml;
use ZendXml\Security as XmlSecurity;
use ZendXml\Exception;
use DOMDocument;
use ReflectionMethod;
use SimpleXMLElement;
/**
* @group ZF2015-06
*/
class MultibyteTest extends \PHPUnit_Framework_TestCase
{
public function multibyteEncodings()
{
return array(
'UTF-16LE' => array('UTF-16LE', pack('CC', 0xff, 0xfe), 3),
'UTF-16BE' => array('UTF-16BE', pack('CC', 0xfe, 0xff), 3),
'UTF-32LE' => array('UTF-32LE', pack('CCCC', 0xff, 0xfe, 0x00, 0x00), 4),
'UTF-32BE' => array('UTF-32BE', pack('CCCC', 0x00, 0x00, 0xfe, 0xff), 4),
);
}
public function getXmlWithXXE()
{
return <<<XML
<?xml version="1.0" encoding="{ENCODING}"?>
<!DOCTYPE methodCall [
<!ENTITY pocdata SYSTEM "file:///etc/passwd">
]>
<methodCall>
<methodName>retrieved: &pocdata;</methodName>
</methodCall>
XML;
}
/**
* Invoke ZendXml\Security::heuristicScan with the provided XML.
*
* @param string $xml
* @return void
* @throws Exception\RuntimeException
*/
public function invokeHeuristicScan($xml)
{
$r = new ReflectionMethod('ZendXml\Security', 'heuristicScan');
$r->setAccessible(true);
return $r->invoke(null, $xml);
}
/**
* @dataProvider multibyteEncodings
* @group heuristicDetection
*/
public function testDetectsMultibyteXXEVectorsUnderFPMWithEncodedStringMissingBOM($encoding, $bom, $bomLength)
{
$xml = $this->getXmlWithXXE();
$xml = str_replace('{ENCODING}', $encoding, $xml);
$xml = iconv('UTF-8', $encoding, $xml);
$this->assertNotSame(0, strncmp($xml, $bom, $bomLength));
$this->setExpectedException('ZendXml\Exception\RuntimeException', 'ENTITY');
$this->invokeHeuristicScan($xml);
}
/**
* @dataProvider multibyteEncodings
*/
public function testDetectsMultibyteXXEVectorsUnderFPMWithEncodedStringUsingBOM($encoding, $bom)
{
$xml = $this->getXmlWithXXE();
$xml = str_replace('{ENCODING}', $encoding, $xml);
$orig = iconv('UTF-8', $encoding, $xml);
$xml = $bom . $orig;
$this->setExpectedException('ZendXml\Exception\RuntimeException', 'ENTITY');
$this->invokeHeuristicScan($xml);
}
public function getXmlWithoutXXE()
{
return <<<XML
<?xml version="1.0" encoding="{ENCODING}"?>
<methodCall>
<methodName>retrieved: &pocdata;</methodName>
</methodCall>
XML;
}
/**
* @dataProvider multibyteEncodings
*/
public function testDoesNotFlagValidMultibyteXmlAsInvalidUnderFPM($encoding)
{
$xml = $this->getXmlWithoutXXE();
$xml = str_replace('{ENCODING}', $encoding, $xml);
$xml = iconv('UTF-8', $encoding, $xml);
try {
$result = $this->invokeHeuristicScan($xml);
$this->assertNull($result);
} catch (\Exception $e) {
$this->fail('Security scan raised exception when it should not have');
}
}
/**
* @dataProvider multibyteEncodings
* @group mixedEncoding
*/
public function testDetectsXXEWhenXMLDocumentEncodingDiffersFromFileEncoding($encoding, $bom)
{
$xml = $this->getXmlWithXXE();
$xml = str_replace('{ENCODING}', 'UTF-8', $xml);
$xml = iconv('UTF-8', $encoding, $xml);
$xml = $bom . $xml;
$this->setExpectedException('ZendXml\Exception\RuntimeException', 'ENTITY');
$this->invokeHeuristicScan($xml);
}
}

View File

@ -0,0 +1,135 @@
<?php
/**
* Zend Framework (http://framework.zend.com/)
*
* @link http://github.com/zendframework/zf2 for the canonical source repository
* @copyright Copyright (c) 2005-2013 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
namespace ZendTest\Xml;
use ZendXml\Security as XmlSecurity;
use ZendXml\Exception;
use DOMDocument;
use SimpleXMLElement;
class SecurityTest extends \PHPUnit_Framework_TestCase
{
/**
* @expectedException ZendXml\Exception\RuntimeException
*/
public function testScanForXEE()
{
$xml = <<<XML
<?xml version="1.0"?>
<!DOCTYPE results [<!ENTITY harmless "completely harmless">]>
<results>
<result>This result is &harmless;</result>
</results>
XML;
$this->setExpectedException('ZendXml\Exception\RuntimeException');
$result = XmlSecurity::scan($xml);
}
public function testScanForXXE()
{
$file = tempnam(sys_get_temp_dir(), 'ZendXml_Security');
file_put_contents($file, 'This is a remote content!');
$xml = <<<XML
<?xml version="1.0"?>
<!DOCTYPE root
[
<!ENTITY foo SYSTEM "file://$file">
]>
<results>
<result>&foo;</result>
</results>
XML;
try {
$result = XmlSecurity::scan($xml);
} catch (Exception\RuntimeException $e) {
unlink($file);
return;
}
$this->fail('An expected exception has not been raised.');
}
public function testScanSimpleXmlResult()
{
$result = XmlSecurity::scan($this->getXml());
$this->assertTrue($result instanceof SimpleXMLElement);
$this->assertEquals($result->result, 'test');
}
public function testScanDom()
{
$dom = new DOMDocument('1.0');
$result = XmlSecurity::scan($this->getXml(), $dom);
$this->assertTrue($result instanceof DOMDocument);
$node = $result->getElementsByTagName('result')->item(0);
$this->assertEquals($node->nodeValue, 'test');
}
public function testScanInvalidXml()
{
$xml = <<<XML
<foo>test</bar>
XML;
$result = XmlSecurity::scan($xml);
$this->assertFalse($result);
}
public function testScanInvalidXmlDom()
{
$xml = <<<XML
<foo>test</bar>
XML;
$dom = new DOMDocument('1.0');
$result = XmlSecurity::scan($xml, $dom);
$this->assertFalse($result);
}
public function testScanFile()
{
$file = tempnam(sys_get_temp_dir(), 'ZendXml_Security');
file_put_contents($file, $this->getXml());
$result = XmlSecurity::scanFile($file);
$this->assertTrue($result instanceof SimpleXMLElement);
$this->assertEquals($result->result, 'test');
unlink($file);
}
public function testScanXmlWithDTD()
{
$xml = <<<XML
<?xml version="1.0"?>
<!DOCTYPE results [
<!ELEMENT results (result+)>
<!ELEMENT result (#PCDATA)>
]>
<results>
<result>test</result>
</results>
XML;
$dom = new DOMDocument('1.0');
$result = XmlSecurity::scan($xml, $dom);
$this->assertTrue($result instanceof DOMDocument);
$this->assertTrue($result->validate());
}
protected function getXml()
{
return <<<XML
<?xml version="1.0"?>
<results>
<result>test</result>
</results>
XML;
}
}

View File

@ -0,0 +1,27 @@
<phpunit bootstrap="./Bootstrap.php" colors="true">
<testsuites>
<testsuite name="ZendXml Test Suite">
<directory>./ZendXmlTest</directory>
<exclude>./ZendXmlTest/TestAsset</exclude>
</testsuite>
</testsuites>
<groups>
<exclude>
</exclude>
</groups>
<listeners>
</listeners>
<filter>
<blacklist>
<directory suffix=".php">./ZendXmlTest</directory>
<directory>../vendor</directory>
</blacklist>
</filter>
<php>
</php>
</phpunit>