From 578f9cbf14d43835b48af3327ef3c05a31080904 Mon Sep 17 00:00:00 2001 From: Frederic Guillot Date: Fri, 10 Apr 2015 20:34:48 -0400 Subject: [PATCH] Add custom rules directory support --- .gitignore | 1 + common.php | 2 + docs/full-article-download.markdown | 6 +- models/auto_update.php | 1 + models/config.php | 4 + rules/.htaccess | 1 + vendor/autoload.php | 2 +- vendor/composer/autoload_real.php | 10 +- vendor/composer/installed.json | 24 ++--- vendor/fguillot/json-rpc/.gitignore | 2 + vendor/fguillot/json-rpc/phpunit.xml | 7 ++ vendor/fguillot/json-rpc/tests/ClientTest.php | 4 +- .../json-rpc/tests/ServerProcedureTest.php | 2 - .../json-rpc/tests/ServerProtocolTest.php | 4 +- vendor/fguillot/picofeed/docs/config.markdown | 10 ++ .../picofeed/lib/PicoFeed/Client/Client.php | 22 ++++- .../picofeed/lib/PicoFeed/Client/Curl.php | 24 ++--- .../picofeed/lib/PicoFeed/Client/Grabber.php | 91 +++++++++++++++---- .../picofeed/lib/PicoFeed/Client/Stream.php | 2 + .../picofeed/lib/PicoFeed/Config/Config.php | 2 + .../picofeed/lib/PicoFeed/Filter/Filter.php | 1 + .../lib/PicoFeed/Parser/XmlParser.php | 11 ++- .../lib/PicoFeed/Rules/www.lemonde.fr.php | 8 +- .../picofeed/tests/Client/GrabberTest.php | 87 ++++++++++++++++-- vendor/fguillot/simple-validator/phpunit.xml | 7 ++ .../tests/AlphaNumericTest.php | 3 - .../tests/EmailValidatorTest.php | 5 +- .../tests/ExistsValidatorTest.php | 6 +- .../tests/IntegerValidatorTest.php | 5 +- .../tests/LengthValidatorTest.php | 5 +- .../tests/NotEqualsValidator.php | 5 +- .../tests/NumericValidatorTest.php | 5 +- .../tests/RangeValidatorTest.php | 5 +- .../tests/RequiredValidatorTest.php | 5 +- .../tests/UniqueValidatorTest.php | 5 +- .../simple-validator/tests/ValidatorTest.php | 8 +- 36 files changed, 274 insertions(+), 118 deletions(-) create mode 100644 rules/.htaccess create mode 100644 vendor/fguillot/json-rpc/.gitignore create mode 100644 vendor/fguillot/json-rpc/phpunit.xml create mode 100644 vendor/fguillot/simple-validator/phpunit.xml diff --git a/.gitignore b/.gitignore index f435b4d..03db907 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,4 @@ Thumbs.db config.php !models/* !controllers/* +rules/*.php diff --git a/common.php b/common.php index 0c13b30..f58aa9a 100644 --- a/common.php +++ b/common.php @@ -31,6 +31,8 @@ defined('AUTO_UPDATE_DOWNLOAD_DIRECTORY') or define('AUTO_UPDATE_DOWNLOAD_DIRECT defined('AUTO_UPDATE_ARCHIVE_DIRECTORY') or define('AUTO_UPDATE_ARCHIVE_DIRECTORY', DATA_DIRECTORY.DIRECTORY_SEPARATOR.'archive'); defined('AUTO_UPDATE_BACKUP_DIRECTORY') or define('AUTO_UPDATE_BACKUP_DIRECTORY', DATA_DIRECTORY.DIRECTORY_SEPARATOR.'backup'); +defined('RULES_DIRECTORY') or define('RULES_DIRECTORY', ROOT_DIRECTORY.DIRECTORY_SEPARATOR.'rules'); + require __DIR__.'/check_setup.php'; PicoDb\Database::bootstrap('db', function() { diff --git a/docs/full-article-download.markdown b/docs/full-article-download.markdown index e3bc3a9..49aa7bd 100644 --- a/docs/full-article-download.markdown +++ b/docs/full-article-download.markdown @@ -20,7 +20,7 @@ However the content grabber doesn't work very well with all websites. How to write a grabber rules file? ---------------------------------- -Add a PHP file to the directory `vendor/fguillot/picofeed/lib/PicoFeed/Rules`, the filename must be the domain name: +Add a PHP file to the directory `rules`, the filename must be the domain name with the suffix `.php`: Example with the BBC website, `www.bbc.co.uk.php`: @@ -55,6 +55,4 @@ Don't forget to send a pull request or a ticket to share your contribution with List of content grabber rules ----------------------------- -[List of existing rules on the repository](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules) - -If you want to add new rules, just open a ticket and I will do it. +[List of rules included by default](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules). diff --git a/models/auto_update.php b/models/auto_update.php index 7cd2314..447b0b2 100644 --- a/models/auto_update.php +++ b/models/auto_update.php @@ -17,6 +17,7 @@ function get_files_list($directory) 'data', 'scripts', 'config.php', + 'rules', ); $it = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($directory), RecursiveIteratorIterator::SELF_FIRST); diff --git a/models/config.php b/models/config.php index ac360b8..6b62ee2 100644 --- a/models/config.php +++ b/models/config.php @@ -21,7 +21,11 @@ function get_reader_config() // Client $config->setClientTimeout(HTTP_TIMEOUT); $config->setClientUserAgent(HTTP_USER_AGENT); + + // Grabber + $config->setGrabberTimeout(HTTP_TIMEOUT); $config->setGrabberUserAgent(HTTP_USER_AGENT); + $config->setGrabberRulesFolder(RULES_DIRECTORY); // Proxy $config->setProxyHostname(PROXY_HOSTNAME); diff --git a/rules/.htaccess b/rules/.htaccess new file mode 100644 index 0000000..14249c5 --- /dev/null +++ b/rules/.htaccess @@ -0,0 +1 @@ +Deny from all \ No newline at end of file diff --git a/vendor/autoload.php b/vendor/autoload.php index 4e05002..1dbb1f2 100644 --- a/vendor/autoload.php +++ b/vendor/autoload.php @@ -4,4 +4,4 @@ require_once __DIR__ . '/composer' . '/autoload_real.php'; -return ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd::getLoader(); +return ComposerAutoloaderInitfa142d3c582de229e6ed8fd200703820::getLoader(); diff --git a/vendor/composer/autoload_real.php b/vendor/composer/autoload_real.php index 8c64f6e..9669ef5 100644 --- a/vendor/composer/autoload_real.php +++ b/vendor/composer/autoload_real.php @@ -2,7 +2,7 @@ // autoload_real.php @generated by Composer -class ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd +class ComposerAutoloaderInitfa142d3c582de229e6ed8fd200703820 { private static $loader; @@ -19,9 +19,9 @@ class ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd return self::$loader; } - spl_autoload_register(array('ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd', 'loadClassLoader'), true, true); + spl_autoload_register(array('ComposerAutoloaderInitfa142d3c582de229e6ed8fd200703820', 'loadClassLoader'), true, true); self::$loader = $loader = new \Composer\Autoload\ClassLoader(); - spl_autoload_unregister(array('ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd', 'loadClassLoader')); + spl_autoload_unregister(array('ComposerAutoloaderInitfa142d3c582de229e6ed8fd200703820', 'loadClassLoader')); $map = require __DIR__ . '/autoload_namespaces.php'; foreach ($map as $namespace => $path) { @@ -42,14 +42,14 @@ class ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd $includeFiles = require __DIR__ . '/autoload_files.php'; foreach ($includeFiles as $file) { - composerRequire177dcd3c68ed52652977fcc464bd77bd($file); + composerRequirefa142d3c582de229e6ed8fd200703820($file); } return $loader; } } -function composerRequire177dcd3c68ed52652977fcc464bd77bd($file) +function composerRequirefa142d3c582de229e6ed8fd200703820($file) { require $file; } diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json index 8a9e066..c9ad89a 100644 --- a/vendor/composer/installed.json +++ b/vendor/composer/installed.json @@ -84,18 +84,18 @@ "source": { "type": "git", "url": "https://github.com/fguillot/simpleValidator.git", - "reference": "5ebdb6df4c5f3aa2539b633eb4ae94c9e8c4ada7" + "reference": "41655dc7b9224395f5bb3b5623f6e428fe6d64e8" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/simpleValidator/zipball/5ebdb6df4c5f3aa2539b633eb4ae94c9e8c4ada7", - "reference": "5ebdb6df4c5f3aa2539b633eb4ae94c9e8c4ada7", + "url": "https://api.github.com/repos/fguillot/simpleValidator/zipball/41655dc7b9224395f5bb3b5623f6e428fe6d64e8", + "reference": "41655dc7b9224395f5bb3b5623f6e428fe6d64e8", "shasum": "" }, "require": { "php": ">=5.3.0" }, - "time": "2015-02-14 21:04:14", + "time": "2015-04-05 21:44:06", "type": "library", "installation-source": "dist", "autoload": { @@ -123,18 +123,18 @@ "source": { "type": "git", "url": "https://github.com/fguillot/JsonRPC.git", - "reference": "d0feab084422fa937da10e3551196b1c6fdf6918" + "reference": "29d63a09ecd450d5e29fef74f687aab221055910" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/JsonRPC/zipball/d0feab084422fa937da10e3551196b1c6fdf6918", - "reference": "d0feab084422fa937da10e3551196b1c6fdf6918", + "url": "https://api.github.com/repos/fguillot/JsonRPC/zipball/29d63a09ecd450d5e29fef74f687aab221055910", + "reference": "29d63a09ecd450d5e29fef74f687aab221055910", "shasum": "" }, "require": { "php": ">=5.3.0" }, - "time": "2015-03-25 23:55:18", + "time": "2015-04-05 21:49:38", "type": "library", "installation-source": "dist", "autoload": { @@ -162,12 +162,12 @@ "source": { "type": "git", "url": "https://github.com/fguillot/picoFeed.git", - "reference": "7c28753d5936ba635435a8e0e941dcabee67b243" + "reference": "3a0dce6bd3a62566c5f8414f7884f959753762f7" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/7c28753d5936ba635435a8e0e941dcabee67b243", - "reference": "7c28753d5936ba635435a8e0e941dcabee67b243", + "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/3a0dce6bd3a62566c5f8414f7884f959753762f7", + "reference": "3a0dce6bd3a62566c5f8414f7884f959753762f7", "shasum": "" }, "require": { @@ -181,7 +181,7 @@ "suggest": { "ext-curl": "PicoFeed will use cURL if present" }, - "time": "2015-03-30 23:34:59", + "time": "2015-04-10 23:28:18", "bin": [ "picofeed" ], diff --git a/vendor/fguillot/json-rpc/.gitignore b/vendor/fguillot/json-rpc/.gitignore new file mode 100644 index 0000000..b0ef068 --- /dev/null +++ b/vendor/fguillot/json-rpc/.gitignore @@ -0,0 +1,2 @@ +.DS_Store +vendor/ \ No newline at end of file diff --git a/vendor/fguillot/json-rpc/phpunit.xml b/vendor/fguillot/json-rpc/phpunit.xml new file mode 100644 index 0000000..dc36b5e --- /dev/null +++ b/vendor/fguillot/json-rpc/phpunit.xml @@ -0,0 +1,7 @@ + + + + tests + + + diff --git a/vendor/fguillot/json-rpc/tests/ClientTest.php b/vendor/fguillot/json-rpc/tests/ClientTest.php index 2837b3c..c592066 100644 --- a/vendor/fguillot/json-rpc/tests/ClientTest.php +++ b/vendor/fguillot/json-rpc/tests/ClientTest.php @@ -1,7 +1,5 @@ assertTrue($client->is_batch); $this->assertEmpty($client->batch); } -} \ No newline at end of file +} diff --git a/vendor/fguillot/json-rpc/tests/ServerProcedureTest.php b/vendor/fguillot/json-rpc/tests/ServerProcedureTest.php index 2224dc7..5232dcd 100644 --- a/vendor/fguillot/json-rpc/tests/ServerProcedureTest.php +++ b/vendor/fguillot/json-rpc/tests/ServerProcedureTest.php @@ -1,7 +1,5 @@ assertEquals('', $server->execute()); } -} \ No newline at end of file +} diff --git a/vendor/fguillot/picofeed/docs/config.markdown b/vendor/fguillot/picofeed/docs/config.markdown index 8b197f6..3360abf 100644 --- a/vendor/fguillot/picofeed/docs/config.markdown +++ b/vendor/fguillot/picofeed/docs/config.markdown @@ -126,6 +126,16 @@ $config->setGrabberTimeout(20); // 20 seconds $config->setGrabberUserAgent('My content scraper'); ``` +### Add a rules folder + +- Method name: `setGrabberRulesFolder()` +- Default value: `null` +- Argument value: string + +```php +$config->setGrabberRulesFolder('/path/to/my/grabber/rules'); +``` + Parser ------ diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php index 84a5cf2..ae93f3e 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php @@ -37,6 +37,14 @@ abstract class Client */ private $encoding = ''; + /** + * HTTP request headers + * + * @access protected + * @var array + */ + protected $request_headers = array(); + /** * HTTP Etag header * @@ -193,6 +201,16 @@ abstract class Client throw new LogicException('You must have "allow_url_fopen=1" or curl extension installed'); } + /** + * Add HTTP Header to the request + * + * @access public + * @param array $headers + */ + public function setHeaders($headers) { + $this->request_headers = $headers; + } + /** * Perform the HTTP request * @@ -645,8 +663,8 @@ abstract class Client public function setConfig($config) { if ($config !== null) { - $this->setTimeout($config->getGrabberTimeout()); - $this->setUserAgent($config->getGrabberUserAgent()); + $this->setTimeout($config->getClientTimeout()); + $this->setUserAgent($config->getClientUserAgent()); $this->setMaxRedirections($config->getMaxRedirections()); $this->setMaxBodySize($config->getMaxBodySize()); $this->setProxyHostname($config->getProxyHostname()); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php index 278eeb4..3dec58c 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php @@ -34,7 +34,7 @@ class Curl extends Client * @access private * @var array */ - private $headers = array(); + private $response_headers = array(); /** * Counter on the number of header received @@ -42,7 +42,7 @@ class Curl extends Client * @access private * @var integer */ - private $headers_counter = 0; + private $response_headers_count = 0; /** * cURL callback to read the HTTP body @@ -81,15 +81,15 @@ class Curl extends Client $length = strlen($buffer); if ($buffer === "\r\n") { - $this->headers_counter++; + $this->response_headers_count++; } else { - if (! isset($this->headers[$this->headers_counter])) { - $this->headers[$this->headers_counter] = ''; + if (! isset($this->response_headers[$this->response_headers_count])) { + $this->response_headers[$this->response_headers_count] = ''; } - $this->headers[$this->headers_counter] .= $buffer; + $this->response_headers[$this->response_headers_count] .= $buffer; } return $length; @@ -153,6 +153,8 @@ class Curl extends Client $headers[] = 'If-Modified-Since: '.$this->last_modified; } + $headers = array_merge($headers, $this->request_headers); + return $headers; } @@ -302,7 +304,7 @@ class Curl extends Client { $this->executeContext(); - list($status, $headers) = HttpHeaders::parse(explode("\r\n", $this->headers[$this->headers_counter - 1])); + list($status, $headers) = HttpHeaders::parse(explode("\r\n", $this->response_headers[$this->response_headers_count - 1])); // When restricted with open_basedir if ($this->needToHandleRedirection($follow_location, $status)) { @@ -343,8 +345,8 @@ class Curl extends Client $this->url = Url::resolve($location, $this->url); $this->body = ''; $this->body_length = 0; - $this->headers = array(); - $this->headers_counter = 0; + $this->response_headers = array(); + $this->response_headers_count = 0; while (true) { @@ -360,8 +362,8 @@ class Curl extends Client $this->url = Url::resolve($result['headers']['Location'], $this->url); $this->body = ''; $this->body_length = 0; - $this->headers = array(); - $this->headers_counter = 0; + $this->response_headers = array(); + $this->response_headers_count = 0; } else { break; diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php index fe48904..bec8ab0 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php @@ -284,7 +284,7 @@ class Grabber Logger::setMessage(get_called_class().': Content length: '.strlen($this->html).' bytes'); $rules = $this->getRules(); - if (is_array($rules)) { + if (! empty($rules)) { Logger::setMessage(get_called_class().': Parse content with rules'); $this->parseContentWithRules($rules); } @@ -316,7 +316,13 @@ class Grabber try { $client = Client::getInstance(); - $client->setConfig($this->config); + + if ($this->config !== null) { + $client->setConfig($this->config); + $client->setTimeout($this->config->getGrabberTimeout()); + $client->setUserAgent($this->config->getGrabberUserAgent()); + } + $client->execute($this->url); $this->url = $client->getUrl(); @@ -335,31 +341,67 @@ class Grabber * Try to find a predefined rule * * @access public - * @return mixed + * @return array */ public function getRules() { $hostname = parse_url($this->url, PHP_URL_HOST); - if ($hostname === false) { - return false; + if ($hostname !== false) { + + $files = $this->getRulesFileList($hostname); + + foreach ($this->getRulesFolders() as $folder) { + $rule = $this->loadRuleFile($folder, $files); + + if (! empty($rule)) { + return $rule; + } + } } - $files = array($hostname); + return array(); + } - if (substr($hostname, 0, 4) == 'www.') { - $files[] = substr($hostname, 4); + /** + * Get the list of possible rules file names for a given hostname + * + * @access public + * @param string $hostname Hostname + * @return array + */ + public function getRulesFileList($hostname) + { + $files = array($hostname); // subdomain.domain.tld + $parts = explode('.', $hostname); + $len = count($parts); + + if ($len > 2) { + $subdomain = array_shift($parts); + $files[] = implode('.', $parts); // domain.tld + $files[] = '.'.implode('.', $parts); // .domain.tld + $files[] = $subdomain; // subdomain + } + else if ($len === 2) { + $files[] = '.'.implode('.', $parts); // .domain.tld + $files[] = $parts[0]; // domain } - if (($pos = strpos($hostname, '.')) !== false) { - $files[] = substr($hostname, $pos); - $files[] = substr($hostname, $pos + 1); - $files[] = substr($hostname, 0, $pos); - } + return $files; + } + /** + * Load a rule file from the defined folder + * + * @access public + * @param string $folder Rule directory + * @param array $files List of possible file names + * @return array + */ + public function loadRuleFile($folder, array $files) + { foreach ($files as $file) { - - $filename = __DIR__.'/../Rules/'.$file.'.php'; + $filename = $folder.'/'.$file.'.php'; if (file_exists($filename)) { Logger::setMessage(get_called_class().' Load rule: '.$file); @@ -367,7 +409,24 @@ class Grabber } } - return false; + return array(); + } + + /** + * Get the list of folders that contains rules + * + * @access public + * @return array + */ + public function getRulesFolders() + { + $folders = array(__DIR__.'/../Rules'); + + if ($this->config !== null && $this->config->getGrabberRulesFolder() !== null) { + $folders[] = $this->config->getGrabberRulesFolder(); + } + + return $folders; } /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php index 1e539b1..72afe92 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php @@ -47,6 +47,8 @@ class Stream extends Client $headers[] = 'Authorization: Basic '.base64_encode($this->username.':'.$this->password); } + $headers = array_merge($headers, $this->request_headers); + return $headers; } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php b/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php index 181da03..1eaaeef 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Config/Config.php @@ -16,6 +16,7 @@ namespace PicoFeed\Config; * @method \PicoFeed\Config\Config setProxyPort(integer $value) * @method \PicoFeed\Config\Config setProxyUsername(string $value) * @method \PicoFeed\Config\Config setProxyPassword(string $value) + * @method \PicoFeed\Config\Config setGrabberRulesFolder(string $value) * @method \PicoFeed\Config\Config setGrabberTimeout(integer $value) * @method \PicoFeed\Config\Config setGrabberUserAgent(string $value) * @method \PicoFeed\Config\Config setParserHashAlgo(string $value) @@ -42,6 +43,7 @@ namespace PicoFeed\Config; * @method integer getProxyPort() * @method string getProxyUsername() * @method string getProxyPassword() + * @method string getGrabberRulesFolder() * @method integer getGrabberTimeout() * @method string getGrabberUserAgent() * @method string getParserHashAlgo() diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php index 0eb3f88..123f989 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php @@ -120,6 +120,7 @@ class Filter "\x10", "\xc3\x20", "", + "\xe2\x80\x9c\x08", ); foreach ($invalid_chars as $needle) { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php index feda8c2..d0c2f8e 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/XmlParser.php @@ -55,7 +55,9 @@ class XmlParser */ private static function scanInput($input, Closure $callback) { - if (substr(php_sapi_name(), 0, 3) === 'fpm') { + $isRunningFpm = substr(php_sapi_name(), 0, 3) === 'fpm'; + + if ($isRunningFpm) { // If running with PHP-FPM and an entity is detected we refuse to parse the feed // @see https://bugs.php.net/bug.php?id=64938 @@ -64,8 +66,7 @@ class XmlParser } } else { - - libxml_disable_entity_loader(true); + $entityLoaderDisabled = libxml_disable_entity_loader(true); } libxml_use_internal_errors(true); @@ -81,6 +82,10 @@ class XmlParser } } + if ($isRunningFpm === false) { + libxml_disable_entity_loader($entityLoaderDisabled); + } + return $dom; } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lemonde.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lemonde.fr.php index ce2a950..125bb6a 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lemonde.fr.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lemonde.fr.php @@ -1,9 +1,13 @@ 'http://www.lemonde.fr/societe/article/2013/08/30/boris-boillon-ancien-ambassadeur-de-sarkozy-arrete-avec-350-000-euros-en-liquide_3469109_3224.html', + 'test_url' => array( + 'http://www.lemonde.fr/societe/article/2013/08/30/boris-boillon-ancien-ambassadeur-de-sarkozy-arrete-avec-350-000-euros-en-liquide_3469109_3224.html', + 'http://www.lemonde.fr/afrique/article/2015/04/06/plonge-dans-la-crise-l-angola-revele-son-vrai-visage_4610364_3212.html', + ), 'body' => array( '//div[@id="articleBody"]', + '//div[@itemprop="articleBody"]', ), 'strip' => array( ), -); \ No newline at end of file +); diff --git a/vendor/fguillot/picofeed/tests/Client/GrabberTest.php b/vendor/fguillot/picofeed/tests/Client/GrabberTest.php index 224dc14..9b057fc 100644 --- a/vendor/fguillot/picofeed/tests/Client/GrabberTest.php +++ b/vendor/fguillot/picofeed/tests/Client/GrabberTest.php @@ -4,9 +4,89 @@ namespace PicoFeed\Client; use PHPUnit_Framework_TestCase; use PicoFeed\Reader\Reader; +use PicoFeed\Config\Config; class GrabberTest extends PHPUnit_Framework_TestCase { + public function testGetRulesFolders() + { + // No custom path + $grabber = new Grabber(''); + $dirs = $grabber->getRulesFolders(); + $this->assertNotEmpty($dirs); + $this->assertCount(1, $dirs); + $this->assertTrue(strpos($dirs[0], '/../Rules') !== false); + + // Custom path + $config = new Config; + $config->setGrabberRulesFolder('/foobar/rules'); + + $grabber = new Grabber(''); + $grabber->setConfig($config); + + $dirs = $grabber->getRulesFolders(); + + $this->assertNotEmpty($dirs); + $this->assertCount(2, $dirs); + $this->assertTrue(strpos($dirs[0], '/../Rules') !== false); + $this->assertEquals('/foobar/rules', $dirs[1]); + + // No custom path with empty config object + $grabber = new Grabber(''); + $grabber->setConfig(new Config); + + $dirs = $grabber->getRulesFolders(); + + $this->assertNotEmpty($dirs); + $this->assertCount(1, $dirs); + $this->assertTrue(strpos($dirs[0], '/../Rules') !== false); + } + + public function testLoadRuleFile() + { + $grabber = new Grabber(''); + $dirs = $grabber->getRulesFolders(); + + $this->assertEmpty($grabber->loadRuleFile($dirs[0], array('test'))); + $this->assertNotEmpty($grabber->loadRuleFile($dirs[0], array('test', 'xkcd.com'))); + } + + public function testGetRulesFileList() + { + $grabber = new Grabber(''); + $this->assertEquals( + array('www.google.ca', 'google.ca', '.google.ca', 'www'), + $grabber->getRulesFileList('www.google.ca') + ); + + $grabber = new Grabber(''); + $this->assertEquals( + array('google.ca', '.google.ca', 'google'), + $grabber->getRulesFileList('google.ca') + ); + + $grabber = new Grabber(''); + $this->assertEquals( + array('a.b.c.d', 'b.c.d', '.b.c.d', 'a'), + $grabber->getRulesFileList('a.b.c.d') + ); + + $grabber = new Grabber(''); + $this->assertEquals( + array('localhost'), + $grabber->getRulesFileList('localhost') + ); + } + + public function testGetRules() + { + $grabber = new Grabber('http://www.egscomics.com/index.php?id=1690'); + $this->assertNotEmpty($grabber->getRules()); + + $grabber = new Grabber('http://localhost/foobar'); + $this->assertEmpty($grabber->getRules()); + } + /** * @group online */ @@ -33,13 +113,6 @@ class GrabberTest extends PHPUnit_Framework_TestCase $this->assertTrue($grabber->parse()); } - public function testGetRules() - { - $grabber = new Grabber('http://www.egscomics.com/index.php?id=1690'); - $this->assertTrue(is_array($grabber->getRules())); - } - - // 01net.com - https://github.com/fguillot/miniflux/issues/267 /** * @group online */ diff --git a/vendor/fguillot/simple-validator/phpunit.xml b/vendor/fguillot/simple-validator/phpunit.xml new file mode 100644 index 0000000..23fb397 --- /dev/null +++ b/vendor/fguillot/simple-validator/phpunit.xml @@ -0,0 +1,7 @@ + + + + tests + + + diff --git a/vendor/fguillot/simple-validator/tests/AlphaNumericTest.php b/vendor/fguillot/simple-validator/tests/AlphaNumericTest.php index 4d076d4..8a0dca7 100644 --- a/vendor/fguillot/simple-validator/tests/AlphaNumericTest.php +++ b/vendor/fguillot/simple-validator/tests/AlphaNumericTest.php @@ -1,8 +1,5 @@ assertTrue($v->execute(array('toto' => 'toto+truc@machin.local'))); $this->assertTrue($v->execute(array('toto' => 'toto+truc@machin-bidule'))); } -} \ No newline at end of file +} diff --git a/vendor/fguillot/simple-validator/tests/ExistsValidatorTest.php b/vendor/fguillot/simple-validator/tests/ExistsValidatorTest.php index 14afca1..fa1db82 100644 --- a/vendor/fguillot/simple-validator/tests/ExistsValidatorTest.php +++ b/vendor/fguillot/simple-validator/tests/ExistsValidatorTest.php @@ -1,9 +1,5 @@ assertTrue($v->execute(array('toto_id' => ''))); $this->assertTrue($v->execute(array('toto_id' => null))); } -} \ No newline at end of file +} diff --git a/vendor/fguillot/simple-validator/tests/IntegerValidatorTest.php b/vendor/fguillot/simple-validator/tests/IntegerValidatorTest.php index 4a73783..05bfb3a 100644 --- a/vendor/fguillot/simple-validator/tests/IntegerValidatorTest.php +++ b/vendor/fguillot/simple-validator/tests/IntegerValidatorTest.php @@ -1,8 +1,5 @@ assertTrue($v->execute(array('toto' => 0))); $this->assertTrue($v->execute(array('toto' => '0'))); } -} \ No newline at end of file +} diff --git a/vendor/fguillot/simple-validator/tests/LengthValidatorTest.php b/vendor/fguillot/simple-validator/tests/LengthValidatorTest.php index cec37a0..43920d0 100644 --- a/vendor/fguillot/simple-validator/tests/LengthValidatorTest.php +++ b/vendor/fguillot/simple-validator/tests/LengthValidatorTest.php @@ -1,8 +1,5 @@ assertTrue($v->execute(array('toto' => '-0.5'))); $this->assertTrue($v->execute(array('toto' => '12345678'))); } -} \ No newline at end of file +} diff --git a/vendor/fguillot/simple-validator/tests/NotEqualsValidator.php b/vendor/fguillot/simple-validator/tests/NotEqualsValidator.php index 92a1f1e..047b299 100644 --- a/vendor/fguillot/simple-validator/tests/NotEqualsValidator.php +++ b/vendor/fguillot/simple-validator/tests/NotEqualsValidator.php @@ -1,8 +1,5 @@ assertTrue($v->execute(array('titi' => ''))); $this->assertTrue($v->execute(array('titi' => null))); } -} \ No newline at end of file +} diff --git a/vendor/fguillot/simple-validator/tests/NumericValidatorTest.php b/vendor/fguillot/simple-validator/tests/NumericValidatorTest.php index fdea56b..ed5b0d1 100644 --- a/vendor/fguillot/simple-validator/tests/NumericValidatorTest.php +++ b/vendor/fguillot/simple-validator/tests/NumericValidatorTest.php @@ -1,8 +1,5 @@ assertTrue($v->execute(array('toto' => 123.4))); $this->assertTrue($v->execute(array('toto' => 0))); } -} \ No newline at end of file +} diff --git a/vendor/fguillot/simple-validator/tests/RangeValidatorTest.php b/vendor/fguillot/simple-validator/tests/RangeValidatorTest.php index 358886b..c536a9b 100644 --- a/vendor/fguillot/simple-validator/tests/RangeValidatorTest.php +++ b/vendor/fguillot/simple-validator/tests/RangeValidatorTest.php @@ -1,8 +1,5 @@ assertTrue($v->execute(array('toto' => -1))); $this->assertTrue($v->execute(array('toto' => '0'))); } -} \ No newline at end of file +} diff --git a/vendor/fguillot/simple-validator/tests/RequiredValidatorTest.php b/vendor/fguillot/simple-validator/tests/RequiredValidatorTest.php index 3ba5daf..d052c2f 100644 --- a/vendor/fguillot/simple-validator/tests/RequiredValidatorTest.php +++ b/vendor/fguillot/simple-validator/tests/RequiredValidatorTest.php @@ -1,8 +1,5 @@ assertTrue($v->execute(array('toto' => 0))); $this->assertTrue($v->execute(array('toto' => 'test'))); } -} \ No newline at end of file +} diff --git a/vendor/fguillot/simple-validator/tests/UniqueValidatorTest.php b/vendor/fguillot/simple-validator/tests/UniqueValidatorTest.php index a5cc104..3693700 100644 --- a/vendor/fguillot/simple-validator/tests/UniqueValidatorTest.php +++ b/vendor/fguillot/simple-validator/tests/UniqueValidatorTest.php @@ -1,8 +1,5 @@ assertFalse($v->execute(array('toto' => 'truc', 'id' => '2'))); } -} \ No newline at end of file +} diff --git a/vendor/fguillot/simple-validator/tests/ValidatorTest.php b/vendor/fguillot/simple-validator/tests/ValidatorTest.php index 6765384..c9b7d4a 100644 --- a/vendor/fguillot/simple-validator/tests/ValidatorTest.php +++ b/vendor/fguillot/simple-validator/tests/ValidatorTest.php @@ -1,11 +1,5 @@ getErrors() ); } -} \ No newline at end of file +}