Add custom rules directory support
This commit is contained in:
parent
f346af2f38
commit
578f9cbf14
1
.gitignore
vendored
1
.gitignore
vendored
@ -45,3 +45,4 @@ Thumbs.db
|
||||
config.php
|
||||
!models/*
|
||||
!controllers/*
|
||||
rules/*.php
|
||||
|
@ -31,6 +31,8 @@ defined('AUTO_UPDATE_DOWNLOAD_DIRECTORY') or define('AUTO_UPDATE_DOWNLOAD_DIRECT
|
||||
defined('AUTO_UPDATE_ARCHIVE_DIRECTORY') or define('AUTO_UPDATE_ARCHIVE_DIRECTORY', DATA_DIRECTORY.DIRECTORY_SEPARATOR.'archive');
|
||||
defined('AUTO_UPDATE_BACKUP_DIRECTORY') or define('AUTO_UPDATE_BACKUP_DIRECTORY', DATA_DIRECTORY.DIRECTORY_SEPARATOR.'backup');
|
||||
|
||||
defined('RULES_DIRECTORY') or define('RULES_DIRECTORY', ROOT_DIRECTORY.DIRECTORY_SEPARATOR.'rules');
|
||||
|
||||
require __DIR__.'/check_setup.php';
|
||||
|
||||
PicoDb\Database::bootstrap('db', function() {
|
||||
|
@ -20,7 +20,7 @@ However the content grabber doesn't work very well with all websites.
|
||||
How to write a grabber rules file?
|
||||
----------------------------------
|
||||
|
||||
Add a PHP file to the directory `vendor/fguillot/picofeed/lib/PicoFeed/Rules`, the filename must be the domain name:
|
||||
Add a PHP file to the directory `rules`, the filename must be the domain name with the suffix `.php`:
|
||||
|
||||
Example with the BBC website, `www.bbc.co.uk.php`:
|
||||
|
||||
@ -55,6 +55,4 @@ Don't forget to send a pull request or a ticket to share your contribution with
|
||||
List of content grabber rules
|
||||
-----------------------------
|
||||
|
||||
[List of existing rules on the repository](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules)
|
||||
|
||||
If you want to add new rules, just open a ticket and I will do it.
|
||||
[List of rules included by default](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules).
|
||||
|
@ -17,6 +17,7 @@ function get_files_list($directory)
|
||||
'data',
|
||||
'scripts',
|
||||
'config.php',
|
||||
'rules',
|
||||
);
|
||||
|
||||
$it = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($directory), RecursiveIteratorIterator::SELF_FIRST);
|
||||
|
@ -21,7 +21,11 @@ function get_reader_config()
|
||||
// Client
|
||||
$config->setClientTimeout(HTTP_TIMEOUT);
|
||||
$config->setClientUserAgent(HTTP_USER_AGENT);
|
||||
|
||||
// Grabber
|
||||
$config->setGrabberTimeout(HTTP_TIMEOUT);
|
||||
$config->setGrabberUserAgent(HTTP_USER_AGENT);
|
||||
$config->setGrabberRulesFolder(RULES_DIRECTORY);
|
||||
|
||||
// Proxy
|
||||
$config->setProxyHostname(PROXY_HOSTNAME);
|
||||
|
1
rules/.htaccess
Normal file
1
rules/.htaccess
Normal file
@ -0,0 +1 @@
|
||||
Deny from all
|
2
vendor/autoload.php
vendored
2
vendor/autoload.php
vendored
@ -4,4 +4,4 @@
|
||||
|
||||
require_once __DIR__ . '/composer' . '/autoload_real.php';
|
||||
|
||||
return ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd::getLoader();
|
||||
return ComposerAutoloaderInitfa142d3c582de229e6ed8fd200703820::getLoader();
|
||||
|
10
vendor/composer/autoload_real.php
vendored
10
vendor/composer/autoload_real.php
vendored
@ -2,7 +2,7 @@
|
||||
|
||||
// autoload_real.php @generated by Composer
|
||||
|
||||
class ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd
|
||||
class ComposerAutoloaderInitfa142d3c582de229e6ed8fd200703820
|
||||
{
|
||||
private static $loader;
|
||||
|
||||
@ -19,9 +19,9 @@ class ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd
|
||||
return self::$loader;
|
||||
}
|
||||
|
||||
spl_autoload_register(array('ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd', 'loadClassLoader'), true, true);
|
||||
spl_autoload_register(array('ComposerAutoloaderInitfa142d3c582de229e6ed8fd200703820', 'loadClassLoader'), true, true);
|
||||
self::$loader = $loader = new \Composer\Autoload\ClassLoader();
|
||||
spl_autoload_unregister(array('ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd', 'loadClassLoader'));
|
||||
spl_autoload_unregister(array('ComposerAutoloaderInitfa142d3c582de229e6ed8fd200703820', 'loadClassLoader'));
|
||||
|
||||
$map = require __DIR__ . '/autoload_namespaces.php';
|
||||
foreach ($map as $namespace => $path) {
|
||||
@ -42,14 +42,14 @@ class ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd
|
||||
|
||||
$includeFiles = require __DIR__ . '/autoload_files.php';
|
||||
foreach ($includeFiles as $file) {
|
||||
composerRequire177dcd3c68ed52652977fcc464bd77bd($file);
|
||||
composerRequirefa142d3c582de229e6ed8fd200703820($file);
|
||||
}
|
||||
|
||||
return $loader;
|
||||
}
|
||||
}
|
||||
|
||||
function composerRequire177dcd3c68ed52652977fcc464bd77bd($file)
|
||||
function composerRequirefa142d3c582de229e6ed8fd200703820($file)
|
||||
{
|
||||
require $file;
|
||||
}
|
||||
|
24
vendor/composer/installed.json
vendored
24
vendor/composer/installed.json
vendored
@ -84,18 +84,18 @@
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/fguillot/simpleValidator.git",
|
||||
"reference": "5ebdb6df4c5f3aa2539b633eb4ae94c9e8c4ada7"
|
||||
"reference": "41655dc7b9224395f5bb3b5623f6e428fe6d64e8"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/fguillot/simpleValidator/zipball/5ebdb6df4c5f3aa2539b633eb4ae94c9e8c4ada7",
|
||||
"reference": "5ebdb6df4c5f3aa2539b633eb4ae94c9e8c4ada7",
|
||||
"url": "https://api.github.com/repos/fguillot/simpleValidator/zipball/41655dc7b9224395f5bb3b5623f6e428fe6d64e8",
|
||||
"reference": "41655dc7b9224395f5bb3b5623f6e428fe6d64e8",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": ">=5.3.0"
|
||||
},
|
||||
"time": "2015-02-14 21:04:14",
|
||||
"time": "2015-04-05 21:44:06",
|
||||
"type": "library",
|
||||
"installation-source": "dist",
|
||||
"autoload": {
|
||||
@ -123,18 +123,18 @@
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/fguillot/JsonRPC.git",
|
||||
"reference": "d0feab084422fa937da10e3551196b1c6fdf6918"
|
||||
"reference": "29d63a09ecd450d5e29fef74f687aab221055910"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/fguillot/JsonRPC/zipball/d0feab084422fa937da10e3551196b1c6fdf6918",
|
||||
"reference": "d0feab084422fa937da10e3551196b1c6fdf6918",
|
||||
"url": "https://api.github.com/repos/fguillot/JsonRPC/zipball/29d63a09ecd450d5e29fef74f687aab221055910",
|
||||
"reference": "29d63a09ecd450d5e29fef74f687aab221055910",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"php": ">=5.3.0"
|
||||
},
|
||||
"time": "2015-03-25 23:55:18",
|
||||
"time": "2015-04-05 21:49:38",
|
||||
"type": "library",
|
||||
"installation-source": "dist",
|
||||
"autoload": {
|
||||
@ -162,12 +162,12 @@
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/fguillot/picoFeed.git",
|
||||
"reference": "7c28753d5936ba635435a8e0e941dcabee67b243"
|
||||
"reference": "3a0dce6bd3a62566c5f8414f7884f959753762f7"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/7c28753d5936ba635435a8e0e941dcabee67b243",
|
||||
"reference": "7c28753d5936ba635435a8e0e941dcabee67b243",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/3a0dce6bd3a62566c5f8414f7884f959753762f7",
|
||||
"reference": "3a0dce6bd3a62566c5f8414f7884f959753762f7",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
@ -181,7 +181,7 @@
|
||||
"suggest": {
|
||||
"ext-curl": "PicoFeed will use cURL if present"
|
||||
},
|
||||
"time": "2015-03-30 23:34:59",
|
||||
"time": "2015-04-10 23:28:18",
|
||||
"bin": [
|
||||
"picofeed"
|
||||
],
|
||||
|
2
vendor/fguillot/json-rpc/.gitignore
vendored
Normal file
2
vendor/fguillot/json-rpc/.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
.DS_Store
|
||||
vendor/
|
7
vendor/fguillot/json-rpc/phpunit.xml
vendored
Normal file
7
vendor/fguillot/json-rpc/phpunit.xml
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
<phpunit bootstrap="./vendor/autoload.php">
|
||||
<testsuites>
|
||||
<testsuite name="JsonRPC">
|
||||
<directory>tests</directory>
|
||||
</testsuite>
|
||||
</testsuites>
|
||||
</phpunit>
|
@ -1,7 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/JsonRPC/Client.php';
|
||||
|
||||
use JsonRPC\Client;
|
||||
|
||||
class ClientTest extends PHPUnit_Framework_TestCase
|
||||
|
@ -1,7 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/JsonRPC/Server.php';
|
||||
|
||||
use JsonRPC\Server;
|
||||
|
||||
class A
|
||||
|
@ -1,7 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/JsonRPC/Server.php';
|
||||
|
||||
use JsonRPC\Server;
|
||||
|
||||
class ServerProtocolTest extends PHPUnit_Framework_TestCase
|
||||
|
10
vendor/fguillot/picofeed/docs/config.markdown
vendored
10
vendor/fguillot/picofeed/docs/config.markdown
vendored
@ -126,6 +126,16 @@ $config->setGrabberTimeout(20); // 20 seconds
|
||||
$config->setGrabberUserAgent('My content scraper');
|
||||
```
|
||||
|
||||
### Add a rules folder
|
||||
|
||||
- Method name: `setGrabberRulesFolder()`
|
||||
- Default value: `null`
|
||||
- Argument value: string
|
||||
|
||||
```php
|
||||
$config->setGrabberRulesFolder('/path/to/my/grabber/rules');
|
||||
```
|
||||
|
||||
Parser
|
||||
------
|
||||
|
||||
|
@ -37,6 +37,14 @@ abstract class Client
|
||||
*/
|
||||
private $encoding = '';
|
||||
|
||||
/**
|
||||
* HTTP request headers
|
||||
*
|
||||
* @access protected
|
||||
* @var array
|
||||
*/
|
||||
protected $request_headers = array();
|
||||
|
||||
/**
|
||||
* HTTP Etag header
|
||||
*
|
||||
@ -193,6 +201,16 @@ abstract class Client
|
||||
throw new LogicException('You must have "allow_url_fopen=1" or curl extension installed');
|
||||
}
|
||||
|
||||
/**
|
||||
* Add HTTP Header to the request
|
||||
*
|
||||
* @access public
|
||||
* @param array $headers
|
||||
*/
|
||||
public function setHeaders($headers) {
|
||||
$this->request_headers = $headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the HTTP request
|
||||
*
|
||||
@ -645,8 +663,8 @@ abstract class Client
|
||||
public function setConfig($config)
|
||||
{
|
||||
if ($config !== null) {
|
||||
$this->setTimeout($config->getGrabberTimeout());
|
||||
$this->setUserAgent($config->getGrabberUserAgent());
|
||||
$this->setTimeout($config->getClientTimeout());
|
||||
$this->setUserAgent($config->getClientUserAgent());
|
||||
$this->setMaxRedirections($config->getMaxRedirections());
|
||||
$this->setMaxBodySize($config->getMaxBodySize());
|
||||
$this->setProxyHostname($config->getProxyHostname());
|
||||
|
@ -34,7 +34,7 @@ class Curl extends Client
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $headers = array();
|
||||
private $response_headers = array();
|
||||
|
||||
/**
|
||||
* Counter on the number of header received
|
||||
@ -42,7 +42,7 @@ class Curl extends Client
|
||||
* @access private
|
||||
* @var integer
|
||||
*/
|
||||
private $headers_counter = 0;
|
||||
private $response_headers_count = 0;
|
||||
|
||||
/**
|
||||
* cURL callback to read the HTTP body
|
||||
@ -81,15 +81,15 @@ class Curl extends Client
|
||||
$length = strlen($buffer);
|
||||
|
||||
if ($buffer === "\r\n") {
|
||||
$this->headers_counter++;
|
||||
$this->response_headers_count++;
|
||||
}
|
||||
else {
|
||||
|
||||
if (! isset($this->headers[$this->headers_counter])) {
|
||||
$this->headers[$this->headers_counter] = '';
|
||||
if (! isset($this->response_headers[$this->response_headers_count])) {
|
||||
$this->response_headers[$this->response_headers_count] = '';
|
||||
}
|
||||
|
||||
$this->headers[$this->headers_counter] .= $buffer;
|
||||
$this->response_headers[$this->response_headers_count] .= $buffer;
|
||||
}
|
||||
|
||||
return $length;
|
||||
@ -153,6 +153,8 @@ class Curl extends Client
|
||||
$headers[] = 'If-Modified-Since: '.$this->last_modified;
|
||||
}
|
||||
|
||||
$headers = array_merge($headers, $this->request_headers);
|
||||
|
||||
return $headers;
|
||||
}
|
||||
|
||||
@ -302,7 +304,7 @@ class Curl extends Client
|
||||
{
|
||||
$this->executeContext();
|
||||
|
||||
list($status, $headers) = HttpHeaders::parse(explode("\r\n", $this->headers[$this->headers_counter - 1]));
|
||||
list($status, $headers) = HttpHeaders::parse(explode("\r\n", $this->response_headers[$this->response_headers_count - 1]));
|
||||
|
||||
// When restricted with open_basedir
|
||||
if ($this->needToHandleRedirection($follow_location, $status)) {
|
||||
@ -343,8 +345,8 @@ class Curl extends Client
|
||||
$this->url = Url::resolve($location, $this->url);
|
||||
$this->body = '';
|
||||
$this->body_length = 0;
|
||||
$this->headers = array();
|
||||
$this->headers_counter = 0;
|
||||
$this->response_headers = array();
|
||||
$this->response_headers_count = 0;
|
||||
|
||||
while (true) {
|
||||
|
||||
@ -360,8 +362,8 @@ class Curl extends Client
|
||||
$this->url = Url::resolve($result['headers']['Location'], $this->url);
|
||||
$this->body = '';
|
||||
$this->body_length = 0;
|
||||
$this->headers = array();
|
||||
$this->headers_counter = 0;
|
||||
$this->response_headers = array();
|
||||
$this->response_headers_count = 0;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
|
@ -284,7 +284,7 @@ class Grabber
|
||||
Logger::setMessage(get_called_class().': Content length: '.strlen($this->html).' bytes');
|
||||
$rules = $this->getRules();
|
||||
|
||||
if (is_array($rules)) {
|
||||
if (! empty($rules)) {
|
||||
Logger::setMessage(get_called_class().': Parse content with rules');
|
||||
$this->parseContentWithRules($rules);
|
||||
}
|
||||
@ -316,7 +316,13 @@ class Grabber
|
||||
try {
|
||||
|
||||
$client = Client::getInstance();
|
||||
|
||||
if ($this->config !== null) {
|
||||
$client->setConfig($this->config);
|
||||
$client->setTimeout($this->config->getGrabberTimeout());
|
||||
$client->setUserAgent($this->config->getGrabberUserAgent());
|
||||
}
|
||||
|
||||
$client->execute($this->url);
|
||||
|
||||
$this->url = $client->getUrl();
|
||||
@ -335,31 +341,67 @@ class Grabber
|
||||
* Try to find a predefined rule
|
||||
*
|
||||
* @access public
|
||||
* @return mixed
|
||||
* @return array
|
||||
*/
|
||||
public function getRules()
|
||||
{
|
||||
$hostname = parse_url($this->url, PHP_URL_HOST);
|
||||
|
||||
if ($hostname === false) {
|
||||
return false;
|
||||
if ($hostname !== false) {
|
||||
|
||||
$files = $this->getRulesFileList($hostname);
|
||||
|
||||
foreach ($this->getRulesFolders() as $folder) {
|
||||
$rule = $this->loadRuleFile($folder, $files);
|
||||
|
||||
if (! empty($rule)) {
|
||||
return $rule;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$files = array($hostname);
|
||||
|
||||
if (substr($hostname, 0, 4) == 'www.') {
|
||||
$files[] = substr($hostname, 4);
|
||||
return array();
|
||||
}
|
||||
|
||||
if (($pos = strpos($hostname, '.')) !== false) {
|
||||
$files[] = substr($hostname, $pos);
|
||||
$files[] = substr($hostname, $pos + 1);
|
||||
$files[] = substr($hostname, 0, $pos);
|
||||
/**
|
||||
* Get the list of possible rules file names for a given hostname
|
||||
*
|
||||
* @access public
|
||||
* @param string $hostname Hostname
|
||||
* @return array
|
||||
*/
|
||||
public function getRulesFileList($hostname)
|
||||
{
|
||||
$files = array($hostname); // subdomain.domain.tld
|
||||
$parts = explode('.', $hostname);
|
||||
$len = count($parts);
|
||||
|
||||
if ($len > 2) {
|
||||
$subdomain = array_shift($parts);
|
||||
$files[] = implode('.', $parts); // domain.tld
|
||||
$files[] = '.'.implode('.', $parts); // .domain.tld
|
||||
$files[] = $subdomain; // subdomain
|
||||
}
|
||||
else if ($len === 2) {
|
||||
$files[] = '.'.implode('.', $parts); // .domain.tld
|
||||
$files[] = $parts[0]; // domain
|
||||
}
|
||||
|
||||
return $files;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a rule file from the defined folder
|
||||
*
|
||||
* @access public
|
||||
* @param string $folder Rule directory
|
||||
* @param array $files List of possible file names
|
||||
* @return array
|
||||
*/
|
||||
public function loadRuleFile($folder, array $files)
|
||||
{
|
||||
foreach ($files as $file) {
|
||||
|
||||
$filename = __DIR__.'/../Rules/'.$file.'.php';
|
||||
$filename = $folder.'/'.$file.'.php';
|
||||
|
||||
if (file_exists($filename)) {
|
||||
Logger::setMessage(get_called_class().' Load rule: '.$file);
|
||||
@ -367,7 +409,24 @@ class Grabber
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of folders that contains rules
|
||||
*
|
||||
* @access public
|
||||
* @return array
|
||||
*/
|
||||
public function getRulesFolders()
|
||||
{
|
||||
$folders = array(__DIR__.'/../Rules');
|
||||
|
||||
if ($this->config !== null && $this->config->getGrabberRulesFolder() !== null) {
|
||||
$folders[] = $this->config->getGrabberRulesFolder();
|
||||
}
|
||||
|
||||
return $folders;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -47,6 +47,8 @@ class Stream extends Client
|
||||
$headers[] = 'Authorization: Basic '.base64_encode($this->username.':'.$this->password);
|
||||
}
|
||||
|
||||
$headers = array_merge($headers, $this->request_headers);
|
||||
|
||||
return $headers;
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@ namespace PicoFeed\Config;
|
||||
* @method \PicoFeed\Config\Config setProxyPort(integer $value)
|
||||
* @method \PicoFeed\Config\Config setProxyUsername(string $value)
|
||||
* @method \PicoFeed\Config\Config setProxyPassword(string $value)
|
||||
* @method \PicoFeed\Config\Config setGrabberRulesFolder(string $value)
|
||||
* @method \PicoFeed\Config\Config setGrabberTimeout(integer $value)
|
||||
* @method \PicoFeed\Config\Config setGrabberUserAgent(string $value)
|
||||
* @method \PicoFeed\Config\Config setParserHashAlgo(string $value)
|
||||
@ -42,6 +43,7 @@ namespace PicoFeed\Config;
|
||||
* @method integer getProxyPort()
|
||||
* @method string getProxyUsername()
|
||||
* @method string getProxyPassword()
|
||||
* @method string getGrabberRulesFolder()
|
||||
* @method integer getGrabberTimeout()
|
||||
* @method string getGrabberUserAgent()
|
||||
* @method string getParserHashAlgo()
|
||||
|
@ -120,6 +120,7 @@ class Filter
|
||||
"\x10",
|
||||
"\xc3\x20",
|
||||
"",
|
||||
"\xe2\x80\x9c\x08",
|
||||
);
|
||||
|
||||
foreach ($invalid_chars as $needle) {
|
||||
|
@ -55,7 +55,9 @@ class XmlParser
|
||||
*/
|
||||
private static function scanInput($input, Closure $callback)
|
||||
{
|
||||
if (substr(php_sapi_name(), 0, 3) === 'fpm') {
|
||||
$isRunningFpm = substr(php_sapi_name(), 0, 3) === 'fpm';
|
||||
|
||||
if ($isRunningFpm) {
|
||||
|
||||
// If running with PHP-FPM and an entity is detected we refuse to parse the feed
|
||||
// @see https://bugs.php.net/bug.php?id=64938
|
||||
@ -64,8 +66,7 @@ class XmlParser
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
libxml_disable_entity_loader(true);
|
||||
$entityLoaderDisabled = libxml_disable_entity_loader(true);
|
||||
}
|
||||
|
||||
libxml_use_internal_errors(true);
|
||||
@ -81,6 +82,10 @@ class XmlParser
|
||||
}
|
||||
}
|
||||
|
||||
if ($isRunningFpm === false) {
|
||||
libxml_disable_entity_loader($entityLoaderDisabled);
|
||||
}
|
||||
|
||||
return $dom;
|
||||
}
|
||||
|
||||
|
@ -1,8 +1,12 @@
|
||||
<?php
|
||||
return array(
|
||||
'test_url' => 'http://www.lemonde.fr/societe/article/2013/08/30/boris-boillon-ancien-ambassadeur-de-sarkozy-arrete-avec-350-000-euros-en-liquide_3469109_3224.html',
|
||||
'test_url' => array(
|
||||
'http://www.lemonde.fr/societe/article/2013/08/30/boris-boillon-ancien-ambassadeur-de-sarkozy-arrete-avec-350-000-euros-en-liquide_3469109_3224.html',
|
||||
'http://www.lemonde.fr/afrique/article/2015/04/06/plonge-dans-la-crise-l-angola-revele-son-vrai-visage_4610364_3212.html',
|
||||
),
|
||||
'body' => array(
|
||||
'//div[@id="articleBody"]',
|
||||
'//div[@itemprop="articleBody"]',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
|
@ -4,9 +4,89 @@ namespace PicoFeed\Client;
|
||||
|
||||
use PHPUnit_Framework_TestCase;
|
||||
use PicoFeed\Reader\Reader;
|
||||
use PicoFeed\Config\Config;
|
||||
|
||||
class GrabberTest extends PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testGetRulesFolders()
|
||||
{
|
||||
// No custom path
|
||||
$grabber = new Grabber('');
|
||||
$dirs = $grabber->getRulesFolders();
|
||||
$this->assertNotEmpty($dirs);
|
||||
$this->assertCount(1, $dirs);
|
||||
$this->assertTrue(strpos($dirs[0], '/../Rules') !== false);
|
||||
|
||||
// Custom path
|
||||
$config = new Config;
|
||||
$config->setGrabberRulesFolder('/foobar/rules');
|
||||
|
||||
$grabber = new Grabber('');
|
||||
$grabber->setConfig($config);
|
||||
|
||||
$dirs = $grabber->getRulesFolders();
|
||||
|
||||
$this->assertNotEmpty($dirs);
|
||||
$this->assertCount(2, $dirs);
|
||||
$this->assertTrue(strpos($dirs[0], '/../Rules') !== false);
|
||||
$this->assertEquals('/foobar/rules', $dirs[1]);
|
||||
|
||||
// No custom path with empty config object
|
||||
$grabber = new Grabber('');
|
||||
$grabber->setConfig(new Config);
|
||||
|
||||
$dirs = $grabber->getRulesFolders();
|
||||
|
||||
$this->assertNotEmpty($dirs);
|
||||
$this->assertCount(1, $dirs);
|
||||
$this->assertTrue(strpos($dirs[0], '/../Rules') !== false);
|
||||
}
|
||||
|
||||
public function testLoadRuleFile()
|
||||
{
|
||||
$grabber = new Grabber('');
|
||||
$dirs = $grabber->getRulesFolders();
|
||||
|
||||
$this->assertEmpty($grabber->loadRuleFile($dirs[0], array('test')));
|
||||
$this->assertNotEmpty($grabber->loadRuleFile($dirs[0], array('test', 'xkcd.com')));
|
||||
}
|
||||
|
||||
public function testGetRulesFileList()
|
||||
{
|
||||
$grabber = new Grabber('');
|
||||
$this->assertEquals(
|
||||
array('www.google.ca', 'google.ca', '.google.ca', 'www'),
|
||||
$grabber->getRulesFileList('www.google.ca')
|
||||
);
|
||||
|
||||
$grabber = new Grabber('');
|
||||
$this->assertEquals(
|
||||
array('google.ca', '.google.ca', 'google'),
|
||||
$grabber->getRulesFileList('google.ca')
|
||||
);
|
||||
|
||||
$grabber = new Grabber('');
|
||||
$this->assertEquals(
|
||||
array('a.b.c.d', 'b.c.d', '.b.c.d', 'a'),
|
||||
$grabber->getRulesFileList('a.b.c.d')
|
||||
);
|
||||
|
||||
$grabber = new Grabber('');
|
||||
$this->assertEquals(
|
||||
array('localhost'),
|
||||
$grabber->getRulesFileList('localhost')
|
||||
);
|
||||
}
|
||||
|
||||
public function testGetRules()
|
||||
{
|
||||
$grabber = new Grabber('http://www.egscomics.com/index.php?id=1690');
|
||||
$this->assertNotEmpty($grabber->getRules());
|
||||
|
||||
$grabber = new Grabber('http://localhost/foobar');
|
||||
$this->assertEmpty($grabber->getRules());
|
||||
}
|
||||
|
||||
/**
|
||||
* @group online
|
||||
*/
|
||||
@ -33,13 +113,6 @@ class GrabberTest extends PHPUnit_Framework_TestCase
|
||||
$this->assertTrue($grabber->parse());
|
||||
}
|
||||
|
||||
public function testGetRules()
|
||||
{
|
||||
$grabber = new Grabber('http://www.egscomics.com/index.php?id=1690');
|
||||
$this->assertTrue(is_array($grabber->getRules()));
|
||||
}
|
||||
|
||||
// 01net.com - https://github.com/fguillot/miniflux/issues/267
|
||||
/**
|
||||
* @group online
|
||||
*/
|
||||
|
7
vendor/fguillot/simple-validator/phpunit.xml
vendored
Normal file
7
vendor/fguillot/simple-validator/phpunit.xml
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
<phpunit bootstrap="./vendor/autoload.php">
|
||||
<testsuites>
|
||||
<testsuite name="simpleValidator">
|
||||
<directory>tests</directory>
|
||||
</testsuite>
|
||||
</testsuites>
|
||||
</phpunit>
|
@ -1,8 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/SimpleValidator/Base.php';
|
||||
require_once 'src/SimpleValidator/Validators/AlphaNumeric.php';
|
||||
|
||||
use SimpleValidator\Validators\AlphaNumeric;
|
||||
|
||||
class AlphaNumericValidatorTest extends PHPUnit_Framework_TestCase
|
||||
|
@ -1,8 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/SimpleValidator/Base.php';
|
||||
require_once 'src/SimpleValidator/Validators/Email.php';
|
||||
|
||||
use SimpleValidator\Validators\Email;
|
||||
|
||||
class EmailValidatorTest extends PHPUnit_Framework_TestCase
|
||||
|
@ -1,9 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/SimpleValidator/Base.php';
|
||||
require_once 'src/SimpleValidator/Validators/Unique.php';
|
||||
require_once 'src/SimpleValidator/Validators/Exists.php';
|
||||
|
||||
use SimpleValidator\Validators\Exists;
|
||||
|
||||
class ExistsValidatorTest extends PHPUnit_Framework_TestCase
|
||||
|
@ -1,8 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/SimpleValidator/Base.php';
|
||||
require_once 'src/SimpleValidator/Validators/Integer.php';
|
||||
|
||||
use SimpleValidator\Validators\Integer;
|
||||
|
||||
class IntegerValidatorTest extends PHPUnit_Framework_TestCase
|
||||
|
@ -1,8 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/SimpleValidator/Base.php';
|
||||
require_once 'src/SimpleValidator/Validators/Length.php';
|
||||
|
||||
use SimpleValidator\Validators\Length;
|
||||
|
||||
class LengthValidatorTest extends PHPUnit_Framework_TestCase
|
||||
|
@ -1,8 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/SimpleValidator/Base.php';
|
||||
require_once 'src/SimpleValidator/Validators/Email.php';
|
||||
|
||||
use SimpleValidator\Validators\NotEquals;
|
||||
|
||||
class EmailValidatorTest extends PHPUnit_Framework_TestCase
|
||||
|
@ -1,8 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/SimpleValidator/Base.php';
|
||||
require_once 'src/SimpleValidator/Validators/Numeric.php';
|
||||
|
||||
use SimpleValidator\Validators\Numeric;
|
||||
|
||||
class NumericValidatorTest extends PHPUnit_Framework_TestCase
|
||||
|
@ -1,8 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/SimpleValidator/Base.php';
|
||||
require_once 'src/SimpleValidator/Validators/Range.php';
|
||||
|
||||
use SimpleValidator\Validators\Range;
|
||||
|
||||
class RangeValidatorTest extends PHPUnit_Framework_TestCase
|
||||
|
@ -1,8 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/SimpleValidator/Base.php';
|
||||
require_once 'src/SimpleValidator/Validators/Required.php';
|
||||
|
||||
use SimpleValidator\Validators\Required;
|
||||
|
||||
class RequiredValidatorTest extends PHPUnit_Framework_TestCase
|
||||
|
@ -1,8 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/SimpleValidator/Base.php';
|
||||
require_once 'src/SimpleValidator/Validators/Unique.php';
|
||||
|
||||
use SimpleValidator\Validators\Unique;
|
||||
|
||||
class UniqueValidatorTest extends PHPUnit_Framework_TestCase
|
||||
|
@ -1,11 +1,5 @@
|
||||
<?php
|
||||
|
||||
require_once 'src/SimpleValidator/Base.php';
|
||||
require_once 'src/SimpleValidator/Validator.php';
|
||||
require_once 'src/SimpleValidator/Validators/Integer.php';
|
||||
require_once 'src/SimpleValidator/Validators/Numeric.php';
|
||||
require_once 'src/SimpleValidator/Validators/Required.php';
|
||||
|
||||
use SimpleValidator\Validator;
|
||||
use SimpleValidator\Validators;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user