Update picofeed due to bug in rule file
This commit is contained in:
parent
9b08fc9160
commit
871de74279
@ -14,7 +14,7 @@
|
||||
"fguillot/simple-validator": "v1.0.0",
|
||||
"fguillot/json-rpc": "v1.0.2",
|
||||
"fguillot/picodb": "v1.0.2",
|
||||
"fguillot/picofeed": "v0.1.12"
|
||||
"fguillot/picofeed": "v0.1.13"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "4.8.3",
|
||||
|
2
vendor/autoload.php
vendored
2
vendor/autoload.php
vendored
@ -4,4 +4,4 @@
|
||||
|
||||
require_once __DIR__ . '/composer' . '/autoload_real.php';
|
||||
|
||||
return ComposerAutoloaderInit8f528b09bdfd7d4a627bbcebe3b2eef5::getLoader();
|
||||
return ComposerAutoloaderInitb324395d2df28954aca677ed0d0d2268::getLoader();
|
||||
|
10
vendor/composer/autoload_real.php
vendored
10
vendor/composer/autoload_real.php
vendored
@ -2,7 +2,7 @@
|
||||
|
||||
// autoload_real.php @generated by Composer
|
||||
|
||||
class ComposerAutoloaderInit8f528b09bdfd7d4a627bbcebe3b2eef5
|
||||
class ComposerAutoloaderInitb324395d2df28954aca677ed0d0d2268
|
||||
{
|
||||
private static $loader;
|
||||
|
||||
@ -19,9 +19,9 @@ class ComposerAutoloaderInit8f528b09bdfd7d4a627bbcebe3b2eef5
|
||||
return self::$loader;
|
||||
}
|
||||
|
||||
spl_autoload_register(array('ComposerAutoloaderInit8f528b09bdfd7d4a627bbcebe3b2eef5', 'loadClassLoader'), true, true);
|
||||
spl_autoload_register(array('ComposerAutoloaderInitb324395d2df28954aca677ed0d0d2268', 'loadClassLoader'), true, true);
|
||||
self::$loader = $loader = new \Composer\Autoload\ClassLoader();
|
||||
spl_autoload_unregister(array('ComposerAutoloaderInit8f528b09bdfd7d4a627bbcebe3b2eef5', 'loadClassLoader'));
|
||||
spl_autoload_unregister(array('ComposerAutoloaderInitb324395d2df28954aca677ed0d0d2268', 'loadClassLoader'));
|
||||
|
||||
$map = require __DIR__ . '/autoload_namespaces.php';
|
||||
foreach ($map as $namespace => $path) {
|
||||
@ -42,14 +42,14 @@ class ComposerAutoloaderInit8f528b09bdfd7d4a627bbcebe3b2eef5
|
||||
|
||||
$includeFiles = require __DIR__ . '/autoload_files.php';
|
||||
foreach ($includeFiles as $file) {
|
||||
composerRequire8f528b09bdfd7d4a627bbcebe3b2eef5($file);
|
||||
composerRequireb324395d2df28954aca677ed0d0d2268($file);
|
||||
}
|
||||
|
||||
return $loader;
|
||||
}
|
||||
}
|
||||
|
||||
function composerRequire8f528b09bdfd7d4a627bbcebe3b2eef5($file)
|
||||
function composerRequireb324395d2df28954aca677ed0d0d2268($file)
|
||||
{
|
||||
require $file;
|
||||
}
|
||||
|
12
vendor/composer/installed.json
vendored
12
vendor/composer/installed.json
vendored
@ -116,17 +116,17 @@
|
||||
},
|
||||
{
|
||||
"name": "fguillot/picofeed",
|
||||
"version": "v0.1.12",
|
||||
"version_normalized": "0.1.12.0",
|
||||
"version": "v0.1.13",
|
||||
"version_normalized": "0.1.13.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/fguillot/picoFeed.git",
|
||||
"reference": "f7f5e792baf09e6e795f4dd9bb56d7d588d67735"
|
||||
"reference": "84d9ee64df8596153ba080bd2436b333507aadba"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/f7f5e792baf09e6e795f4dd9bb56d7d588d67735",
|
||||
"reference": "f7f5e792baf09e6e795f4dd9bb56d7d588d67735",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/84d9ee64df8596153ba080bd2436b333507aadba",
|
||||
"reference": "84d9ee64df8596153ba080bd2436b333507aadba",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
@ -140,7 +140,7 @@
|
||||
"suggest": {
|
||||
"ext-curl": "PicoFeed will use cURL if present"
|
||||
},
|
||||
"time": "2015-10-18 21:22:40",
|
||||
"time": "2015-10-20 01:48:56",
|
||||
"bin": [
|
||||
"picofeed"
|
||||
],
|
||||
|
@ -4,218 +4,197 @@ namespace PicoFeed\Client;
|
||||
|
||||
use LogicException;
|
||||
use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Config\Config;
|
||||
|
||||
/**
|
||||
* Client class
|
||||
* Client class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package client
|
||||
*/
|
||||
abstract class Client
|
||||
{
|
||||
/**
|
||||
* Flag that say if the resource have been modified
|
||||
* Flag that say if the resource have been modified.
|
||||
*
|
||||
* @access private
|
||||
* @var bool
|
||||
*/
|
||||
private $is_modified = true;
|
||||
|
||||
/**
|
||||
* HTTP Content-Type
|
||||
* HTTP Content-Type.
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $content_type = '';
|
||||
|
||||
/**
|
||||
* HTTP encoding
|
||||
* HTTP encoding.
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $encoding = '';
|
||||
|
||||
/**
|
||||
* HTTP request headers
|
||||
* HTTP request headers.
|
||||
*
|
||||
* @access protected
|
||||
* @var array
|
||||
*/
|
||||
protected $request_headers = array();
|
||||
|
||||
/**
|
||||
* HTTP Etag header
|
||||
* HTTP Etag header.
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $etag = '';
|
||||
|
||||
/**
|
||||
* HTTP Last-Modified header
|
||||
* HTTP Last-Modified header.
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $last_modified = '';
|
||||
|
||||
/**
|
||||
* Proxy hostname
|
||||
* Proxy hostname.
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $proxy_hostname = '';
|
||||
|
||||
/**
|
||||
* Proxy port
|
||||
* Proxy port.
|
||||
*
|
||||
* @access protected
|
||||
* @var integer
|
||||
* @var int
|
||||
*/
|
||||
protected $proxy_port = 3128;
|
||||
|
||||
/**
|
||||
* Proxy username
|
||||
* Proxy username.
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $proxy_username = '';
|
||||
|
||||
/**
|
||||
* Proxy password
|
||||
* Proxy password.
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $proxy_password = '';
|
||||
|
||||
/**
|
||||
* Basic auth username
|
||||
* Basic auth username.
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $username = '';
|
||||
|
||||
/**
|
||||
* Basic auth password
|
||||
* Basic auth password.
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $password = '';
|
||||
|
||||
/**
|
||||
* Client connection timeout
|
||||
* Client connection timeout.
|
||||
*
|
||||
* @access protected
|
||||
* @var integer
|
||||
* @var int
|
||||
*/
|
||||
protected $timeout = 10;
|
||||
|
||||
/**
|
||||
* User-agent
|
||||
* User-agent.
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $user_agent = 'PicoFeed (https://github.com/fguillot/picoFeed)';
|
||||
|
||||
/**
|
||||
* Real URL used (can be changed after a HTTP redirect)
|
||||
* Real URL used (can be changed after a HTTP redirect).
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $url = '';
|
||||
|
||||
/**
|
||||
* Page/Feed content
|
||||
* Page/Feed content.
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $content = '';
|
||||
|
||||
/**
|
||||
* Number maximum of HTTP redirections to avoid infinite loops
|
||||
* Number maximum of HTTP redirections to avoid infinite loops.
|
||||
*
|
||||
* @access protected
|
||||
* @var integer
|
||||
* @var int
|
||||
*/
|
||||
protected $max_redirects = 5;
|
||||
|
||||
/**
|
||||
* Maximum size of the HTTP body response
|
||||
* Maximum size of the HTTP body response.
|
||||
*
|
||||
* @access protected
|
||||
* @var integer
|
||||
* @var int
|
||||
*/
|
||||
protected $max_body_size = 2097152; // 2MB
|
||||
|
||||
/**
|
||||
* HTTP response status code
|
||||
* HTTP response status code.
|
||||
*
|
||||
* @access protected
|
||||
* @var integer
|
||||
* @var int
|
||||
*/
|
||||
protected $status_code = 0;
|
||||
|
||||
/**
|
||||
* Enables direct passthrough to requesting client
|
||||
* Enables direct passthrough to requesting client.
|
||||
*
|
||||
* @access protected
|
||||
* @var bool
|
||||
*/
|
||||
protected $passthrough = false;
|
||||
|
||||
/**
|
||||
* Do the HTTP request
|
||||
* Do the HTTP request.
|
||||
*
|
||||
* @abstract
|
||||
* @access public
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
abstract public function doRequest();
|
||||
|
||||
/**
|
||||
* Get client instance: curl or stream driver
|
||||
* Get client instance: curl or stream driver.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
*
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public static function getInstance()
|
||||
{
|
||||
if (function_exists('curl_init')) {
|
||||
return new Curl;
|
||||
}
|
||||
else if (ini_get('allow_url_fopen')) {
|
||||
return new Stream;
|
||||
return new Curl();
|
||||
} elseif (ini_get('allow_url_fopen')) {
|
||||
return new Stream();
|
||||
}
|
||||
|
||||
throw new LogicException('You must have "allow_url_fopen=1" or curl extension installed');
|
||||
}
|
||||
|
||||
/**
|
||||
* Add HTTP Header to the request
|
||||
* Add HTTP Header to the request.
|
||||
*
|
||||
* @access public
|
||||
* @param array $headers
|
||||
*/
|
||||
public function setHeaders($headers) {
|
||||
public function setHeaders($headers)
|
||||
{
|
||||
$this->request_headers = $headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the HTTP request
|
||||
* Perform the HTTP request.
|
||||
*
|
||||
* @param string $url URL
|
||||
*
|
||||
* @access public
|
||||
* @param string $url URL
|
||||
* @return Client
|
||||
*/
|
||||
public function execute($url = '')
|
||||
@ -239,17 +218,15 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle not modified response
|
||||
* Handle not modified response.
|
||||
*
|
||||
* @access public
|
||||
* @param array $response Client response
|
||||
* @param array $response Client response
|
||||
*/
|
||||
public function handleNotModifiedResponse(array $response)
|
||||
{
|
||||
if ($response['status'] == 304) {
|
||||
$this->is_modified = false;
|
||||
}
|
||||
else if ($response['status'] == 200) {
|
||||
} elseif ($response['status'] == 200) {
|
||||
$this->is_modified = $this->hasBeenModified($response, $this->etag, $this->last_modified);
|
||||
$this->etag = $this->getHeader($response, 'ETag');
|
||||
$this->last_modified = $this->getHeader($response, 'Last-Modified');
|
||||
@ -261,10 +238,9 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle not found response
|
||||
* Handle not found response.
|
||||
*
|
||||
* @access public
|
||||
* @param array $response Client response
|
||||
* @param array $response Client response
|
||||
*/
|
||||
public function handleNotFoundResponse(array $response)
|
||||
{
|
||||
@ -274,10 +250,9 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle normal response
|
||||
* Handle normal response.
|
||||
*
|
||||
* @access public
|
||||
* @param array $response Client response
|
||||
* @param array $response Client response
|
||||
*/
|
||||
public function handleNormalResponse(array $response)
|
||||
{
|
||||
@ -289,19 +264,19 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a request has been modified according to the parameters
|
||||
* Check if a request has been modified according to the parameters.
|
||||
*
|
||||
* @access public
|
||||
* @param array $response
|
||||
* @param string $etag
|
||||
* @param string $lastModified
|
||||
* @return boolean
|
||||
* @param array $response
|
||||
* @param string $etag
|
||||
* @param string $lastModified
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
private function hasBeenModified($response, $etag, $lastModified)
|
||||
{
|
||||
$headers = array(
|
||||
'Etag' => $etag,
|
||||
'Last-Modified' => $lastModified
|
||||
'Last-Modified' => $lastModified,
|
||||
);
|
||||
|
||||
// Compare the values for each header that is present
|
||||
@ -311,7 +286,7 @@ abstract class Client
|
||||
if ($response['headers'][$key] !== $value) {
|
||||
return true;
|
||||
}
|
||||
$presentCacheHeaderCount++;
|
||||
++$presentCacheHeaderCount;
|
||||
}
|
||||
}
|
||||
|
||||
@ -325,10 +300,10 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Find content type from response headers
|
||||
* Find content type from response headers.
|
||||
*
|
||||
* @param array $response Client response
|
||||
*
|
||||
* @access public
|
||||
* @param array $response Client response
|
||||
* @return string
|
||||
*/
|
||||
public function findContentType(array $response)
|
||||
@ -337,23 +312,23 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Find charset from response headers
|
||||
* Find charset from response headers.
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function findCharset()
|
||||
{
|
||||
$result = explode('charset=', $this->content_type);
|
||||
|
||||
return isset($result[1]) ? $result[1] : '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get header value from a client response
|
||||
* Get header value from a client response.
|
||||
*
|
||||
* @param array $response Client response
|
||||
* @param string $header Header name
|
||||
*
|
||||
* @access public
|
||||
* @param array $response Client response
|
||||
* @param string $header Header name
|
||||
* @return string
|
||||
*/
|
||||
public function getHeader(array $response, $header)
|
||||
@ -362,22 +337,22 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the Last-Modified HTTP header
|
||||
* Set the Last-Modified HTTP header.
|
||||
*
|
||||
* @param string $last_modified Header value
|
||||
*
|
||||
* @access public
|
||||
* @param string $last_modified Header value
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setLastModified($last_modified)
|
||||
{
|
||||
$this->last_modified = $last_modified;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the value of the Last-Modified HTTP header
|
||||
* Get the value of the Last-Modified HTTP header.
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getLastModified()
|
||||
@ -386,22 +361,22 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value of the Etag HTTP header
|
||||
* Set the value of the Etag HTTP header.
|
||||
*
|
||||
* @param string $etag Etag HTTP header value
|
||||
*
|
||||
* @access public
|
||||
* @param string $etag Etag HTTP header value
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setEtag($etag)
|
||||
{
|
||||
$this->etag = $etag;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Etag HTTP header value
|
||||
* Get the Etag HTTP header value.
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getEtag()
|
||||
@ -410,9 +385,8 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the final url value
|
||||
* Get the final url value.
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getUrl()
|
||||
@ -421,23 +395,22 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the url
|
||||
* Set the url.
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setUrl($url)
|
||||
{
|
||||
$this->url = $url;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the HTTP response status code
|
||||
* Get the HTTP response status code.
|
||||
*
|
||||
* @access public
|
||||
* @return integer
|
||||
* @return int
|
||||
*/
|
||||
public function getStatusCode()
|
||||
{
|
||||
@ -445,9 +418,8 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the body of the HTTP response
|
||||
* Get the body of the HTTP response.
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getContent()
|
||||
@ -456,9 +428,8 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the content type value from HTTP headers
|
||||
* Get the content type value from HTTP headers.
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getContentType()
|
||||
@ -467,9 +438,8 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the encoding value from HTTP headers
|
||||
* Get the encoding value from HTTP headers.
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getEncoding()
|
||||
@ -478,9 +448,8 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the remote resource has changed
|
||||
* Return true if the remote resource has changed.
|
||||
*
|
||||
* @access public
|
||||
* @return bool
|
||||
*/
|
||||
public function isModified()
|
||||
@ -489,9 +458,8 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* return true if passthrough mode is enabled
|
||||
* return true if passthrough mode is enabled.
|
||||
*
|
||||
* @access public
|
||||
* @return bool
|
||||
*/
|
||||
public function isPassthroughEnabled()
|
||||
@ -500,167 +468,177 @@ abstract class Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Set connection timeout
|
||||
* Set connection timeout.
|
||||
*
|
||||
* @param int $timeout Connection timeout
|
||||
*
|
||||
* @access public
|
||||
* @param integer $timeout Connection timeout
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setTimeout($timeout)
|
||||
{
|
||||
$this->timeout = $timeout ?: $this->timeout;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a custom user agent
|
||||
* Set a custom user agent.
|
||||
*
|
||||
* @param string $user_agent User Agent
|
||||
*
|
||||
* @access public
|
||||
* @param string $user_agent User Agent
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setUserAgent($user_agent)
|
||||
{
|
||||
$this->user_agent = $user_agent ?: $this->user_agent;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the mximum number of HTTP redirections
|
||||
* Set the mximum number of HTTP redirections.
|
||||
*
|
||||
* @param int $max Maximum
|
||||
*
|
||||
* @access public
|
||||
* @param integer $max Maximum
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setMaxRedirections($max)
|
||||
{
|
||||
$this->max_redirects = $max ?: $this->max_redirects;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the maximum size of the HTTP body
|
||||
* Set the maximum size of the HTTP body.
|
||||
*
|
||||
* @param int $max Maximum
|
||||
*
|
||||
* @access public
|
||||
* @param integer $max Maximum
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setMaxBodySize($max)
|
||||
{
|
||||
$this->max_body_size = $max ?: $this->max_body_size;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the proxy hostname
|
||||
* Set the proxy hostname.
|
||||
*
|
||||
* @param string $hostname Proxy hostname
|
||||
*
|
||||
* @access public
|
||||
* @param string $hostname Proxy hostname
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setProxyHostname($hostname)
|
||||
{
|
||||
$this->proxy_hostname = $hostname ?: $this->proxy_hostname;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the proxy port
|
||||
* Set the proxy port.
|
||||
*
|
||||
* @param int $port Proxy port
|
||||
*
|
||||
* @access public
|
||||
* @param integer $port Proxy port
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setProxyPort($port)
|
||||
{
|
||||
$this->proxy_port = $port ?: $this->proxy_port;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the proxy username
|
||||
* Set the proxy username.
|
||||
*
|
||||
* @param string $username Proxy username
|
||||
*
|
||||
* @access public
|
||||
* @param string $username Proxy username
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setProxyUsername($username)
|
||||
{
|
||||
$this->proxy_username = $username ?: $this->proxy_username;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the proxy password
|
||||
* Set the proxy password.
|
||||
*
|
||||
* @param string $password Password
|
||||
*
|
||||
* @access public
|
||||
* @param string $password Password
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setProxyPassword($password)
|
||||
{
|
||||
$this->proxy_password = $password ?: $this->proxy_password;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the username
|
||||
* Set the username.
|
||||
*
|
||||
* @param string $username Basic Auth username
|
||||
*
|
||||
* @access public
|
||||
* @param string $username Basic Auth username
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setUsername($username)
|
||||
{
|
||||
$this->username = $username ?: $this->username;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the password
|
||||
* Set the password.
|
||||
*
|
||||
* @param string $password Basic Auth Password
|
||||
*
|
||||
* @access public
|
||||
* @param string $password Basic Auth Password
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setPassword($password)
|
||||
{
|
||||
$this->password = $password ?: $this->password;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable the passthrough mode
|
||||
* Enable the passthrough mode.
|
||||
*
|
||||
* @access public
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function enablePassthroughMode()
|
||||
{
|
||||
$this->passthrough = true;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable the passthrough mode
|
||||
* Disable the passthrough mode.
|
||||
*
|
||||
* @access public
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function disablePassthroughMode()
|
||||
{
|
||||
$this->passthrough = false;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set config object
|
||||
* Set config object.
|
||||
*
|
||||
* @param \PicoFeed\Config\Config $config Config instance
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Config\Config $config Config instance
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function setConfig($config)
|
||||
public function setConfig(Config $config)
|
||||
{
|
||||
if ($config !== null) {
|
||||
$this->setTimeout($config->getClientTimeout());
|
||||
|
@ -4,12 +4,10 @@ namespace PicoFeed\Client;
|
||||
|
||||
use PicoFeed\PicoFeedException;
|
||||
|
||||
|
||||
/**
|
||||
* ClientException Exception
|
||||
* ClientException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Client
|
||||
*/
|
||||
abstract class ClientException extends PicoFeedException
|
||||
{
|
||||
|
@ -5,54 +5,49 @@ namespace PicoFeed\Client;
|
||||
use PicoFeed\Logging\Logger;
|
||||
|
||||
/**
|
||||
* cURL HTTP client
|
||||
* cURL HTTP client.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Client
|
||||
*/
|
||||
class Curl extends Client
|
||||
{
|
||||
/**
|
||||
* HTTP response body
|
||||
* HTTP response body.
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $body = '';
|
||||
|
||||
/**
|
||||
* Body size
|
||||
* Body size.
|
||||
*
|
||||
* @access private
|
||||
* @var integer
|
||||
* @var int
|
||||
*/
|
||||
private $body_length = 0;
|
||||
|
||||
/**
|
||||
* HTTP response headers
|
||||
* HTTP response headers.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $response_headers = array();
|
||||
|
||||
/**
|
||||
* Counter on the number of header received
|
||||
* Counter on the number of header received.
|
||||
*
|
||||
* @access private
|
||||
* @var integer
|
||||
* @var int
|
||||
*/
|
||||
private $response_headers_count = 0;
|
||||
|
||||
/**
|
||||
* cURL callback to read the HTTP body
|
||||
* cURL callback to read the HTTP body.
|
||||
*
|
||||
* If the function return -1, curl stop to read the HTTP response
|
||||
*
|
||||
* @access public
|
||||
* @param resource $ch cURL handler
|
||||
* @param string $buffer Chunk of data
|
||||
* @return integer Length of the buffer
|
||||
* @param resource $ch cURL handler
|
||||
* @param string $buffer Chunk of data
|
||||
*
|
||||
* @return int Length of the buffer
|
||||
*/
|
||||
public function readBody($ch, $buffer)
|
||||
{
|
||||
@ -69,23 +64,21 @@ class Curl extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* cURL callback to read HTTP headers
|
||||
* cURL callback to read HTTP headers.
|
||||
*
|
||||
* @access public
|
||||
* @param resource $ch cURL handler
|
||||
* @param string $buffer Header line
|
||||
* @return integer Length of the buffer
|
||||
* @param resource $ch cURL handler
|
||||
* @param string $buffer Header line
|
||||
*
|
||||
* @return int Length of the buffer
|
||||
*/
|
||||
public function readHeaders($ch, $buffer)
|
||||
{
|
||||
$length = strlen($buffer);
|
||||
|
||||
if ($buffer === "\r\n" || $buffer === "\n") {
|
||||
$this->response_headers_count++;
|
||||
}
|
||||
else {
|
||||
|
||||
if (! isset($this->response_headers[$this->response_headers_count])) {
|
||||
++$this->response_headers_count;
|
||||
} else {
|
||||
if (!isset($this->response_headers[$this->response_headers_count])) {
|
||||
$this->response_headers[$this->response_headers_count] = '';
|
||||
}
|
||||
|
||||
@ -96,12 +89,12 @@ class Curl extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* cURL callback to passthrough the HTTP status header to the client
|
||||
* cURL callback to passthrough the HTTP status header to the client.
|
||||
*
|
||||
* @access public
|
||||
* @param resource $ch cURL handler
|
||||
* @param string $buffer Header line
|
||||
* @return integer Length of the buffer
|
||||
* @param resource $ch cURL handler
|
||||
* @param string $buffer Header line
|
||||
*
|
||||
* @return int Length of the buffer
|
||||
*/
|
||||
public function passthroughHeaders($ch, $buffer)
|
||||
{
|
||||
@ -109,8 +102,7 @@ class Curl extends Client
|
||||
|
||||
if ($status !== 0) {
|
||||
header(':', true, $status);
|
||||
}
|
||||
elseif (isset($headers['Content-Type'])) {
|
||||
} elseif (isset($headers['Content-Type'])) {
|
||||
header($buffer);
|
||||
}
|
||||
|
||||
@ -118,25 +110,25 @@ class Curl extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* cURL callback to passthrough the HTTP body to the client
|
||||
* cURL callback to passthrough the HTTP body to the client.
|
||||
*
|
||||
* If the function return -1, curl stop to read the HTTP response
|
||||
*
|
||||
* @access public
|
||||
* @param resource $ch cURL handler
|
||||
* @param string $buffer Chunk of data
|
||||
* @return integer Length of the buffer
|
||||
* @param resource $ch cURL handler
|
||||
* @param string $buffer Chunk of data
|
||||
*
|
||||
* @return int Length of the buffer
|
||||
*/
|
||||
public function passthroughBody($ch, $buffer)
|
||||
{
|
||||
echo $buffer;
|
||||
|
||||
return strlen($buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare HTTP headers
|
||||
* Prepare HTTP headers.
|
||||
*
|
||||
* @access private
|
||||
* @return string[]
|
||||
*/
|
||||
private function prepareHeaders()
|
||||
@ -159,16 +151,15 @@ class Curl extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare curl proxy context
|
||||
* Prepare curl proxy context.
|
||||
*
|
||||
* @param resource $ch
|
||||
*
|
||||
* @access private
|
||||
* @param resource $ch
|
||||
* @return resource $ch
|
||||
*/
|
||||
private function prepareProxyContext($ch)
|
||||
{
|
||||
if ($this->proxy_hostname) {
|
||||
|
||||
Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
|
||||
|
||||
curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxy_port);
|
||||
@ -178,8 +169,7 @@ class Curl extends Client
|
||||
if ($this->proxy_username) {
|
||||
Logger::setMessage(get_called_class().' Proxy credentials: Yes');
|
||||
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxy_username.':'.$this->proxy_password);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
Logger::setMessage(get_called_class().' Proxy credentials: No');
|
||||
}
|
||||
}
|
||||
@ -188,10 +178,10 @@ class Curl extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare curl auth context
|
||||
* Prepare curl auth context.
|
||||
*
|
||||
* @param resource $ch
|
||||
*
|
||||
* @access private
|
||||
* @param resource $ch
|
||||
* @return resource $ch
|
||||
*/
|
||||
private function prepareAuthContext($ch)
|
||||
@ -204,10 +194,10 @@ class Curl extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Set write/header functions
|
||||
* Set write/header functions.
|
||||
*
|
||||
* @param resource $ch
|
||||
*
|
||||
* @access private
|
||||
* @param resource $ch
|
||||
* @return resource $ch
|
||||
*/
|
||||
private function prepareDownloadMode($ch)
|
||||
@ -218,7 +208,6 @@ class Curl extends Client
|
||||
if ($this->isPassthroughEnabled()) {
|
||||
$write_function = 'passthroughBody';
|
||||
$header_function = 'passthroughHeaders';
|
||||
|
||||
}
|
||||
|
||||
curl_setopt($ch, CURLOPT_WRITEFUNCTION, array($this, $write_function));
|
||||
@ -228,9 +217,8 @@ class Curl extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare curl context
|
||||
* Prepare curl context.
|
||||
*
|
||||
* @access private
|
||||
* @return resource
|
||||
*/
|
||||
private function prepareContext()
|
||||
@ -266,9 +254,7 @@ class Curl extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute curl context
|
||||
*
|
||||
* @access private
|
||||
* Execute curl context.
|
||||
*/
|
||||
private function executeContext()
|
||||
{
|
||||
@ -297,11 +283,11 @@ class Curl extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Do the HTTP request
|
||||
* Do the HTTP request.
|
||||
*
|
||||
* @access public
|
||||
* @param bool $follow_location Flag used when there is an open_basedir restriction
|
||||
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
|
||||
* @param bool $follow_location Flag used when there is an open_basedir restriction
|
||||
*
|
||||
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
|
||||
*/
|
||||
public function doRequest($follow_location = true)
|
||||
{
|
||||
@ -316,15 +302,15 @@ class Curl extends Client
|
||||
return array(
|
||||
'status' => $status,
|
||||
'body' => $this->body,
|
||||
'headers' => $headers
|
||||
'headers' => $headers,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle manually redirections when there is an open base dir restriction
|
||||
* Handle manually redirections when there is an open base dir restriction.
|
||||
*
|
||||
* @param string $location Redirected URL
|
||||
*
|
||||
* @access private
|
||||
* @param string $location Redirected URL
|
||||
* @return array
|
||||
*/
|
||||
private function handleRedirection($location)
|
||||
@ -338,8 +324,7 @@ class Curl extends Client
|
||||
$this->response_headers_count = 0;
|
||||
|
||||
while (true) {
|
||||
|
||||
$nb_redirects++;
|
||||
++$nb_redirects;
|
||||
|
||||
if ($nb_redirects >= $this->max_redirects) {
|
||||
throw new MaxRedirectException('Maximum number of redirections reached');
|
||||
@ -353,8 +338,7 @@ class Curl extends Client
|
||||
$this->body_length = 0;
|
||||
$this->response_headers = array();
|
||||
$this->response_headers_count = 0;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -363,14 +347,14 @@ class Curl extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle cURL errors (throw individual exceptions)
|
||||
* Handle cURL errors (throw individual exceptions).
|
||||
*
|
||||
* We don't use constants because they are not necessary always available
|
||||
* (depends of the version of libcurl linked to php)
|
||||
*
|
||||
* @see http://curl.haxx.se/libcurl/c/libcurl-errors.html
|
||||
* @access private
|
||||
* @param integer $errno cURL error code
|
||||
*
|
||||
* @param int $errno cURL error code
|
||||
*/
|
||||
private function handleError($errno)
|
||||
{
|
||||
|
@ -6,11 +6,10 @@ use ArrayAccess;
|
||||
use PicoFeed\Logging\Logger;
|
||||
|
||||
/**
|
||||
* Class to handle HTTP headers case insensitivity
|
||||
* Class to handle HTTP headers case insensitivity.
|
||||
*
|
||||
* @author Bernhard Posselt
|
||||
* @author Frederic Guillot
|
||||
* @package Client
|
||||
*/
|
||||
class HttpHeaders implements ArrayAccess
|
||||
{
|
||||
@ -44,11 +43,12 @@ class HttpHeaders implements ArrayAccess
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse HTTP headers
|
||||
* Parse HTTP headers.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param array $lines List of headers
|
||||
*
|
||||
* @param array $lines List of headers
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public static function parse(array $lines)
|
||||
@ -57,15 +57,14 @@ class HttpHeaders implements ArrayAccess
|
||||
$headers = array();
|
||||
|
||||
foreach ($lines as $line) {
|
||||
|
||||
if (strpos($line, 'HTTP/1') === 0) {
|
||||
$headers = array();
|
||||
$status = (int) substr($line, 9, 3);
|
||||
}
|
||||
else if (strpos($line, ':') !== false) {
|
||||
|
||||
@list($name, $value) = explode(': ', $line);
|
||||
if ($value) $headers[trim($name)] = trim($value);
|
||||
} elseif (strpos($line, ':') !== false) {
|
||||
list($name, $value) = explode(': ', $line);
|
||||
if ($value) {
|
||||
$headers[trim($name)] = trim($value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3,10 +3,9 @@
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* InvalidCertificateException Exception
|
||||
* InvalidCertificateException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Client
|
||||
*/
|
||||
class InvalidCertificateException extends ClientException
|
||||
{
|
||||
|
@ -3,10 +3,9 @@
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* InvalidUrlException Exception
|
||||
* InvalidUrlException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Client
|
||||
*/
|
||||
class InvalidUrlException extends ClientException
|
||||
{
|
||||
|
@ -3,10 +3,9 @@
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* MaxRedirectException Exception
|
||||
* MaxRedirectException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Client
|
||||
*/
|
||||
class MaxRedirectException extends ClientException
|
||||
{
|
||||
|
@ -3,10 +3,9 @@
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* MaxSizeException Exception
|
||||
* MaxSizeException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Client
|
||||
*/
|
||||
class MaxSizeException extends ClientException
|
||||
{
|
||||
|
@ -5,17 +5,15 @@ namespace PicoFeed\Client;
|
||||
use PicoFeed\Logging\Logger;
|
||||
|
||||
/**
|
||||
* Stream context HTTP client
|
||||
* Stream context HTTP client.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Client
|
||||
*/
|
||||
class Stream extends Client
|
||||
{
|
||||
/**
|
||||
* Prepare HTTP headers
|
||||
* Prepare HTTP headers.
|
||||
*
|
||||
* @access private
|
||||
* @return string[]
|
||||
*/
|
||||
private function prepareHeaders()
|
||||
@ -27,7 +25,7 @@ class Stream extends Client
|
||||
|
||||
// disable compression in passthrough mode. It could result in double
|
||||
// compressed content which isn't decodeable by browsers
|
||||
if (function_exists('gzdecode') && ! $this->isPassthroughEnabled()) {
|
||||
if (function_exists('gzdecode') && !$this->isPassthroughEnabled()) {
|
||||
$headers[] = 'Accept-Encoding: gzip';
|
||||
}
|
||||
|
||||
@ -53,14 +51,13 @@ class Stream extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct the final URL from location headers
|
||||
* Construct the final URL from location headers.
|
||||
*
|
||||
* @access private
|
||||
* @param array $headers List of HTTP response header
|
||||
* @param array $headers List of HTTP response header
|
||||
*/
|
||||
private function setEffectiveUrl($headers)
|
||||
{
|
||||
foreach($headers as $header) {
|
||||
foreach ($headers as $header) {
|
||||
if (stripos($header, 'Location') === 0) {
|
||||
list(, $value) = explode(': ', $header);
|
||||
|
||||
@ -70,9 +67,8 @@ class Stream extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare stream context
|
||||
* Prepare stream context.
|
||||
*
|
||||
* @access private
|
||||
* @return array
|
||||
*/
|
||||
private function prepareContext()
|
||||
@ -83,11 +79,10 @@ class Stream extends Client
|
||||
'protocol_version' => 1.1,
|
||||
'timeout' => $this->timeout,
|
||||
'max_redirects' => $this->max_redirects,
|
||||
)
|
||||
),
|
||||
);
|
||||
|
||||
if ($this->proxy_hostname) {
|
||||
|
||||
Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
|
||||
|
||||
$context['http']['proxy'] = 'tcp://'.$this->proxy_hostname.':'.$this->proxy_port;
|
||||
@ -95,8 +90,7 @@ class Stream extends Client
|
||||
|
||||
if ($this->proxy_username) {
|
||||
Logger::setMessage(get_called_class().' Proxy credentials: Yes');
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
Logger::setMessage(get_called_class().' Proxy credentials: No');
|
||||
}
|
||||
}
|
||||
@ -107,10 +101,9 @@ class Stream extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Do the HTTP request
|
||||
* Do the HTTP request.
|
||||
*
|
||||
* @access public
|
||||
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
|
||||
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
|
||||
*/
|
||||
public function doRequest()
|
||||
{
|
||||
@ -121,7 +114,7 @@ class Stream extends Client
|
||||
|
||||
// Make HTTP request
|
||||
$stream = @fopen($this->url, 'r', false, $context);
|
||||
if (! is_resource($stream)) {
|
||||
if (!is_resource($stream)) {
|
||||
throw new InvalidUrlException('Unable to establish a connection');
|
||||
}
|
||||
|
||||
@ -137,8 +130,7 @@ class Stream extends Client
|
||||
}
|
||||
|
||||
fpassthru($stream);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
// Get the entire body until the max size
|
||||
$body = stream_get_contents($stream, $this->max_body_size + 1);
|
||||
|
||||
@ -159,16 +151,16 @@ class Stream extends Client
|
||||
return array(
|
||||
'status' => $status,
|
||||
'body' => $this->decodeBody($body, $headers),
|
||||
'headers' => $headers
|
||||
'headers' => $headers,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode body response according to the HTTP headers
|
||||
* Decode body response according to the HTTP headers.
|
||||
*
|
||||
* @param string $body Raw body
|
||||
* @param HttpHeaders $headers HTTP headers
|
||||
*
|
||||
* @access public
|
||||
* @param string $body Raw body
|
||||
* @param HttpHeaders $headers HTTP headers
|
||||
* @return string
|
||||
*/
|
||||
public function decodeBody($body, HttpHeaders $headers)
|
||||
@ -178,22 +170,22 @@ class Stream extends Client
|
||||
}
|
||||
|
||||
if (isset($headers['Content-Encoding']) && $headers['Content-Encoding'] === 'gzip') {
|
||||
$body = @gzdecode($body);
|
||||
$body = gzdecode($body);
|
||||
}
|
||||
|
||||
return $body;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a chunked body
|
||||
* Decode a chunked body.
|
||||
*
|
||||
* @access public
|
||||
* @param string $str Raw body
|
||||
* @return string Decoded body
|
||||
* @param string $str Raw body
|
||||
*
|
||||
* @return string Decoded body
|
||||
*/
|
||||
public function decodeChunked($str)
|
||||
{
|
||||
for ($result = ''; ! empty($str); $str = trim($str)) {
|
||||
for ($result = ''; !empty($str); $str = trim($str)) {
|
||||
|
||||
// Get the chunk length
|
||||
$pos = strpos($str, "\r\n");
|
||||
|
@ -3,10 +3,9 @@
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* TimeoutException Exception
|
||||
* TimeoutException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Client
|
||||
*/
|
||||
class TimeoutException extends ClientException
|
||||
{
|
||||
|
136
vendor/fguillot/picofeed/lib/PicoFeed/Client/Url.php
vendored
136
vendor/fguillot/picofeed/lib/PicoFeed/Client/Url.php
vendored
@ -3,34 +3,30 @@
|
||||
namespace PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* URL class
|
||||
* URL class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Client
|
||||
*/
|
||||
class Url
|
||||
{
|
||||
/**
|
||||
* URL
|
||||
* URL.
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $url = '';
|
||||
|
||||
/**
|
||||
* URL components
|
||||
* URL components.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $components = array();
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* Constructor.
|
||||
*
|
||||
* @access public
|
||||
* @param string $url URL
|
||||
* @param string $url URL
|
||||
*/
|
||||
public function __construct($url)
|
||||
{
|
||||
@ -51,28 +47,27 @@ class Url
|
||||
}
|
||||
|
||||
/**
|
||||
* Shortcut method to get an absolute url from relative url
|
||||
* Shortcut method to get an absolute url from relative url.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param mixed $item_url Unknown url (can be relative or not)
|
||||
* @param mixed $website_url Website url
|
||||
*
|
||||
* @param mixed $item_url Unknown url (can be relative or not)
|
||||
* @param mixed $website_url Website url
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function resolve($item_url, $website_url)
|
||||
{
|
||||
$link = is_string($item_url) ? new Url($item_url) : $item_url;
|
||||
$website = is_string($website_url) ? new Url($website_url) : $website_url;
|
||||
$link = is_string($item_url) ? new self($item_url) : $item_url;
|
||||
$website = is_string($website_url) ? new self($website_url) : $website_url;
|
||||
|
||||
if ($link->isRelativeUrl()) {
|
||||
|
||||
if ($link->isRelativePath()) {
|
||||
return $link->getAbsoluteUrl($website->getBaseUrl($website->getBasePath()));
|
||||
}
|
||||
|
||||
return $link->getAbsoluteUrl($website->getBaseUrl());
|
||||
}
|
||||
else if ($link->isProtocolRelative()) {
|
||||
} elseif ($link->isProtocolRelative()) {
|
||||
$link->setScheme($website->getScheme());
|
||||
}
|
||||
|
||||
@ -80,24 +75,26 @@ class Url
|
||||
}
|
||||
|
||||
/**
|
||||
* Shortcut method to get a base url
|
||||
* Shortcut method to get a base url.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $url
|
||||
*
|
||||
* @param string $url
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function base($url)
|
||||
{
|
||||
$link = new Url($url);
|
||||
$link = new self($url);
|
||||
|
||||
return $link->getBaseUrl();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the base URL
|
||||
* Get the base URL.
|
||||
*
|
||||
* @param string $suffix Add a suffix to the url
|
||||
*
|
||||
* @access public
|
||||
* @param string $suffix Add a suffix to the url
|
||||
* @return string
|
||||
*/
|
||||
public function getBaseUrl($suffix = '')
|
||||
@ -106,19 +103,18 @@ class Url
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the absolute URL
|
||||
* Get the absolute URL.
|
||||
*
|
||||
* @param string $base_url Use this url as base url
|
||||
*
|
||||
* @access public
|
||||
* @param string $base_url Use this url as base url
|
||||
* @return string
|
||||
*/
|
||||
public function getAbsoluteUrl($base_url = '')
|
||||
{
|
||||
if ($base_url) {
|
||||
$base = new Url($base_url);
|
||||
$base = new self($base_url);
|
||||
$url = $base->getAbsoluteUrl().substr($this->getFullPath(), 1);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
$url = $this->hasHost() ? $this->getBaseUrl().$this->getFullPath() : '';
|
||||
}
|
||||
|
||||
@ -126,50 +122,49 @@ class Url
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url is relative
|
||||
* Return true if the url is relative.
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
* @return bool
|
||||
*/
|
||||
public function isRelativeUrl()
|
||||
{
|
||||
return ! $this->hasScheme() && ! $this->isProtocolRelative();
|
||||
return !$this->hasScheme() && !$this->isProtocolRelative();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the path is relative
|
||||
* Return true if the path is relative.
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
* @return bool
|
||||
*/
|
||||
public function isRelativePath()
|
||||
{
|
||||
$path = $this->getPath();
|
||||
return empty($path) || $path{0} !== '/';
|
||||
|
||||
return empty($path) || $path{0}
|
||||
!== '/';
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters the path of a URI
|
||||
* Filters the path of a URI.
|
||||
*
|
||||
* Imported from Guzzle library: https://github.com/guzzle/psr7/blob/master/src/Uri.php#L568-L582
|
||||
*
|
||||
* @access public
|
||||
* @param $path
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function filterPath($path, $charUnreserved = 'a-zA-Z0-9_\-\.~', $charSubDelims = '!\$&\'\(\)\*\+,;=')
|
||||
{
|
||||
return preg_replace_callback(
|
||||
'/(?:[^' . $charUnreserved . $charSubDelims . ':@\/%]+|%(?![A-Fa-f0-9]{2}))/',
|
||||
'/(?:[^'.$charUnreserved.$charSubDelims.':@\/%]+|%(?![A-Fa-f0-9]{2}))/',
|
||||
function (array $matches) { return rawurlencode($matches[0]); },
|
||||
$path
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the path
|
||||
* Get the path.
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getPath()
|
||||
@ -178,9 +173,8 @@ class Url
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the base path
|
||||
* Get the base path.
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getBasePath()
|
||||
@ -194,9 +188,8 @@ class Url
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the full path (path + querystring + fragment)
|
||||
* Get the full path (path + querystring + fragment).
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getFullPath()
|
||||
@ -210,9 +203,8 @@ class Url
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the hostname
|
||||
* Get the hostname.
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getHost()
|
||||
@ -221,21 +213,20 @@ class Url
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url has a hostname
|
||||
* Return true if the url has a hostname.
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
* @return bool
|
||||
*/
|
||||
public function hasHost()
|
||||
{
|
||||
return ! empty($this->components['host']);
|
||||
return !empty($this->components['host']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the scheme
|
||||
* Get the scheme.
|
||||
*
|
||||
* @param string $suffix Suffix to add when there is a scheme
|
||||
*
|
||||
* @access public
|
||||
* @param string $suffix Suffix to add when there is a scheme
|
||||
* @return string
|
||||
*/
|
||||
public function getScheme($suffix = '')
|
||||
@ -244,10 +235,10 @@ class Url
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the scheme
|
||||
* Set the scheme.
|
||||
*
|
||||
* @param string $scheme Set a scheme
|
||||
*
|
||||
* @access public
|
||||
* @param string $scheme Set a scheme
|
||||
* @return string
|
||||
*/
|
||||
public function setScheme($scheme)
|
||||
@ -256,21 +247,20 @@ class Url
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url has a scheme
|
||||
* Return true if the url has a scheme.
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
* @return bool
|
||||
*/
|
||||
public function hasScheme()
|
||||
{
|
||||
return ! empty($this->components['scheme']);
|
||||
return !empty($this->components['scheme']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the port
|
||||
* Get the port.
|
||||
*
|
||||
* @param string $prefix Prefix to add when there is a port
|
||||
*
|
||||
* @access public
|
||||
* @param string $prefix Prefix to add when there is a port
|
||||
* @return string
|
||||
*/
|
||||
public function getPort($prefix = '')
|
||||
@ -279,21 +269,19 @@ class Url
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url has a port
|
||||
* Return true if the url has a port.
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
* @return bool
|
||||
*/
|
||||
public function hasPort()
|
||||
{
|
||||
return ! empty($this->components['port']);
|
||||
return !empty($this->components['port']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url is protocol relative (start with //)
|
||||
* Return true if the url is protocol relative (start with //).
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
* @return bool
|
||||
*/
|
||||
public function isProtocolRelative()
|
||||
{
|
||||
|
@ -3,10 +3,9 @@
|
||||
namespace PicoFeed\Config;
|
||||
|
||||
/**
|
||||
* Config class
|
||||
* Config class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*
|
||||
* @method \PicoFeed\Config\Config setClientTimeout(integer $value)
|
||||
* @method \PicoFeed\Config\Config setClientUserAgent(string $value)
|
||||
@ -34,7 +33,6 @@ namespace PicoFeed\Config;
|
||||
* @method \PicoFeed\Config\Config setFilterImageProxyUrl($value)
|
||||
* @method \PicoFeed\Config\Config setFilterImageProxyCallback($closure)
|
||||
* @method \PicoFeed\Config\Config setFilterImageProxyProtocol($value)
|
||||
*
|
||||
* @method integer getClientTimeout()
|
||||
* @method string getClientUserAgent()
|
||||
* @method integer getMaxRedirections()
|
||||
@ -65,22 +63,21 @@ namespace PicoFeed\Config;
|
||||
class Config
|
||||
{
|
||||
/**
|
||||
* Contains all parameters
|
||||
* Contains all parameters.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $container = array();
|
||||
|
||||
/**
|
||||
* Magic method to have any kind of setters or getters
|
||||
* Magic method to have any kind of setters or getters.
|
||||
*
|
||||
* @param string $name Getter/Setter name
|
||||
* @param array $arguments Method arguments
|
||||
*
|
||||
* @access public
|
||||
* @param string $name Getter/Setter name
|
||||
* @param array $arguments Method arguments
|
||||
* @return mixed
|
||||
*/
|
||||
public function __call($name , array $arguments)
|
||||
public function __call($name, array $arguments)
|
||||
{
|
||||
$name = strtolower($name);
|
||||
$prefix = substr($name, 0, 3);
|
||||
@ -88,10 +85,11 @@ class Config
|
||||
|
||||
if ($prefix === 'set' && isset($arguments[0])) {
|
||||
$this->container[$parameter] = $arguments[0];
|
||||
|
||||
return $this;
|
||||
}
|
||||
else if ($prefix === 'get') {
|
||||
} elseif ($prefix === 'get') {
|
||||
$default_value = isset($arguments[0]) ? $arguments[0] : null;
|
||||
|
||||
return isset($this->container[$parameter]) ? $this->container[$parameter] : $default_value;
|
||||
}
|
||||
}
|
||||
|
@ -3,9 +3,7 @@
|
||||
namespace PicoFeed\Encoding;
|
||||
|
||||
/**
|
||||
* Encoding class
|
||||
*
|
||||
* @package Encoding
|
||||
* Encoding class.
|
||||
*/
|
||||
class Encoding
|
||||
{
|
||||
@ -17,7 +15,7 @@ class Encoding
|
||||
|
||||
// suppress all notices since it isn't possible to silence only the
|
||||
// notice "Wrong charset, conversion from $in_encoding to $out_encoding is not allowed"
|
||||
set_error_handler(function() {}, E_NOTICE);
|
||||
set_error_handler(function () {}, E_NOTICE);
|
||||
|
||||
// convert input to utf-8 and strip invalid characters
|
||||
$value = iconv($encoding, 'UTF-8//IGNORE', $input);
|
||||
|
@ -5,41 +5,36 @@ namespace PicoFeed\Filter;
|
||||
use PicoFeed\Client\Url;
|
||||
|
||||
/**
|
||||
* Attribute Filter class
|
||||
* Attribute Filter class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Filter
|
||||
*/
|
||||
class Attribute
|
||||
{
|
||||
/**
|
||||
* Image proxy url
|
||||
* Image proxy url.
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $image_proxy_url = '';
|
||||
|
||||
/**
|
||||
* Image proxy callback
|
||||
* Image proxy callback.
|
||||
*
|
||||
* @access private
|
||||
* @var \Closure|null
|
||||
*/
|
||||
private $image_proxy_callback = null;
|
||||
|
||||
/**
|
||||
* limits the image proxy usage to this protocol
|
||||
* limits the image proxy usage to this protocol.
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $image_proxy_limit_protocol = '';
|
||||
|
||||
/**
|
||||
* Tags and attribute whitelist
|
||||
* Tags and attribute whitelist.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $attribute_whitelist = array(
|
||||
@ -80,15 +75,14 @@ class Attribute
|
||||
'time' => array('datetime'),
|
||||
'abbr' => array('title'),
|
||||
'iframe' => array('width', 'height', 'frameborder', 'src', 'allowfullscreen'),
|
||||
'q' => array('cite')
|
||||
'q' => array('cite'),
|
||||
);
|
||||
|
||||
/**
|
||||
* Scheme whitelist
|
||||
* Scheme whitelist.
|
||||
*
|
||||
* For a complete list go to http://en.wikipedia.org/wiki/URI_scheme
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $scheme_whitelist = array(
|
||||
@ -124,9 +118,8 @@ class Attribute
|
||||
);
|
||||
|
||||
/**
|
||||
* Iframe source whitelist, everything else is ignored
|
||||
* Iframe source whitelist, everything else is ignored.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $iframe_whitelist = array(
|
||||
@ -139,9 +132,8 @@ class Attribute
|
||||
);
|
||||
|
||||
/**
|
||||
* Blacklisted resources
|
||||
* Blacklisted resources.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $media_blacklist = array(
|
||||
@ -172,9 +164,8 @@ class Attribute
|
||||
);
|
||||
|
||||
/**
|
||||
* Attributes used for external resources
|
||||
* Attributes used for external resources.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $media_attributes = array(
|
||||
@ -184,9 +175,8 @@ class Attribute
|
||||
);
|
||||
|
||||
/**
|
||||
* Attributes that must be integer
|
||||
* Attributes that must be integer.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $integer_attributes = array(
|
||||
@ -196,9 +186,8 @@ class Attribute
|
||||
);
|
||||
|
||||
/**
|
||||
* Mandatory attributes for specified tags
|
||||
* Mandatory attributes for specified tags.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $required_attributes = array(
|
||||
@ -210,9 +199,8 @@ class Attribute
|
||||
);
|
||||
|
||||
/**
|
||||
* Add attributes to specified tags
|
||||
* Add attributes to specified tags.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $add_attributes = array(
|
||||
@ -221,9 +209,8 @@ class Attribute
|
||||
);
|
||||
|
||||
/**
|
||||
* List of filters to apply
|
||||
* List of filters to apply.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $filters = array(
|
||||
@ -235,22 +222,20 @@ class Attribute
|
||||
'filterProtocolUrlAttribute',
|
||||
'rewriteImageProxyUrl',
|
||||
'secureIframeSrc',
|
||||
'removeYouTubeAutoplay'
|
||||
'removeYouTubeAutoplay',
|
||||
);
|
||||
|
||||
/**
|
||||
* Add attributes to specified tags
|
||||
* Add attributes to specified tags.
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Client\Url
|
||||
*/
|
||||
private $website;
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* Constructor.
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Client\Url $website Website url instance
|
||||
* @param \PicoFeed\Client\Url $website Website url instance
|
||||
*/
|
||||
public function __construct(Url $website)
|
||||
{
|
||||
@ -258,18 +243,18 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply filters to the attributes list
|
||||
* Apply filters to the attributes list.
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes dictionary
|
||||
* @return array Filtered attributes
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes dictionary
|
||||
*
|
||||
* @return array Filtered attributes
|
||||
*/
|
||||
public function filter($tag, array $attributes)
|
||||
{
|
||||
foreach ($attributes as $attribute => &$value) {
|
||||
foreach ($this->filters as $filter) {
|
||||
if (! $this->$filter($tag, $attribute, $value)) {
|
||||
if (!$this->$filter($tag, $attribute, $value)) {
|
||||
unset($attributes[$attribute]);
|
||||
break;
|
||||
}
|
||||
@ -280,13 +265,13 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the value is allowed (remove not allowed attributes)
|
||||
* Return true if the value is allowed (remove not allowed attributes).
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function filterAllowedAttribute($tag, $attribute, $value)
|
||||
{
|
||||
@ -294,13 +279,13 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the value is not integer (remove attributes that should have an integer value)
|
||||
* Return true if the value is not integer (remove attributes that should have an integer value).
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function filterIntegerAttribute($tag, $attribute, $value)
|
||||
{
|
||||
@ -312,18 +297,17 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the iframe source is allowed (remove not allowed iframe)
|
||||
* Return true if the iframe source is allowed (remove not allowed iframe).
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function filterIframeAttribute($tag, $attribute, $value)
|
||||
{
|
||||
if ($tag === 'iframe' && $attribute === 'src') {
|
||||
|
||||
foreach ($this->iframe_whitelist as $url) {
|
||||
if (strpos($value, $url) === 0) {
|
||||
return true;
|
||||
@ -337,13 +321,13 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the resource is not blacklisted (remove blacklisted resource attributes)
|
||||
* Return true if the resource is not blacklisted (remove blacklisted resource attributes).
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function filterBlacklistResourceAttribute($tag, $attribute, $value)
|
||||
{
|
||||
@ -355,13 +339,13 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert all relative links to absolute url
|
||||
* Convert all relative links to absolute url.
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function rewriteAbsoluteUrl($tag, $attribute, &$value)
|
||||
{
|
||||
@ -374,13 +358,13 @@ class Attribute
|
||||
|
||||
/**
|
||||
* Turns iframes' src attribute from http to https to prevent
|
||||
* mixed active content
|
||||
* mixed active content.
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attribute Atttributes name
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
* @param array $attribute Atttributes name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function secureIframeSrc($tag, $attribute, &$value)
|
||||
{
|
||||
@ -392,13 +376,13 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes YouTube autoplay from iframes
|
||||
* Removes YouTube autoplay from iframes.
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attribute Atttributes name
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
* @param array $attribute Atttributes name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function removeYouTubeAutoplay($tag, $attribute, &$value)
|
||||
{
|
||||
@ -411,23 +395,21 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite image url to use with a proxy
|
||||
* Rewrite image url to use with a proxy.
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function rewriteImageProxyUrl($tag, $attribute, &$value)
|
||||
{
|
||||
if ($tag === 'img' && $attribute === 'src'
|
||||
&& ! ($this->image_proxy_limit_protocol !== '' && stripos($value, $this->image_proxy_limit_protocol.':') !== 0)) {
|
||||
|
||||
&& !($this->image_proxy_limit_protocol !== '' && stripos($value, $this->image_proxy_limit_protocol.':') !== 0)) {
|
||||
if ($this->image_proxy_url) {
|
||||
$value = sprintf($this->image_proxy_url, rawurlencode($value));
|
||||
}
|
||||
else if (is_callable($this->image_proxy_callback)) {
|
||||
} elseif (is_callable($this->image_proxy_callback)) {
|
||||
$value = call_user_func($this->image_proxy_callback, $value);
|
||||
}
|
||||
}
|
||||
@ -436,17 +418,17 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the scheme is authorized
|
||||
* Return true if the scheme is authorized.
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function filterProtocolUrlAttribute($tag, $attribute, $value)
|
||||
{
|
||||
if ($this->isResource($attribute) && ! $this->isAllowedProtocol($value)) {
|
||||
if ($this->isResource($attribute) && !$this->isAllowedProtocol($value)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -454,11 +436,11 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Automatically add/override some attributes for specific tags
|
||||
* Automatically add/override some attributes for specific tags.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes list
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes list
|
||||
* @return array
|
||||
*/
|
||||
public function addAttributes($tag, array $attributes)
|
||||
@ -471,19 +453,18 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if all required attributes are present
|
||||
* Return true if all required attributes are present.
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes list
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes list
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function hasRequiredAttributes($tag, array $attributes)
|
||||
{
|
||||
if (isset($this->required_attributes[$tag])) {
|
||||
|
||||
foreach ($this->required_attributes[$tag] as $attribute) {
|
||||
if (! isset($attributes[$attribute])) {
|
||||
if (!isset($attributes[$attribute])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -493,11 +474,11 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an attribute name is an external resource
|
||||
* Check if an attribute name is an external resource.
|
||||
*
|
||||
* @access public
|
||||
* @param string $attribute Attribute name
|
||||
* @return boolean
|
||||
* @param string $attribute Attribute name
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isResource($attribute)
|
||||
{
|
||||
@ -505,16 +486,15 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if the protocol is allowed or not
|
||||
* Detect if the protocol is allowed or not.
|
||||
*
|
||||
* @access public
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
* @param string $value Attribute value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isAllowedProtocol($value)
|
||||
{
|
||||
foreach ($this->scheme_whitelist as $protocol) {
|
||||
|
||||
if (strpos($value, $protocol) === 0) {
|
||||
return true;
|
||||
}
|
||||
@ -524,16 +504,15 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if an url is blacklisted
|
||||
* Detect if an url is blacklisted.
|
||||
*
|
||||
* @access public
|
||||
* @param string $resource Attribute value (URL)
|
||||
* @return boolean
|
||||
* @param string $resource Attribute value (URL)
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isBlacklistedMedia($resource)
|
||||
{
|
||||
foreach ($this->media_blacklist as $name) {
|
||||
|
||||
if (strpos($resource, $name) !== false) {
|
||||
return true;
|
||||
}
|
||||
@ -543,10 +522,10 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the attribute list to html
|
||||
* Convert the attribute list to html.
|
||||
*
|
||||
* @param array $attributes Attributes
|
||||
*
|
||||
* @access public
|
||||
* @param array $attributes Attributes
|
||||
* @return string
|
||||
*/
|
||||
public function toHtml(array $attributes)
|
||||
@ -561,147 +540,158 @@ class Attribute
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whitelisted tags and attributes for each tag
|
||||
* Set whitelisted tags and attributes for each tag.
|
||||
*
|
||||
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setWhitelistedAttributes(array $values)
|
||||
{
|
||||
$this->attribute_whitelist = $values ?: $this->attribute_whitelist;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set scheme whitelist
|
||||
* Set scheme whitelist.
|
||||
*
|
||||
* @param array $values List of scheme: ['http://', 'ftp://']
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of scheme: ['http://', 'ftp://']
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setSchemeWhitelist(array $values)
|
||||
{
|
||||
$this->scheme_whitelist = $values ?: $this->scheme_whitelist;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set media attributes (used to load external resources)
|
||||
* Set media attributes (used to load external resources).
|
||||
*
|
||||
* @param array $values List of values: ['src', 'href']
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of values: ['src', 'href']
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setMediaAttributes(array $values)
|
||||
{
|
||||
$this->media_attributes = $values ?: $this->media_attributes;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set blacklisted external resources
|
||||
* Set blacklisted external resources.
|
||||
*
|
||||
* @param array $values List of tags: ['http://google.com/', '...']
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['http://google.com/', '...']
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setMediaBlacklist(array $values)
|
||||
{
|
||||
$this->media_blacklist = $values ?: $this->media_blacklist;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set mandatory attributes for whitelisted tags
|
||||
* Set mandatory attributes for whitelisted tags.
|
||||
*
|
||||
* @param array $values List of tags: ['img' => 'src']
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['img' => 'src']
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setRequiredAttributes(array $values)
|
||||
{
|
||||
$this->required_attributes = $values ?: $this->required_attributes;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set attributes to automatically to specific tags
|
||||
* Set attributes to automatically to specific tags.
|
||||
*
|
||||
* @param array $values List of tags: ['a' => 'target="_blank"']
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['a' => 'target="_blank"']
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setAttributeOverrides(array $values)
|
||||
{
|
||||
$this->add_attributes = $values ?: $this->add_attributes;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set attributes that must be an integer
|
||||
* Set attributes that must be an integer.
|
||||
*
|
||||
* @param array $values List of tags: ['width', 'height']
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['width', 'height']
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setIntegerAttributes(array $values)
|
||||
{
|
||||
$this->integer_attributes = $values ?: $this->integer_attributes;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set allowed iframe resources
|
||||
* Set allowed iframe resources.
|
||||
*
|
||||
* @param array $values List of tags: ['http://www.youtube.com']
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['http://www.youtube.com']
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setIframeWhitelist(array $values)
|
||||
{
|
||||
$this->iframe_whitelist = $values ?: $this->iframe_whitelist;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set image proxy URL
|
||||
* Set image proxy URL.
|
||||
*
|
||||
* The original image url will be urlencoded
|
||||
*
|
||||
* @access public
|
||||
* @param string $url Proxy URL
|
||||
* @param string $url Proxy URL
|
||||
*
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setImageProxyUrl($url)
|
||||
{
|
||||
$this->image_proxy_url = $url ?: $this->image_proxy_url;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set image proxy callback
|
||||
* Set image proxy callback.
|
||||
*
|
||||
* @param \Closure $callback
|
||||
*
|
||||
* @access public
|
||||
* @param \Closure $callback
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setImageProxyCallback($callback)
|
||||
{
|
||||
$this->image_proxy_callback = $callback ?: $this->image_proxy_callback;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set image proxy protocol restriction
|
||||
* Set image proxy protocol restriction.
|
||||
*
|
||||
* @param string $value
|
||||
*
|
||||
* @access public
|
||||
* @param string $value
|
||||
* @return Attribute
|
||||
*/
|
||||
public function setImageProxyProtocol($value)
|
||||
{
|
||||
$this->image_proxy_limit_protocol = $value ?: $this->image_proxy_limit_protocol;
|
||||
|
||||
return $this;
|
||||
}
|
||||
}
|
||||
|
@ -3,45 +3,46 @@
|
||||
namespace PicoFeed\Filter;
|
||||
|
||||
/**
|
||||
* Filter class
|
||||
* Filter class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Filter
|
||||
*/
|
||||
class Filter
|
||||
{
|
||||
/**
|
||||
* Get the Html filter instance
|
||||
* Get the Html filter instance.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $html HTML content
|
||||
* @param string $website Site URL (used to build absolute URL)
|
||||
*
|
||||
* @param string $html HTML content
|
||||
* @param string $website Site URL (used to build absolute URL)
|
||||
*
|
||||
* @return Html
|
||||
*/
|
||||
public static function html($html, $website)
|
||||
{
|
||||
$filter = new Html($html, $website);
|
||||
|
||||
return $filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Escape HTML content
|
||||
* Escape HTML content.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function escape($content)
|
||||
{
|
||||
return @htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false);
|
||||
return htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove HTML tags
|
||||
* Remove HTML tags.
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public function removeHTMLTags($data)
|
||||
@ -50,11 +51,12 @@ class Filter
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the XML tag from a document
|
||||
* Remove the XML tag from a document.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function stripXmlTag($data)
|
||||
@ -64,38 +66,38 @@ class Filter
|
||||
}
|
||||
|
||||
do {
|
||||
|
||||
$pos = strpos($data, '<?xml-stylesheet ');
|
||||
|
||||
if ($pos !== false) {
|
||||
$data = ltrim(substr($data, strpos($data, '?>') + 2));
|
||||
}
|
||||
|
||||
} while ($pos !== false && $pos < 200);
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip head tag from the HTML content
|
||||
* Strip head tag from the HTML content.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function stripHeadTags($data)
|
||||
{
|
||||
return preg_replace('@<head[^>]*?>.*?</head>@siu','', $data );
|
||||
return preg_replace('@<head[^>]*?>.*?</head>@siu', '', $data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim whitespace from the begining, the end and inside a string and don't break utf-8 string
|
||||
* Trim whitespace from the begining, the end and inside a string and don't break utf-8 string.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $value Raw data
|
||||
* @return string Normalized data
|
||||
*
|
||||
* @param string $value Raw data
|
||||
*
|
||||
* @return string Normalized data
|
||||
*/
|
||||
public static function stripWhiteSpace($value)
|
||||
{
|
||||
@ -107,12 +109,13 @@ class Filter
|
||||
}
|
||||
|
||||
/**
|
||||
* Fixes before XML parsing
|
||||
* Fixes before XML parsing.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $data Raw data
|
||||
* @return string Normalized data
|
||||
*
|
||||
* @param string $data Raw data
|
||||
*
|
||||
* @return string Normalized data
|
||||
*/
|
||||
public static function normalizeData($data)
|
||||
{
|
||||
@ -122,7 +125,7 @@ class Filter
|
||||
);
|
||||
|
||||
// strip invalid XML 1.0 characters which are encoded as entities
|
||||
$data = preg_replace_callback($entities, function($matches) {
|
||||
$data = preg_replace_callback($entities, function ($matches) {
|
||||
$code_point = $matches[2];
|
||||
|
||||
// convert hex entity to decimal
|
||||
|
@ -8,88 +8,78 @@ use PicoFeed\Scraper\RuleLoader;
|
||||
use PicoFeed\Parser\XmlParser;
|
||||
|
||||
/**
|
||||
* HTML Filter class
|
||||
* HTML Filter class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Filter
|
||||
*/
|
||||
class Html
|
||||
{
|
||||
/**
|
||||
* Config object
|
||||
* Config object.
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Config\Config
|
||||
*/
|
||||
private $config;
|
||||
|
||||
/**
|
||||
* Unfiltered XML data
|
||||
* Unfiltered XML data.
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $input = '';
|
||||
|
||||
/**
|
||||
* Filtered XML data
|
||||
* Filtered XML data.
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $output = '';
|
||||
|
||||
/**
|
||||
* List of empty tags
|
||||
* List of empty tags.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $empty_tags = array();
|
||||
|
||||
/**
|
||||
* Empty flag
|
||||
* Empty flag.
|
||||
*
|
||||
* @access private
|
||||
* @var boolean
|
||||
* @var bool
|
||||
*/
|
||||
private $empty = true;
|
||||
|
||||
/**
|
||||
* Tag instance
|
||||
* Tag instance.
|
||||
*
|
||||
* @access public
|
||||
* @var \PicoFeed\Filter\Tag
|
||||
*/
|
||||
public $tag = '';
|
||||
|
||||
/**
|
||||
* Attribute instance
|
||||
* Attribute instance.
|
||||
*
|
||||
* @access public
|
||||
* @var \PicoFeed\Filter\Attribute
|
||||
*/
|
||||
public $attribute = '';
|
||||
|
||||
/**
|
||||
* The website to filter
|
||||
* The website to filter.
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $website;
|
||||
|
||||
/**
|
||||
* Initialize the filter, all inputs data must be encoded in UTF-8 before
|
||||
* Initialize the filter, all inputs data must be encoded in UTF-8 before.
|
||||
*
|
||||
* @access public
|
||||
* @param string $html HTML content
|
||||
* @param string $website Site URL (used to build absolute URL)
|
||||
* @param string $html HTML content
|
||||
* @param string $website Site URL (used to build absolute URL)
|
||||
*/
|
||||
public function __construct($html, $website)
|
||||
{
|
||||
$this->config = new Config;
|
||||
$this->input = XmlParser::HtmlToXml($html);
|
||||
$this->config = new Config();
|
||||
$this->input = XmlParser::htmlToXml($html);
|
||||
$this->output = '';
|
||||
$this->tag = new Tag($this->config);
|
||||
$this->website = $website;
|
||||
@ -97,10 +87,10 @@ class Html
|
||||
}
|
||||
|
||||
/**
|
||||
* Set config object
|
||||
* Set config object.
|
||||
*
|
||||
* @param \PicoFeed\Config\Config $config Config instance
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Config\Config $config Config instance
|
||||
* @return \PicoFeed\Filter\Html
|
||||
*/
|
||||
public function setConfig($config)
|
||||
@ -126,9 +116,8 @@ class Html
|
||||
}
|
||||
|
||||
/**
|
||||
* Run tags/attributes filtering
|
||||
* Run tags/attributes filtering.
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function execute()
|
||||
@ -150,9 +139,7 @@ class Html
|
||||
}
|
||||
|
||||
/**
|
||||
* Called before XML parsing
|
||||
*
|
||||
* @access public
|
||||
* Called before XML parsing.
|
||||
*/
|
||||
public function preFilter()
|
||||
{
|
||||
@ -160,9 +147,7 @@ class Html
|
||||
}
|
||||
|
||||
/**
|
||||
* Called after XML parsing
|
||||
*
|
||||
* @access public
|
||||
* Called after XML parsing.
|
||||
*/
|
||||
public function postFilter()
|
||||
{
|
||||
@ -173,16 +158,15 @@ class Html
|
||||
}
|
||||
|
||||
/**
|
||||
* Called after XML parsing
|
||||
* @param string $content the content that should be filtered
|
||||
* Called after XML parsing.
|
||||
*
|
||||
* @access public
|
||||
* @param string $content the content that should be filtered
|
||||
*/
|
||||
public function filterRules($content)
|
||||
{
|
||||
// the constructor should require a config, then this if can be removed
|
||||
if ($this->config === null) {
|
||||
$config = new Config;
|
||||
$config = new Config();
|
||||
} else {
|
||||
$config = $this->config;
|
||||
}
|
||||
@ -196,7 +180,7 @@ class Html
|
||||
if (isset($rules['filter'])) {
|
||||
foreach ($rules['filter'] as $pattern => $rule) {
|
||||
if (preg_match($pattern, $sub_url)) {
|
||||
foreach($rule as $search => $replace) {
|
||||
foreach ($rule as $search => $replace) {
|
||||
$content = preg_replace($search, $replace, $content);
|
||||
}
|
||||
}
|
||||
@ -207,23 +191,20 @@ class Html
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse opening tag
|
||||
* Parse opening tag.
|
||||
*
|
||||
* @access public
|
||||
* @param resource $parser XML parser
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Tag attributes
|
||||
* @param resource $parser XML parser
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Tag attributes
|
||||
*/
|
||||
public function startTag($parser, $tag, array $attributes)
|
||||
{
|
||||
$this->empty = true;
|
||||
|
||||
if ($this->tag->isAllowed($tag, $attributes)) {
|
||||
|
||||
$attributes = $this->attribute->filter($tag, $attributes);
|
||||
|
||||
if ($this->attribute->hasRequiredAttributes($tag, $attributes)) {
|
||||
|
||||
$attributes = $this->attribute->addAttributes($tag, $attributes);
|
||||
|
||||
$this->output .= $this->tag->openHtmlTag($tag, $this->attribute->toHtml($attributes));
|
||||
@ -235,25 +216,23 @@ class Html
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse closing tag
|
||||
* Parse closing tag.
|
||||
*
|
||||
* @access public
|
||||
* @param resource $parser XML parser
|
||||
* @param string $tag Tag name
|
||||
* @param resource $parser XML parser
|
||||
* @param string $tag Tag name
|
||||
*/
|
||||
public function endTag($parser, $tag)
|
||||
{
|
||||
if (! array_pop($this->empty_tags) && $this->tag->isAllowedTag($tag)) {
|
||||
if (!array_pop($this->empty_tags) && $this->tag->isAllowedTag($tag)) {
|
||||
$this->output .= $this->tag->closeHtmlTag($tag);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse tag content
|
||||
* Parse tag content.
|
||||
*
|
||||
* @access public
|
||||
* @param resource $parser XML parser
|
||||
* @param string $content Tag content
|
||||
* @param resource $parser XML parser
|
||||
* @param string $content Tag content
|
||||
*/
|
||||
public function dataTag($parser, $content)
|
||||
{
|
||||
|
@ -3,30 +3,26 @@
|
||||
namespace PicoFeed\Filter;
|
||||
|
||||
use DOMXpath;
|
||||
|
||||
use PicoFeed\Parser\XmlParser;
|
||||
use PicoFeed\Config\Config;
|
||||
|
||||
/**
|
||||
* Tag Filter class
|
||||
* Tag Filter class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Filter
|
||||
*/
|
||||
class Tag
|
||||
{
|
||||
/**
|
||||
* Config object
|
||||
* Config object.
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Config\Config
|
||||
*/
|
||||
private $config;
|
||||
|
||||
/**
|
||||
* Tags blacklist (Xpath expressions)
|
||||
* Tags blacklist (Xpath expressions).
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $tag_blacklist = array(
|
||||
@ -35,9 +31,8 @@ class Tag
|
||||
);
|
||||
|
||||
/**
|
||||
* Tags whitelist
|
||||
* Tags whitelist.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $tag_whitelist = array(
|
||||
@ -87,24 +82,24 @@ class Tag
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the tag is allowed and is not a pixel tracker
|
||||
* Check if the tag is allowed and is not a pixel tracker.
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes dictionary
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes dictionary
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isAllowed($tag, array $attributes)
|
||||
{
|
||||
return $this->isAllowedTag($tag) && ! $this->isPixelTracker($tag, $attributes);
|
||||
return $this->isAllowedTag($tag) && !$this->isPixelTracker($tag, $attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the HTML opening tag
|
||||
* Return the HTML opening tag.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
* @param string $attributes Attributes converted in html
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attributes Attributes converted in html
|
||||
* @return string
|
||||
*/
|
||||
public function openHtmlTag($tag, $attributes = '')
|
||||
@ -113,10 +108,10 @@ class Tag
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the HTML closing tag
|
||||
* Return the HTML closing tag.
|
||||
*
|
||||
* @param string $tag Tag name
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @return string
|
||||
*/
|
||||
public function closeHtmlTag($tag)
|
||||
@ -125,11 +120,11 @@ class Tag
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true is the tag is self-closing
|
||||
* Return true is the tag is self-closing.
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isSelfClosingTag($tag)
|
||||
{
|
||||
@ -137,11 +132,11 @@ class Tag
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a tag is on the whitelist
|
||||
* Check if a tag is on the whitelist.
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isAllowedTag($tag)
|
||||
{
|
||||
@ -152,12 +147,12 @@ class Tag
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if an image tag is a pixel tracker
|
||||
* Detect if an image tag is a pixel tracker.
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Tag attributes
|
||||
* @return boolean
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Tag attributes
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function isPixelTracker($tag, array $attributes)
|
||||
{
|
||||
@ -167,10 +162,10 @@ class Tag
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove script tags
|
||||
* Remove script tags.
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public function removeBlacklistedTags($data)
|
||||
@ -192,12 +187,11 @@ class Tag
|
||||
return $dom->saveXML();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Remove empty tags
|
||||
* Remove empty tags.
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public function removeEmptyTags($data)
|
||||
@ -206,27 +200,28 @@ class Tag
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace <br/><br/> by only one
|
||||
* Replace <br/><br/> by only one.
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public function removeMultipleBreakTags($data)
|
||||
{
|
||||
return preg_replace("/(<br\s*\/?>\s*)+/", "<br/>", $data);
|
||||
return preg_replace("/(<br\s*\/?>\s*)+/", '<br/>', $data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whitelisted tags adn attributes for each tag
|
||||
* Set whitelisted tags adn attributes for each tag.
|
||||
*
|
||||
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
|
||||
* @return Tag
|
||||
*/
|
||||
public function setWhitelistedTags(array $values)
|
||||
{
|
||||
$this->tag_whitelist = $values ?: $this->tag_whitelist;
|
||||
|
||||
return $this;
|
||||
}
|
||||
}
|
||||
|
@ -6,45 +6,43 @@ use DateTime;
|
||||
use DateTimeZone;
|
||||
|
||||
/**
|
||||
* Logging class
|
||||
* Logging class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Logging
|
||||
*/
|
||||
class Logger
|
||||
{
|
||||
/**
|
||||
* List of messages
|
||||
* List of messages.
|
||||
*
|
||||
* @static
|
||||
* @access private
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private static $messages = array();
|
||||
|
||||
/**
|
||||
* Default timezone
|
||||
* Default timezone.
|
||||
*
|
||||
* @static
|
||||
* @access private
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private static $timezone = 'UTC';
|
||||
|
||||
/**
|
||||
* Enable or disable logging
|
||||
* Enable or disable logging.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @var boolean
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
public static $enable = false;
|
||||
|
||||
/**
|
||||
* Enable logging
|
||||
* Enable logging.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
*/
|
||||
public static function enable()
|
||||
{
|
||||
@ -52,11 +50,11 @@ class Logger
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new message
|
||||
* Add a new message.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $message Message
|
||||
*
|
||||
* @param string $message Message
|
||||
*/
|
||||
public static function setMessage($message)
|
||||
{
|
||||
@ -67,10 +65,10 @@ class Logger
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all logged messages
|
||||
* Get all logged messages.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public static function getMessages()
|
||||
@ -79,10 +77,9 @@ class Logger
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all logged messages
|
||||
* Remove all logged messages.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
*/
|
||||
public static function deleteMessages()
|
||||
{
|
||||
@ -90,12 +87,13 @@ class Logger
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a different timezone
|
||||
* Set a different timezone.
|
||||
*
|
||||
* @static
|
||||
*
|
||||
* @see http://php.net/manual/en/timezones.php
|
||||
* @access public
|
||||
* @param string $timezone Timezone
|
||||
*
|
||||
* @param string $timezone Timezone
|
||||
*/
|
||||
public static function setTimeZone($timezone)
|
||||
{
|
||||
@ -103,10 +101,10 @@ class Logger
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all messages serialized into a string
|
||||
* Get all messages serialized into a string.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function toString()
|
||||
|
@ -7,25 +7,24 @@ use PicoFeed\Filter\Filter;
|
||||
use PicoFeed\Client\Url;
|
||||
|
||||
/**
|
||||
* Atom parser
|
||||
* Atom parser.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Parser
|
||||
*/
|
||||
class Atom extends Parser
|
||||
{
|
||||
/**
|
||||
* Supported namespaces
|
||||
* Supported namespaces.
|
||||
*/
|
||||
protected $namespaces = array(
|
||||
'atom' => 'http://www.w3.org/2005/Atom',
|
||||
);
|
||||
|
||||
/**
|
||||
* Get the path to the items XML tree
|
||||
* Get the path to the items XML tree.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
public function getItemsTree(SimpleXMLElement $xml)
|
||||
@ -35,11 +34,10 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed url
|
||||
* Find the feed url.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -47,11 +45,10 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the site url
|
||||
* Find the site url.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findSiteUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -59,11 +56,10 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed description
|
||||
* Find the feed description.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -74,11 +70,10 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed logo url
|
||||
* Find the feed logo url.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -89,11 +84,10 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed icon
|
||||
* Find the feed icon.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -104,11 +98,10 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed title
|
||||
* Find the feed title.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -119,11 +112,10 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed language
|
||||
* Find the feed language.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -134,11 +126,10 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed id
|
||||
* Find the feed id.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedId(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -149,11 +140,10 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed date
|
||||
* Find the feed date.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -164,12 +154,11 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item date
|
||||
* Find the item date.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
@ -179,26 +168,23 @@ class Atom extends Parser
|
||||
$updated = XmlParser::getXPathResult($entry, 'atom:updated', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'updated');
|
||||
|
||||
$published = ! empty($published) ? $this->date->getDateTime((string) current($published)) : null;
|
||||
$updated = ! empty($updated) ? $this->date->getDateTime((string) current($updated)) : null;
|
||||
$published = !empty($published) ? $this->date->getDateTime((string) current($published)) : null;
|
||||
$updated = !empty($updated) ? $this->date->getDateTime((string) current($updated)) : null;
|
||||
|
||||
if ($published === null && $updated === null) {
|
||||
$item->date = $feed->getDate(); // We use the feed date if there is no date for the item
|
||||
}
|
||||
else if ($published !== null && $updated !== null) {
|
||||
} elseif ($published !== null && $updated !== null) {
|
||||
$item->date = max($published, $updated); // We use the most recent date between published and updated
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
$item->date = $updated ?: $published;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item title
|
||||
* Find the item title.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function findItemTitle(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
@ -209,12 +195,11 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item author
|
||||
* Find the item author.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
@ -227,11 +212,10 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item content
|
||||
* Find the item content.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemContent(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
@ -239,11 +223,10 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item URL
|
||||
* Find the item URL.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemUrl(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
@ -251,22 +234,20 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Genereate the item id
|
||||
* Genereate the item id.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$id = XmlParser::getXPathResult($entry, 'atom:id', $this->namespaces)
|
||||
?: XmlParser::getXPathResult($entry, 'id');
|
||||
|
||||
if (! empty($id)) {
|
||||
if (!empty($id)) {
|
||||
$item->id = $this->generateId((string) current($id));
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
$item->id = $this->generateId(
|
||||
$item->getTitle(), $item->getUrl(), $item->getContent()
|
||||
);
|
||||
@ -274,12 +255,11 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item enclosure
|
||||
* Find the item enclosure.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
@ -292,12 +272,11 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item language
|
||||
* Find the item language.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
@ -307,11 +286,11 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the URL from a link tag
|
||||
* Get the URL from a link tag.
|
||||
*
|
||||
* @param SimpleXMLElement $xml XML tag
|
||||
* @param string $rel Link relationship: alternate, enclosure, related, self, via
|
||||
*
|
||||
* @access private
|
||||
* @param SimpleXMLElement $xml XML tag
|
||||
* @param string $rel Link relationship: alternate, enclosure, related, self, via
|
||||
* @return string
|
||||
*/
|
||||
private function getUrl(SimpleXMLElement $xml, $rel, $fallback = false)
|
||||
@ -324,6 +303,7 @@ class Atom extends Parser
|
||||
|
||||
if ($fallback) {
|
||||
$link = $this->findLink($xml, '');
|
||||
|
||||
return $link ? (string) $link['href'] : '';
|
||||
}
|
||||
|
||||
@ -331,11 +311,11 @@ class Atom extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a link tag that match a relationship
|
||||
* Get a link tag that match a relationship.
|
||||
*
|
||||
* @param SimpleXMLElement $xml XML tag
|
||||
* @param string $rel Link relationship: alternate, enclosure, related, self, via
|
||||
*
|
||||
* @access private
|
||||
* @param SimpleXMLElement $xml XML tag
|
||||
* @param string $rel Link relationship: alternate, enclosure, related, self, via
|
||||
* @return SimpleXMLElement|null
|
||||
*/
|
||||
private function findLink(SimpleXMLElement $xml, $rel)
|
||||
@ -349,14 +329,14 @@ class Atom extends Parser
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the entry content
|
||||
* Get the entry content.
|
||||
*
|
||||
* @param SimpleXMLElement $entry XML Entry
|
||||
*
|
||||
* @access private
|
||||
* @param SimpleXMLElement $entry XML Entry
|
||||
* @return string
|
||||
*/
|
||||
private function getContent(SimpleXMLElement $entry)
|
||||
@ -366,16 +346,15 @@ class Atom extends Parser
|
||||
?: XmlParser::getXPathResult($entry, 'content')
|
||||
);
|
||||
|
||||
if (! empty($content) && count($content->children())) {
|
||||
if (!empty($content) && count($content->children())) {
|
||||
$xml_string = '';
|
||||
|
||||
foreach($content->children() as $child) {
|
||||
foreach ($content->children() as $child) {
|
||||
$xml_string .= $child->asXML();
|
||||
}
|
||||
|
||||
return $xml_string;
|
||||
}
|
||||
else if (trim((string) $content) !== '') {
|
||||
} elseif (trim((string) $content) !== '') {
|
||||
return (string) $content;
|
||||
}
|
||||
|
||||
|
@ -6,25 +6,22 @@ use DateTime;
|
||||
use DateTimeZone;
|
||||
|
||||
/**
|
||||
* Date Parser
|
||||
* Date Parser.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Parser
|
||||
*/
|
||||
class DateParser
|
||||
{
|
||||
/**
|
||||
* Timezone used to parse feed dates
|
||||
* Timezone used to parse feed dates.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $timezone = 'UTC';
|
||||
|
||||
/**
|
||||
* Supported formats [ 'format' => length ]
|
||||
* Supported formats [ 'format' => length ].
|
||||
*
|
||||
* @access public
|
||||
* @var array
|
||||
*/
|
||||
public $formats = array(
|
||||
@ -56,10 +53,10 @@ class DateParser
|
||||
);
|
||||
|
||||
/**
|
||||
* Try to parse all date format for broken feeds
|
||||
* Try to parse all date format for broken feeds.
|
||||
*
|
||||
* @param string $value Original date format
|
||||
*
|
||||
* @access public
|
||||
* @param string $value Original date format
|
||||
* @return DateTime
|
||||
*/
|
||||
public function getDateTime($value)
|
||||
@ -67,7 +64,6 @@ class DateParser
|
||||
$value = trim($value);
|
||||
|
||||
foreach ($this->formats as $format => $length) {
|
||||
|
||||
$truncated_value = $value;
|
||||
if ($length !== null) {
|
||||
$truncated_value = substr($truncated_value, 0, $length);
|
||||
@ -83,19 +79,18 @@ class DateParser
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a valid date from a given format
|
||||
* Get a valid date from a given format.
|
||||
*
|
||||
* @access public
|
||||
* @param string $format Date format
|
||||
* @param string $value Original date value
|
||||
* @return DateTime|boolean
|
||||
* @param string $format Date format
|
||||
* @param string $value Original date value
|
||||
*
|
||||
* @return DateTime|bool
|
||||
*/
|
||||
public function getValidDate($format, $value)
|
||||
{
|
||||
$date = DateTime::createFromFormat($format, $value, new DateTimeZone($this->timezone));
|
||||
|
||||
if ($date !== false) {
|
||||
|
||||
$errors = DateTime::getLastErrors();
|
||||
|
||||
if ($errors['error_count'] === 0 && $errors['warning_count'] === 0) {
|
||||
@ -107,9 +102,8 @@ class DateParser
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current datetime
|
||||
* Get the current datetime.
|
||||
*
|
||||
* @access public
|
||||
* @return DateTime
|
||||
*/
|
||||
public function getCurrentDateTime()
|
||||
|
@ -3,98 +3,84 @@
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
/**
|
||||
* Feed
|
||||
* Feed.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Parser
|
||||
*/
|
||||
class Feed
|
||||
{
|
||||
/**
|
||||
* Feed items
|
||||
* Feed items.
|
||||
*
|
||||
* @access public
|
||||
* @var array
|
||||
*/
|
||||
public $items = array();
|
||||
|
||||
/**
|
||||
* Feed id
|
||||
* Feed id.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $id = '';
|
||||
|
||||
/**
|
||||
* Feed title
|
||||
* Feed title.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $title = '';
|
||||
|
||||
/**
|
||||
* Feed description
|
||||
* Feed description.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $description = '';
|
||||
|
||||
/**
|
||||
* Feed url
|
||||
* Feed url.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $feed_url = '';
|
||||
|
||||
/**
|
||||
* Site url
|
||||
* Site url.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $site_url = '';
|
||||
|
||||
/**
|
||||
* Feed date
|
||||
* Feed date.
|
||||
*
|
||||
* @access public
|
||||
* @var \DateTime
|
||||
*/
|
||||
public $date = null;
|
||||
|
||||
/**
|
||||
* Feed language
|
||||
* Feed language.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $language = '';
|
||||
|
||||
/**
|
||||
* Feed logo URL
|
||||
* Feed logo URL.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $logo = '';
|
||||
|
||||
/**
|
||||
* Feed icon URL
|
||||
* Feed icon URL.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $icon = '';
|
||||
|
||||
/**
|
||||
* Return feed information
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Return feed information.
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
@ -117,10 +103,7 @@ class Feed
|
||||
}
|
||||
|
||||
/**
|
||||
* Get title
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get title.
|
||||
*/
|
||||
public function getTitle()
|
||||
{
|
||||
@ -128,10 +111,7 @@ class Feed
|
||||
}
|
||||
|
||||
/**
|
||||
* Get description
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get description.
|
||||
*/
|
||||
public function getDescription()
|
||||
{
|
||||
@ -139,10 +119,7 @@ class Feed
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the logo url
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get the logo url.
|
||||
*/
|
||||
public function getLogo()
|
||||
{
|
||||
@ -150,10 +127,7 @@ class Feed
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the icon url
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get the icon url.
|
||||
*/
|
||||
public function getIcon()
|
||||
{
|
||||
@ -161,10 +135,7 @@ class Feed
|
||||
}
|
||||
|
||||
/**
|
||||
* Get feed url
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get feed url.
|
||||
*/
|
||||
public function getFeedUrl()
|
||||
{
|
||||
@ -172,10 +143,7 @@ class Feed
|
||||
}
|
||||
|
||||
/**
|
||||
* Get site url
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get site url.
|
||||
*/
|
||||
public function getSiteUrl()
|
||||
{
|
||||
@ -183,10 +151,7 @@ class Feed
|
||||
}
|
||||
|
||||
/**
|
||||
* Get date
|
||||
*
|
||||
* @access public
|
||||
* $return integer
|
||||
* Get date.
|
||||
*/
|
||||
public function getDate()
|
||||
{
|
||||
@ -194,10 +159,7 @@ class Feed
|
||||
}
|
||||
|
||||
/**
|
||||
* Get language
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get language.
|
||||
*/
|
||||
public function getLanguage()
|
||||
{
|
||||
@ -205,10 +167,7 @@ class Feed
|
||||
}
|
||||
|
||||
/**
|
||||
* Get id
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get id.
|
||||
*/
|
||||
public function getId()
|
||||
{
|
||||
@ -216,10 +175,7 @@ class Feed
|
||||
}
|
||||
|
||||
/**
|
||||
* Get feed items
|
||||
*
|
||||
* @access public
|
||||
* $return array
|
||||
* Get feed items.
|
||||
*/
|
||||
public function getItems()
|
||||
{
|
||||
@ -227,9 +183,8 @@ class Feed
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the feed is "Right to Left"
|
||||
* Return true if the feed is "Right to Left".
|
||||
*
|
||||
* @access public
|
||||
* @return bool
|
||||
*/
|
||||
public function isRTL()
|
||||
|
@ -3,17 +3,15 @@
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
/**
|
||||
* Feed Item
|
||||
* Feed Item.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Parser
|
||||
*/
|
||||
class Item
|
||||
{
|
||||
/**
|
||||
* List of known RTL languages
|
||||
* List of known RTL languages.
|
||||
*
|
||||
* @access public
|
||||
* @var public
|
||||
*/
|
||||
public $rtl = array(
|
||||
@ -28,100 +26,89 @@ class Item
|
||||
);
|
||||
|
||||
/**
|
||||
* Item id
|
||||
* Item id.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $id = '';
|
||||
|
||||
/**
|
||||
* Item title
|
||||
* Item title.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $title = '';
|
||||
|
||||
/**
|
||||
* Item url
|
||||
* Item url.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $url = '';
|
||||
|
||||
/**
|
||||
* Item author
|
||||
* Item author.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $author= '';
|
||||
public $author = '';
|
||||
|
||||
/**
|
||||
* Item date
|
||||
* Item date.
|
||||
*
|
||||
* @access public
|
||||
* @var \DateTime
|
||||
*/
|
||||
public $date = null;
|
||||
|
||||
/**
|
||||
* Item content
|
||||
* Item content.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $content = '';
|
||||
|
||||
/**
|
||||
* Item enclosure url
|
||||
* Item enclosure url.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $enclosure_url = '';
|
||||
|
||||
/**
|
||||
* Item enclusure type
|
||||
* Item enclusure type.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $enclosure_type = '';
|
||||
|
||||
/**
|
||||
* Item language
|
||||
* Item language.
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $language = '';
|
||||
|
||||
/**
|
||||
* Raw XML
|
||||
* Raw XML.
|
||||
*
|
||||
* @access public
|
||||
* @var \SimpleXMLElement
|
||||
*/
|
||||
public $xml;
|
||||
|
||||
/**
|
||||
* List of namespaces
|
||||
* List of namespaces.
|
||||
*
|
||||
* @access public
|
||||
* @var array
|
||||
*/
|
||||
public $namespaces = array();
|
||||
|
||||
/**
|
||||
* Get specific XML tag or attribute value
|
||||
* Get specific XML tag or attribute value.
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name (examples: guid, media:content)
|
||||
* @param string $attribute Tag attribute
|
||||
* @return array|false Tag values or error
|
||||
* @param string $tag Tag name (examples: guid, media:content)
|
||||
* @param string $attribute Tag attribute
|
||||
*
|
||||
* @return array|false Tag values or error
|
||||
*/
|
||||
public function getTag($tag, $attribute = '')
|
||||
{
|
||||
@ -142,10 +129,7 @@ class Item
|
||||
}
|
||||
|
||||
/**
|
||||
* Return item information
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Return item information.
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
@ -163,10 +147,7 @@ class Item
|
||||
}
|
||||
|
||||
/**
|
||||
* Get title
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get title.
|
||||
*/
|
||||
public function getTitle()
|
||||
{
|
||||
@ -174,10 +155,7 @@ class Item
|
||||
}
|
||||
|
||||
/**
|
||||
* Get url
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get url.
|
||||
*/
|
||||
public function getUrl()
|
||||
{
|
||||
@ -185,10 +163,7 @@ class Item
|
||||
}
|
||||
|
||||
/**
|
||||
* Get id
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get id.
|
||||
*/
|
||||
public function getId()
|
||||
{
|
||||
@ -196,10 +171,7 @@ class Item
|
||||
}
|
||||
|
||||
/**
|
||||
* Get date
|
||||
*
|
||||
* @access public
|
||||
* $return \DateTime
|
||||
* Get date.
|
||||
*/
|
||||
public function getDate()
|
||||
{
|
||||
@ -207,10 +179,7 @@ class Item
|
||||
}
|
||||
|
||||
/**
|
||||
* Get content
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get content.
|
||||
*/
|
||||
public function getContent()
|
||||
{
|
||||
@ -218,10 +187,7 @@ class Item
|
||||
}
|
||||
|
||||
/**
|
||||
* Get enclosure url
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get enclosure url.
|
||||
*/
|
||||
public function getEnclosureUrl()
|
||||
{
|
||||
@ -229,10 +195,7 @@ class Item
|
||||
}
|
||||
|
||||
/**
|
||||
* Get enclosure type
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get enclosure type.
|
||||
*/
|
||||
public function getEnclosureType()
|
||||
{
|
||||
@ -240,10 +203,7 @@ class Item
|
||||
}
|
||||
|
||||
/**
|
||||
* Get language
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get language.
|
||||
*/
|
||||
public function getLanguage()
|
||||
{
|
||||
@ -251,10 +211,7 @@ class Item
|
||||
}
|
||||
|
||||
/**
|
||||
* Get author
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
* Get author.
|
||||
*/
|
||||
public function getAuthor()
|
||||
{
|
||||
@ -262,9 +219,8 @@ class Item
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the item is "Right to Left"
|
||||
* Return true if the item is "Right to Left".
|
||||
*
|
||||
* @access public
|
||||
* @return bool
|
||||
*/
|
||||
public function isRTL()
|
||||
|
@ -3,11 +3,10 @@
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
/**
|
||||
* MalformedXmlException Exception
|
||||
* MalformedXmlException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Parser
|
||||
*/
|
||||
class MalformedXmlException extends ParserException
|
||||
{
|
||||
}
|
||||
}
|
||||
|
@ -10,112 +10,99 @@ use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Scraper\Scraper;
|
||||
|
||||
/**
|
||||
* Base parser class
|
||||
* Base parser class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Parser
|
||||
*/
|
||||
abstract class Parser
|
||||
{
|
||||
/**
|
||||
* Config object
|
||||
* Config object.
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Config\Config
|
||||
*/
|
||||
private $config;
|
||||
|
||||
/**
|
||||
* DateParser object
|
||||
* DateParser object.
|
||||
*
|
||||
* @access protected
|
||||
* @var \PicoFeed\Parser\DateParser
|
||||
*/
|
||||
protected $date;
|
||||
|
||||
/**
|
||||
* Hash algorithm used to generate item id, any value supported by PHP, see hash_algos()
|
||||
* Hash algorithm used to generate item id, any value supported by PHP, see hash_algos().
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $hash_algo = 'sha256';
|
||||
|
||||
/**
|
||||
* Feed content (XML data)
|
||||
* Feed content (XML data).
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $content = '';
|
||||
|
||||
/**
|
||||
* Fallback url
|
||||
* Fallback url.
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $fallback_url = '';
|
||||
|
||||
/**
|
||||
* XML namespaces supported by parser
|
||||
* XML namespaces supported by parser.
|
||||
*
|
||||
* @access protected
|
||||
* @var array
|
||||
*/
|
||||
protected $namespaces = array();
|
||||
|
||||
/**
|
||||
* XML namespaces used in document
|
||||
* XML namespaces used in document.
|
||||
*
|
||||
* @access protected
|
||||
* @var array
|
||||
*/
|
||||
protected $used_namespaces = array();
|
||||
|
||||
/**
|
||||
* Enable the content filtering
|
||||
* Enable the content filtering.
|
||||
*
|
||||
* @access private
|
||||
* @var bool
|
||||
*/
|
||||
private $enable_filter = true;
|
||||
|
||||
/**
|
||||
* Enable the content grabber
|
||||
* Enable the content grabber.
|
||||
*
|
||||
* @access private
|
||||
* @var bool
|
||||
*/
|
||||
private $enable_grabber = false;
|
||||
|
||||
/**
|
||||
* Enable the content grabber on all pages
|
||||
* Enable the content grabber on all pages.
|
||||
*
|
||||
* @access private
|
||||
* @var bool
|
||||
*/
|
||||
private $grabber_needs_rule_file = false;
|
||||
|
||||
/**
|
||||
* Ignore those urls for the content scraper
|
||||
* Ignore those urls for the content scraper.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $grabber_ignore_urls = array();
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* Constructor.
|
||||
*
|
||||
* @access public
|
||||
* @param string $content Feed content
|
||||
* @param string $http_encoding HTTP encoding (headers)
|
||||
* @param string $fallback_url Fallback url when the feed provide relative or broken url
|
||||
* @param string $content Feed content
|
||||
* @param string $http_encoding HTTP encoding (headers)
|
||||
* @param string $fallback_url Fallback url when the feed provide relative or broken url
|
||||
*/
|
||||
public function __construct($content, $http_encoding = '', $fallback_url = '')
|
||||
{
|
||||
$this->date = new DateParser;
|
||||
$this->date = new DateParser();
|
||||
$this->fallback_url = $fallback_url;
|
||||
$xml_encoding = XmlParser::getEncodingFromXmlTag($content);
|
||||
|
||||
@ -128,9 +115,8 @@ abstract class Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the document
|
||||
* Parse the document.
|
||||
*
|
||||
* @access public
|
||||
* @return \PicoFeed\Parser\Feed
|
||||
*/
|
||||
public function execute()
|
||||
@ -154,7 +140,7 @@ abstract class Parser
|
||||
$this->used_namespaces = $xml->getNamespaces(true);
|
||||
$xml = $this->registerSupportedNamespaces($xml);
|
||||
|
||||
$feed = new Feed;
|
||||
$feed = new Feed();
|
||||
|
||||
$this->findFeedUrl($xml, $feed);
|
||||
$this->checkFeedUrl($feed);
|
||||
@ -171,10 +157,9 @@ abstract class Parser
|
||||
$this->findFeedIcon($xml, $feed);
|
||||
|
||||
foreach ($this->getItemsTree($xml) as $entry) {
|
||||
|
||||
$entry = $this->registerSupportedNamespaces($entry);
|
||||
|
||||
$item = new Item;
|
||||
$item = new Item();
|
||||
$item->xml = $entry;
|
||||
$item->namespaces = $this->used_namespaces;
|
||||
|
||||
@ -206,43 +191,38 @@ abstract class Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the feed url is correct
|
||||
* Check if the feed url is correct.
|
||||
*
|
||||
* @access public
|
||||
* @param Feed $feed Feed object
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function checkFeedUrl(Feed $feed)
|
||||
{
|
||||
if ($feed->getFeedUrl() === '') {
|
||||
$feed->feed_url = $this->fallback_url;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
$feed->feed_url = Url::resolve($feed->getFeedUrl(), $this->fallback_url);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the site url is correct
|
||||
* Check if the site url is correct.
|
||||
*
|
||||
* @access public
|
||||
* @param Feed $feed Feed object
|
||||
* @param Feed $feed Feed object
|
||||
*/
|
||||
public function checkSiteUrl(Feed $feed)
|
||||
{
|
||||
if ($feed->getSiteUrl() === '') {
|
||||
$feed->site_url = Url::base($feed->getFeedUrl());
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
$feed->site_url = Url::resolve($feed->getSiteUrl(), $this->fallback_url);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the item url is correct
|
||||
* Check if the item url is correct.
|
||||
*
|
||||
* @access public
|
||||
* @param Feed $feed Feed object
|
||||
* @param Item $item Item object
|
||||
* @param Feed $feed Feed object
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function checkItemUrl(Feed $feed, Item $item)
|
||||
{
|
||||
@ -250,15 +230,13 @@ abstract class Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch item content with the content grabber
|
||||
* Fetch item content with the content grabber.
|
||||
*
|
||||
* @access public
|
||||
* @param Item $item Item object
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function scrapWebsite(Item $item)
|
||||
{
|
||||
if ($this->enable_grabber && ! in_array($item->getUrl(), $this->grabber_ignore_urls)) {
|
||||
|
||||
if ($this->enable_grabber && !in_array($item->getUrl(), $this->grabber_ignore_urls)) {
|
||||
$grabber = new Scraper($this->config);
|
||||
$grabber->setUrl($item->getUrl());
|
||||
|
||||
@ -275,11 +253,10 @@ abstract class Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter HTML for entry content
|
||||
* Filter HTML for entry content.
|
||||
*
|
||||
* @access public
|
||||
* @param Feed $feed Feed object
|
||||
* @param Item $item Item object
|
||||
* @param Feed $feed Feed object
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function filterItemContent(Feed $feed, Item $item)
|
||||
{
|
||||
@ -287,16 +264,14 @@ abstract class Parser
|
||||
$filter = Filter::html($item->getContent(), $feed->getSiteUrl());
|
||||
$filter->setConfig($this->config);
|
||||
$item->content = $filter->execute();
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
Logger::setMessage(get_called_class().': Content filtering disabled');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a unique id for an entry (hash all arguments)
|
||||
* Generate a unique id for an entry (hash all arguments).
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function generateId()
|
||||
@ -305,11 +280,12 @@ abstract class Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the given language is "Right to Left"
|
||||
* Return true if the given language is "Right to Left".
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $language Language: fr-FR, en-US
|
||||
*
|
||||
* @param string $language Language: fr-FR, en-US
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public static function isLanguageRTL($language)
|
||||
@ -337,24 +313,26 @@ abstract class Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Set Hash algorithm used for id generation
|
||||
* Set Hash algorithm used for id generation.
|
||||
*
|
||||
* @param string $algo Algorithm name
|
||||
*
|
||||
* @access public
|
||||
* @param string $algo Algorithm name
|
||||
* @return \PicoFeed\Parser\Parser
|
||||
*/
|
||||
public function setHashAlgo($algo)
|
||||
{
|
||||
$this->hash_algo = $algo ?: $this->hash_algo;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a different timezone
|
||||
* Set a different timezone.
|
||||
*
|
||||
* @see http://php.net/manual/en/timezones.php
|
||||
* @access public
|
||||
* @param string $timezone Timezone
|
||||
*
|
||||
* @param string $timezone Timezone
|
||||
*
|
||||
* @return \PicoFeed\Parser\Parser
|
||||
*/
|
||||
public function setTimezone($timezone)
|
||||
@ -367,22 +345,22 @@ abstract class Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Set config object
|
||||
* Set config object.
|
||||
*
|
||||
* @param \PicoFeed\Config\Config $config Config instance
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Config\Config $config Config instance
|
||||
* @return \PicoFeed\Parser\Parser
|
||||
*/
|
||||
public function setConfig($config)
|
||||
{
|
||||
$this->config = $config;
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable the content grabber
|
||||
* Enable the content grabber.
|
||||
*
|
||||
* @access public
|
||||
* @return \PicoFeed\Parser\Parser
|
||||
*/
|
||||
public function disableContentFiltering()
|
||||
@ -391,10 +369,9 @@ abstract class Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the content filtering is enabled
|
||||
* Return true if the content filtering is enabled.
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
* @return bool
|
||||
*/
|
||||
public function isFilteringEnabled()
|
||||
{
|
||||
@ -406,11 +383,11 @@ abstract class Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable the content grabber
|
||||
* Enable the content grabber.
|
||||
*
|
||||
* @access public
|
||||
* @param bool $needs_rule_file true if only pages with rule files should be
|
||||
* scraped
|
||||
* scraped
|
||||
*
|
||||
* @return \PicoFeed\Parser\Parser
|
||||
*/
|
||||
public function enableContentGrabber($needs_rule_file = false)
|
||||
@ -420,10 +397,10 @@ abstract class Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Set ignored URLs for the content grabber
|
||||
* Set ignored URLs for the content grabber.
|
||||
*
|
||||
* @param array $urls URLs
|
||||
*
|
||||
* @access public
|
||||
* @param array $urls URLs
|
||||
* @return \PicoFeed\Parser\Parser
|
||||
*/
|
||||
public function setGrabberIgnoreUrls(array $urls)
|
||||
@ -432,10 +409,10 @@ abstract class Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Register all supported namespaces to be used within an xpath query
|
||||
* Register all supported namespaces to be used within an xpath query.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
public function registerSupportedNamespaces(SimpleXMLElement $xml)
|
||||
@ -448,169 +425,152 @@ abstract class Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed url
|
||||
* Find the feed url.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public abstract function findFeedUrl(SimpleXMLElement $xml, Feed $feed);
|
||||
abstract public function findFeedUrl(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the site url
|
||||
* Find the site url.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public abstract function findSiteUrl(SimpleXMLElement $xml, Feed $feed);
|
||||
abstract public function findSiteUrl(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed title
|
||||
* Find the feed title.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public abstract function findFeedTitle(SimpleXMLElement $xml, Feed $feed);
|
||||
abstract public function findFeedTitle(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed description
|
||||
* Find the feed description.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public abstract function findFeedDescription(SimpleXMLElement $xml, Feed $feed);
|
||||
abstract public function findFeedDescription(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed language
|
||||
* Find the feed language.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public abstract function findFeedLanguage(SimpleXMLElement $xml, Feed $feed);
|
||||
abstract public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed id
|
||||
* Find the feed id.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public abstract function findFeedId(SimpleXMLElement $xml, Feed $feed);
|
||||
abstract public function findFeedId(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed date
|
||||
* Find the feed date.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public abstract function findFeedDate(SimpleXMLElement $xml, Feed $feed);
|
||||
abstract public function findFeedDate(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed logo url
|
||||
* Find the feed logo url.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public abstract function findFeedLogo(SimpleXMLElement $xml, Feed $feed);
|
||||
abstract public function findFeedLogo(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the feed icon
|
||||
* Find the feed icon.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public abstract function findFeedIcon(SimpleXMLElement $xml, Feed $feed);
|
||||
abstract public function findFeedIcon(SimpleXMLElement $xml, Feed $feed);
|
||||
|
||||
/**
|
||||
* Get the path to the items XML tree
|
||||
* Get the path to the items XML tree.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
public abstract function getItemsTree(SimpleXMLElement $xml);
|
||||
abstract public function getItemsTree(SimpleXMLElement $xml);
|
||||
|
||||
/**
|
||||
* Find the item author
|
||||
* Find the item author.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public abstract function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item);
|
||||
abstract public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Find the item URL
|
||||
* Find the item URL.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public abstract function findItemUrl(SimpleXMLElement $entry, Item $item);
|
||||
abstract public function findItemUrl(SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Find the item title
|
||||
* Find the item title.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public abstract function findItemTitle(SimpleXMLElement $entry, Item $item);
|
||||
abstract public function findItemTitle(SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Genereate the item id
|
||||
* Genereate the item id.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public abstract function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
abstract public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the item date
|
||||
* Find the item date.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public abstract function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
abstract public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the item content
|
||||
* Find the item content.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public abstract function findItemContent(SimpleXMLElement $entry, Item $item);
|
||||
abstract public function findItemContent(SimpleXMLElement $entry, Item $item);
|
||||
|
||||
/**
|
||||
* Find the item enclosure
|
||||
* Find the item enclosure.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public abstract function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
abstract public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
|
||||
/**
|
||||
* Find the item language
|
||||
* Find the item language.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public abstract function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
abstract public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed);
|
||||
}
|
||||
|
@ -4,13 +4,11 @@ namespace PicoFeed\Parser;
|
||||
|
||||
use PicoFeed\PicoFeedException;
|
||||
|
||||
|
||||
/**
|
||||
* ParserException Exception
|
||||
* ParserException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Parser
|
||||
*/
|
||||
abstract class ParserException extends PicoFeedException
|
||||
{
|
||||
}
|
||||
}
|
||||
|
@ -6,15 +6,14 @@ use SimpleXMLElement;
|
||||
use PicoFeed\Filter\Filter;
|
||||
|
||||
/**
|
||||
* RSS 1.0 parser
|
||||
* RSS 1.0 parser.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Parser
|
||||
*/
|
||||
class Rss10 extends Parser
|
||||
{
|
||||
/**
|
||||
* Supported namespaces
|
||||
* Supported namespaces.
|
||||
*/
|
||||
protected $namespaces = array(
|
||||
'rss' => 'http://purl.org/rss/1.0/',
|
||||
@ -24,10 +23,10 @@ class Rss10 extends Parser
|
||||
);
|
||||
|
||||
/**
|
||||
* Get the path to the items XML tree
|
||||
* Get the path to the items XML tree.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
public function getItemsTree(SimpleXMLElement $xml)
|
||||
@ -37,11 +36,10 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed url
|
||||
* Find the feed url.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -49,11 +47,10 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the site url
|
||||
* Find the site url.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findSiteUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -64,11 +61,10 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed description
|
||||
* Find the feed description.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -79,11 +75,10 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed logo url
|
||||
* Find the feed logo url.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -94,11 +89,10 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed icon
|
||||
* Find the feed icon.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -106,11 +100,10 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed title
|
||||
* Find the feed title.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -121,11 +114,10 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed language
|
||||
* Find the feed language.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -136,11 +128,10 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed id
|
||||
* Find the feed id.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedId(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -148,11 +139,10 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed date
|
||||
* Find the feed date.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -163,12 +153,11 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item date
|
||||
* Find the item date.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
@ -178,11 +167,10 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item title
|
||||
* Find the item title.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemTitle(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
@ -193,12 +181,11 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item author
|
||||
* Find the item author.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
@ -210,11 +197,10 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item content
|
||||
* Find the item content.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemContent(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
@ -229,11 +215,10 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item URL
|
||||
* Find the item URL.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemUrl(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
@ -245,12 +230,11 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Genereate the item id
|
||||
* Genereate the item id.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
@ -260,24 +244,22 @@ class Rss10 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item enclosure
|
||||
* Find the item enclosure.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item language
|
||||
* Find the item language.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
|
@ -7,28 +7,27 @@ use PicoFeed\Filter\Filter;
|
||||
use PicoFeed\Client\Url;
|
||||
|
||||
/**
|
||||
* RSS 2.0 Parser
|
||||
* RSS 2.0 Parser.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Parser
|
||||
*/
|
||||
class Rss20 extends Parser
|
||||
{
|
||||
/**
|
||||
* Supported namespaces
|
||||
* Supported namespaces.
|
||||
*/
|
||||
protected $namespaces = array(
|
||||
'dc' => 'http://purl.org/dc/elements/1.1/',
|
||||
'content' => 'http://purl.org/rss/1.0/modules/content/',
|
||||
'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0',
|
||||
'atom' => 'http://www.w3.org/2005/Atom'
|
||||
'atom' => 'http://www.w3.org/2005/Atom',
|
||||
);
|
||||
|
||||
/**
|
||||
* Get the path to the items XML tree
|
||||
* Get the path to the items XML tree.
|
||||
*
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
public function getItemsTree(SimpleXMLElement $xml)
|
||||
@ -37,11 +36,10 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed url
|
||||
* Find the feed url.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -49,11 +47,10 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the site url
|
||||
* Find the site url.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findSiteUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -62,11 +59,10 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed description
|
||||
* Find the feed description.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -75,11 +71,10 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed logo url
|
||||
* Find the feed logo url.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -88,11 +83,10 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed icon
|
||||
* Find the feed icon.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -100,11 +94,10 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed title
|
||||
* Find the feed title.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -113,11 +106,10 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed language
|
||||
* Find the feed language.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -126,11 +118,10 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed id
|
||||
* Find the feed id.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedId(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
@ -138,38 +129,34 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed date
|
||||
* Find the feed date.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$publish_date = XmlParser::getXPathResult($xml, 'channel/pubDate');
|
||||
$update_date = XmlParser::getXPathResult($xml, 'channel/lastBuildDate');
|
||||
|
||||
$published = ! empty($publish_date) ? $this->date->getDateTime((string) current($publish_date)) : null;
|
||||
$updated = ! empty($update_date) ? $this->date->getDateTime((string) current($update_date)) : null;
|
||||
$published = !empty($publish_date) ? $this->date->getDateTime((string) current($publish_date)) : null;
|
||||
$updated = !empty($update_date) ? $this->date->getDateTime((string) current($update_date)) : null;
|
||||
|
||||
if ($published === null && $updated === null) {
|
||||
$feed->date = $this->date->getCurrentDateTime(); // We use the current date if there is no date for the feed
|
||||
}
|
||||
else if ($published !== null && $updated !== null) {
|
||||
} elseif ($published !== null && $updated !== null) {
|
||||
$feed->date = max($published, $updated); // We use the most recent date between published and updated
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
$feed->date = $updated ?: $published;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item date
|
||||
* Find the item date.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
@ -179,11 +166,10 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item title
|
||||
* Find the item title.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemTitle(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
@ -192,12 +178,11 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item author
|
||||
* Find the item author.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
@ -210,11 +195,10 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item content
|
||||
* Find the item content.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemContent(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
@ -228,11 +212,10 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item URL
|
||||
* Find the item URL.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
*/
|
||||
public function findItemUrl(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
@ -240,10 +223,9 @@ class Rss20 extends Parser
|
||||
?: XmlParser::getXPathResult($entry, 'link')
|
||||
?: XmlParser::getXPathResult($entry, 'atom:link/@href', $this->namespaces);
|
||||
|
||||
if (! empty($link)) {
|
||||
if (!empty($link)) {
|
||||
$item->url = trim((string) current($link));
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
$link = XmlParser::getXPathResult($entry, 'guid');
|
||||
$link = trim((string) current($link));
|
||||
|
||||
@ -254,12 +236,11 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Genereate the item id
|
||||
* Genereate the item id.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
@ -267,8 +248,7 @@ class Rss20 extends Parser
|
||||
|
||||
if ($id) {
|
||||
$item->id = $this->generateId($id);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
$item->id = $this->generateId(
|
||||
$item->getTitle(), $item->getUrl(), $item->getContent()
|
||||
);
|
||||
@ -276,12 +256,11 @@ class Rss20 extends Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item enclosure
|
||||
* Find the item enclosure.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
@ -291,19 +270,17 @@ class Rss20 extends Parser
|
||||
|
||||
$enclosure_type = XmlParser::getXPathResult($entry, 'enclosure/@type');
|
||||
|
||||
|
||||
$item->enclosure_url = Url::resolve((string) current($enclosure_url), $feed->getSiteUrl());
|
||||
$item->enclosure_type = (string) current($enclosure_type);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item language
|
||||
* Find the item language.
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Parser\Item $item Item object
|
||||
* @param \PicoFeed\Parser\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
|
@ -3,10 +3,9 @@
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
/**
|
||||
* RSS 0.91 Parser
|
||||
* RSS 0.91 Parser.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Parser
|
||||
*/
|
||||
class Rss91 extends Rss20
|
||||
{
|
||||
|
@ -3,10 +3,9 @@
|
||||
namespace PicoFeed\Parser;
|
||||
|
||||
/**
|
||||
* RSS 0.92 Parser
|
||||
* RSS 0.92 Parser.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Parser
|
||||
*/
|
||||
class Rss92 extends Rss20
|
||||
{
|
||||
|
@ -4,25 +4,24 @@ namespace PicoFeed\Parser;
|
||||
|
||||
use Closure;
|
||||
use DomDocument;
|
||||
use DOMXPath;
|
||||
use SimpleXmlElement;
|
||||
|
||||
/**
|
||||
* XML parser class
|
||||
* XML parser class.
|
||||
*
|
||||
* Checks for XML eXternal Entity (XXE) and XML Entity Expansion (XEE) attacks on XML documents
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Parser
|
||||
*/
|
||||
class XmlParser
|
||||
{
|
||||
/**
|
||||
* Get a SimpleXmlElement instance or return false
|
||||
* Get a SimpleXmlElement instance or return false.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $input XML content
|
||||
*
|
||||
* @param string $input XML content
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public static function getSimpleXml($input)
|
||||
@ -30,10 +29,9 @@ class XmlParser
|
||||
$dom = self::getDomDocument($input);
|
||||
|
||||
if ($dom !== false) {
|
||||
|
||||
$simplexml = simplexml_import_dom($dom);
|
||||
|
||||
if (! $simplexml instanceof SimpleXmlElement) {
|
||||
if (!$simplexml instanceof SimpleXmlElement) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -44,14 +42,14 @@ class XmlParser
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan the input for XXE attacks
|
||||
* Scan the input for XXE attacks.
|
||||
*
|
||||
* @param string $input Unsafe input
|
||||
* @param Closure $callback Callback called to build the dom.
|
||||
* Must be an instance of DomDocument and receives the input as argument
|
||||
* @param string $input Unsafe input
|
||||
* @param Closure $callback Callback called to build the dom.
|
||||
* Must be an instance of DomDocument and receives the input as argument
|
||||
*
|
||||
* @return bool|DomDocument False if an XXE attack was discovered,
|
||||
* otherwise the return of the callback
|
||||
* @return bool|DomDocument False if an XXE attack was discovered,
|
||||
* otherwise the return of the callback
|
||||
*/
|
||||
private static function scanInput($input, Closure $callback)
|
||||
{
|
||||
@ -64,8 +62,7 @@ class XmlParser
|
||||
if (strpos($input, '<!ENTITY') !== false) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
$entityLoaderDisabled = libxml_disable_entity_loader(true);
|
||||
}
|
||||
|
||||
@ -90,11 +87,12 @@ class XmlParser
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a DomDocument instance or return false
|
||||
* Get a DomDocument instance or return false.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $input XML content
|
||||
*
|
||||
* @param string $input XML content
|
||||
*
|
||||
* @return \DOMNDocument
|
||||
*/
|
||||
public static function getDomDocument($input)
|
||||
@ -104,8 +102,9 @@ class XmlParser
|
||||
}
|
||||
|
||||
$dom = self::scanInput($input, function ($in) {
|
||||
$dom = new DomDocument;
|
||||
$dom = new DomDocument();
|
||||
$dom->loadXml($in, LIBXML_NONET);
|
||||
|
||||
return $dom;
|
||||
});
|
||||
|
||||
@ -118,30 +117,32 @@ class XmlParser
|
||||
}
|
||||
|
||||
/**
|
||||
* Load HTML document by using a DomDocument instance or return false on failure
|
||||
* Load HTML document by using a DomDocument instance or return false on failure.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $input XML content
|
||||
*
|
||||
* @param string $input XML content
|
||||
*
|
||||
* @return \DOMDocument
|
||||
*/
|
||||
public static function getHtmlDocument($input)
|
||||
{
|
||||
if (empty($input)) {
|
||||
return new DomDocument;
|
||||
return new DomDocument();
|
||||
}
|
||||
|
||||
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
|
||||
$callback = function ($in) {
|
||||
$dom = new DomDocument;
|
||||
$dom = new DomDocument();
|
||||
$dom->loadHTML($in, LIBXML_NONET);
|
||||
|
||||
return $dom;
|
||||
};
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
$callback = function ($in) {
|
||||
$dom = new DomDocument;
|
||||
$dom = new DomDocument();
|
||||
$dom->loadHTML($in);
|
||||
|
||||
return $dom;
|
||||
};
|
||||
}
|
||||
@ -150,32 +151,33 @@ class XmlParser
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a HTML document to XML
|
||||
* Convert a HTML document to XML.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $html HTML document
|
||||
*
|
||||
* @param string $html HTML document
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function HtmlToXml($html)
|
||||
public static function htmlToXml($html)
|
||||
{
|
||||
$dom = self::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$html);
|
||||
|
||||
return $dom->saveXML($dom->getElementsByTagName('body')->item(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get XML parser errors
|
||||
* Get XML parser errors.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function getErrors()
|
||||
{
|
||||
$errors = array();
|
||||
|
||||
foreach(libxml_get_errors() as $error) {
|
||||
|
||||
foreach (libxml_get_errors() as $error) {
|
||||
$errors[] = sprintf('XML error: %s (Line: %d - Column: %d - Code: %d)',
|
||||
$error->message,
|
||||
$error->line,
|
||||
@ -188,11 +190,12 @@ class XmlParser
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the encoding from a xml tag
|
||||
* Get the encoding from a xml tag.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function getEncodingFromXmlTag($data)
|
||||
@ -200,7 +203,6 @@ class XmlParser
|
||||
$encoding = '';
|
||||
|
||||
if (strpos($data, '<?xml') !== false) {
|
||||
|
||||
$data = substr($data, 0, strrpos($data, '?>'));
|
||||
$data = str_replace("'", '"', $data);
|
||||
|
||||
@ -217,11 +219,12 @@ class XmlParser
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the charset from a meta tag
|
||||
* Get the charset from a meta tag.
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @param string $data Input data
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function getEncodingFromMetaTag($data)
|
||||
@ -236,14 +239,16 @@ class XmlParser
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite XPath query to use namespace-uri and local-name derived from prefix
|
||||
* Rewrite XPath query to use namespace-uri and local-name derived from prefix.
|
||||
*
|
||||
* @param string $query XPath query
|
||||
* @param array $ns Prefix to namespace URI mapping
|
||||
*
|
||||
* @param string $query XPath query
|
||||
* @param array $ns Prefix to namespace URI mapping
|
||||
* @return string
|
||||
*/
|
||||
public static function replaceXPathPrefixWithNamespaceURI($query, array $ns) {
|
||||
return preg_replace_callback('/([A-Z0-9]+):([A-Z0-9]+)/iu', function($matches) use($ns) {
|
||||
public static function replaceXPathPrefixWithNamespaceURI($query, array $ns)
|
||||
{
|
||||
return preg_replace_callback('/([A-Z0-9]+):([A-Z0-9]+)/iu', function ($matches) use ($ns) {
|
||||
// don't try to map the special prefix XML
|
||||
if (strtolower($matches[1]) === 'xml') {
|
||||
return $matches[0];
|
||||
@ -255,16 +260,17 @@ class XmlParser
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the result elements of a XPath query
|
||||
* Get the result elements of a XPath query.
|
||||
*
|
||||
* @param \SimpleXMLElement $xml XML element
|
||||
* @param string $query XPath query
|
||||
* @param array $ns Prefix to namespace URI mapping
|
||||
*
|
||||
* @param \SimpleXMLElement $xml XML element
|
||||
* @param string $query XPath query
|
||||
* @param array $ns Prefix to namespace URI mapping
|
||||
* @return \SimpleXMLElement
|
||||
*/
|
||||
public static function getXPathResult(SimpleXMLElement $xml, $query, array $ns = array())
|
||||
{
|
||||
if (! empty($ns)) {
|
||||
if (!empty($ns)) {
|
||||
$query = static::replaceXPathPrefixWithNamespaceURI($query, $ns);
|
||||
}
|
||||
|
||||
|
@ -5,10 +5,9 @@ namespace PicoFeed;
|
||||
use Exception;
|
||||
|
||||
/**
|
||||
* PicoFeedException Exception
|
||||
* PicoFeedException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package exception
|
||||
*/
|
||||
abstract class PicoFeedException extends Exception
|
||||
{
|
||||
|
@ -11,19 +11,17 @@ use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Parser\XmlParser;
|
||||
|
||||
/**
|
||||
* Favicon class
|
||||
* Favicon class.
|
||||
*
|
||||
* https://en.wikipedia.org/wiki/Favicon
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Reader
|
||||
*/
|
||||
class Favicon
|
||||
{
|
||||
/**
|
||||
* Valid types for favicon (supported by browsers)
|
||||
* Valid types for favicon (supported by browsers).
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $types = array(
|
||||
@ -35,44 +33,39 @@ class Favicon
|
||||
);
|
||||
|
||||
/**
|
||||
* Config class instance
|
||||
* Config class instance.
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Config\Config
|
||||
*/
|
||||
private $config;
|
||||
|
||||
/**
|
||||
* Icon binary content
|
||||
* Icon binary content.
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $content = '';
|
||||
|
||||
/**
|
||||
* Icon content type
|
||||
* Icon content type.
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $content_type = '';
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* Constructor.
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Config\Config $config Config class instance
|
||||
* @param \PicoFeed\Config\Config $config Config class instance
|
||||
*/
|
||||
public function __construct(Config $config = null)
|
||||
{
|
||||
$this->config = $config ?: new Config;
|
||||
$this->config = $config ?: new Config();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the icon file content (available only after the download)
|
||||
* Get the icon file content (available only after the download).
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getContent()
|
||||
@ -81,9 +74,8 @@ class Favicon
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the icon file type (available only after the download)
|
||||
* Get the icon file type (available only after the download).
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getType()
|
||||
@ -98,9 +90,8 @@ class Favicon
|
||||
}
|
||||
|
||||
/**
|
||||
* Get data URI (http://en.wikipedia.org/wiki/Data_URI_scheme)
|
||||
* Get data URI (http://en.wikipedia.org/wiki/Data_URI_scheme).
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getDataUri()
|
||||
@ -117,11 +108,11 @@ class Favicon
|
||||
}
|
||||
|
||||
/**
|
||||
* Download and check if a resource exists
|
||||
* Download and check if a resource exists.
|
||||
*
|
||||
* @access public
|
||||
* @param string $url URL
|
||||
* @return \PicoFeed\Client Client instance
|
||||
* @param string $url URL
|
||||
*
|
||||
* @return \PicoFeed\Client Client instance
|
||||
*/
|
||||
public function download($url)
|
||||
{
|
||||
@ -132,8 +123,7 @@ class Favicon
|
||||
|
||||
try {
|
||||
$client->execute($url);
|
||||
}
|
||||
catch (ClientException $e) {
|
||||
} catch (ClientException $e) {
|
||||
Logger::setMessage(get_called_class().' Download Failed => '.$e->getMessage());
|
||||
}
|
||||
|
||||
@ -141,11 +131,11 @@ class Favicon
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a remote file exists
|
||||
* Check if a remote file exists.
|
||||
*
|
||||
* @access public
|
||||
* @param string $url URL
|
||||
* @return boolean
|
||||
* @param string $url URL
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function exists($url)
|
||||
{
|
||||
@ -153,11 +143,11 @@ class Favicon
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the icon link for a website
|
||||
* Get the icon link for a website.
|
||||
*
|
||||
* @param string $website_link URL
|
||||
* @param string $favicon_link optional URL
|
||||
*
|
||||
* @access public
|
||||
* @param string $website_link URL
|
||||
* @param string $favicon_link optional URL
|
||||
* @return string
|
||||
*/
|
||||
public function find($website_link, $favicon_link = '')
|
||||
@ -188,10 +178,10 @@ class Favicon
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the icon links from the HTML
|
||||
* Extract the icon links from the HTML.
|
||||
*
|
||||
* @param string $html HTML
|
||||
*
|
||||
* @access public
|
||||
* @param string $html HTML
|
||||
* @return array
|
||||
*/
|
||||
public function extract($html)
|
||||
@ -207,7 +197,7 @@ class Favicon
|
||||
$xpath = new DOMXpath($dom);
|
||||
$elements = $xpath->query("//link[contains(@rel, 'icon') and not(contains(@rel, 'apple'))]");
|
||||
|
||||
for ($i = 0; $i < $elements->length; $i++) {
|
||||
for ($i = 0; $i < $elements->length; ++$i) {
|
||||
$icons[] = $elements->item($i)->getAttribute('href');
|
||||
}
|
||||
|
||||
|
@ -10,17 +10,15 @@ use PicoFeed\Logging\Logger;
|
||||
use PicoFeed\Parser\XmlParser;
|
||||
|
||||
/**
|
||||
* Reader class
|
||||
* Reader class.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Reader
|
||||
*/
|
||||
class Reader
|
||||
{
|
||||
/**
|
||||
* Feed formats for detection
|
||||
* Feed formats for detection.
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $formats = array(
|
||||
@ -32,34 +30,32 @@ class Reader
|
||||
);
|
||||
|
||||
/**
|
||||
* Config class instance
|
||||
* Config class instance.
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Config\Config
|
||||
*/
|
||||
private $config;
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* Constructor.
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Config\Config $config Config class instance
|
||||
* @param \PicoFeed\Config\Config $config Config class instance
|
||||
*/
|
||||
public function __construct(Config $config = null)
|
||||
{
|
||||
$this->config = $config ?: new Config;
|
||||
$this->config = $config ?: new Config();
|
||||
Logger::setTimezone($this->config->getTimezone());
|
||||
}
|
||||
|
||||
/**
|
||||
* Download a feed (no discovery)
|
||||
* Download a feed (no discovery).
|
||||
*
|
||||
* @param string $url Feed url
|
||||
* @param string $last_modified Last modified HTTP header
|
||||
* @param string $etag Etag HTTP header
|
||||
* @param string $username HTTP basic auth username
|
||||
* @param string $password HTTP basic auth password
|
||||
*
|
||||
* @access public
|
||||
* @param string $url Feed url
|
||||
* @param string $last_modified Last modified HTTP header
|
||||
* @param string $etag Etag HTTP header
|
||||
* @param string $username HTTP basic auth username
|
||||
* @param string $password HTTP basic auth password
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function download($url, $last_modified = '', $etag = '', $username = '', $password = '')
|
||||
@ -76,14 +72,14 @@ class Reader
|
||||
}
|
||||
|
||||
/**
|
||||
* Discover and download a feed
|
||||
* Discover and download a feed.
|
||||
*
|
||||
* @param string $url Feed or website url
|
||||
* @param string $last_modified Last modified HTTP header
|
||||
* @param string $etag Etag HTTP header
|
||||
* @param string $username HTTP basic auth username
|
||||
* @param string $password HTTP basic auth password
|
||||
*
|
||||
* @access public
|
||||
* @param string $url Feed or website url
|
||||
* @param string $last_modified Last modified HTTP header
|
||||
* @param string $etag Etag HTTP header
|
||||
* @param string $username HTTP basic auth username
|
||||
* @param string $password HTTP basic auth password
|
||||
* @return \PicoFeed\Client\Client
|
||||
*/
|
||||
public function discover($url, $last_modified = '', $etag = '', $username = '', $password = '')
|
||||
@ -91,7 +87,7 @@ class Reader
|
||||
$client = $this->download($url, $last_modified, $etag, $username, $password);
|
||||
|
||||
// It's already a feed or the feed was not modified
|
||||
if (! $client->isModified() || $this->detectFormat($client->getContent())) {
|
||||
if (!$client->isModified() || $this->detectFormat($client->getContent())) {
|
||||
return $client;
|
||||
}
|
||||
|
||||
@ -106,12 +102,12 @@ class Reader
|
||||
}
|
||||
|
||||
/**
|
||||
* Find feed urls inside a HTML document
|
||||
* Find feed urls inside a HTML document.
|
||||
*
|
||||
* @access public
|
||||
* @param string $url Website url
|
||||
* @param string $html HTML content
|
||||
* @return array List of feed links
|
||||
* @param string $url Website url
|
||||
* @param string $html HTML content
|
||||
*
|
||||
* @return array List of feed links
|
||||
*/
|
||||
public function find($url, $html)
|
||||
{
|
||||
@ -127,15 +123,12 @@ class Reader
|
||||
);
|
||||
|
||||
foreach ($queries as $query) {
|
||||
|
||||
$nodes = $xpath->query($query);
|
||||
|
||||
foreach ($nodes as $node) {
|
||||
|
||||
$link = $node->getAttribute('href');
|
||||
|
||||
if (! empty($link)) {
|
||||
|
||||
if (!empty($link)) {
|
||||
$feedUrl = new Url($link);
|
||||
$siteUrl = new Url($url);
|
||||
|
||||
@ -150,12 +143,12 @@ class Reader
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a parser instance
|
||||
* Get a parser instance.
|
||||
*
|
||||
* @param string $url Site url
|
||||
* @param string $content Feed content
|
||||
* @param string $encoding HTTP encoding
|
||||
*
|
||||
* @access public
|
||||
* @param string $url Site url
|
||||
* @param string $content Feed content
|
||||
* @param string $encoding HTTP encoding
|
||||
* @return \PicoFeed\Parser\Parser
|
||||
*/
|
||||
public function getParser($url, $content, $encoding)
|
||||
@ -177,10 +170,10 @@ class Reader
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect the feed format
|
||||
* Detect the feed format.
|
||||
*
|
||||
* @param string $content Feed content
|
||||
*
|
||||
* @access public
|
||||
* @param string $content Feed content
|
||||
* @return string
|
||||
*/
|
||||
public function detectFormat($content)
|
||||
@ -200,16 +193,15 @@ class Reader
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the prefix "http://" if the end-user just enter a domain name
|
||||
* Add the prefix "http://" if the end-user just enter a domain name.
|
||||
*
|
||||
* @access public
|
||||
* @param string $url Url
|
||||
* @param string $url Url
|
||||
* @retunr string
|
||||
*/
|
||||
public function prependScheme($url)
|
||||
{
|
||||
if (! preg_match('%^https?://%', $url)) {
|
||||
$url = 'http://' . $url;
|
||||
if (!preg_match('%^https?://%', $url)) {
|
||||
$url = 'http://'.$url;
|
||||
}
|
||||
|
||||
return $url;
|
||||
|
@ -4,12 +4,10 @@ namespace PicoFeed\Reader;
|
||||
|
||||
use PicoFeed\PicoFeedException;
|
||||
|
||||
|
||||
/**
|
||||
* ReaderException Exception
|
||||
* ReaderException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Reader
|
||||
*/
|
||||
abstract class ReaderException extends PicoFeedException
|
||||
{
|
||||
|
@ -3,10 +3,9 @@
|
||||
namespace PicoFeed\Reader;
|
||||
|
||||
/**
|
||||
* SubscriptionNotFoundException Exception
|
||||
* SubscriptionNotFoundException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Reader
|
||||
*/
|
||||
class SubscriptionNotFoundException extends ReaderException
|
||||
{
|
||||
|
@ -3,11 +3,10 @@
|
||||
namespace PicoFeed\Reader;
|
||||
|
||||
/**
|
||||
* UnsupportedFeedFormatException Exception
|
||||
* UnsupportedFeedFormatException Exception.
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package Reader
|
||||
*/
|
||||
class UnsupportedFeedFormatException extends ReaderException
|
||||
{
|
||||
}
|
||||
}
|
||||
|
@ -11,4 +11,4 @@ return array(
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
||||
);
|
||||
|
@ -10,4 +10,4 @@ return array(
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
||||
);
|
||||
|
@ -17,4 +17,4 @@ return array(
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
);
|
||||
|
@ -11,4 +11,4 @@ return array(
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
||||
);
|
||||
|
@ -22,7 +22,7 @@ return array(
|
||||
'//img[contains(@src, "1x1")]',
|
||||
'//a[contains(@href, "creativecommons")]',
|
||||
'//a[@href="#start-of-content"]',
|
||||
'//ul[@id="article-tags"],
|
||||
'//ul[@id="article-tags"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
|
@ -12,4 +12,4 @@ return array(
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -11,8 +12,8 @@ return array(
|
||||
'//*[contains(@class, "article_navigation")]',
|
||||
'//h1',
|
||||
'//*[contains(@class, "article_toolbarMain")]',
|
||||
'//*[contains(@class, "article_imagehaute_box")]'
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
'//*[contains(@class, "article_imagehaute_box")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,8 +1,9 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%alt="(.+)" title="(.+)" */>%' => "/><br/>$1<br/>$2"
|
||||
)
|
||||
)
|
||||
'%alt="(.+)" title="(.+)" */>%' => '/><br/>$1<br/>$2',
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,14 +1,15 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.alainonline.net/news_details.php?lang=arabic&sid=18907',
|
||||
'body' => array(
|
||||
'//div[@class="news_details"]'
|
||||
'//div[@class="news_details"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[@class="news_details"]/div/div[last()]',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -13,8 +14,8 @@ return array(
|
||||
'//ul',
|
||||
'//a[@target="_self"]',
|
||||
'//div[@data-embed-type="Brightcove"]',
|
||||
'//div[@class="QuoteContainer"]'
|
||||
'//div[@class="QuoteContainer"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -14,6 +15,6 @@ return array(
|
||||
'//sharebar',
|
||||
'//related-topics',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -16,8 +17,8 @@ return array(
|
||||
'//*[@class="skip"]',
|
||||
'//*[@class="funcs"]',
|
||||
'//span[@class="nd address"]',
|
||||
'//a[contains(@href, "abo-und-services")]'
|
||||
)
|
||||
)
|
||||
)
|
||||
'//a[contains(@href, "abo-und-services")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,8 +1,9 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%title="(.+)" */>%' => "/><br/>$1"
|
||||
)
|
||||
)
|
||||
'%title="(.+)" */>%' => '/><br/>$1',
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,13 +1,14 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'body' => array(
|
||||
'//img[@id="comic_image"]',
|
||||
'//div[@class="comment-wrapper"][position()=1]'
|
||||
'//div[@class="comment-wrapper"][position()=1]',
|
||||
),
|
||||
'strip' => array(),
|
||||
'test_url' => 'http://www.anythingcomic.com/comics/2108929/stress-free/',
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -8,6 +9,6 @@ return array(
|
||||
'//span[@class="entry-content"]',
|
||||
),
|
||||
'strip' => array(),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,10 +1,11 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.areadvd.de/news/daily-deals-angebote-bei-lautsprecher-teufel-3/',
|
||||
'body' => array('//div[contains(@class,"entry")]'),
|
||||
'strip' => array(),
|
||||
)
|
||||
)
|
||||
);
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -12,6 +13,6 @@ return array(
|
||||
'//aside',
|
||||
'//div[@class="article-expander"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,10 +1,11 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%/index.php.*comic=.*%' => array(
|
||||
'test_url' => 'http://www.awkwardzombie.com/index.php?comic=041315',
|
||||
'body' => array('//*[@id="comic"]/img'),
|
||||
'strip' => array(),
|
||||
)
|
||||
)
|
||||
);
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -15,6 +16,6 @@ return array(
|
||||
'//script',
|
||||
'//ul',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -10,6 +11,6 @@ return array(
|
||||
'strip' => array(
|
||||
'//strong',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,8 +1,9 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%-150x150%' => '',
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -8,6 +9,6 @@ return array(
|
||||
'//figure/div/a/img',
|
||||
'//p[@class="content__segment"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,13 +1,14 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://blog.fefe.de/?ts=ad706a73',
|
||||
'body' => array(
|
||||
'/html/body/ul'
|
||||
'/html/body/ul',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,11 +1,12 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://blog.mapillary.com/update/2015/08/26/traffic-sign-updates.html',
|
||||
'body' => array(
|
||||
'//div[contains(@class, "blog-post__content")]'
|
||||
)
|
||||
)
|
||||
)
|
||||
'//div[contains(@class, "blog-post__content")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -13,6 +14,6 @@ return array(
|
||||
'//h1',
|
||||
'//div[@class="autor"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -8,6 +9,6 @@ return array(
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -7,7 +8,7 @@ return array(
|
||||
'//div[@id="comic"]',
|
||||
'//div[@class="post-comic"]',
|
||||
),
|
||||
'strip' => array()
|
||||
)
|
||||
)
|
||||
'strip' => array(),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,12 +1,13 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%/cad/.+%' => array(
|
||||
'test_url' => 'http://www.cad-comic.com/cad/20150417',
|
||||
'body' => array(
|
||||
'//*[@id="content"]/img'
|
||||
'//*[@id="content"]/img',
|
||||
),
|
||||
'strip' => array(),
|
||||
)
|
||||
)
|
||||
);
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,8 +1,9 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%href="http://www.channelate.com/(\\d+)/(\\d+)/(\\d+)/[^"]*"%' => 'href="http://www.channelate.com/extra-panel/$1$2$3/"'
|
||||
)
|
||||
)
|
||||
'%href="http://www.channelate.com/(\\d+)/(\\d+)/(\\d+)/[^"]*"%' => 'href="http://www.channelate.com/extra-panel/$1$2$3/"',
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,10 +1,11 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://chaoslife.findchaos.com/pets-in-the-wild',
|
||||
'body' => array('//div[@id="comic"]'),
|
||||
'strip' => array(),
|
||||
)
|
||||
)
|
||||
);
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,10 +1,11 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%/comic.*%' => array(
|
||||
'test_url' => 'http://cliquerefresh.com/comic/078-stating-the-obvious/',
|
||||
'body' => array('//div[@class="comicImg"]/img | //div[@class="comicImg"]/a/img'),
|
||||
'strip' => array(),
|
||||
)
|
||||
)
|
||||
);
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -15,6 +16,6 @@ return array(
|
||||
'//div[contains(@class,"ad-")]',
|
||||
'//div[@section="shortcodeGallery"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -8,6 +9,6 @@ return array(
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,8 +1,9 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%title="(.+)" */>%' => "/><br/>$1"
|
||||
)
|
||||
)
|
||||
'%title="(.+)" */>%' => '/><br/>$1',
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -13,6 +14,6 @@ return array(
|
||||
'//*[contains(@class,"promo_link")]',
|
||||
'//div[@id="story-embed-column"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -13,7 +14,7 @@ return array(
|
||||
'//*[contains(@class, "addthis_default_style")]',
|
||||
'//*[@class="navigation small"]',
|
||||
'//*[@id="related"]',
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -8,6 +9,6 @@ return array(
|
||||
'//span[@id="lblSummary"]',
|
||||
'//span[@id="lblBody"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -7,8 +8,8 @@ return array(
|
||||
'//div[@class="contenu"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[contains(@class, "a2a")]'
|
||||
'//div[contains(@class, "a2a")]',
|
||||
),
|
||||
)
|
||||
)
|
||||
);
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -9,6 +10,6 @@ return array(
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -12,6 +13,6 @@ return array(
|
||||
'//div[@class="shareIcons"]',
|
||||
'//div[@class="navigation"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -7,7 +8,7 @@ return array(
|
||||
'//td[@class="NewsText"][1]',
|
||||
),
|
||||
'strip' => array(
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -9,7 +10,7 @@ return array(
|
||||
'//div[@class="story-content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,15 +1,16 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'body' => array('//img[@id="comicimage"]'),
|
||||
'strip' => array(),
|
||||
'test_url' => 'http://drawingboardcomic.com/index.php?comic=208',
|
||||
)
|
||||
),
|
||||
),
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%title="(.+)" */>%' => "/><br/>$1"
|
||||
)
|
||||
)
|
||||
'%title="(.+)" */>%' => '/><br/>$1',
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,8 +1,9 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%-150x150%' => '',
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,10 +1,11 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.engadget.com/2015/04/20/dark-matter-discovery/?ncid=rss_truncated',
|
||||
'body' => array('//div[@class="article-content"]/p[not(@class="read-more")] | //div[@class="article-content"]/div[@style="text-align: center;"]'),
|
||||
'strip' => array(),
|
||||
)
|
||||
)
|
||||
);
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,47 +1,46 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%/articles/view/comicsandcosplay/comics/critical-miss.*%' => array(
|
||||
'body' => array('//*[@class="body"]/span/img | //div[@class="folder_nav_links"]/following::p'),
|
||||
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/critical-miss/13776-Critical-Miss-on-Framerates?utm_source=rss&utm_medium=rss&utm_campaign=articles',
|
||||
'strip' => array()
|
||||
'strip' => array(),
|
||||
),
|
||||
'%/articles/view/comicsandcosplay/comics/namegame.*%' => array(
|
||||
'body' => array('//*[@class="body"]/span/p/img[@height != "120"]'),
|
||||
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/namegame/9759-Leaving-the-Nest?utm_source=rss&utm_medium=rss&utm_campaign=articles',
|
||||
'strip' => array()
|
||||
'strip' => array(),
|
||||
),
|
||||
'%/articles/view/comicsandcosplay/comics/stolen-pixels.*%' => array(
|
||||
'body' => array('//*[@class="body"]/span/p[2]/img'),
|
||||
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/stolen-pixels/8866-Stolen-Pixels-258-Where-the-Boys-Are?utm_source=rss&utm_medium=rss&utm_campaign=articles',
|
||||
'strip' => array()
|
||||
'strip' => array(),
|
||||
),
|
||||
'%/articles/view/comicsandcosplay/comics/bumhugparade.*%' => array(
|
||||
'body' => array('//*[@class="body"]/span/p[2]/img'),
|
||||
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/bumhugparade/8262-Bumhug-Parade-13?utm_source=rss&utm_medium=rss&utm_campaign=articles',
|
||||
'strip' => array()
|
||||
'strip' => array(),
|
||||
),
|
||||
'%/articles/view/comicsandcosplay.*/comics/escapistradiotheater%' => array(
|
||||
'body' => array('//*[@class="body"]/span/p[2]/img'),
|
||||
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/escapistradiotheater/8265-The-Escapist-Radio-Theater-13?utm_source=rss&utm_medium=rss&utm_campaign=articles',
|
||||
'strip' => array()
|
||||
'strip' => array(),
|
||||
),
|
||||
'%/articles/view/comicsandcosplay/comics/paused.*%' => array(
|
||||
'body' => array('//*[@class="body"]/span/p[2]/img | //*[@class="body"]/span/div/img'),
|
||||
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/paused/8263-Paused-16?utm_source=rss&utm_medium=rss&utm_campaign=articles',
|
||||
'strip' => array()
|
||||
'strip' => array(),
|
||||
),
|
||||
'%/articles/view/comicsandcosplay/comics/fraughtwithperil.*%' => array(
|
||||
'body' => array('//*[@class="body"]'),
|
||||
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/fraughtwithperil/12166-The-Escapist-Presents-Escapist-Comics-Critical-Miss-B-lyeh-Fhlop?utm_source=rss&utm_medium=rss&utm_campaign=articles',
|
||||
'strip' => array()
|
||||
'strip' => array(),
|
||||
),
|
||||
'%/articles/view/video-games/columns/.*%' => array(
|
||||
'body' => array('//*[@id="article_content"]'),
|
||||
'test_url' => 'http://www.escapistmagazine.com/articles/view/video-games/columns/experienced-points/13971-What-50-Shades-and-Batman-Have-in-Common.2',
|
||||
'strip' => array()
|
||||
'strip' => array(),
|
||||
),
|
||||
)
|
||||
),
|
||||
);
|
||||
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -6,6 +7,6 @@ return array(
|
||||
'body' => array(
|
||||
'//p',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,15 +1,16 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'body' => array('//a[@class="comic"]/img'),
|
||||
'strip' => array(),
|
||||
'test_url' => 'http://www.exocomics.com/379',
|
||||
)
|
||||
),
|
||||
),
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%title="(.+)" */>%' => "/><br/>$1"
|
||||
)
|
||||
)
|
||||
'%title="(.+)" */>%' => '/><br/>$1',
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -8,6 +9,6 @@ return array(
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,8 +1,9 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'%.*%' => array(
|
||||
'%-150x150%' => '',
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -7,7 +8,7 @@ return array(
|
||||
'//article[contains(@class, "body prose")]',
|
||||
),
|
||||
'strip' => array(
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -7,7 +8,7 @@ return array(
|
||||
'//article[contains(@class, "body prose")]',
|
||||
),
|
||||
'strip' => array(
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -7,7 +8,7 @@ return array(
|
||||
'//article[contains(@class, "body prose")]',
|
||||
),
|
||||
'strip' => array(
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -7,7 +8,7 @@ return array(
|
||||
'//div[@class="news_body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,10 +1,11 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'body' => array('//*[@id="comic"] | //*[@class="post-image"]'),
|
||||
'strip' => array(),
|
||||
'test_url' => 'http://www.fowllanguagecomics.com/comic/working-out/'
|
||||
)
|
||||
)
|
||||
'test_url' => 'http://www.fowllanguagecomics.com/comic/working-out/',
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -11,6 +12,6 @@ return array(
|
||||
'strip' => array(
|
||||
'//span[@class="red"]',
|
||||
),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,12 +1,13 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%/comics/oots.*%' => array(
|
||||
'test_url' => 'http://www.giantitp.com/comics/oots0989.html',
|
||||
'body' => array(
|
||||
'//td[@align="center"]/img'
|
||||
),
|
||||
'strip' => array()
|
||||
)
|
||||
)
|
||||
'test_url' => 'http://www.giantitp.com/comics/oots0989.html',
|
||||
'body' => array(
|
||||
'//td[@align="center"]/img',
|
||||
),
|
||||
'strip' => array(),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -7,8 +8,8 @@ return array(
|
||||
'//article[contains(@class, "entry-content")]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//h1'
|
||||
)
|
||||
)
|
||||
)
|
||||
'//h1',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,4 +1,5 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
@ -7,6 +8,6 @@ return array(
|
||||
'//div[1]/p[1]/a[1]/img',
|
||||
),
|
||||
'strip' => array(),
|
||||
)
|
||||
)
|
||||
),
|
||||
),
|
||||
);
|
||||
|
@ -1,12 +1,13 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.golem.de/news/breko-telekom-verzoegert-gezielt-den-vectoring-ausbau-1311-102974.html',
|
||||
'body' => array(
|
||||
'//header[@class="cluster-header"]',
|
||||
'//div[@class="formatted"]'
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
'//div[@class="formatted"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user