Update picofeed due to bug in rule file

This commit is contained in:
Frederic Guillot 2015-10-19 22:49:30 -04:00
parent 9b08fc9160
commit 871de74279
224 changed files with 2090 additions and 2334 deletions

View File

@ -14,7 +14,7 @@
"fguillot/simple-validator": "v1.0.0",
"fguillot/json-rpc": "v1.0.2",
"fguillot/picodb": "v1.0.2",
"fguillot/picofeed": "v0.1.12"
"fguillot/picofeed": "v0.1.13"
},
"require-dev": {
"phpunit/phpunit": "4.8.3",

2
vendor/autoload.php vendored
View File

@ -4,4 +4,4 @@
require_once __DIR__ . '/composer' . '/autoload_real.php';
return ComposerAutoloaderInit8f528b09bdfd7d4a627bbcebe3b2eef5::getLoader();
return ComposerAutoloaderInitb324395d2df28954aca677ed0d0d2268::getLoader();

View File

@ -2,7 +2,7 @@
// autoload_real.php @generated by Composer
class ComposerAutoloaderInit8f528b09bdfd7d4a627bbcebe3b2eef5
class ComposerAutoloaderInitb324395d2df28954aca677ed0d0d2268
{
private static $loader;
@ -19,9 +19,9 @@ class ComposerAutoloaderInit8f528b09bdfd7d4a627bbcebe3b2eef5
return self::$loader;
}
spl_autoload_register(array('ComposerAutoloaderInit8f528b09bdfd7d4a627bbcebe3b2eef5', 'loadClassLoader'), true, true);
spl_autoload_register(array('ComposerAutoloaderInitb324395d2df28954aca677ed0d0d2268', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader();
spl_autoload_unregister(array('ComposerAutoloaderInit8f528b09bdfd7d4a627bbcebe3b2eef5', 'loadClassLoader'));
spl_autoload_unregister(array('ComposerAutoloaderInitb324395d2df28954aca677ed0d0d2268', 'loadClassLoader'));
$map = require __DIR__ . '/autoload_namespaces.php';
foreach ($map as $namespace => $path) {
@ -42,14 +42,14 @@ class ComposerAutoloaderInit8f528b09bdfd7d4a627bbcebe3b2eef5
$includeFiles = require __DIR__ . '/autoload_files.php';
foreach ($includeFiles as $file) {
composerRequire8f528b09bdfd7d4a627bbcebe3b2eef5($file);
composerRequireb324395d2df28954aca677ed0d0d2268($file);
}
return $loader;
}
}
function composerRequire8f528b09bdfd7d4a627bbcebe3b2eef5($file)
function composerRequireb324395d2df28954aca677ed0d0d2268($file)
{
require $file;
}

View File

@ -116,17 +116,17 @@
},
{
"name": "fguillot/picofeed",
"version": "v0.1.12",
"version_normalized": "0.1.12.0",
"version": "v0.1.13",
"version_normalized": "0.1.13.0",
"source": {
"type": "git",
"url": "https://github.com/fguillot/picoFeed.git",
"reference": "f7f5e792baf09e6e795f4dd9bb56d7d588d67735"
"reference": "84d9ee64df8596153ba080bd2436b333507aadba"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/f7f5e792baf09e6e795f4dd9bb56d7d588d67735",
"reference": "f7f5e792baf09e6e795f4dd9bb56d7d588d67735",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/84d9ee64df8596153ba080bd2436b333507aadba",
"reference": "84d9ee64df8596153ba080bd2436b333507aadba",
"shasum": ""
},
"require": {
@ -140,7 +140,7 @@
"suggest": {
"ext-curl": "PicoFeed will use cURL if present"
},
"time": "2015-10-18 21:22:40",
"time": "2015-10-20 01:48:56",
"bin": [
"picofeed"
],

View File

@ -4,218 +4,197 @@ namespace PicoFeed\Client;
use LogicException;
use PicoFeed\Logging\Logger;
use PicoFeed\Config\Config;
/**
* Client class
* Client class.
*
* @author Frederic Guillot
* @package client
*/
abstract class Client
{
/**
* Flag that say if the resource have been modified
* Flag that say if the resource have been modified.
*
* @access private
* @var bool
*/
private $is_modified = true;
/**
* HTTP Content-Type
* HTTP Content-Type.
*
* @access private
* @var string
*/
private $content_type = '';
/**
* HTTP encoding
* HTTP encoding.
*
* @access private
* @var string
*/
private $encoding = '';
/**
* HTTP request headers
* HTTP request headers.
*
* @access protected
* @var array
*/
protected $request_headers = array();
/**
* HTTP Etag header
* HTTP Etag header.
*
* @access protected
* @var string
*/
protected $etag = '';
/**
* HTTP Last-Modified header
* HTTP Last-Modified header.
*
* @access protected
* @var string
*/
protected $last_modified = '';
/**
* Proxy hostname
* Proxy hostname.
*
* @access protected
* @var string
*/
protected $proxy_hostname = '';
/**
* Proxy port
* Proxy port.
*
* @access protected
* @var integer
* @var int
*/
protected $proxy_port = 3128;
/**
* Proxy username
* Proxy username.
*
* @access protected
* @var string
*/
protected $proxy_username = '';
/**
* Proxy password
* Proxy password.
*
* @access protected
* @var string
*/
protected $proxy_password = '';
/**
* Basic auth username
* Basic auth username.
*
* @access protected
* @var string
*/
protected $username = '';
/**
* Basic auth password
* Basic auth password.
*
* @access protected
* @var string
*/
protected $password = '';
/**
* Client connection timeout
* Client connection timeout.
*
* @access protected
* @var integer
* @var int
*/
protected $timeout = 10;
/**
* User-agent
* User-agent.
*
* @access protected
* @var string
*/
protected $user_agent = 'PicoFeed (https://github.com/fguillot/picoFeed)';
/**
* Real URL used (can be changed after a HTTP redirect)
* Real URL used (can be changed after a HTTP redirect).
*
* @access protected
* @var string
*/
protected $url = '';
/**
* Page/Feed content
* Page/Feed content.
*
* @access protected
* @var string
*/
protected $content = '';
/**
* Number maximum of HTTP redirections to avoid infinite loops
* Number maximum of HTTP redirections to avoid infinite loops.
*
* @access protected
* @var integer
* @var int
*/
protected $max_redirects = 5;
/**
* Maximum size of the HTTP body response
* Maximum size of the HTTP body response.
*
* @access protected
* @var integer
* @var int
*/
protected $max_body_size = 2097152; // 2MB
/**
* HTTP response status code
* HTTP response status code.
*
* @access protected
* @var integer
* @var int
*/
protected $status_code = 0;
/**
* Enables direct passthrough to requesting client
* Enables direct passthrough to requesting client.
*
* @access protected
* @var bool
*/
protected $passthrough = false;
/**
* Do the HTTP request
* Do the HTTP request.
*
* @abstract
* @access public
*
* @return array
*/
abstract public function doRequest();
/**
* Get client instance: curl or stream driver
* Get client instance: curl or stream driver.
*
* @static
* @access public
*
* @return \PicoFeed\Client\Client
*/
public static function getInstance()
{
if (function_exists('curl_init')) {
return new Curl;
}
else if (ini_get('allow_url_fopen')) {
return new Stream;
return new Curl();
} elseif (ini_get('allow_url_fopen')) {
return new Stream();
}
throw new LogicException('You must have "allow_url_fopen=1" or curl extension installed');
}
/**
* Add HTTP Header to the request
* Add HTTP Header to the request.
*
* @access public
* @param array $headers
*/
public function setHeaders($headers) {
public function setHeaders($headers)
{
$this->request_headers = $headers;
}
/**
* Perform the HTTP request
* Perform the HTTP request.
*
* @access public
* @param string $url URL
*
* @return Client
*/
public function execute($url = '')
@ -239,17 +218,15 @@ abstract class Client
}
/**
* Handle not modified response
* Handle not modified response.
*
* @access public
* @param array $response Client response
*/
public function handleNotModifiedResponse(array $response)
{
if ($response['status'] == 304) {
$this->is_modified = false;
}
else if ($response['status'] == 200) {
} elseif ($response['status'] == 200) {
$this->is_modified = $this->hasBeenModified($response, $this->etag, $this->last_modified);
$this->etag = $this->getHeader($response, 'ETag');
$this->last_modified = $this->getHeader($response, 'Last-Modified');
@ -261,9 +238,8 @@ abstract class Client
}
/**
* Handle not found response
* Handle not found response.
*
* @access public
* @param array $response Client response
*/
public function handleNotFoundResponse(array $response)
@ -274,9 +250,8 @@ abstract class Client
}
/**
* Handle normal response
* Handle normal response.
*
* @access public
* @param array $response Client response
*/
public function handleNormalResponse(array $response)
@ -289,19 +264,19 @@ abstract class Client
}
/**
* Check if a request has been modified according to the parameters
* Check if a request has been modified according to the parameters.
*
* @access public
* @param array $response
* @param string $etag
* @param string $lastModified
* @return boolean
*
* @return bool
*/
private function hasBeenModified($response, $etag, $lastModified)
{
$headers = array(
'Etag' => $etag,
'Last-Modified' => $lastModified
'Last-Modified' => $lastModified,
);
// Compare the values for each header that is present
@ -311,7 +286,7 @@ abstract class Client
if ($response['headers'][$key] !== $value) {
return true;
}
$presentCacheHeaderCount++;
++$presentCacheHeaderCount;
}
}
@ -325,10 +300,10 @@ abstract class Client
}
/**
* Find content type from response headers
* Find content type from response headers.
*
* @access public
* @param array $response Client response
*
* @return string
*/
public function findContentType(array $response)
@ -337,23 +312,23 @@ abstract class Client
}
/**
* Find charset from response headers
* Find charset from response headers.
*
* @access public
* @return string
*/
public function findCharset()
{
$result = explode('charset=', $this->content_type);
return isset($result[1]) ? $result[1] : '';
}
/**
* Get header value from a client response
* Get header value from a client response.
*
* @access public
* @param array $response Client response
* @param string $header Header name
*
* @return string
*/
public function getHeader(array $response, $header)
@ -362,22 +337,22 @@ abstract class Client
}
/**
* Set the Last-Modified HTTP header
* Set the Last-Modified HTTP header.
*
* @access public
* @param string $last_modified Header value
*
* @return \PicoFeed\Client\Client
*/
public function setLastModified($last_modified)
{
$this->last_modified = $last_modified;
return $this;
}
/**
* Get the value of the Last-Modified HTTP header
* Get the value of the Last-Modified HTTP header.
*
* @access public
* @return string
*/
public function getLastModified()
@ -386,22 +361,22 @@ abstract class Client
}
/**
* Set the value of the Etag HTTP header
* Set the value of the Etag HTTP header.
*
* @access public
* @param string $etag Etag HTTP header value
*
* @return \PicoFeed\Client\Client
*/
public function setEtag($etag)
{
$this->etag = $etag;
return $this;
}
/**
* Get the Etag HTTP header value
* Get the Etag HTTP header value.
*
* @access public
* @return string
*/
public function getEtag()
@ -410,9 +385,8 @@ abstract class Client
}
/**
* Get the final url value
* Get the final url value.
*
* @access public
* @return string
*/
public function getUrl()
@ -421,23 +395,22 @@ abstract class Client
}
/**
* Set the url
* Set the url.
*
* @access public
* @return string
* @return \PicoFeed\Client\Client
*/
public function setUrl($url)
{
$this->url = $url;
return $this;
}
/**
* Get the HTTP response status code
* Get the HTTP response status code.
*
* @access public
* @return integer
* @return int
*/
public function getStatusCode()
{
@ -445,9 +418,8 @@ abstract class Client
}
/**
* Get the body of the HTTP response
* Get the body of the HTTP response.
*
* @access public
* @return string
*/
public function getContent()
@ -456,9 +428,8 @@ abstract class Client
}
/**
* Get the content type value from HTTP headers
* Get the content type value from HTTP headers.
*
* @access public
* @return string
*/
public function getContentType()
@ -467,9 +438,8 @@ abstract class Client
}
/**
* Get the encoding value from HTTP headers
* Get the encoding value from HTTP headers.
*
* @access public
* @return string
*/
public function getEncoding()
@ -478,9 +448,8 @@ abstract class Client
}
/**
* Return true if the remote resource has changed
* Return true if the remote resource has changed.
*
* @access public
* @return bool
*/
public function isModified()
@ -489,9 +458,8 @@ abstract class Client
}
/**
* return true if passthrough mode is enabled
* return true if passthrough mode is enabled.
*
* @access public
* @return bool
*/
public function isPassthroughEnabled()
@ -500,167 +468,177 @@ abstract class Client
}
/**
* Set connection timeout
* Set connection timeout.
*
* @param int $timeout Connection timeout
*
* @access public
* @param integer $timeout Connection timeout
* @return \PicoFeed\Client\Client
*/
public function setTimeout($timeout)
{
$this->timeout = $timeout ?: $this->timeout;
return $this;
}
/**
* Set a custom user agent
* Set a custom user agent.
*
* @access public
* @param string $user_agent User Agent
*
* @return \PicoFeed\Client\Client
*/
public function setUserAgent($user_agent)
{
$this->user_agent = $user_agent ?: $this->user_agent;
return $this;
}
/**
* Set the mximum number of HTTP redirections
* Set the mximum number of HTTP redirections.
*
* @param int $max Maximum
*
* @access public
* @param integer $max Maximum
* @return \PicoFeed\Client\Client
*/
public function setMaxRedirections($max)
{
$this->max_redirects = $max ?: $this->max_redirects;
return $this;
}
/**
* Set the maximum size of the HTTP body
* Set the maximum size of the HTTP body.
*
* @param int $max Maximum
*
* @access public
* @param integer $max Maximum
* @return \PicoFeed\Client\Client
*/
public function setMaxBodySize($max)
{
$this->max_body_size = $max ?: $this->max_body_size;
return $this;
}
/**
* Set the proxy hostname
* Set the proxy hostname.
*
* @access public
* @param string $hostname Proxy hostname
*
* @return \PicoFeed\Client\Client
*/
public function setProxyHostname($hostname)
{
$this->proxy_hostname = $hostname ?: $this->proxy_hostname;
return $this;
}
/**
* Set the proxy port
* Set the proxy port.
*
* @param int $port Proxy port
*
* @access public
* @param integer $port Proxy port
* @return \PicoFeed\Client\Client
*/
public function setProxyPort($port)
{
$this->proxy_port = $port ?: $this->proxy_port;
return $this;
}
/**
* Set the proxy username
* Set the proxy username.
*
* @access public
* @param string $username Proxy username
*
* @return \PicoFeed\Client\Client
*/
public function setProxyUsername($username)
{
$this->proxy_username = $username ?: $this->proxy_username;
return $this;
}
/**
* Set the proxy password
* Set the proxy password.
*
* @access public
* @param string $password Password
*
* @return \PicoFeed\Client\Client
*/
public function setProxyPassword($password)
{
$this->proxy_password = $password ?: $this->proxy_password;
return $this;
}
/**
* Set the username
* Set the username.
*
* @access public
* @param string $username Basic Auth username
*
* @return \PicoFeed\Client\Client
*/
public function setUsername($username)
{
$this->username = $username ?: $this->username;
return $this;
}
/**
* Set the password
* Set the password.
*
* @access public
* @param string $password Basic Auth Password
*
* @return \PicoFeed\Client\Client
*/
public function setPassword($password)
{
$this->password = $password ?: $this->password;
return $this;
}
/**
* Enable the passthrough mode
* Enable the passthrough mode.
*
* @access public
* @return \PicoFeed\Client\Client
*/
public function enablePassthroughMode()
{
$this->passthrough = true;
return $this;
}
/**
* Disable the passthrough mode
* Disable the passthrough mode.
*
* @access public
* @return \PicoFeed\Client\Client
*/
public function disablePassthroughMode()
{
$this->passthrough = false;
return $this;
}
/**
* Set config object
* Set config object.
*
* @access public
* @param \PicoFeed\Config\Config $config Config instance
*
* @return \PicoFeed\Client\Client
*/
public function setConfig($config)
public function setConfig(Config $config)
{
if ($config !== null) {
$this->setTimeout($config->getClientTimeout());

View File

@ -4,12 +4,10 @@ namespace PicoFeed\Client;
use PicoFeed\PicoFeedException;
/**
* ClientException Exception
* ClientException Exception.
*
* @author Frederic Guillot
* @package Client
*/
abstract class ClientException extends PicoFeedException
{

View File

@ -5,54 +5,49 @@ namespace PicoFeed\Client;
use PicoFeed\Logging\Logger;
/**
* cURL HTTP client
* cURL HTTP client.
*
* @author Frederic Guillot
* @package Client
*/
class Curl extends Client
{
/**
* HTTP response body
* HTTP response body.
*
* @access private
* @var string
*/
private $body = '';
/**
* Body size
* Body size.
*
* @access private
* @var integer
* @var int
*/
private $body_length = 0;
/**
* HTTP response headers
* HTTP response headers.
*
* @access private
* @var array
*/
private $response_headers = array();
/**
* Counter on the number of header received
* Counter on the number of header received.
*
* @access private
* @var integer
* @var int
*/
private $response_headers_count = 0;
/**
* cURL callback to read the HTTP body
* cURL callback to read the HTTP body.
*
* If the function return -1, curl stop to read the HTTP response
*
* @access public
* @param resource $ch cURL handler
* @param string $buffer Chunk of data
* @return integer Length of the buffer
*
* @return int Length of the buffer
*/
public function readBody($ch, $buffer)
{
@ -69,23 +64,21 @@ class Curl extends Client
}
/**
* cURL callback to read HTTP headers
* cURL callback to read HTTP headers.
*
* @access public
* @param resource $ch cURL handler
* @param string $buffer Header line
* @return integer Length of the buffer
*
* @return int Length of the buffer
*/
public function readHeaders($ch, $buffer)
{
$length = strlen($buffer);
if ($buffer === "\r\n" || $buffer === "\n") {
$this->response_headers_count++;
}
else {
if (! isset($this->response_headers[$this->response_headers_count])) {
++$this->response_headers_count;
} else {
if (!isset($this->response_headers[$this->response_headers_count])) {
$this->response_headers[$this->response_headers_count] = '';
}
@ -96,12 +89,12 @@ class Curl extends Client
}
/**
* cURL callback to passthrough the HTTP status header to the client
* cURL callback to passthrough the HTTP status header to the client.
*
* @access public
* @param resource $ch cURL handler
* @param string $buffer Header line
* @return integer Length of the buffer
*
* @return int Length of the buffer
*/
public function passthroughHeaders($ch, $buffer)
{
@ -109,8 +102,7 @@ class Curl extends Client
if ($status !== 0) {
header(':', true, $status);
}
elseif (isset($headers['Content-Type'])) {
} elseif (isset($headers['Content-Type'])) {
header($buffer);
}
@ -118,25 +110,25 @@ class Curl extends Client
}
/**
* cURL callback to passthrough the HTTP body to the client
* cURL callback to passthrough the HTTP body to the client.
*
* If the function return -1, curl stop to read the HTTP response
*
* @access public
* @param resource $ch cURL handler
* @param string $buffer Chunk of data
* @return integer Length of the buffer
*
* @return int Length of the buffer
*/
public function passthroughBody($ch, $buffer)
{
echo $buffer;
return strlen($buffer);
}
/**
* Prepare HTTP headers
* Prepare HTTP headers.
*
* @access private
* @return string[]
*/
private function prepareHeaders()
@ -159,16 +151,15 @@ class Curl extends Client
}
/**
* Prepare curl proxy context
* Prepare curl proxy context.
*
* @access private
* @param resource $ch
*
* @return resource $ch
*/
private function prepareProxyContext($ch)
{
if ($this->proxy_hostname) {
Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxy_port);
@ -178,8 +169,7 @@ class Curl extends Client
if ($this->proxy_username) {
Logger::setMessage(get_called_class().' Proxy credentials: Yes');
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxy_username.':'.$this->proxy_password);
}
else {
} else {
Logger::setMessage(get_called_class().' Proxy credentials: No');
}
}
@ -188,10 +178,10 @@ class Curl extends Client
}
/**
* Prepare curl auth context
* Prepare curl auth context.
*
* @access private
* @param resource $ch
*
* @return resource $ch
*/
private function prepareAuthContext($ch)
@ -204,10 +194,10 @@ class Curl extends Client
}
/**
* Set write/header functions
* Set write/header functions.
*
* @access private
* @param resource $ch
*
* @return resource $ch
*/
private function prepareDownloadMode($ch)
@ -218,7 +208,6 @@ class Curl extends Client
if ($this->isPassthroughEnabled()) {
$write_function = 'passthroughBody';
$header_function = 'passthroughHeaders';
}
curl_setopt($ch, CURLOPT_WRITEFUNCTION, array($this, $write_function));
@ -228,9 +217,8 @@ class Curl extends Client
}
/**
* Prepare curl context
* Prepare curl context.
*
* @access private
* @return resource
*/
private function prepareContext()
@ -266,9 +254,7 @@ class Curl extends Client
}
/**
* Execute curl context
*
* @access private
* Execute curl context.
*/
private function executeContext()
{
@ -297,10 +283,10 @@ class Curl extends Client
}
/**
* Do the HTTP request
* Do the HTTP request.
*
* @access public
* @param bool $follow_location Flag used when there is an open_basedir restriction
*
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
*/
public function doRequest($follow_location = true)
@ -316,15 +302,15 @@ class Curl extends Client
return array(
'status' => $status,
'body' => $this->body,
'headers' => $headers
'headers' => $headers,
);
}
/**
* Handle manually redirections when there is an open base dir restriction
* Handle manually redirections when there is an open base dir restriction.
*
* @access private
* @param string $location Redirected URL
*
* @return array
*/
private function handleRedirection($location)
@ -338,8 +324,7 @@ class Curl extends Client
$this->response_headers_count = 0;
while (true) {
$nb_redirects++;
++$nb_redirects;
if ($nb_redirects >= $this->max_redirects) {
throw new MaxRedirectException('Maximum number of redirections reached');
@ -353,8 +338,7 @@ class Curl extends Client
$this->body_length = 0;
$this->response_headers = array();
$this->response_headers_count = 0;
}
else {
} else {
break;
}
}
@ -363,14 +347,14 @@ class Curl extends Client
}
/**
* Handle cURL errors (throw individual exceptions)
* Handle cURL errors (throw individual exceptions).
*
* We don't use constants because they are not necessary always available
* (depends of the version of libcurl linked to php)
*
* @see http://curl.haxx.se/libcurl/c/libcurl-errors.html
* @access private
* @param integer $errno cURL error code
*
* @param int $errno cURL error code
*/
private function handleError($errno)
{

View File

@ -6,11 +6,10 @@ use ArrayAccess;
use PicoFeed\Logging\Logger;
/**
* Class to handle HTTP headers case insensitivity
* Class to handle HTTP headers case insensitivity.
*
* @author Bernhard Posselt
* @author Frederic Guillot
* @package Client
*/
class HttpHeaders implements ArrayAccess
{
@ -44,11 +43,12 @@ class HttpHeaders implements ArrayAccess
}
/**
* Parse HTTP headers
* Parse HTTP headers.
*
* @static
* @access public
*
* @param array $lines List of headers
*
* @return array
*/
public static function parse(array $lines)
@ -57,15 +57,14 @@ class HttpHeaders implements ArrayAccess
$headers = array();
foreach ($lines as $line) {
if (strpos($line, 'HTTP/1') === 0) {
$headers = array();
$status = (int) substr($line, 9, 3);
} elseif (strpos($line, ':') !== false) {
list($name, $value) = explode(': ', $line);
if ($value) {
$headers[trim($name)] = trim($value);
}
else if (strpos($line, ':') !== false) {
@list($name, $value) = explode(': ', $line);
if ($value) $headers[trim($name)] = trim($value);
}
}

View File

@ -3,10 +3,9 @@
namespace PicoFeed\Client;
/**
* InvalidCertificateException Exception
* InvalidCertificateException Exception.
*
* @author Frederic Guillot
* @package Client
*/
class InvalidCertificateException extends ClientException
{

View File

@ -3,10 +3,9 @@
namespace PicoFeed\Client;
/**
* InvalidUrlException Exception
* InvalidUrlException Exception.
*
* @author Frederic Guillot
* @package Client
*/
class InvalidUrlException extends ClientException
{

View File

@ -3,10 +3,9 @@
namespace PicoFeed\Client;
/**
* MaxRedirectException Exception
* MaxRedirectException Exception.
*
* @author Frederic Guillot
* @package Client
*/
class MaxRedirectException extends ClientException
{

View File

@ -3,10 +3,9 @@
namespace PicoFeed\Client;
/**
* MaxSizeException Exception
* MaxSizeException Exception.
*
* @author Frederic Guillot
* @package Client
*/
class MaxSizeException extends ClientException
{

View File

@ -5,17 +5,15 @@ namespace PicoFeed\Client;
use PicoFeed\Logging\Logger;
/**
* Stream context HTTP client
* Stream context HTTP client.
*
* @author Frederic Guillot
* @package Client
*/
class Stream extends Client
{
/**
* Prepare HTTP headers
* Prepare HTTP headers.
*
* @access private
* @return string[]
*/
private function prepareHeaders()
@ -27,7 +25,7 @@ class Stream extends Client
// disable compression in passthrough mode. It could result in double
// compressed content which isn't decodeable by browsers
if (function_exists('gzdecode') && ! $this->isPassthroughEnabled()) {
if (function_exists('gzdecode') && !$this->isPassthroughEnabled()) {
$headers[] = 'Accept-Encoding: gzip';
}
@ -53,14 +51,13 @@ class Stream extends Client
}
/**
* Construct the final URL from location headers
* Construct the final URL from location headers.
*
* @access private
* @param array $headers List of HTTP response header
*/
private function setEffectiveUrl($headers)
{
foreach($headers as $header) {
foreach ($headers as $header) {
if (stripos($header, 'Location') === 0) {
list(, $value) = explode(': ', $header);
@ -70,9 +67,8 @@ class Stream extends Client
}
/**
* Prepare stream context
* Prepare stream context.
*
* @access private
* @return array
*/
private function prepareContext()
@ -83,11 +79,10 @@ class Stream extends Client
'protocol_version' => 1.1,
'timeout' => $this->timeout,
'max_redirects' => $this->max_redirects,
)
),
);
if ($this->proxy_hostname) {
Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
$context['http']['proxy'] = 'tcp://'.$this->proxy_hostname.':'.$this->proxy_port;
@ -95,8 +90,7 @@ class Stream extends Client
if ($this->proxy_username) {
Logger::setMessage(get_called_class().' Proxy credentials: Yes');
}
else {
} else {
Logger::setMessage(get_called_class().' Proxy credentials: No');
}
}
@ -107,9 +101,8 @@ class Stream extends Client
}
/**
* Do the HTTP request
* Do the HTTP request.
*
* @access public
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
*/
public function doRequest()
@ -121,7 +114,7 @@ class Stream extends Client
// Make HTTP request
$stream = @fopen($this->url, 'r', false, $context);
if (! is_resource($stream)) {
if (!is_resource($stream)) {
throw new InvalidUrlException('Unable to establish a connection');
}
@ -137,8 +130,7 @@ class Stream extends Client
}
fpassthru($stream);
}
else {
} else {
// Get the entire body until the max size
$body = stream_get_contents($stream, $this->max_body_size + 1);
@ -159,16 +151,16 @@ class Stream extends Client
return array(
'status' => $status,
'body' => $this->decodeBody($body, $headers),
'headers' => $headers
'headers' => $headers,
);
}
/**
* Decode body response according to the HTTP headers
* Decode body response according to the HTTP headers.
*
* @access public
* @param string $body Raw body
* @param HttpHeaders $headers HTTP headers
*
* @return string
*/
public function decodeBody($body, HttpHeaders $headers)
@ -178,22 +170,22 @@ class Stream extends Client
}
if (isset($headers['Content-Encoding']) && $headers['Content-Encoding'] === 'gzip') {
$body = @gzdecode($body);
$body = gzdecode($body);
}
return $body;
}
/**
* Decode a chunked body
* Decode a chunked body.
*
* @access public
* @param string $str Raw body
*
* @return string Decoded body
*/
public function decodeChunked($str)
{
for ($result = ''; ! empty($str); $str = trim($str)) {
for ($result = ''; !empty($str); $str = trim($str)) {
// Get the chunk length
$pos = strpos($str, "\r\n");

View File

@ -3,10 +3,9 @@
namespace PicoFeed\Client;
/**
* TimeoutException Exception
* TimeoutException Exception.
*
* @author Frederic Guillot
* @package Client
*/
class TimeoutException extends ClientException
{

View File

@ -3,33 +3,29 @@
namespace PicoFeed\Client;
/**
* URL class
* URL class.
*
* @author Frederic Guillot
* @package Client
*/
class Url
{
/**
* URL
* URL.
*
* @access private
* @var string
*/
private $url = '';
/**
* URL components
* URL components.
*
* @access private
* @var array
*/
private $components = array();
/**
* Constructor
* Constructor.
*
* @access public
* @param string $url URL
*/
public function __construct($url)
@ -51,28 +47,27 @@ class Url
}
/**
* Shortcut method to get an absolute url from relative url
* Shortcut method to get an absolute url from relative url.
*
* @static
* @access public
*
* @param mixed $item_url Unknown url (can be relative or not)
* @param mixed $website_url Website url
*
* @return string
*/
public static function resolve($item_url, $website_url)
{
$link = is_string($item_url) ? new Url($item_url) : $item_url;
$website = is_string($website_url) ? new Url($website_url) : $website_url;
$link = is_string($item_url) ? new self($item_url) : $item_url;
$website = is_string($website_url) ? new self($website_url) : $website_url;
if ($link->isRelativeUrl()) {
if ($link->isRelativePath()) {
return $link->getAbsoluteUrl($website->getBaseUrl($website->getBasePath()));
}
return $link->getAbsoluteUrl($website->getBaseUrl());
}
else if ($link->isProtocolRelative()) {
} elseif ($link->isProtocolRelative()) {
$link->setScheme($website->getScheme());
}
@ -80,24 +75,26 @@ class Url
}
/**
* Shortcut method to get a base url
* Shortcut method to get a base url.
*
* @static
* @access public
*
* @param string $url
*
* @return string
*/
public static function base($url)
{
$link = new Url($url);
$link = new self($url);
return $link->getBaseUrl();
}
/**
* Get the base URL
* Get the base URL.
*
* @access public
* @param string $suffix Add a suffix to the url
*
* @return string
*/
public function getBaseUrl($suffix = '')
@ -106,19 +103,18 @@ class Url
}
/**
* Get the absolute URL
* Get the absolute URL.
*
* @access public
* @param string $base_url Use this url as base url
*
* @return string
*/
public function getAbsoluteUrl($base_url = '')
{
if ($base_url) {
$base = new Url($base_url);
$base = new self($base_url);
$url = $base->getAbsoluteUrl().substr($this->getFullPath(), 1);
}
else {
} else {
$url = $this->hasHost() ? $this->getBaseUrl().$this->getFullPath() : '';
}
@ -126,50 +122,49 @@ class Url
}
/**
* Return true if the url is relative
* Return true if the url is relative.
*
* @access public
* @return boolean
* @return bool
*/
public function isRelativeUrl()
{
return ! $this->hasScheme() && ! $this->isProtocolRelative();
return !$this->hasScheme() && !$this->isProtocolRelative();
}
/**
* Return true if the path is relative
* Return true if the path is relative.
*
* @access public
* @return boolean
* @return bool
*/
public function isRelativePath()
{
$path = $this->getPath();
return empty($path) || $path{0} !== '/';
return empty($path) || $path{0}
!== '/';
}
/**
* Filters the path of a URI
* Filters the path of a URI.
*
* Imported from Guzzle library: https://github.com/guzzle/psr7/blob/master/src/Uri.php#L568-L582
*
* @access public
* @param $path
*
* @return string
*/
public function filterPath($path, $charUnreserved = 'a-zA-Z0-9_\-\.~', $charSubDelims = '!\$&\'\(\)\*\+,;=')
{
return preg_replace_callback(
'/(?:[^' . $charUnreserved . $charSubDelims . ':@\/%]+|%(?![A-Fa-f0-9]{2}))/',
'/(?:[^'.$charUnreserved.$charSubDelims.':@\/%]+|%(?![A-Fa-f0-9]{2}))/',
function (array $matches) { return rawurlencode($matches[0]); },
$path
);
}
/**
* Get the path
* Get the path.
*
* @access public
* @return string
*/
public function getPath()
@ -178,9 +173,8 @@ class Url
}
/**
* Get the base path
* Get the base path.
*
* @access public
* @return string
*/
public function getBasePath()
@ -194,9 +188,8 @@ class Url
}
/**
* Get the full path (path + querystring + fragment)
* Get the full path (path + querystring + fragment).
*
* @access public
* @return string
*/
public function getFullPath()
@ -210,9 +203,8 @@ class Url
}
/**
* Get the hostname
* Get the hostname.
*
* @access public
* @return string
*/
public function getHost()
@ -221,21 +213,20 @@ class Url
}
/**
* Return true if the url has a hostname
* Return true if the url has a hostname.
*
* @access public
* @return boolean
* @return bool
*/
public function hasHost()
{
return ! empty($this->components['host']);
return !empty($this->components['host']);
}
/**
* Get the scheme
* Get the scheme.
*
* @access public
* @param string $suffix Suffix to add when there is a scheme
*
* @return string
*/
public function getScheme($suffix = '')
@ -244,10 +235,10 @@ class Url
}
/**
* Set the scheme
* Set the scheme.
*
* @access public
* @param string $scheme Set a scheme
*
* @return string
*/
public function setScheme($scheme)
@ -256,21 +247,20 @@ class Url
}
/**
* Return true if the url has a scheme
* Return true if the url has a scheme.
*
* @access public
* @return boolean
* @return bool
*/
public function hasScheme()
{
return ! empty($this->components['scheme']);
return !empty($this->components['scheme']);
}
/**
* Get the port
* Get the port.
*
* @access public
* @param string $prefix Prefix to add when there is a port
*
* @return string
*/
public function getPort($prefix = '')
@ -279,21 +269,19 @@ class Url
}
/**
* Return true if the url has a port
* Return true if the url has a port.
*
* @access public
* @return boolean
* @return bool
*/
public function hasPort()
{
return ! empty($this->components['port']);
return !empty($this->components['port']);
}
/**
* Return true if the url is protocol relative (start with //)
* Return true if the url is protocol relative (start with //).
*
* @access public
* @return boolean
* @return bool
*/
public function isProtocolRelative()
{

View File

@ -3,10 +3,9 @@
namespace PicoFeed\Config;
/**
* Config class
* Config class.
*
* @author Frederic Guillot
* @package picofeed
*
* @method \PicoFeed\Config\Config setClientTimeout(integer $value)
* @method \PicoFeed\Config\Config setClientUserAgent(string $value)
@ -34,7 +33,6 @@ namespace PicoFeed\Config;
* @method \PicoFeed\Config\Config setFilterImageProxyUrl($value)
* @method \PicoFeed\Config\Config setFilterImageProxyCallback($closure)
* @method \PicoFeed\Config\Config setFilterImageProxyProtocol($value)
*
* @method integer getClientTimeout()
* @method string getClientUserAgent()
* @method integer getMaxRedirections()
@ -65,22 +63,21 @@ namespace PicoFeed\Config;
class Config
{
/**
* Contains all parameters
* Contains all parameters.
*
* @access private
* @var array
*/
private $container = array();
/**
* Magic method to have any kind of setters or getters
* Magic method to have any kind of setters or getters.
*
* @access public
* @param string $name Getter/Setter name
* @param array $arguments Method arguments
*
* @return mixed
*/
public function __call($name , array $arguments)
public function __call($name, array $arguments)
{
$name = strtolower($name);
$prefix = substr($name, 0, 3);
@ -88,10 +85,11 @@ class Config
if ($prefix === 'set' && isset($arguments[0])) {
$this->container[$parameter] = $arguments[0];
return $this;
}
else if ($prefix === 'get') {
} elseif ($prefix === 'get') {
$default_value = isset($arguments[0]) ? $arguments[0] : null;
return isset($this->container[$parameter]) ? $this->container[$parameter] : $default_value;
}
}

View File

@ -3,9 +3,7 @@
namespace PicoFeed\Encoding;
/**
* Encoding class
*
* @package Encoding
* Encoding class.
*/
class Encoding
{
@ -17,7 +15,7 @@ class Encoding
// suppress all notices since it isn't possible to silence only the
// notice "Wrong charset, conversion from $in_encoding to $out_encoding is not allowed"
set_error_handler(function() {}, E_NOTICE);
set_error_handler(function () {}, E_NOTICE);
// convert input to utf-8 and strip invalid characters
$value = iconv($encoding, 'UTF-8//IGNORE', $input);

View File

@ -5,41 +5,36 @@ namespace PicoFeed\Filter;
use PicoFeed\Client\Url;
/**
* Attribute Filter class
* Attribute Filter class.
*
* @author Frederic Guillot
* @package Filter
*/
class Attribute
{
/**
* Image proxy url
* Image proxy url.
*
* @access private
* @var string
*/
private $image_proxy_url = '';
/**
* Image proxy callback
* Image proxy callback.
*
* @access private
* @var \Closure|null
*/
private $image_proxy_callback = null;
/**
* limits the image proxy usage to this protocol
* limits the image proxy usage to this protocol.
*
* @access private
* @var string
*/
private $image_proxy_limit_protocol = '';
/**
* Tags and attribute whitelist
* Tags and attribute whitelist.
*
* @access private
* @var array
*/
private $attribute_whitelist = array(
@ -80,15 +75,14 @@ class Attribute
'time' => array('datetime'),
'abbr' => array('title'),
'iframe' => array('width', 'height', 'frameborder', 'src', 'allowfullscreen'),
'q' => array('cite')
'q' => array('cite'),
);
/**
* Scheme whitelist
* Scheme whitelist.
*
* For a complete list go to http://en.wikipedia.org/wiki/URI_scheme
*
* @access private
* @var array
*/
private $scheme_whitelist = array(
@ -124,9 +118,8 @@ class Attribute
);
/**
* Iframe source whitelist, everything else is ignored
* Iframe source whitelist, everything else is ignored.
*
* @access private
* @var array
*/
private $iframe_whitelist = array(
@ -139,9 +132,8 @@ class Attribute
);
/**
* Blacklisted resources
* Blacklisted resources.
*
* @access private
* @var array
*/
private $media_blacklist = array(
@ -172,9 +164,8 @@ class Attribute
);
/**
* Attributes used for external resources
* Attributes used for external resources.
*
* @access private
* @var array
*/
private $media_attributes = array(
@ -184,9 +175,8 @@ class Attribute
);
/**
* Attributes that must be integer
* Attributes that must be integer.
*
* @access private
* @var array
*/
private $integer_attributes = array(
@ -196,9 +186,8 @@ class Attribute
);
/**
* Mandatory attributes for specified tags
* Mandatory attributes for specified tags.
*
* @access private
* @var array
*/
private $required_attributes = array(
@ -210,9 +199,8 @@ class Attribute
);
/**
* Add attributes to specified tags
* Add attributes to specified tags.
*
* @access private
* @var array
*/
private $add_attributes = array(
@ -221,9 +209,8 @@ class Attribute
);
/**
* List of filters to apply
* List of filters to apply.
*
* @access private
* @var array
*/
private $filters = array(
@ -235,21 +222,19 @@ class Attribute
'filterProtocolUrlAttribute',
'rewriteImageProxyUrl',
'secureIframeSrc',
'removeYouTubeAutoplay'
'removeYouTubeAutoplay',
);
/**
* Add attributes to specified tags
* Add attributes to specified tags.
*
* @access private
* @var \PicoFeed\Client\Url
*/
private $website;
/**
* Constructor
* Constructor.
*
* @access public
* @param \PicoFeed\Client\Url $website Website url instance
*/
public function __construct(Url $website)
@ -258,18 +243,18 @@ class Attribute
}
/**
* Apply filters to the attributes list
* Apply filters to the attributes list.
*
* @access public
* @param string $tag Tag name
* @param array $attributes Attributes dictionary
*
* @return array Filtered attributes
*/
public function filter($tag, array $attributes)
{
foreach ($attributes as $attribute => &$value) {
foreach ($this->filters as $filter) {
if (! $this->$filter($tag, $attribute, $value)) {
if (!$this->$filter($tag, $attribute, $value)) {
unset($attributes[$attribute]);
break;
}
@ -280,13 +265,13 @@ class Attribute
}
/**
* Return true if the value is allowed (remove not allowed attributes)
* Return true if the value is allowed (remove not allowed attributes).
*
* @access public
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
* @return boolean
*
* @return bool
*/
public function filterAllowedAttribute($tag, $attribute, $value)
{
@ -294,13 +279,13 @@ class Attribute
}
/**
* Return true if the value is not integer (remove attributes that should have an integer value)
* Return true if the value is not integer (remove attributes that should have an integer value).
*
* @access public
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
* @return boolean
*
* @return bool
*/
public function filterIntegerAttribute($tag, $attribute, $value)
{
@ -312,18 +297,17 @@ class Attribute
}
/**
* Return true if the iframe source is allowed (remove not allowed iframe)
* Return true if the iframe source is allowed (remove not allowed iframe).
*
* @access public
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
* @return boolean
*
* @return bool
*/
public function filterIframeAttribute($tag, $attribute, $value)
{
if ($tag === 'iframe' && $attribute === 'src') {
foreach ($this->iframe_whitelist as $url) {
if (strpos($value, $url) === 0) {
return true;
@ -337,13 +321,13 @@ class Attribute
}
/**
* Return true if the resource is not blacklisted (remove blacklisted resource attributes)
* Return true if the resource is not blacklisted (remove blacklisted resource attributes).
*
* @access public
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
* @return boolean
*
* @return bool
*/
public function filterBlacklistResourceAttribute($tag, $attribute, $value)
{
@ -355,13 +339,13 @@ class Attribute
}
/**
* Convert all relative links to absolute url
* Convert all relative links to absolute url.
*
* @access public
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
* @return boolean
*
* @return bool
*/
public function rewriteAbsoluteUrl($tag, $attribute, &$value)
{
@ -374,13 +358,13 @@ class Attribute
/**
* Turns iframes' src attribute from http to https to prevent
* mixed active content
* mixed active content.
*
* @access public
* @param string $tag Tag name
* @param array $attribute Atttributes name
* @param string $value Attribute value
* @return boolean
*
* @return bool
*/
public function secureIframeSrc($tag, $attribute, &$value)
{
@ -392,13 +376,13 @@ class Attribute
}
/**
* Removes YouTube autoplay from iframes
* Removes YouTube autoplay from iframes.
*
* @access public
* @param string $tag Tag name
* @param array $attribute Atttributes name
* @param string $value Attribute value
* @return boolean
*
* @return bool
*/
public function removeYouTubeAutoplay($tag, $attribute, &$value)
{
@ -411,23 +395,21 @@ class Attribute
}
/**
* Rewrite image url to use with a proxy
* Rewrite image url to use with a proxy.
*
* @access public
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
* @return boolean
*
* @return bool
*/
public function rewriteImageProxyUrl($tag, $attribute, &$value)
{
if ($tag === 'img' && $attribute === 'src'
&& ! ($this->image_proxy_limit_protocol !== '' && stripos($value, $this->image_proxy_limit_protocol.':') !== 0)) {
&& !($this->image_proxy_limit_protocol !== '' && stripos($value, $this->image_proxy_limit_protocol.':') !== 0)) {
if ($this->image_proxy_url) {
$value = sprintf($this->image_proxy_url, rawurlencode($value));
}
else if (is_callable($this->image_proxy_callback)) {
} elseif (is_callable($this->image_proxy_callback)) {
$value = call_user_func($this->image_proxy_callback, $value);
}
}
@ -436,17 +418,17 @@ class Attribute
}
/**
* Return true if the scheme is authorized
* Return true if the scheme is authorized.
*
* @access public
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
* @return boolean
*
* @return bool
*/
public function filterProtocolUrlAttribute($tag, $attribute, $value)
{
if ($this->isResource($attribute) && ! $this->isAllowedProtocol($value)) {
if ($this->isResource($attribute) && !$this->isAllowedProtocol($value)) {
return false;
}
@ -454,11 +436,11 @@ class Attribute
}
/**
* Automatically add/override some attributes for specific tags
* Automatically add/override some attributes for specific tags.
*
* @access public
* @param string $tag Tag name
* @param array $attributes Attributes list
*
* @return array
*/
public function addAttributes($tag, array $attributes)
@ -471,19 +453,18 @@ class Attribute
}
/**
* Return true if all required attributes are present
* Return true if all required attributes are present.
*
* @access public
* @param string $tag Tag name
* @param array $attributes Attributes list
* @return boolean
*
* @return bool
*/
public function hasRequiredAttributes($tag, array $attributes)
{
if (isset($this->required_attributes[$tag])) {
foreach ($this->required_attributes[$tag] as $attribute) {
if (! isset($attributes[$attribute])) {
if (!isset($attributes[$attribute])) {
return false;
}
}
@ -493,11 +474,11 @@ class Attribute
}
/**
* Check if an attribute name is an external resource
* Check if an attribute name is an external resource.
*
* @access public
* @param string $attribute Attribute name
* @return boolean
*
* @return bool
*/
public function isResource($attribute)
{
@ -505,16 +486,15 @@ class Attribute
}
/**
* Detect if the protocol is allowed or not
* Detect if the protocol is allowed or not.
*
* @access public
* @param string $value Attribute value
* @return boolean
*
* @return bool
*/
public function isAllowedProtocol($value)
{
foreach ($this->scheme_whitelist as $protocol) {
if (strpos($value, $protocol) === 0) {
return true;
}
@ -524,16 +504,15 @@ class Attribute
}
/**
* Detect if an url is blacklisted
* Detect if an url is blacklisted.
*
* @access public
* @param string $resource Attribute value (URL)
* @return boolean
*
* @return bool
*/
public function isBlacklistedMedia($resource)
{
foreach ($this->media_blacklist as $name) {
if (strpos($resource, $name) !== false) {
return true;
}
@ -543,10 +522,10 @@ class Attribute
}
/**
* Convert the attribute list to html
* Convert the attribute list to html.
*
* @access public
* @param array $attributes Attributes
*
* @return string
*/
public function toHtml(array $attributes)
@ -561,147 +540,158 @@ class Attribute
}
/**
* Set whitelisted tags and attributes for each tag
* Set whitelisted tags and attributes for each tag.
*
* @access public
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
*
* @return Attribute
*/
public function setWhitelistedAttributes(array $values)
{
$this->attribute_whitelist = $values ?: $this->attribute_whitelist;
return $this;
}
/**
* Set scheme whitelist
* Set scheme whitelist.
*
* @access public
* @param array $values List of scheme: ['http://', 'ftp://']
*
* @return Attribute
*/
public function setSchemeWhitelist(array $values)
{
$this->scheme_whitelist = $values ?: $this->scheme_whitelist;
return $this;
}
/**
* Set media attributes (used to load external resources)
* Set media attributes (used to load external resources).
*
* @access public
* @param array $values List of values: ['src', 'href']
*
* @return Attribute
*/
public function setMediaAttributes(array $values)
{
$this->media_attributes = $values ?: $this->media_attributes;
return $this;
}
/**
* Set blacklisted external resources
* Set blacklisted external resources.
*
* @access public
* @param array $values List of tags: ['http://google.com/', '...']
*
* @return Attribute
*/
public function setMediaBlacklist(array $values)
{
$this->media_blacklist = $values ?: $this->media_blacklist;
return $this;
}
/**
* Set mandatory attributes for whitelisted tags
* Set mandatory attributes for whitelisted tags.
*
* @access public
* @param array $values List of tags: ['img' => 'src']
*
* @return Attribute
*/
public function setRequiredAttributes(array $values)
{
$this->required_attributes = $values ?: $this->required_attributes;
return $this;
}
/**
* Set attributes to automatically to specific tags
* Set attributes to automatically to specific tags.
*
* @access public
* @param array $values List of tags: ['a' => 'target="_blank"']
*
* @return Attribute
*/
public function setAttributeOverrides(array $values)
{
$this->add_attributes = $values ?: $this->add_attributes;
return $this;
}
/**
* Set attributes that must be an integer
* Set attributes that must be an integer.
*
* @access public
* @param array $values List of tags: ['width', 'height']
*
* @return Attribute
*/
public function setIntegerAttributes(array $values)
{
$this->integer_attributes = $values ?: $this->integer_attributes;
return $this;
}
/**
* Set allowed iframe resources
* Set allowed iframe resources.
*
* @access public
* @param array $values List of tags: ['http://www.youtube.com']
*
* @return Attribute
*/
public function setIframeWhitelist(array $values)
{
$this->iframe_whitelist = $values ?: $this->iframe_whitelist;
return $this;
}
/**
* Set image proxy URL
* Set image proxy URL.
*
* The original image url will be urlencoded
*
* @access public
* @param string $url Proxy URL
*
* @return Attribute
*/
public function setImageProxyUrl($url)
{
$this->image_proxy_url = $url ?: $this->image_proxy_url;
return $this;
}
/**
* Set image proxy callback
* Set image proxy callback.
*
* @access public
* @param \Closure $callback
*
* @return Attribute
*/
public function setImageProxyCallback($callback)
{
$this->image_proxy_callback = $callback ?: $this->image_proxy_callback;
return $this;
}
/**
* Set image proxy protocol restriction
* Set image proxy protocol restriction.
*
* @access public
* @param string $value
*
* @return Attribute
*/
public function setImageProxyProtocol($value)
{
$this->image_proxy_limit_protocol = $value ?: $this->image_proxy_limit_protocol;
return $this;
}
}

View File

@ -3,45 +3,46 @@
namespace PicoFeed\Filter;
/**
* Filter class
* Filter class.
*
* @author Frederic Guillot
* @package Filter
*/
class Filter
{
/**
* Get the Html filter instance
* Get the Html filter instance.
*
* @static
* @access public
*
* @param string $html HTML content
* @param string $website Site URL (used to build absolute URL)
*
* @return Html
*/
public static function html($html, $website)
{
$filter = new Html($html, $website);
return $filter;
}
/**
* Escape HTML content
* Escape HTML content.
*
* @static
* @access public
*
* @return string
*/
public static function escape($content)
{
return @htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false);
return htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false);
}
/**
* Remove HTML tags
* Remove HTML tags.
*
* @access public
* @param string $data Input data
*
* @return string
*/
public function removeHTMLTags($data)
@ -50,11 +51,12 @@ class Filter
}
/**
* Remove the XML tag from a document
* Remove the XML tag from a document.
*
* @static
* @access public
*
* @param string $data Input data
*
* @return string
*/
public static function stripXmlTag($data)
@ -64,37 +66,37 @@ class Filter
}
do {
$pos = strpos($data, '<?xml-stylesheet ');
if ($pos !== false) {
$data = ltrim(substr($data, strpos($data, '?>') + 2));
}
} while ($pos !== false && $pos < 200);
return $data;
}
/**
* Strip head tag from the HTML content
* Strip head tag from the HTML content.
*
* @static
* @access public
*
* @param string $data Input data
*
* @return string
*/
public static function stripHeadTags($data)
{
return preg_replace('@<head[^>]*?>.*?</head>@siu','', $data );
return preg_replace('@<head[^>]*?>.*?</head>@siu', '', $data);
}
/**
* Trim whitespace from the begining, the end and inside a string and don't break utf-8 string
* Trim whitespace from the begining, the end and inside a string and don't break utf-8 string.
*
* @static
* @access public
*
* @param string $value Raw data
*
* @return string Normalized data
*/
public static function stripWhiteSpace($value)
@ -107,11 +109,12 @@ class Filter
}
/**
* Fixes before XML parsing
* Fixes before XML parsing.
*
* @static
* @access public
*
* @param string $data Raw data
*
* @return string Normalized data
*/
public static function normalizeData($data)
@ -122,7 +125,7 @@ class Filter
);
// strip invalid XML 1.0 characters which are encoded as entities
$data = preg_replace_callback($entities, function($matches) {
$data = preg_replace_callback($entities, function ($matches) {
$code_point = $matches[2];
// convert hex entity to decimal

View File

@ -8,88 +8,78 @@ use PicoFeed\Scraper\RuleLoader;
use PicoFeed\Parser\XmlParser;
/**
* HTML Filter class
* HTML Filter class.
*
* @author Frederic Guillot
* @package Filter
*/
class Html
{
/**
* Config object
* Config object.
*
* @access private
* @var \PicoFeed\Config\Config
*/
private $config;
/**
* Unfiltered XML data
* Unfiltered XML data.
*
* @access private
* @var string
*/
private $input = '';
/**
* Filtered XML data
* Filtered XML data.
*
* @access private
* @var string
*/
private $output = '';
/**
* List of empty tags
* List of empty tags.
*
* @access private
* @var array
*/
private $empty_tags = array();
/**
* Empty flag
* Empty flag.
*
* @access private
* @var boolean
* @var bool
*/
private $empty = true;
/**
* Tag instance
* Tag instance.
*
* @access public
* @var \PicoFeed\Filter\Tag
*/
public $tag = '';
/**
* Attribute instance
* Attribute instance.
*
* @access public
* @var \PicoFeed\Filter\Attribute
*/
public $attribute = '';
/**
* The website to filter
* The website to filter.
*
* @access private
* @var string
*/
private $website;
/**
* Initialize the filter, all inputs data must be encoded in UTF-8 before
* Initialize the filter, all inputs data must be encoded in UTF-8 before.
*
* @access public
* @param string $html HTML content
* @param string $website Site URL (used to build absolute URL)
*/
public function __construct($html, $website)
{
$this->config = new Config;
$this->input = XmlParser::HtmlToXml($html);
$this->config = new Config();
$this->input = XmlParser::htmlToXml($html);
$this->output = '';
$this->tag = new Tag($this->config);
$this->website = $website;
@ -97,10 +87,10 @@ class Html
}
/**
* Set config object
* Set config object.
*
* @access public
* @param \PicoFeed\Config\Config $config Config instance
*
* @return \PicoFeed\Filter\Html
*/
public function setConfig($config)
@ -126,9 +116,8 @@ class Html
}
/**
* Run tags/attributes filtering
* Run tags/attributes filtering.
*
* @access public
* @return string
*/
public function execute()
@ -150,9 +139,7 @@ class Html
}
/**
* Called before XML parsing
*
* @access public
* Called before XML parsing.
*/
public function preFilter()
{
@ -160,9 +147,7 @@ class Html
}
/**
* Called after XML parsing
*
* @access public
* Called after XML parsing.
*/
public function postFilter()
{
@ -173,16 +158,15 @@ class Html
}
/**
* Called after XML parsing
* @param string $content the content that should be filtered
* Called after XML parsing.
*
* @access public
* @param string $content the content that should be filtered
*/
public function filterRules($content)
{
// the constructor should require a config, then this if can be removed
if ($this->config === null) {
$config = new Config;
$config = new Config();
} else {
$config = $this->config;
}
@ -196,7 +180,7 @@ class Html
if (isset($rules['filter'])) {
foreach ($rules['filter'] as $pattern => $rule) {
if (preg_match($pattern, $sub_url)) {
foreach($rule as $search => $replace) {
foreach ($rule as $search => $replace) {
$content = preg_replace($search, $replace, $content);
}
}
@ -207,9 +191,8 @@ class Html
}
/**
* Parse opening tag
* Parse opening tag.
*
* @access public
* @param resource $parser XML parser
* @param string $tag Tag name
* @param array $attributes Tag attributes
@ -219,11 +202,9 @@ class Html
$this->empty = true;
if ($this->tag->isAllowed($tag, $attributes)) {
$attributes = $this->attribute->filter($tag, $attributes);
if ($this->attribute->hasRequiredAttributes($tag, $attributes)) {
$attributes = $this->attribute->addAttributes($tag, $attributes);
$this->output .= $this->tag->openHtmlTag($tag, $this->attribute->toHtml($attributes));
@ -235,23 +216,21 @@ class Html
}
/**
* Parse closing tag
* Parse closing tag.
*
* @access public
* @param resource $parser XML parser
* @param string $tag Tag name
*/
public function endTag($parser, $tag)
{
if (! array_pop($this->empty_tags) && $this->tag->isAllowedTag($tag)) {
if (!array_pop($this->empty_tags) && $this->tag->isAllowedTag($tag)) {
$this->output .= $this->tag->closeHtmlTag($tag);
}
}
/**
* Parse tag content
* Parse tag content.
*
* @access public
* @param resource $parser XML parser
* @param string $content Tag content
*/

View File

@ -3,30 +3,26 @@
namespace PicoFeed\Filter;
use DOMXpath;
use PicoFeed\Parser\XmlParser;
use PicoFeed\Config\Config;
/**
* Tag Filter class
* Tag Filter class.
*
* @author Frederic Guillot
* @package Filter
*/
class Tag
{
/**
* Config object
* Config object.
*
* @access private
* @var \PicoFeed\Config\Config
*/
private $config;
/**
* Tags blacklist (Xpath expressions)
* Tags blacklist (Xpath expressions).
*
* @access private
* @var array
*/
private $tag_blacklist = array(
@ -35,9 +31,8 @@ class Tag
);
/**
* Tags whitelist
* Tags whitelist.
*
* @access private
* @var array
*/
private $tag_whitelist = array(
@ -87,24 +82,24 @@ class Tag
}
/**
* Check if the tag is allowed and is not a pixel tracker
* Check if the tag is allowed and is not a pixel tracker.
*
* @access public
* @param string $tag Tag name
* @param array $attributes Attributes dictionary
* @return boolean
*
* @return bool
*/
public function isAllowed($tag, array $attributes)
{
return $this->isAllowedTag($tag) && ! $this->isPixelTracker($tag, $attributes);
return $this->isAllowedTag($tag) && !$this->isPixelTracker($tag, $attributes);
}
/**
* Return the HTML opening tag
* Return the HTML opening tag.
*
* @access public
* @param string $tag Tag name
* @param string $attributes Attributes converted in html
*
* @return string
*/
public function openHtmlTag($tag, $attributes = '')
@ -113,10 +108,10 @@ class Tag
}
/**
* Return the HTML closing tag
* Return the HTML closing tag.
*
* @access public
* @param string $tag Tag name
*
* @return string
*/
public function closeHtmlTag($tag)
@ -125,11 +120,11 @@ class Tag
}
/**
* Return true is the tag is self-closing
* Return true is the tag is self-closing.
*
* @access public
* @param string $tag Tag name
* @return boolean
*
* @return bool
*/
public function isSelfClosingTag($tag)
{
@ -137,11 +132,11 @@ class Tag
}
/**
* Check if a tag is on the whitelist
* Check if a tag is on the whitelist.
*
* @access public
* @param string $tag Tag name
* @return boolean
*
* @return bool
*/
public function isAllowedTag($tag)
{
@ -152,12 +147,12 @@ class Tag
}
/**
* Detect if an image tag is a pixel tracker
* Detect if an image tag is a pixel tracker.
*
* @access public
* @param string $tag Tag name
* @param array $attributes Tag attributes
* @return boolean
*
* @return bool
*/
public function isPixelTracker($tag, array $attributes)
{
@ -167,10 +162,10 @@ class Tag
}
/**
* Remove script tags
* Remove script tags.
*
* @access public
* @param string $data Input data
*
* @return string
*/
public function removeBlacklistedTags($data)
@ -192,12 +187,11 @@ class Tag
return $dom->saveXML();
}
/**
* Remove empty tags
* Remove empty tags.
*
* @access public
* @param string $data Input data
*
* @return string
*/
public function removeEmptyTags($data)
@ -206,27 +200,28 @@ class Tag
}
/**
* Replace <br/><br/> by only one
* Replace <br/><br/> by only one.
*
* @access public
* @param string $data Input data
*
* @return string
*/
public function removeMultipleBreakTags($data)
{
return preg_replace("/(<br\s*\/?>\s*)+/", "<br/>", $data);
return preg_replace("/(<br\s*\/?>\s*)+/", '<br/>', $data);
}
/**
* Set whitelisted tags adn attributes for each tag
* Set whitelisted tags adn attributes for each tag.
*
* @access public
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
*
* @return Tag
*/
public function setWhitelistedTags(array $values)
{
$this->tag_whitelist = $values ?: $this->tag_whitelist;
return $this;
}
}

View File

@ -6,45 +6,43 @@ use DateTime;
use DateTimeZone;
/**
* Logging class
* Logging class.
*
* @author Frederic Guillot
* @package Logging
*/
class Logger
{
/**
* List of messages
* List of messages.
*
* @static
* @access private
*
* @var array
*/
private static $messages = array();
/**
* Default timezone
* Default timezone.
*
* @static
* @access private
*
* @var string
*/
private static $timezone = 'UTC';
/**
* Enable or disable logging
* Enable or disable logging.
*
* @static
* @access public
* @var boolean
*
* @var bool
*/
public static $enable = false;
/**
* Enable logging
* Enable logging.
*
* @static
* @access public
*/
public static function enable()
{
@ -52,10 +50,10 @@ class Logger
}
/**
* Add a new message
* Add a new message.
*
* @static
* @access public
*
* @param string $message Message
*/
public static function setMessage($message)
@ -67,10 +65,10 @@ class Logger
}
/**
* Get all logged messages
* Get all logged messages.
*
* @static
* @access public
*
* @return array
*/
public static function getMessages()
@ -79,10 +77,9 @@ class Logger
}
/**
* Remove all logged messages
* Remove all logged messages.
*
* @static
* @access public
*/
public static function deleteMessages()
{
@ -90,11 +87,12 @@ class Logger
}
/**
* Set a different timezone
* Set a different timezone.
*
* @static
*
* @see http://php.net/manual/en/timezones.php
* @access public
*
* @param string $timezone Timezone
*/
public static function setTimeZone($timezone)
@ -103,10 +101,10 @@ class Logger
}
/**
* Get all messages serialized into a string
* Get all messages serialized into a string.
*
* @static
* @access public
*
* @return string
*/
public static function toString()

View File

@ -7,25 +7,24 @@ use PicoFeed\Filter\Filter;
use PicoFeed\Client\Url;
/**
* Atom parser
* Atom parser.
*
* @author Frederic Guillot
* @package Parser
*/
class Atom extends Parser
{
/**
* Supported namespaces
* Supported namespaces.
*/
protected $namespaces = array(
'atom' => 'http://www.w3.org/2005/Atom',
);
/**
* Get the path to the items XML tree
* Get the path to the items XML tree.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
*
* @return SimpleXMLElement
*/
public function getItemsTree(SimpleXMLElement $xml)
@ -35,9 +34,8 @@ class Atom extends Parser
}
/**
* Find the feed url
* Find the feed url.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -47,9 +45,8 @@ class Atom extends Parser
}
/**
* Find the site url
* Find the site url.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -59,9 +56,8 @@ class Atom extends Parser
}
/**
* Find the feed description
* Find the feed description.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -74,9 +70,8 @@ class Atom extends Parser
}
/**
* Find the feed logo url
* Find the feed logo url.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -89,9 +84,8 @@ class Atom extends Parser
}
/**
* Find the feed icon
* Find the feed icon.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -104,9 +98,8 @@ class Atom extends Parser
}
/**
* Find the feed title
* Find the feed title.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -119,9 +112,8 @@ class Atom extends Parser
}
/**
* Find the feed language
* Find the feed language.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -134,9 +126,8 @@ class Atom extends Parser
}
/**
* Find the feed id
* Find the feed id.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -149,9 +140,8 @@ class Atom extends Parser
}
/**
* Find the feed date
* Find the feed date.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -164,9 +154,8 @@ class Atom extends Parser
}
/**
* Find the item date
* Find the item date.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
@ -179,24 +168,21 @@ class Atom extends Parser
$updated = XmlParser::getXPathResult($entry, 'atom:updated', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'updated');
$published = ! empty($published) ? $this->date->getDateTime((string) current($published)) : null;
$updated = ! empty($updated) ? $this->date->getDateTime((string) current($updated)) : null;
$published = !empty($published) ? $this->date->getDateTime((string) current($published)) : null;
$updated = !empty($updated) ? $this->date->getDateTime((string) current($updated)) : null;
if ($published === null && $updated === null) {
$item->date = $feed->getDate(); // We use the feed date if there is no date for the item
}
else if ($published !== null && $updated !== null) {
} elseif ($published !== null && $updated !== null) {
$item->date = max($published, $updated); // We use the most recent date between published and updated
}
else {
} else {
$item->date = $updated ?: $published;
}
}
/**
* Find the item title
* Find the item title.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param Item $item Item object
*/
@ -209,9 +195,8 @@ class Atom extends Parser
}
/**
* Find the item author
* Find the item author.
*
* @access public
* @param SimpleXMLElement $xml Feed
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
@ -227,9 +212,8 @@ class Atom extends Parser
}
/**
* Find the item content
* Find the item content.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
@ -239,9 +223,8 @@ class Atom extends Parser
}
/**
* Find the item URL
* Find the item URL.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
@ -251,9 +234,8 @@ class Atom extends Parser
}
/**
* Genereate the item id
* Genereate the item id.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
@ -263,10 +245,9 @@ class Atom extends Parser
$id = XmlParser::getXPathResult($entry, 'atom:id', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'id');
if (! empty($id)) {
if (!empty($id)) {
$item->id = $this->generateId((string) current($id));
}
else {
} else {
$item->id = $this->generateId(
$item->getTitle(), $item->getUrl(), $item->getContent()
);
@ -274,9 +255,8 @@ class Atom extends Parser
}
/**
* Find the item enclosure
* Find the item enclosure.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
@ -292,9 +272,8 @@ class Atom extends Parser
}
/**
* Find the item language
* Find the item language.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
@ -307,11 +286,11 @@ class Atom extends Parser
}
/**
* Get the URL from a link tag
* Get the URL from a link tag.
*
* @access private
* @param SimpleXMLElement $xml XML tag
* @param string $rel Link relationship: alternate, enclosure, related, self, via
*
* @return string
*/
private function getUrl(SimpleXMLElement $xml, $rel, $fallback = false)
@ -324,6 +303,7 @@ class Atom extends Parser
if ($fallback) {
$link = $this->findLink($xml, '');
return $link ? (string) $link['href'] : '';
}
@ -331,11 +311,11 @@ class Atom extends Parser
}
/**
* Get a link tag that match a relationship
* Get a link tag that match a relationship.
*
* @access private
* @param SimpleXMLElement $xml XML tag
* @param string $rel Link relationship: alternate, enclosure, related, self, via
*
* @return SimpleXMLElement|null
*/
private function findLink(SimpleXMLElement $xml, $rel)
@ -349,14 +329,14 @@ class Atom extends Parser
}
}
return null;
return;
}
/**
* Get the entry content
* Get the entry content.
*
* @access private
* @param SimpleXMLElement $entry XML Entry
*
* @return string
*/
private function getContent(SimpleXMLElement $entry)
@ -366,16 +346,15 @@ class Atom extends Parser
?: XmlParser::getXPathResult($entry, 'content')
);
if (! empty($content) && count($content->children())) {
if (!empty($content) && count($content->children())) {
$xml_string = '';
foreach($content->children() as $child) {
foreach ($content->children() as $child) {
$xml_string .= $child->asXML();
}
return $xml_string;
}
else if (trim((string) $content) !== '') {
} elseif (trim((string) $content) !== '') {
return (string) $content;
}

View File

@ -6,25 +6,22 @@ use DateTime;
use DateTimeZone;
/**
* Date Parser
* Date Parser.
*
* @author Frederic Guillot
* @package Parser
*/
class DateParser
{
/**
* Timezone used to parse feed dates
* Timezone used to parse feed dates.
*
* @access public
* @var string
*/
public $timezone = 'UTC';
/**
* Supported formats [ 'format' => length ]
* Supported formats [ 'format' => length ].
*
* @access public
* @var array
*/
public $formats = array(
@ -56,10 +53,10 @@ class DateParser
);
/**
* Try to parse all date format for broken feeds
* Try to parse all date format for broken feeds.
*
* @access public
* @param string $value Original date format
*
* @return DateTime
*/
public function getDateTime($value)
@ -67,7 +64,6 @@ class DateParser
$value = trim($value);
foreach ($this->formats as $format => $length) {
$truncated_value = $value;
if ($length !== null) {
$truncated_value = substr($truncated_value, 0, $length);
@ -83,19 +79,18 @@ class DateParser
}
/**
* Get a valid date from a given format
* Get a valid date from a given format.
*
* @access public
* @param string $format Date format
* @param string $value Original date value
* @return DateTime|boolean
*
* @return DateTime|bool
*/
public function getValidDate($format, $value)
{
$date = DateTime::createFromFormat($format, $value, new DateTimeZone($this->timezone));
if ($date !== false) {
$errors = DateTime::getLastErrors();
if ($errors['error_count'] === 0 && $errors['warning_count'] === 0) {
@ -107,9 +102,8 @@ class DateParser
}
/**
* Get the current datetime
* Get the current datetime.
*
* @access public
* @return DateTime
*/
public function getCurrentDateTime()

View File

@ -3,98 +3,84 @@
namespace PicoFeed\Parser;
/**
* Feed
* Feed.
*
* @author Frederic Guillot
* @package Parser
*/
class Feed
{
/**
* Feed items
* Feed items.
*
* @access public
* @var array
*/
public $items = array();
/**
* Feed id
* Feed id.
*
* @access public
* @var string
*/
public $id = '';
/**
* Feed title
* Feed title.
*
* @access public
* @var string
*/
public $title = '';
/**
* Feed description
* Feed description.
*
* @access public
* @var string
*/
public $description = '';
/**
* Feed url
* Feed url.
*
* @access public
* @var string
*/
public $feed_url = '';
/**
* Site url
* Site url.
*
* @access public
* @var string
*/
public $site_url = '';
/**
* Feed date
* Feed date.
*
* @access public
* @var \DateTime
*/
public $date = null;
/**
* Feed language
* Feed language.
*
* @access public
* @var string
*/
public $language = '';
/**
* Feed logo URL
* Feed logo URL.
*
* @access public
* @var string
*/
public $logo = '';
/**
* Feed icon URL
* Feed icon URL.
*
* @access public
* @var string
*/
public $icon = '';
/**
* Return feed information
*
* @access public
* $return string
* Return feed information.
*/
public function __toString()
{
@ -117,10 +103,7 @@ class Feed
}
/**
* Get title
*
* @access public
* $return string
* Get title.
*/
public function getTitle()
{
@ -128,10 +111,7 @@ class Feed
}
/**
* Get description
*
* @access public
* $return string
* Get description.
*/
public function getDescription()
{
@ -139,10 +119,7 @@ class Feed
}
/**
* Get the logo url
*
* @access public
* $return string
* Get the logo url.
*/
public function getLogo()
{
@ -150,10 +127,7 @@ class Feed
}
/**
* Get the icon url
*
* @access public
* $return string
* Get the icon url.
*/
public function getIcon()
{
@ -161,10 +135,7 @@ class Feed
}
/**
* Get feed url
*
* @access public
* $return string
* Get feed url.
*/
public function getFeedUrl()
{
@ -172,10 +143,7 @@ class Feed
}
/**
* Get site url
*
* @access public
* $return string
* Get site url.
*/
public function getSiteUrl()
{
@ -183,10 +151,7 @@ class Feed
}
/**
* Get date
*
* @access public
* $return integer
* Get date.
*/
public function getDate()
{
@ -194,10 +159,7 @@ class Feed
}
/**
* Get language
*
* @access public
* $return string
* Get language.
*/
public function getLanguage()
{
@ -205,10 +167,7 @@ class Feed
}
/**
* Get id
*
* @access public
* $return string
* Get id.
*/
public function getId()
{
@ -216,10 +175,7 @@ class Feed
}
/**
* Get feed items
*
* @access public
* $return array
* Get feed items.
*/
public function getItems()
{
@ -227,9 +183,8 @@ class Feed
}
/**
* Return true if the feed is "Right to Left"
* Return true if the feed is "Right to Left".
*
* @access public
* @return bool
*/
public function isRTL()

View File

@ -3,17 +3,15 @@
namespace PicoFeed\Parser;
/**
* Feed Item
* Feed Item.
*
* @author Frederic Guillot
* @package Parser
*/
class Item
{
/**
* List of known RTL languages
* List of known RTL languages.
*
* @access public
* @var public
*/
public $rtl = array(
@ -28,99 +26,88 @@ class Item
);
/**
* Item id
* Item id.
*
* @access public
* @var string
*/
public $id = '';
/**
* Item title
* Item title.
*
* @access public
* @var string
*/
public $title = '';
/**
* Item url
* Item url.
*
* @access public
* @var string
*/
public $url = '';
/**
* Item author
* Item author.
*
* @access public
* @var string
*/
public $author= '';
public $author = '';
/**
* Item date
* Item date.
*
* @access public
* @var \DateTime
*/
public $date = null;
/**
* Item content
* Item content.
*
* @access public
* @var string
*/
public $content = '';
/**
* Item enclosure url
* Item enclosure url.
*
* @access public
* @var string
*/
public $enclosure_url = '';
/**
* Item enclusure type
* Item enclusure type.
*
* @access public
* @var string
*/
public $enclosure_type = '';
/**
* Item language
* Item language.
*
* @access public
* @var string
*/
public $language = '';
/**
* Raw XML
* Raw XML.
*
* @access public
* @var \SimpleXMLElement
*/
public $xml;
/**
* List of namespaces
* List of namespaces.
*
* @access public
* @var array
*/
public $namespaces = array();
/**
* Get specific XML tag or attribute value
* Get specific XML tag or attribute value.
*
* @access public
* @param string $tag Tag name (examples: guid, media:content)
* @param string $attribute Tag attribute
*
* @return array|false Tag values or error
*/
public function getTag($tag, $attribute = '')
@ -142,10 +129,7 @@ class Item
}
/**
* Return item information
*
* @access public
* $return string
* Return item information.
*/
public function __toString()
{
@ -163,10 +147,7 @@ class Item
}
/**
* Get title
*
* @access public
* $return string
* Get title.
*/
public function getTitle()
{
@ -174,10 +155,7 @@ class Item
}
/**
* Get url
*
* @access public
* $return string
* Get url.
*/
public function getUrl()
{
@ -185,10 +163,7 @@ class Item
}
/**
* Get id
*
* @access public
* $return string
* Get id.
*/
public function getId()
{
@ -196,10 +171,7 @@ class Item
}
/**
* Get date
*
* @access public
* $return \DateTime
* Get date.
*/
public function getDate()
{
@ -207,10 +179,7 @@ class Item
}
/**
* Get content
*
* @access public
* $return string
* Get content.
*/
public function getContent()
{
@ -218,10 +187,7 @@ class Item
}
/**
* Get enclosure url
*
* @access public
* $return string
* Get enclosure url.
*/
public function getEnclosureUrl()
{
@ -229,10 +195,7 @@ class Item
}
/**
* Get enclosure type
*
* @access public
* $return string
* Get enclosure type.
*/
public function getEnclosureType()
{
@ -240,10 +203,7 @@ class Item
}
/**
* Get language
*
* @access public
* $return string
* Get language.
*/
public function getLanguage()
{
@ -251,10 +211,7 @@ class Item
}
/**
* Get author
*
* @access public
* $return string
* Get author.
*/
public function getAuthor()
{
@ -262,9 +219,8 @@ class Item
}
/**
* Return true if the item is "Right to Left"
* Return true if the item is "Right to Left".
*
* @access public
* @return bool
*/
public function isRTL()

View File

@ -3,10 +3,9 @@
namespace PicoFeed\Parser;
/**
* MalformedXmlException Exception
* MalformedXmlException Exception.
*
* @author Frederic Guillot
* @package Parser
*/
class MalformedXmlException extends ParserException
{

View File

@ -10,112 +10,99 @@ use PicoFeed\Logging\Logger;
use PicoFeed\Scraper\Scraper;
/**
* Base parser class
* Base parser class.
*
* @author Frederic Guillot
* @package Parser
*/
abstract class Parser
{
/**
* Config object
* Config object.
*
* @access private
* @var \PicoFeed\Config\Config
*/
private $config;
/**
* DateParser object
* DateParser object.
*
* @access protected
* @var \PicoFeed\Parser\DateParser
*/
protected $date;
/**
* Hash algorithm used to generate item id, any value supported by PHP, see hash_algos()
* Hash algorithm used to generate item id, any value supported by PHP, see hash_algos().
*
* @access private
* @var string
*/
private $hash_algo = 'sha256';
/**
* Feed content (XML data)
* Feed content (XML data).
*
* @access protected
* @var string
*/
protected $content = '';
/**
* Fallback url
* Fallback url.
*
* @access protected
* @var string
*/
protected $fallback_url = '';
/**
* XML namespaces supported by parser
* XML namespaces supported by parser.
*
* @access protected
* @var array
*/
protected $namespaces = array();
/**
* XML namespaces used in document
* XML namespaces used in document.
*
* @access protected
* @var array
*/
protected $used_namespaces = array();
/**
* Enable the content filtering
* Enable the content filtering.
*
* @access private
* @var bool
*/
private $enable_filter = true;
/**
* Enable the content grabber
* Enable the content grabber.
*
* @access private
* @var bool
*/
private $enable_grabber = false;
/**
* Enable the content grabber on all pages
* Enable the content grabber on all pages.
*
* @access private
* @var bool
*/
private $grabber_needs_rule_file = false;
/**
* Ignore those urls for the content scraper
* Ignore those urls for the content scraper.
*
* @access private
* @var array
*/
private $grabber_ignore_urls = array();
/**
* Constructor
* Constructor.
*
* @access public
* @param string $content Feed content
* @param string $http_encoding HTTP encoding (headers)
* @param string $fallback_url Fallback url when the feed provide relative or broken url
*/
public function __construct($content, $http_encoding = '', $fallback_url = '')
{
$this->date = new DateParser;
$this->date = new DateParser();
$this->fallback_url = $fallback_url;
$xml_encoding = XmlParser::getEncodingFromXmlTag($content);
@ -128,9 +115,8 @@ abstract class Parser
}
/**
* Parse the document
* Parse the document.
*
* @access public
* @return \PicoFeed\Parser\Feed
*/
public function execute()
@ -154,7 +140,7 @@ abstract class Parser
$this->used_namespaces = $xml->getNamespaces(true);
$xml = $this->registerSupportedNamespaces($xml);
$feed = new Feed;
$feed = new Feed();
$this->findFeedUrl($xml, $feed);
$this->checkFeedUrl($feed);
@ -171,10 +157,9 @@ abstract class Parser
$this->findFeedIcon($xml, $feed);
foreach ($this->getItemsTree($xml) as $entry) {
$entry = $this->registerSupportedNamespaces($entry);
$item = new Item;
$item = new Item();
$item->xml = $entry;
$item->namespaces = $this->used_namespaces;
@ -206,41 +191,36 @@ abstract class Parser
}
/**
* Check if the feed url is correct
* Check if the feed url is correct.
*
* @access public
* @param Feed $feed Feed object
*/
public function checkFeedUrl(Feed $feed)
{
if ($feed->getFeedUrl() === '') {
$feed->feed_url = $this->fallback_url;
}
else {
} else {
$feed->feed_url = Url::resolve($feed->getFeedUrl(), $this->fallback_url);
}
}
/**
* Check if the site url is correct
* Check if the site url is correct.
*
* @access public
* @param Feed $feed Feed object
*/
public function checkSiteUrl(Feed $feed)
{
if ($feed->getSiteUrl() === '') {
$feed->site_url = Url::base($feed->getFeedUrl());
}
else {
} else {
$feed->site_url = Url::resolve($feed->getSiteUrl(), $this->fallback_url);
}
}
/**
* Check if the item url is correct
* Check if the item url is correct.
*
* @access public
* @param Feed $feed Feed object
* @param Item $item Item object
*/
@ -250,15 +230,13 @@ abstract class Parser
}
/**
* Fetch item content with the content grabber
* Fetch item content with the content grabber.
*
* @access public
* @param Item $item Item object
*/
public function scrapWebsite(Item $item)
{
if ($this->enable_grabber && ! in_array($item->getUrl(), $this->grabber_ignore_urls)) {
if ($this->enable_grabber && !in_array($item->getUrl(), $this->grabber_ignore_urls)) {
$grabber = new Scraper($this->config);
$grabber->setUrl($item->getUrl());
@ -275,9 +253,8 @@ abstract class Parser
}
/**
* Filter HTML for entry content
* Filter HTML for entry content.
*
* @access public
* @param Feed $feed Feed object
* @param Item $item Item object
*/
@ -287,16 +264,14 @@ abstract class Parser
$filter = Filter::html($item->getContent(), $feed->getSiteUrl());
$filter->setConfig($this->config);
$item->content = $filter->execute();
}
else {
} else {
Logger::setMessage(get_called_class().': Content filtering disabled');
}
}
/**
* Generate a unique id for an entry (hash all arguments)
* Generate a unique id for an entry (hash all arguments).
*
* @access public
* @return string
*/
public function generateId()
@ -305,11 +280,12 @@ abstract class Parser
}
/**
* Return true if the given language is "Right to Left"
* Return true if the given language is "Right to Left".
*
* @static
* @access public
*
* @param string $language Language: fr-FR, en-US
*
* @return bool
*/
public static function isLanguageRTL($language)
@ -337,24 +313,26 @@ abstract class Parser
}
/**
* Set Hash algorithm used for id generation
* Set Hash algorithm used for id generation.
*
* @access public
* @param string $algo Algorithm name
*
* @return \PicoFeed\Parser\Parser
*/
public function setHashAlgo($algo)
{
$this->hash_algo = $algo ?: $this->hash_algo;
return $this;
}
/**
* Set a different timezone
* Set a different timezone.
*
* @see http://php.net/manual/en/timezones.php
* @access public
*
* @param string $timezone Timezone
*
* @return \PicoFeed\Parser\Parser
*/
public function setTimezone($timezone)
@ -367,22 +345,22 @@ abstract class Parser
}
/**
* Set config object
* Set config object.
*
* @access public
* @param \PicoFeed\Config\Config $config Config instance
*
* @return \PicoFeed\Parser\Parser
*/
public function setConfig($config)
{
$this->config = $config;
return $this;
}
/**
* Enable the content grabber
* Enable the content grabber.
*
* @access public
* @return \PicoFeed\Parser\Parser
*/
public function disableContentFiltering()
@ -391,10 +369,9 @@ abstract class Parser
}
/**
* Return true if the content filtering is enabled
* Return true if the content filtering is enabled.
*
* @access public
* @return boolean
* @return bool
*/
public function isFilteringEnabled()
{
@ -406,11 +383,11 @@ abstract class Parser
}
/**
* Enable the content grabber
* Enable the content grabber.
*
* @access public
* @param bool $needs_rule_file true if only pages with rule files should be
* scraped
*
* @return \PicoFeed\Parser\Parser
*/
public function enableContentGrabber($needs_rule_file = false)
@ -420,10 +397,10 @@ abstract class Parser
}
/**
* Set ignored URLs for the content grabber
* Set ignored URLs for the content grabber.
*
* @access public
* @param array $urls URLs
*
* @return \PicoFeed\Parser\Parser
*/
public function setGrabberIgnoreUrls(array $urls)
@ -432,10 +409,10 @@ abstract class Parser
}
/**
* Register all supported namespaces to be used within an xpath query
* Register all supported namespaces to be used within an xpath query.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
*
* @return SimpleXMLElement
*/
public function registerSupportedNamespaces(SimpleXMLElement $xml)
@ -448,169 +425,152 @@ abstract class Parser
}
/**
* Find the feed url
* Find the feed url.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findFeedUrl(SimpleXMLElement $xml, Feed $feed);
abstract public function findFeedUrl(SimpleXMLElement $xml, Feed $feed);
/**
* Find the site url
* Find the site url.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findSiteUrl(SimpleXMLElement $xml, Feed $feed);
abstract public function findSiteUrl(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed title
* Find the feed title.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findFeedTitle(SimpleXMLElement $xml, Feed $feed);
abstract public function findFeedTitle(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed description
* Find the feed description.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findFeedDescription(SimpleXMLElement $xml, Feed $feed);
abstract public function findFeedDescription(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed language
* Find the feed language.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findFeedLanguage(SimpleXMLElement $xml, Feed $feed);
abstract public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed id
* Find the feed id.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findFeedId(SimpleXMLElement $xml, Feed $feed);
abstract public function findFeedId(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed date
* Find the feed date.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findFeedDate(SimpleXMLElement $xml, Feed $feed);
abstract public function findFeedDate(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed logo url
* Find the feed logo url.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findFeedLogo(SimpleXMLElement $xml, Feed $feed);
abstract public function findFeedLogo(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed icon
* Find the feed icon.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findFeedIcon(SimpleXMLElement $xml, Feed $feed);
abstract public function findFeedIcon(SimpleXMLElement $xml, Feed $feed);
/**
* Get the path to the items XML tree
* Get the path to the items XML tree.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
*
* @return SimpleXMLElement
*/
public abstract function getItemsTree(SimpleXMLElement $xml);
abstract public function getItemsTree(SimpleXMLElement $xml);
/**
* Find the item author
* Find the item author.
*
* @access public
* @param SimpleXMLElement $xml Feed
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public abstract function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item);
abstract public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item);
/**
* Find the item URL
* Find the item URL.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public abstract function findItemUrl(SimpleXMLElement $entry, Item $item);
abstract public function findItemUrl(SimpleXMLElement $entry, Item $item);
/**
* Find the item title
* Find the item title.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public abstract function findItemTitle(SimpleXMLElement $entry, Item $item);
abstract public function findItemTitle(SimpleXMLElement $entry, Item $item);
/**
* Genereate the item id
* Genereate the item id.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed);
abstract public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed);
/**
* Find the item date
* Find the item date.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed);
abstract public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed);
/**
* Find the item content
* Find the item content.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public abstract function findItemContent(SimpleXMLElement $entry, Item $item);
abstract public function findItemContent(SimpleXMLElement $entry, Item $item);
/**
* Find the item enclosure
* Find the item enclosure.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed);
abstract public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed);
/**
* Find the item language
* Find the item language.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed);
abstract public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed);
}

View File

@ -4,12 +4,10 @@ namespace PicoFeed\Parser;
use PicoFeed\PicoFeedException;
/**
* ParserException Exception
* ParserException Exception.
*
* @author Frederic Guillot
* @package Parser
*/
abstract class ParserException extends PicoFeedException
{

View File

@ -6,15 +6,14 @@ use SimpleXMLElement;
use PicoFeed\Filter\Filter;
/**
* RSS 1.0 parser
* RSS 1.0 parser.
*
* @author Frederic Guillot
* @package Parser
*/
class Rss10 extends Parser
{
/**
* Supported namespaces
* Supported namespaces.
*/
protected $namespaces = array(
'rss' => 'http://purl.org/rss/1.0/',
@ -24,10 +23,10 @@ class Rss10 extends Parser
);
/**
* Get the path to the items XML tree
* Get the path to the items XML tree.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
*
* @return SimpleXMLElement
*/
public function getItemsTree(SimpleXMLElement $xml)
@ -37,9 +36,8 @@ class Rss10 extends Parser
}
/**
* Find the feed url
* Find the feed url.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -49,9 +47,8 @@ class Rss10 extends Parser
}
/**
* Find the site url
* Find the site url.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -64,9 +61,8 @@ class Rss10 extends Parser
}
/**
* Find the feed description
* Find the feed description.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -79,9 +75,8 @@ class Rss10 extends Parser
}
/**
* Find the feed logo url
* Find the feed logo url.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -94,9 +89,8 @@ class Rss10 extends Parser
}
/**
* Find the feed icon
* Find the feed icon.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -106,9 +100,8 @@ class Rss10 extends Parser
}
/**
* Find the feed title
* Find the feed title.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -121,9 +114,8 @@ class Rss10 extends Parser
}
/**
* Find the feed language
* Find the feed language.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -136,9 +128,8 @@ class Rss10 extends Parser
}
/**
* Find the feed id
* Find the feed id.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -148,9 +139,8 @@ class Rss10 extends Parser
}
/**
* Find the feed date
* Find the feed date.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -163,9 +153,8 @@ class Rss10 extends Parser
}
/**
* Find the item date
* Find the item date.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
@ -178,9 +167,8 @@ class Rss10 extends Parser
}
/**
* Find the item title
* Find the item title.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
@ -193,9 +181,8 @@ class Rss10 extends Parser
}
/**
* Find the item author
* Find the item author.
*
* @access public
* @param SimpleXMLElement $xml Feed
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
@ -210,9 +197,8 @@ class Rss10 extends Parser
}
/**
* Find the item content
* Find the item content.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
@ -229,9 +215,8 @@ class Rss10 extends Parser
}
/**
* Find the item URL
* Find the item URL.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
@ -245,9 +230,8 @@ class Rss10 extends Parser
}
/**
* Genereate the item id
* Genereate the item id.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
@ -260,9 +244,8 @@ class Rss10 extends Parser
}
/**
* Find the item enclosure
* Find the item enclosure.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
@ -272,9 +255,8 @@ class Rss10 extends Parser
}
/**
* Find the item language
* Find the item language.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object

View File

@ -7,28 +7,27 @@ use PicoFeed\Filter\Filter;
use PicoFeed\Client\Url;
/**
* RSS 2.0 Parser
* RSS 2.0 Parser.
*
* @author Frederic Guillot
* @package Parser
*/
class Rss20 extends Parser
{
/**
* Supported namespaces
* Supported namespaces.
*/
protected $namespaces = array(
'dc' => 'http://purl.org/dc/elements/1.1/',
'content' => 'http://purl.org/rss/1.0/modules/content/',
'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0',
'atom' => 'http://www.w3.org/2005/Atom'
'atom' => 'http://www.w3.org/2005/Atom',
);
/**
* Get the path to the items XML tree
* Get the path to the items XML tree.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
*
* @return SimpleXMLElement
*/
public function getItemsTree(SimpleXMLElement $xml)
@ -37,9 +36,8 @@ class Rss20 extends Parser
}
/**
* Find the feed url
* Find the feed url.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -49,9 +47,8 @@ class Rss20 extends Parser
}
/**
* Find the site url
* Find the site url.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -62,9 +59,8 @@ class Rss20 extends Parser
}
/**
* Find the feed description
* Find the feed description.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -75,9 +71,8 @@ class Rss20 extends Parser
}
/**
* Find the feed logo url
* Find the feed logo url.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -88,9 +83,8 @@ class Rss20 extends Parser
}
/**
* Find the feed icon
* Find the feed icon.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -100,9 +94,8 @@ class Rss20 extends Parser
}
/**
* Find the feed title
* Find the feed title.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -113,9 +106,8 @@ class Rss20 extends Parser
}
/**
* Find the feed language
* Find the feed language.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -126,9 +118,8 @@ class Rss20 extends Parser
}
/**
* Find the feed id
* Find the feed id.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -138,9 +129,8 @@ class Rss20 extends Parser
}
/**
* Find the feed date
* Find the feed date.
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
@ -149,24 +139,21 @@ class Rss20 extends Parser
$publish_date = XmlParser::getXPathResult($xml, 'channel/pubDate');
$update_date = XmlParser::getXPathResult($xml, 'channel/lastBuildDate');
$published = ! empty($publish_date) ? $this->date->getDateTime((string) current($publish_date)) : null;
$updated = ! empty($update_date) ? $this->date->getDateTime((string) current($update_date)) : null;
$published = !empty($publish_date) ? $this->date->getDateTime((string) current($publish_date)) : null;
$updated = !empty($update_date) ? $this->date->getDateTime((string) current($update_date)) : null;
if ($published === null && $updated === null) {
$feed->date = $this->date->getCurrentDateTime(); // We use the current date if there is no date for the feed
}
else if ($published !== null && $updated !== null) {
} elseif ($published !== null && $updated !== null) {
$feed->date = max($published, $updated); // We use the most recent date between published and updated
}
else {
} else {
$feed->date = $updated ?: $published;
}
}
/**
* Find the item date
* Find the item date.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
@ -179,9 +166,8 @@ class Rss20 extends Parser
}
/**
* Find the item title
* Find the item title.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
@ -192,9 +178,8 @@ class Rss20 extends Parser
}
/**
* Find the item author
* Find the item author.
*
* @access public
* @param SimpleXMLElement $xml Feed
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
@ -210,9 +195,8 @@ class Rss20 extends Parser
}
/**
* Find the item content
* Find the item content.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
@ -228,9 +212,8 @@ class Rss20 extends Parser
}
/**
* Find the item URL
* Find the item URL.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
@ -240,10 +223,9 @@ class Rss20 extends Parser
?: XmlParser::getXPathResult($entry, 'link')
?: XmlParser::getXPathResult($entry, 'atom:link/@href', $this->namespaces);
if (! empty($link)) {
if (!empty($link)) {
$item->url = trim((string) current($link));
}
else {
} else {
$link = XmlParser::getXPathResult($entry, 'guid');
$link = trim((string) current($link));
@ -254,9 +236,8 @@ class Rss20 extends Parser
}
/**
* Genereate the item id
* Genereate the item id.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
@ -267,8 +248,7 @@ class Rss20 extends Parser
if ($id) {
$item->id = $this->generateId($id);
}
else {
} else {
$item->id = $this->generateId(
$item->getTitle(), $item->getUrl(), $item->getContent()
);
@ -276,9 +256,8 @@ class Rss20 extends Parser
}
/**
* Find the item enclosure
* Find the item enclosure.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
@ -291,16 +270,14 @@ class Rss20 extends Parser
$enclosure_type = XmlParser::getXPathResult($entry, 'enclosure/@type');
$item->enclosure_url = Url::resolve((string) current($enclosure_url), $feed->getSiteUrl());
$item->enclosure_type = (string) current($enclosure_type);
}
}
/**
* Find the item language
* Find the item language.
*
* @access public
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object

View File

@ -3,10 +3,9 @@
namespace PicoFeed\Parser;
/**
* RSS 0.91 Parser
* RSS 0.91 Parser.
*
* @author Frederic Guillot
* @package Parser
*/
class Rss91 extends Rss20
{

View File

@ -3,10 +3,9 @@
namespace PicoFeed\Parser;
/**
* RSS 0.92 Parser
* RSS 0.92 Parser.
*
* @author Frederic Guillot
* @package Parser
*/
class Rss92 extends Rss20
{

View File

@ -4,25 +4,24 @@ namespace PicoFeed\Parser;
use Closure;
use DomDocument;
use DOMXPath;
use SimpleXmlElement;
/**
* XML parser class
* XML parser class.
*
* Checks for XML eXternal Entity (XXE) and XML Entity Expansion (XEE) attacks on XML documents
*
* @author Frederic Guillot
* @package Parser
*/
class XmlParser
{
/**
* Get a SimpleXmlElement instance or return false
* Get a SimpleXmlElement instance or return false.
*
* @static
* @access public
*
* @param string $input XML content
*
* @return mixed
*/
public static function getSimpleXml($input)
@ -30,10 +29,9 @@ class XmlParser
$dom = self::getDomDocument($input);
if ($dom !== false) {
$simplexml = simplexml_import_dom($dom);
if (! $simplexml instanceof SimpleXmlElement) {
if (!$simplexml instanceof SimpleXmlElement) {
return false;
}
@ -44,7 +42,7 @@ class XmlParser
}
/**
* Scan the input for XXE attacks
* Scan the input for XXE attacks.
*
* @param string $input Unsafe input
* @param Closure $callback Callback called to build the dom.
@ -64,8 +62,7 @@ class XmlParser
if (strpos($input, '<!ENTITY') !== false) {
return false;
}
}
else {
} else {
$entityLoaderDisabled = libxml_disable_entity_loader(true);
}
@ -90,11 +87,12 @@ class XmlParser
}
/**
* Get a DomDocument instance or return false
* Get a DomDocument instance or return false.
*
* @static
* @access public
*
* @param string $input XML content
*
* @return \DOMNDocument
*/
public static function getDomDocument($input)
@ -104,8 +102,9 @@ class XmlParser
}
$dom = self::scanInput($input, function ($in) {
$dom = new DomDocument;
$dom = new DomDocument();
$dom->loadXml($in, LIBXML_NONET);
return $dom;
});
@ -118,30 +117,32 @@ class XmlParser
}
/**
* Load HTML document by using a DomDocument instance or return false on failure
* Load HTML document by using a DomDocument instance or return false on failure.
*
* @static
* @access public
*
* @param string $input XML content
*
* @return \DOMDocument
*/
public static function getHtmlDocument($input)
{
if (empty($input)) {
return new DomDocument;
return new DomDocument();
}
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
$callback = function ($in) {
$dom = new DomDocument;
$dom = new DomDocument();
$dom->loadHTML($in, LIBXML_NONET);
return $dom;
};
}
else {
} else {
$callback = function ($in) {
$dom = new DomDocument;
$dom = new DomDocument();
$dom->loadHTML($in);
return $dom;
};
}
@ -150,32 +151,33 @@ class XmlParser
}
/**
* Convert a HTML document to XML
* Convert a HTML document to XML.
*
* @static
* @access public
*
* @param string $html HTML document
*
* @return string
*/
public static function HtmlToXml($html)
public static function htmlToXml($html)
{
$dom = self::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$html);
return $dom->saveXML($dom->getElementsByTagName('body')->item(0));
}
/**
* Get XML parser errors
* Get XML parser errors.
*
* @static
* @access public
*
* @return string
*/
public static function getErrors()
{
$errors = array();
foreach(libxml_get_errors() as $error) {
foreach (libxml_get_errors() as $error) {
$errors[] = sprintf('XML error: %s (Line: %d - Column: %d - Code: %d)',
$error->message,
$error->line,
@ -188,11 +190,12 @@ class XmlParser
}
/**
* Get the encoding from a xml tag
* Get the encoding from a xml tag.
*
* @static
* @access public
*
* @param string $data Input data
*
* @return string
*/
public static function getEncodingFromXmlTag($data)
@ -200,7 +203,6 @@ class XmlParser
$encoding = '';
if (strpos($data, '<?xml') !== false) {
$data = substr($data, 0, strrpos($data, '?>'));
$data = str_replace("'", '"', $data);
@ -217,11 +219,12 @@ class XmlParser
}
/**
* Get the charset from a meta tag
* Get the charset from a meta tag.
*
* @static
* @access public
*
* @param string $data Input data
*
* @return string
*/
public static function getEncodingFromMetaTag($data)
@ -236,14 +239,16 @@ class XmlParser
}
/**
* Rewrite XPath query to use namespace-uri and local-name derived from prefix
* Rewrite XPath query to use namespace-uri and local-name derived from prefix.
*
* @param string $query XPath query
* @param array $ns Prefix to namespace URI mapping
*
* @return string
*/
public static function replaceXPathPrefixWithNamespaceURI($query, array $ns) {
return preg_replace_callback('/([A-Z0-9]+):([A-Z0-9]+)/iu', function($matches) use($ns) {
public static function replaceXPathPrefixWithNamespaceURI($query, array $ns)
{
return preg_replace_callback('/([A-Z0-9]+):([A-Z0-9]+)/iu', function ($matches) use ($ns) {
// don't try to map the special prefix XML
if (strtolower($matches[1]) === 'xml') {
return $matches[0];
@ -255,16 +260,17 @@ class XmlParser
}
/**
* Get the result elements of a XPath query
* Get the result elements of a XPath query.
*
* @param \SimpleXMLElement $xml XML element
* @param string $query XPath query
* @param array $ns Prefix to namespace URI mapping
*
* @return \SimpleXMLElement
*/
public static function getXPathResult(SimpleXMLElement $xml, $query, array $ns = array())
{
if (! empty($ns)) {
if (!empty($ns)) {
$query = static::replaceXPathPrefixWithNamespaceURI($query, $ns);
}

View File

@ -5,10 +5,9 @@ namespace PicoFeed;
use Exception;
/**
* PicoFeedException Exception
* PicoFeedException Exception.
*
* @author Frederic Guillot
* @package exception
*/
abstract class PicoFeedException extends Exception
{

View File

@ -11,19 +11,17 @@ use PicoFeed\Logging\Logger;
use PicoFeed\Parser\XmlParser;
/**
* Favicon class
* Favicon class.
*
* https://en.wikipedia.org/wiki/Favicon
*
* @author Frederic Guillot
* @package Reader
*/
class Favicon
{
/**
* Valid types for favicon (supported by browsers)
* Valid types for favicon (supported by browsers).
*
* @access private
* @var array
*/
private $types = array(
@ -35,44 +33,39 @@ class Favicon
);
/**
* Config class instance
* Config class instance.
*
* @access private
* @var \PicoFeed\Config\Config
*/
private $config;
/**
* Icon binary content
* Icon binary content.
*
* @access private
* @var string
*/
private $content = '';
/**
* Icon content type
* Icon content type.
*
* @access private
* @var string
*/
private $content_type = '';
/**
* Constructor
* Constructor.
*
* @access public
* @param \PicoFeed\Config\Config $config Config class instance
*/
public function __construct(Config $config = null)
{
$this->config = $config ?: new Config;
$this->config = $config ?: new Config();
}
/**
* Get the icon file content (available only after the download)
* Get the icon file content (available only after the download).
*
* @access public
* @return string
*/
public function getContent()
@ -81,9 +74,8 @@ class Favicon
}
/**
* Get the icon file type (available only after the download)
* Get the icon file type (available only after the download).
*
* @access public
* @return string
*/
public function getType()
@ -98,9 +90,8 @@ class Favicon
}
/**
* Get data URI (http://en.wikipedia.org/wiki/Data_URI_scheme)
* Get data URI (http://en.wikipedia.org/wiki/Data_URI_scheme).
*
* @access public
* @return string
*/
public function getDataUri()
@ -117,10 +108,10 @@ class Favicon
}
/**
* Download and check if a resource exists
* Download and check if a resource exists.
*
* @access public
* @param string $url URL
*
* @return \PicoFeed\Client Client instance
*/
public function download($url)
@ -132,8 +123,7 @@ class Favicon
try {
$client->execute($url);
}
catch (ClientException $e) {
} catch (ClientException $e) {
Logger::setMessage(get_called_class().' Download Failed => '.$e->getMessage());
}
@ -141,11 +131,11 @@ class Favicon
}
/**
* Check if a remote file exists
* Check if a remote file exists.
*
* @access public
* @param string $url URL
* @return boolean
*
* @return bool
*/
public function exists($url)
{
@ -153,11 +143,11 @@ class Favicon
}
/**
* Get the icon link for a website
* Get the icon link for a website.
*
* @access public
* @param string $website_link URL
* @param string $favicon_link optional URL
*
* @return string
*/
public function find($website_link, $favicon_link = '')
@ -188,10 +178,10 @@ class Favicon
}
/**
* Extract the icon links from the HTML
* Extract the icon links from the HTML.
*
* @access public
* @param string $html HTML
*
* @return array
*/
public function extract($html)
@ -207,7 +197,7 @@ class Favicon
$xpath = new DOMXpath($dom);
$elements = $xpath->query("//link[contains(@rel, 'icon') and not(contains(@rel, 'apple'))]");
for ($i = 0; $i < $elements->length; $i++) {
for ($i = 0; $i < $elements->length; ++$i) {
$icons[] = $elements->item($i)->getAttribute('href');
}

View File

@ -10,17 +10,15 @@ use PicoFeed\Logging\Logger;
use PicoFeed\Parser\XmlParser;
/**
* Reader class
* Reader class.
*
* @author Frederic Guillot
* @package Reader
*/
class Reader
{
/**
* Feed formats for detection
* Feed formats for detection.
*
* @access private
* @var array
*/
private $formats = array(
@ -32,34 +30,32 @@ class Reader
);
/**
* Config class instance
* Config class instance.
*
* @access private
* @var \PicoFeed\Config\Config
*/
private $config;
/**
* Constructor
* Constructor.
*
* @access public
* @param \PicoFeed\Config\Config $config Config class instance
*/
public function __construct(Config $config = null)
{
$this->config = $config ?: new Config;
$this->config = $config ?: new Config();
Logger::setTimezone($this->config->getTimezone());
}
/**
* Download a feed (no discovery)
* Download a feed (no discovery).
*
* @access public
* @param string $url Feed url
* @param string $last_modified Last modified HTTP header
* @param string $etag Etag HTTP header
* @param string $username HTTP basic auth username
* @param string $password HTTP basic auth password
*
* @return \PicoFeed\Client\Client
*/
public function download($url, $last_modified = '', $etag = '', $username = '', $password = '')
@ -76,14 +72,14 @@ class Reader
}
/**
* Discover and download a feed
* Discover and download a feed.
*
* @access public
* @param string $url Feed or website url
* @param string $last_modified Last modified HTTP header
* @param string $etag Etag HTTP header
* @param string $username HTTP basic auth username
* @param string $password HTTP basic auth password
*
* @return \PicoFeed\Client\Client
*/
public function discover($url, $last_modified = '', $etag = '', $username = '', $password = '')
@ -91,7 +87,7 @@ class Reader
$client = $this->download($url, $last_modified, $etag, $username, $password);
// It's already a feed or the feed was not modified
if (! $client->isModified() || $this->detectFormat($client->getContent())) {
if (!$client->isModified() || $this->detectFormat($client->getContent())) {
return $client;
}
@ -106,11 +102,11 @@ class Reader
}
/**
* Find feed urls inside a HTML document
* Find feed urls inside a HTML document.
*
* @access public
* @param string $url Website url
* @param string $html HTML content
*
* @return array List of feed links
*/
public function find($url, $html)
@ -127,15 +123,12 @@ class Reader
);
foreach ($queries as $query) {
$nodes = $xpath->query($query);
foreach ($nodes as $node) {
$link = $node->getAttribute('href');
if (! empty($link)) {
if (!empty($link)) {
$feedUrl = new Url($link);
$siteUrl = new Url($url);
@ -150,12 +143,12 @@ class Reader
}
/**
* Get a parser instance
* Get a parser instance.
*
* @access public
* @param string $url Site url
* @param string $content Feed content
* @param string $encoding HTTP encoding
*
* @return \PicoFeed\Parser\Parser
*/
public function getParser($url, $content, $encoding)
@ -177,10 +170,10 @@ class Reader
}
/**
* Detect the feed format
* Detect the feed format.
*
* @access public
* @param string $content Feed content
*
* @return string
*/
public function detectFormat($content)
@ -200,16 +193,15 @@ class Reader
}
/**
* Add the prefix "http://" if the end-user just enter a domain name
* Add the prefix "http://" if the end-user just enter a domain name.
*
* @access public
* @param string $url Url
* @retunr string
*/
public function prependScheme($url)
{
if (! preg_match('%^https?://%', $url)) {
$url = 'http://' . $url;
if (!preg_match('%^https?://%', $url)) {
$url = 'http://'.$url;
}
return $url;

View File

@ -4,12 +4,10 @@ namespace PicoFeed\Reader;
use PicoFeed\PicoFeedException;
/**
* ReaderException Exception
* ReaderException Exception.
*
* @author Frederic Guillot
* @package Reader
*/
abstract class ReaderException extends PicoFeedException
{

View File

@ -3,10 +3,9 @@
namespace PicoFeed\Reader;
/**
* SubscriptionNotFoundException Exception
* SubscriptionNotFoundException Exception.
*
* @author Frederic Guillot
* @package Reader
*/
class SubscriptionNotFoundException extends ReaderException
{

View File

@ -3,10 +3,9 @@
namespace PicoFeed\Reader;
/**
* UnsupportedFeedFormatException Exception
* UnsupportedFeedFormatException Exception.
*
* @author Frederic Guillot
* @package Reader
*/
class UnsupportedFeedFormatException extends ReaderException
{

View File

@ -22,7 +22,7 @@ return array(
'//img[contains(@src, "1x1")]',
'//a[contains(@href, "creativecommons")]',
'//a[@href="#start-of-content"]',
'//ul[@id="article-tags"],
'//ul[@id="article-tags"]',
),
)
)

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -11,8 +12,8 @@ return array(
'//*[contains(@class, "article_navigation")]',
'//h1',
'//*[contains(@class, "article_toolbarMain")]',
'//*[contains(@class, "article_imagehaute_box")]'
)
)
)
'//*[contains(@class, "article_imagehaute_box")]',
),
),
),
);

View File

@ -1,8 +1,9 @@
<?php
return array(
'filter' => array(
'%.*%' => array(
'%alt="(.+)" title="(.+)" */>%' => "/><br/>$1<br/>$2"
)
)
'%alt="(.+)" title="(.+)" */>%' => '/><br/>$1<br/>$2',
),
),
);

View File

@ -1,14 +1,15 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://www.alainonline.net/news_details.php?lang=arabic&sid=18907',
'body' => array(
'//div[@class="news_details"]'
'//div[@class="news_details"]',
),
'strip' => array(
'//div[@class="news_details"]/div/div[last()]',
),
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -13,8 +14,8 @@ return array(
'//ul',
'//a[@target="_self"]',
'//div[@data-embed-type="Brightcove"]',
'//div[@class="QuoteContainer"]'
'//div[@class="QuoteContainer"]',
),
),
),
)
)
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -14,6 +15,6 @@ return array(
'//sharebar',
'//related-topics',
),
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -16,8 +17,8 @@ return array(
'//*[@class="skip"]',
'//*[@class="funcs"]',
'//span[@class="nd address"]',
'//a[contains(@href, "abo-und-services")]'
)
)
)
'//a[contains(@href, "abo-und-services")]',
),
),
),
);

View File

@ -1,8 +1,9 @@
<?php
return array(
'filter' => array(
'%.*%' => array(
'%title="(.+)" */>%' => "/><br/>$1"
)
)
'%title="(.+)" */>%' => '/><br/>$1',
),
),
);

View File

@ -1,13 +1,14 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'body' => array(
'//img[@id="comic_image"]',
'//div[@class="comment-wrapper"][position()=1]'
'//div[@class="comment-wrapper"][position()=1]',
),
'strip' => array(),
'test_url' => 'http://www.anythingcomic.com/comics/2108929/stress-free/',
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -8,6 +9,6 @@ return array(
'//span[@class="entry-content"]',
),
'strip' => array(),
)
)
),
),
);

View File

@ -1,10 +1,11 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://www.areadvd.de/news/daily-deals-angebote-bei-lautsprecher-teufel-3/',
'body' => array('//div[contains(@class,"entry")]'),
'strip' => array(),
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -12,6 +13,6 @@ return array(
'//aside',
'//div[@class="article-expander"]',
),
)
)
),
),
);

View File

@ -1,10 +1,11 @@
<?php
return array(
'grabber' => array(
'%/index.php.*comic=.*%' => array(
'test_url' => 'http://www.awkwardzombie.com/index.php?comic=041315',
'body' => array('//*[@id="comic"]/img'),
'strip' => array(),
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -15,6 +16,6 @@ return array(
'//script',
'//ul',
),
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -10,6 +11,6 @@ return array(
'strip' => array(
'//strong',
),
)
)
),
),
);

View File

@ -1,8 +1,9 @@
<?php
return array(
'filter' => array(
'%.*%' => array(
'%-150x150%' => '',
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -8,6 +9,6 @@ return array(
'//figure/div/a/img',
'//p[@class="content__segment"]',
),
)
)
),
),
);

View File

@ -1,13 +1,14 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://blog.fefe.de/?ts=ad706a73',
'body' => array(
'/html/body/ul'
'/html/body/ul',
),
'strip' => array(
),
)
)
),
),
);

View File

@ -1,11 +1,12 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://blog.mapillary.com/update/2015/08/26/traffic-sign-updates.html',
'body' => array(
'//div[contains(@class, "blog-post__content")]'
)
)
)
'//div[contains(@class, "blog-post__content")]',
),
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -13,6 +14,6 @@ return array(
'//h1',
'//div[@class="autor"]',
),
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -8,6 +9,6 @@ return array(
),
'strip' => array(
),
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -7,7 +8,7 @@ return array(
'//div[@id="comic"]',
'//div[@class="post-comic"]',
),
'strip' => array()
)
)
'strip' => array(),
),
),
);

View File

@ -1,12 +1,13 @@
<?php
return array(
'grabber' => array(
'%/cad/.+%' => array(
'test_url' => 'http://www.cad-comic.com/cad/20150417',
'body' => array(
'//*[@id="content"]/img'
'//*[@id="content"]/img',
),
'strip' => array(),
)
)
),
),
);

View File

@ -1,8 +1,9 @@
<?php
return array(
'filter' => array(
'%.*%' => array(
'%href="http://www.channelate.com/(\\d+)/(\\d+)/(\\d+)/[^"]*"%' => 'href="http://www.channelate.com/extra-panel/$1$2$3/"'
)
)
'%href="http://www.channelate.com/(\\d+)/(\\d+)/(\\d+)/[^"]*"%' => 'href="http://www.channelate.com/extra-panel/$1$2$3/"',
),
),
);

View File

@ -1,10 +1,11 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://chaoslife.findchaos.com/pets-in-the-wild',
'body' => array('//div[@id="comic"]'),
'strip' => array(),
)
)
),
),
);

View File

@ -1,10 +1,11 @@
<?php
return array(
'grabber' => array(
'%/comic.*%' => array(
'test_url' => 'http://cliquerefresh.com/comic/078-stating-the-obvious/',
'body' => array('//div[@class="comicImg"]/img | //div[@class="comicImg"]/a/img'),
'strip' => array(),
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -15,6 +16,6 @@ return array(
'//div[contains(@class,"ad-")]',
'//div[@section="shortcodeGallery"]',
),
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -8,6 +9,6 @@ return array(
),
'strip' => array(
),
)
)
),
),
);

View File

@ -1,8 +1,9 @@
<?php
return array(
'filter' => array(
'%.*%' => array(
'%title="(.+)" */>%' => "/><br/>$1"
)
)
'%title="(.+)" */>%' => '/><br/>$1',
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -13,6 +14,6 @@ return array(
'//*[contains(@class,"promo_link")]',
'//div[@id="story-embed-column"]',
),
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -13,7 +14,7 @@ return array(
'//*[contains(@class, "addthis_default_style")]',
'//*[@class="navigation small"]',
'//*[@id="related"]',
)
)
)
),
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -8,6 +9,6 @@ return array(
'//span[@id="lblSummary"]',
'//span[@id="lblBody"]',
),
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -7,8 +8,8 @@ return array(
'//div[@class="contenu"]',
),
'strip' => array(
'//div[contains(@class, "a2a")]'
'//div[contains(@class, "a2a")]',
),
),
),
)
)
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -9,6 +10,6 @@ return array(
),
'strip' => array(
),
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -12,6 +13,6 @@ return array(
'//div[@class="shareIcons"]',
'//div[@class="navigation"]',
),
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -7,7 +8,7 @@ return array(
'//td[@class="NewsText"][1]',
),
'strip' => array(
)
)
)
),
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -9,7 +10,7 @@ return array(
'//div[@class="story-content"]',
),
'strip' => array(
)
)
)
),
),
),
);

View File

@ -1,15 +1,16 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'body' => array('//img[@id="comicimage"]'),
'strip' => array(),
'test_url' => 'http://drawingboardcomic.com/index.php?comic=208',
)
),
),
'filter' => array(
'%.*%' => array(
'%title="(.+)" */>%' => "/><br/>$1"
)
)
'%title="(.+)" */>%' => '/><br/>$1',
),
),
);

View File

@ -1,8 +1,9 @@
<?php
return array(
'filter' => array(
'%.*%' => array(
'%-150x150%' => '',
)
)
),
),
);

View File

@ -1,10 +1,11 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://www.engadget.com/2015/04/20/dark-matter-discovery/?ncid=rss_truncated',
'body' => array('//div[@class="article-content"]/p[not(@class="read-more")] | //div[@class="article-content"]/div[@style="text-align: center;"]'),
'strip' => array(),
)
)
),
),
);

View File

@ -1,47 +1,46 @@
<?php
return array(
'grabber' => array(
'%/articles/view/comicsandcosplay/comics/critical-miss.*%' => array(
'body' => array('//*[@class="body"]/span/img | //div[@class="folder_nav_links"]/following::p'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/critical-miss/13776-Critical-Miss-on-Framerates?utm_source=rss&amp;utm_medium=rss&amp;utm_campaign=articles',
'strip' => array()
'strip' => array(),
),
'%/articles/view/comicsandcosplay/comics/namegame.*%' => array(
'body' => array('//*[@class="body"]/span/p/img[@height != "120"]'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/namegame/9759-Leaving-the-Nest?utm_source=rss&amp;utm_medium=rss&amp;utm_campaign=articles',
'strip' => array()
'strip' => array(),
),
'%/articles/view/comicsandcosplay/comics/stolen-pixels.*%' => array(
'body' => array('//*[@class="body"]/span/p[2]/img'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/stolen-pixels/8866-Stolen-Pixels-258-Where-the-Boys-Are?utm_source=rss&amp;utm_medium=rss&amp;utm_campaign=articles',
'strip' => array()
'strip' => array(),
),
'%/articles/view/comicsandcosplay/comics/bumhugparade.*%' => array(
'body' => array('//*[@class="body"]/span/p[2]/img'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/bumhugparade/8262-Bumhug-Parade-13?utm_source=rss&amp;utm_medium=rss&amp;utm_campaign=articles',
'strip' => array()
'strip' => array(),
),
'%/articles/view/comicsandcosplay.*/comics/escapistradiotheater%' => array(
'body' => array('//*[@class="body"]/span/p[2]/img'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/escapistradiotheater/8265-The-Escapist-Radio-Theater-13?utm_source=rss&amp;utm_medium=rss&amp;utm_campaign=articles',
'strip' => array()
'strip' => array(),
),
'%/articles/view/comicsandcosplay/comics/paused.*%' => array(
'body' => array('//*[@class="body"]/span/p[2]/img | //*[@class="body"]/span/div/img'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/paused/8263-Paused-16?utm_source=rss&amp;utm_medium=rss&amp;utm_campaign=articles',
'strip' => array()
'strip' => array(),
),
'%/articles/view/comicsandcosplay/comics/fraughtwithperil.*%' => array(
'body' => array('//*[@class="body"]'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/fraughtwithperil/12166-The-Escapist-Presents-Escapist-Comics-Critical-Miss-B-lyeh-Fhlop?utm_source=rss&amp;utm_medium=rss&amp;utm_campaign=articles',
'strip' => array()
'strip' => array(),
),
'%/articles/view/video-games/columns/.*%' => array(
'body' => array('//*[@id="article_content"]'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/video-games/columns/experienced-points/13971-What-50-Shades-and-Batman-Have-in-Common.2',
'strip' => array()
'strip' => array(),
),
),
)
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -6,6 +7,6 @@ return array(
'body' => array(
'//p',
),
)
)
),
),
);

View File

@ -1,15 +1,16 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'body' => array('//a[@class="comic"]/img'),
'strip' => array(),
'test_url' => 'http://www.exocomics.com/379',
)
),
),
'filter' => array(
'%.*%' => array(
'%title="(.+)" */>%' => "/><br/>$1"
)
)
'%title="(.+)" */>%' => '/><br/>$1',
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -8,6 +9,6 @@ return array(
),
'strip' => array(
),
)
)
),
),
);

View File

@ -1,8 +1,9 @@
<?php
return array(
'filter' => array(
'%.*%' => array(
'%-150x150%' => '',
)
)
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -7,7 +8,7 @@ return array(
'//article[contains(@class, "body prose")]',
),
'strip' => array(
)
)
)
),
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -7,7 +8,7 @@ return array(
'//article[contains(@class, "body prose")]',
),
'strip' => array(
)
)
)
),
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -7,7 +8,7 @@ return array(
'//article[contains(@class, "body prose")]',
),
'strip' => array(
)
)
)
),
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -7,7 +8,7 @@ return array(
'//div[@class="news_body"]',
),
'strip' => array(
)
)
)
),
),
),
);

View File

@ -1,10 +1,11 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'body' => array('//*[@id="comic"] | //*[@class="post-image"]'),
'strip' => array(),
'test_url' => 'http://www.fowllanguagecomics.com/comic/working-out/'
)
)
'test_url' => 'http://www.fowllanguagecomics.com/comic/working-out/',
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -11,6 +12,6 @@ return array(
'strip' => array(
'//span[@class="red"]',
),
)
)
),
),
);

View File

@ -1,12 +1,13 @@
<?php
return array(
'grabber' => array(
'%/comics/oots.*%' => array(
'test_url' => 'http://www.giantitp.com/comics/oots0989.html',
'body' => array(
'//td[@align="center"]/img'
'//td[@align="center"]/img',
),
'strip' => array(),
),
),
'strip' => array()
)
)
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -7,8 +8,8 @@ return array(
'//article[contains(@class, "entry-content")]',
),
'strip' => array(
'//h1'
)
)
)
'//h1',
),
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -7,6 +8,6 @@ return array(
'//div[1]/p[1]/a[1]/img',
),
'strip' => array(),
)
)
),
),
);

View File

@ -1,12 +1,13 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://www.golem.de/news/breko-telekom-verzoegert-gezielt-den-vectoring-ausbau-1311-102974.html',
'body' => array(
'//header[@class="cluster-header"]',
'//div[@class="formatted"]'
)
)
)
'//div[@class="formatted"]',
),
),
),
);

View File

@ -1,18 +1,19 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'body' => array(
'//div[@id="comic"]',
'//div[@class="entry"]'
'//div[@class="entry"]',
),
'strip' => array('//div[@class="ssba"]'),
'test_url' => 'http://www.happletea.com/comic/mans-best-friend/',
)
),
),
'filter' => array(
'%.*%' => array(
'%title="(.+)" */>%' => "/><br/>$1"
)
)
'%title="(.+)" */>%' => '/><br/>$1',
),
),
);

View File

@ -1,11 +1,12 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
'test_url' => 'http://www.heise.de/security/meldung/BND-300-Millionen-Euro-fuer-Fruehwarnsystem-gegen-Cyber-Attacken-2192237.html',
'body' => array(
'//div[@class="meldung_wrapper"]'
)
)
)
'//div[@class="meldung_wrapper"]',
),
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -7,7 +8,7 @@ return array(
'//article[@class="content")]',
),
'strip' => array(
)
)
)
),
),
),
);

View File

@ -1,8 +1,9 @@
<?php
return array(
'filter' => array(
'%.*%' => array(
'%title="(.+)" */>%' => "/><br/>$1"
)
)
'%title="(.+)" */>%' => '/><br/>$1',
),
),
);

View File

@ -1,4 +1,5 @@
<?php
return array(
'grabber' => array(
'%.*%' => array(
@ -6,7 +7,7 @@ return array(
'body' => array(
'//section[contains(@class, "teaser")]',
'//section[contains(@class, "body")]',
)
)
)
),
),
),
);

Some files were not shown because too many files have changed in this diff Show More