Update PicoFeed and PicoDb
This commit is contained in:
parent
e1b22f2d35
commit
e2280f1b7b
@ -3,7 +3,6 @@
|
||||
require __DIR__.'/lib/Translator.php';
|
||||
require __DIR__.'/vendor/PicoDb/Database.php';
|
||||
require __DIR__.'/vendor/PicoFeed/PicoFeed.php';
|
||||
require __DIR__.'/vendor/Readability/Readability.php';
|
||||
|
||||
require __DIR__.'/vendor/SimpleValidator/Validator.php';
|
||||
require __DIR__.'/vendor/SimpleValidator/Base.php';
|
||||
|
@ -8,8 +8,7 @@ How the content grabber works?
|
||||
|
||||
1. Try with rules first (xpath patterns) for the domain name (see `PicoFeed\Rules\`)
|
||||
2. Try to find the text content by using common attributes for class and id
|
||||
3. Fallback to Readability if no content is found
|
||||
4. Finally, if nothing is found, the feed content is displayed
|
||||
3. Finally, if nothing is found, the feed content is displayed
|
||||
|
||||
The content downloader use a fake user agent, actually Google Chrome under Mac Os X.
|
||||
|
||||
|
@ -8,7 +8,6 @@ use PicoFeed\Logging;
|
||||
use PicoFeed\Grabber;
|
||||
use PicoFeed\Client;
|
||||
use PicoFeed\Filter;
|
||||
use Readability;
|
||||
|
||||
// Get all items without filtering
|
||||
function get_everything()
|
||||
@ -535,12 +534,9 @@ function download_content_url($url)
|
||||
if ($grabber->parse()) {
|
||||
$content = $grabber->getcontent();
|
||||
}
|
||||
else {
|
||||
$content = download_content_readability($grabber->getRawContent(), $url);
|
||||
}
|
||||
|
||||
if (! empty($content)) {
|
||||
$filter = new Filter($content, $url);
|
||||
$filter = Filter::html($content, $url);
|
||||
$filter->setConfig(Config\get_reader_config());
|
||||
$content = $filter->execute();
|
||||
}
|
||||
@ -580,18 +576,3 @@ function download_content_id($item_id)
|
||||
'content' => ''
|
||||
);
|
||||
}
|
||||
|
||||
// Download content with Readability PHP port
|
||||
function download_content_readability($content, $url)
|
||||
{
|
||||
if (! empty($content)) {
|
||||
|
||||
$readability = new Readability($content, $url);
|
||||
|
||||
if ($readability->init()) {
|
||||
return $readability->getContent()->innerHTML;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
5
vendor/PicoDb/Database.php
vendored
5
vendor/PicoDb/Database.php
vendored
@ -86,6 +86,11 @@ class Database
|
||||
|
||||
public function escapeIdentifier($value)
|
||||
{
|
||||
// Do not escape custom query
|
||||
if (strpos($value, '.') !== false || strpos($value, ' ') !== false) {
|
||||
return $value;
|
||||
}
|
||||
|
||||
return $this->pdo->escapeIdentifier($value);
|
||||
}
|
||||
|
||||
|
1
vendor/PicoDb/Drivers/Mysql.php
vendored
1
vendor/PicoDb/Drivers/Mysql.php
vendored
@ -70,7 +70,6 @@ class Mysql extends \PDO {
|
||||
|
||||
public function escapeIdentifier($value)
|
||||
{
|
||||
if (strpos($value, '.') !== false) return $value;
|
||||
return '`'.$value.'`';
|
||||
}
|
||||
}
|
1
vendor/PicoDb/Drivers/Sqlite.php
vendored
1
vendor/PicoDb/Drivers/Sqlite.php
vendored
@ -51,7 +51,6 @@ class Sqlite extends \PDO {
|
||||
|
||||
public function escapeIdentifier($value)
|
||||
{
|
||||
if (strpos($value, '.') !== false) return $value;
|
||||
return '"'.$value.'"';
|
||||
}
|
||||
}
|
8
vendor/PicoDb/Table.php
vendored
8
vendor/PicoDb/Table.php
vendored
@ -173,6 +173,10 @@ class Table
|
||||
|
||||
public function buildSelectQuery()
|
||||
{
|
||||
foreach ($this->columns as $key => $value) {
|
||||
$this->columns[$key] = $this->db->escapeIdentifier($value);
|
||||
}
|
||||
|
||||
return sprintf(
|
||||
'SELECT %s %s FROM %s %s %s %s %s %s %s',
|
||||
$this->distinct ? 'DISTINCT' : '',
|
||||
@ -350,7 +354,7 @@ class Table
|
||||
switch (strtolower($name)) {
|
||||
|
||||
case 'in':
|
||||
if (isset($arguments[1]) && is_array($arguments[1])) {
|
||||
if (isset($arguments[1]) && is_array($arguments[1]) && ! empty($arguments[1])) {
|
||||
|
||||
$sql = sprintf(
|
||||
'%s IN (%s)',
|
||||
@ -361,7 +365,7 @@ class Table
|
||||
break;
|
||||
|
||||
case 'notin':
|
||||
if (isset($arguments[1]) && is_array($arguments[1])) {
|
||||
if (isset($arguments[1]) && is_array($arguments[1]) && ! empty($arguments[1])) {
|
||||
|
||||
$sql = sprintf(
|
||||
'%s NOT IN (%s)',
|
||||
|
124
vendor/PicoFeed/Client.php
vendored
124
vendor/PicoFeed/Client.php
vendored
@ -5,7 +5,6 @@ namespace PicoFeed;
|
||||
use LogicException;
|
||||
use Clients\Curl;
|
||||
use Clients\Stream;
|
||||
use PicoFeed\Logging;
|
||||
|
||||
/**
|
||||
* Client class
|
||||
@ -23,6 +22,14 @@ abstract class Client
|
||||
*/
|
||||
private $is_modified = true;
|
||||
|
||||
/**
|
||||
* Flag that say if the resource is a 404
|
||||
*
|
||||
* @access private
|
||||
* @var bool
|
||||
*/
|
||||
private $is_not_found = false;
|
||||
|
||||
/**
|
||||
* HTTP encoding
|
||||
*
|
||||
@ -170,25 +177,32 @@ abstract class Client
|
||||
$response = $this->doRequest();
|
||||
|
||||
if (is_array($response)) {
|
||||
$this->handleNotModifiedResponse($response);
|
||||
$this->handleNotFoundResponse($response);
|
||||
$this->handleNormalResponse($response);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle not modified response
|
||||
*
|
||||
* @access public
|
||||
* @param array $response Client response
|
||||
*/
|
||||
public function handleNotModifiedResponse(array $response)
|
||||
{
|
||||
if ($response['status'] == 304) {
|
||||
$this->is_modified = false;
|
||||
Logging::setMessage(get_called_class().' Resource not modified');
|
||||
}
|
||||
else if ($response['status'] == 404) {
|
||||
Logging::setMessage(get_called_class().' Resource not found');
|
||||
}
|
||||
else {
|
||||
$etag = isset($response['headers']['ETag']) ? $response['headers']['ETag'] : '';
|
||||
$last_modified = isset($response['headers']['Last-Modified']) ? $response['headers']['Last-Modified'] : '';
|
||||
$this->content = $response['body'];
|
||||
else if ($response['status'] == 200) {
|
||||
|
||||
if (isset($response['headers']['Content-Type'])) {
|
||||
$result = explode('charset=', strtolower($response['headers']['Content-Type']));
|
||||
$this->encoding = isset($result[1]) ? $result[1] : '';
|
||||
}
|
||||
$etag = $this->getHeader($response, 'ETag');
|
||||
$last_modified = $this->getHeader($response, 'Last-Modified');
|
||||
|
||||
if (($this->etag && $this->etag === $etag) || ($this->last_modified && $last_modified === $this->last_modified)) {
|
||||
if ($this->isPropertyEquals('etag', $etag) || $this->isPropertyEquals('last_modified', $last_modified)) {
|
||||
$this->is_modified = false;
|
||||
}
|
||||
|
||||
@ -196,10 +210,75 @@ abstract class Client
|
||||
$this->last_modified = $last_modified;
|
||||
}
|
||||
|
||||
return true;
|
||||
if ($this->is_modified === false) {
|
||||
Logging::setMessage(get_called_class().' Resource not modified');
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
/**
|
||||
* Handle not found response
|
||||
*
|
||||
* @access public
|
||||
* @param array $response Client response
|
||||
*/
|
||||
public function handleNotFoundResponse(array $response)
|
||||
{
|
||||
if ($response['status'] == 404) {
|
||||
$this->is_not_found = true;
|
||||
Logging::setMessage(get_called_class().' Resource not found');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle normal response
|
||||
*
|
||||
* @access public
|
||||
* @param array $response Client response
|
||||
*/
|
||||
public function handleNormalResponse(array $response)
|
||||
{
|
||||
if ($response['status'] == 200) {
|
||||
$this->content = $response['body'];
|
||||
$this->encoding = $this->findCharset($response);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a class property equals to a value
|
||||
*
|
||||
* @access public
|
||||
* @param string $property Class property
|
||||
* @param string $value Value
|
||||
* @return boolean
|
||||
*/
|
||||
private function isPropertyEquals($property, $value)
|
||||
{
|
||||
return $this->$property && $this->$property === $value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find charset from response headers
|
||||
*
|
||||
* @access public
|
||||
* @param array $response Client response
|
||||
*/
|
||||
public function findCharset(array $response)
|
||||
{
|
||||
$result = explode('charset=', strtolower($this->getHeader($response, 'Content-Type')));
|
||||
return isset($result[1]) ? $result[1] : '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get header value from a client response
|
||||
*
|
||||
* @access public
|
||||
* @param array $response Client response
|
||||
* @param string $header Header name
|
||||
* @return string
|
||||
*/
|
||||
public function getHeader(array $response, $header)
|
||||
{
|
||||
return isset($response['headers'][$header]) ? $response['headers'][$header] : '';
|
||||
}
|
||||
|
||||
/**
|
||||
@ -340,6 +419,17 @@ abstract class Client
|
||||
return $this->is_modified;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the remote resource is not found
|
||||
*
|
||||
* @access public
|
||||
* @return bool
|
||||
*/
|
||||
public function isNotFound()
|
||||
{
|
||||
return $this->is_not_found;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set connection timeout
|
||||
*
|
||||
@ -453,6 +543,7 @@ abstract class Client
|
||||
*/
|
||||
public function setConfig($config)
|
||||
{
|
||||
if ($config !== null) {
|
||||
$this->setTimeout($config->getGrabberTimeout());
|
||||
$this->setUserAgent($config->getGrabberUserAgent());
|
||||
$this->setMaxRedirections($config->getMaxRedirections());
|
||||
@ -461,6 +552,7 @@ abstract class Client
|
||||
$this->setProxyPort($config->getProxyPort());
|
||||
$this->setProxyUsername($config->getProxyUsername());
|
||||
$this->setProxyPassword($config->getProxyPassword());
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
154
vendor/PicoFeed/Clients/Curl.php
vendored
154
vendor/PicoFeed/Clients/Curl.php
vendored
@ -97,36 +97,37 @@ class Curl extends Client
|
||||
}
|
||||
|
||||
/**
|
||||
* Do the HTTP request
|
||||
* Prepare HTTP headers
|
||||
*
|
||||
* @access public
|
||||
* @param bool $follow_location Flag used when there is an open_basedir restriction
|
||||
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
|
||||
* @access private
|
||||
* @return array
|
||||
*/
|
||||
public function doRequest($follow_location = true)
|
||||
private function prepareHeaders()
|
||||
{
|
||||
$request_headers = array('Connection: close');
|
||||
$headers = array(
|
||||
'Connection: close',
|
||||
'User-Agent: '.$this->user_agent,
|
||||
);
|
||||
|
||||
if ($this->etag) $request_headers[] = 'If-None-Match: '.$this->etag;
|
||||
if ($this->last_modified) $request_headers[] = 'If-Modified-Since: '.$this->last_modified;
|
||||
if ($this->etag) {
|
||||
$headers[] = 'If-None-Match: '.$this->etag;
|
||||
}
|
||||
|
||||
$ch = curl_init();
|
||||
if ($this->last_modified) {
|
||||
$headers[] = 'If-Modified-Since: '.$this->last_modified;
|
||||
}
|
||||
|
||||
curl_setopt($ch, CURLOPT_URL, $this->url);
|
||||
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
|
||||
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->timeout);
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
|
||||
curl_setopt($ch, CURLOPT_USERAGENT, $this->user_agent);
|
||||
curl_setopt($ch, CURLOPT_HTTPHEADER, $request_headers);
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, ini_get('open_basedir') === '');
|
||||
curl_setopt($ch, CURLOPT_MAXREDIRS, $this->max_redirects);
|
||||
curl_setopt($ch, CURLOPT_ENCODING, '');
|
||||
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // For auto-signed certificates...
|
||||
curl_setopt($ch, CURLOPT_WRITEFUNCTION, array($this, 'readBody'));
|
||||
curl_setopt($ch, CURLOPT_HEADERFUNCTION, array($this, 'readHeaders'));
|
||||
curl_setopt($ch, CURLOPT_COOKIEJAR, 'php://memory');
|
||||
curl_setopt($ch, CURLOPT_COOKIEFILE, 'php://memory');
|
||||
return $headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare curl proxy context
|
||||
*
|
||||
* @access private
|
||||
* @return resource
|
||||
*/
|
||||
private function prepareProxyContext($ch)
|
||||
{
|
||||
if ($this->proxy_hostname) {
|
||||
|
||||
Logging::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
|
||||
@ -144,6 +145,47 @@ class Curl extends Client
|
||||
}
|
||||
}
|
||||
|
||||
return $ch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare curl context
|
||||
*
|
||||
* @access private
|
||||
* @return resource
|
||||
*/
|
||||
private function prepareContext()
|
||||
{
|
||||
$ch = curl_init();
|
||||
|
||||
curl_setopt($ch, CURLOPT_URL, $this->url);
|
||||
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
|
||||
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->timeout);
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
|
||||
curl_setopt($ch, CURLOPT_HTTPHEADER, $this->prepareHeaders());
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, ini_get('open_basedir') === '');
|
||||
curl_setopt($ch, CURLOPT_MAXREDIRS, $this->max_redirects);
|
||||
curl_setopt($ch, CURLOPT_ENCODING, '');
|
||||
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // For auto-signed certificates...
|
||||
curl_setopt($ch, CURLOPT_WRITEFUNCTION, array($this, 'readBody'));
|
||||
curl_setopt($ch, CURLOPT_HEADERFUNCTION, array($this, 'readHeaders'));
|
||||
curl_setopt($ch, CURLOPT_COOKIEJAR, 'php://memory');
|
||||
curl_setopt($ch, CURLOPT_COOKIEFILE, 'php://memory');
|
||||
|
||||
$ch = $this->prepareProxyContext($ch);
|
||||
|
||||
return $ch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute curl context
|
||||
*
|
||||
* @access private
|
||||
* @return resource
|
||||
*/
|
||||
private function executeContext()
|
||||
{
|
||||
$ch = $this->prepareContext();
|
||||
curl_exec($ch);
|
||||
|
||||
Logging::setMessage(get_called_class().' cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME));
|
||||
@ -153,21 +195,67 @@ class Curl extends Client
|
||||
Logging::setMessage(get_called_class().' cURL effective url: '.curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
|
||||
|
||||
if (curl_errno($ch)) {
|
||||
|
||||
Logging::setMessage(get_called_class().' cURL error: '.curl_error($ch));
|
||||
|
||||
curl_close($ch);
|
||||
return false;
|
||||
}
|
||||
|
||||
curl_close($ch);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do the HTTP request
|
||||
*
|
||||
* @access public
|
||||
* @param bool $follow_location Flag used when there is an open_basedir restriction
|
||||
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
|
||||
*/
|
||||
public function doRequest($follow_location = true)
|
||||
{
|
||||
if (! $this->executeContext()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
list($status, $headers) = $this->parseHeaders(explode("\r\n", $this->headers[$this->headers_counter - 1]));
|
||||
|
||||
if ($follow_location && ini_get('open_basedir') !== '' && ($status == 301 || $status == 302)) {
|
||||
// When resticted with open_basedir
|
||||
if ($this->needToHandleRedirection($follow_location, $status)) {
|
||||
return $this->handleRedirection($headers['Location']);
|
||||
}
|
||||
|
||||
return array(
|
||||
'status' => $status,
|
||||
'body' => $this->body,
|
||||
'headers' => $headers
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the redirection have to be handled manually
|
||||
*
|
||||
* @access private
|
||||
* @param boolean $follow_location Flag
|
||||
* @param integer $status HTTP status code
|
||||
* @return boolean
|
||||
*/
|
||||
private function needToHandleRedirection($follow_location, $status)
|
||||
{
|
||||
return $follow_location && ini_get('open_basedir') !== '' && ($status == 301 || $status == 302);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle manually redirections when there is an open base dir restriction
|
||||
*
|
||||
* @access private
|
||||
* @param string $location Redirected URL
|
||||
* @return boolean|array
|
||||
*/
|
||||
private function handleRedirection($location)
|
||||
{
|
||||
$nb_redirects = 0;
|
||||
$this->url = $headers['Location'];
|
||||
$this->url = $location;
|
||||
$this->body = '';
|
||||
$this->body_length = 0;
|
||||
$this->headers = array();
|
||||
@ -176,7 +264,10 @@ class Curl extends Client
|
||||
while (true) {
|
||||
|
||||
$nb_redirects++;
|
||||
if ($nb_redirects >= $this->max_redirects) return false;
|
||||
|
||||
if ($nb_redirects >= $this->max_redirects) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$result = $this->doRequest(false);
|
||||
|
||||
@ -191,12 +282,7 @@ class Curl extends Client
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return array(
|
||||
'status' => $status,
|
||||
'body' => $this->body,
|
||||
'headers' => $headers
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
82
vendor/PicoFeed/Clients/Stream.php
vendored
82
vendor/PicoFeed/Clients/Stream.php
vendored
@ -14,14 +14,13 @@ use \PicoFeed\Client;
|
||||
class Stream extends Client
|
||||
{
|
||||
/**
|
||||
* Do the HTTP request
|
||||
* Prepare HTTP headers
|
||||
*
|
||||
* @access public
|
||||
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
|
||||
* @access private
|
||||
* @return array
|
||||
*/
|
||||
public function doRequest()
|
||||
private function prepareHeaders()
|
||||
{
|
||||
// Prepare HTTP headers for the request
|
||||
$headers = array(
|
||||
'Connection: close',
|
||||
'User-Agent: '.$this->user_agent,
|
||||
@ -39,14 +38,27 @@ class Stream extends Client
|
||||
$headers[] = 'If-Modified-Since: '.$this->last_modified;
|
||||
}
|
||||
|
||||
// Create context
|
||||
$context_options = array(
|
||||
if ($this->proxy_username) {
|
||||
$headers[] = 'Proxy-Authorization: Basic '.base64_encode($this->proxy_username.':'.$this->proxy_password);
|
||||
}
|
||||
|
||||
return $headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare stream context
|
||||
*
|
||||
* @access private
|
||||
* @return array
|
||||
*/
|
||||
private function prepareContext()
|
||||
{
|
||||
$context = array(
|
||||
'http' => array(
|
||||
'method' => 'GET',
|
||||
'protocol_version' => 1.1,
|
||||
'timeout' => $this->timeout,
|
||||
'max_redirects' => $this->max_redirects,
|
||||
'header' => implode("\r\n", $headers)
|
||||
)
|
||||
);
|
||||
|
||||
@ -54,31 +66,46 @@ class Stream extends Client
|
||||
|
||||
Logging::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
|
||||
|
||||
$context_options['http']['proxy'] = 'tcp://'.$this->proxy_hostname.':'.$this->proxy_port;
|
||||
$context_options['http']['request_fulluri'] = true;
|
||||
$context['http']['proxy'] = 'tcp://'.$this->proxy_hostname.':'.$this->proxy_port;
|
||||
$context['http']['request_fulluri'] = true;
|
||||
|
||||
if ($this->proxy_username) {
|
||||
Logging::setMessage(get_called_class().' Proxy credentials: Yes');
|
||||
|
||||
$headers[] = 'Proxy-Authorization: Basic '.base64_encode($this->proxy_username.':'.$this->proxy_password);
|
||||
$context_options['http']['header'] = implode("\r\n", $headers);
|
||||
}
|
||||
else {
|
||||
Logging::setMessage(get_called_class().' Proxy credentials: No');
|
||||
}
|
||||
}
|
||||
|
||||
$context = stream_context_create($context_options);
|
||||
$context['http']['header'] = implode("\r\n", $this->prepareHeaders());
|
||||
|
||||
return $context;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do the HTTP request
|
||||
*
|
||||
* @access public
|
||||
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
|
||||
*/
|
||||
public function doRequest()
|
||||
{
|
||||
// Create context
|
||||
$context = stream_context_create($this->prepareContext());
|
||||
|
||||
// Make HTTP request
|
||||
$stream = @fopen($this->url, 'r', false, $context);
|
||||
if (! is_resource($stream)) return false;
|
||||
if (! is_resource($stream)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the entire body until the max size
|
||||
$body = stream_get_contents($stream, $this->max_body_size + 1);
|
||||
|
||||
// If the body size is too large abort everything
|
||||
if (strlen($body) > $this->max_body_size) return false;
|
||||
if (strlen($body) > $this->max_body_size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get HTTP headers response
|
||||
$metadata = stream_get_meta_data($stream);
|
||||
@ -87,6 +114,23 @@ class Stream extends Client
|
||||
|
||||
fclose($stream);
|
||||
|
||||
return array(
|
||||
'status' => $status,
|
||||
'body' => $this->decodeBody($body, $headers),
|
||||
'headers' => $headers
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode body response according to the HTTP headers
|
||||
*
|
||||
* @access public
|
||||
* @param string $body Raw body
|
||||
* @param array $headers HTTP headers
|
||||
* @return string
|
||||
*/
|
||||
public function decodeBody($body, array $headers)
|
||||
{
|
||||
if (isset($headers['Transfer-Encoding']) && $headers['Transfer-Encoding'] === 'chunked') {
|
||||
$body = $this->decodeChunked($body);
|
||||
}
|
||||
@ -95,11 +139,7 @@ class Stream extends Client
|
||||
$body = @gzdecode($body);
|
||||
}
|
||||
|
||||
return array(
|
||||
'status' => $status,
|
||||
'body' => $body,
|
||||
'headers' => $headers
|
||||
);
|
||||
return $body;
|
||||
}
|
||||
|
||||
/**
|
||||
|
46
vendor/PicoFeed/Config.php
vendored
46
vendor/PicoFeed/Config.php
vendored
@ -7,6 +7,52 @@ namespace PicoFeed;
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*
|
||||
* @method \PicoFeed\Config setClientTimeout(integer $value)
|
||||
* @method \PicoFeed\Config setClientUserAgent(string $value)
|
||||
* @method \PicoFeed\Config setMaxRedirections(integer $value)
|
||||
* @method \PicoFeed\Config setMaxBodySize(integer $value)
|
||||
* @method \PicoFeed\Config setProxyHostname(string $value)
|
||||
* @method \PicoFeed\Config setProxyPort(integer $value)
|
||||
* @method \PicoFeed\Config setProxyUsername(string $value)
|
||||
* @method \PicoFeed\Config setProxyPassword(string $value)
|
||||
* @method \PicoFeed\Config setGrabberTimeout(integer $value)
|
||||
* @method \PicoFeed\Config setGrabberUserAgent(string $value)
|
||||
* @method \PicoFeed\Config setParserHashAlgo(string $value)
|
||||
* @method \PicoFeed\Config setContentFiltering(boolean $value)
|
||||
* @method \PicoFeed\Config setTimezone(string $value)
|
||||
* @method \PicoFeed\Config setFilterIframeWhitelist(array $value)
|
||||
* @method \PicoFeed\Config setFilterIntegerAttributes(array $value)
|
||||
* @method \PicoFeed\Config setFilterAttributeOverrides(array $value)
|
||||
* @method \PicoFeed\Config setFilterRequiredAttributes(array $value)
|
||||
* @method \PicoFeed\Config setFilterMediaBlacklist(array $value)
|
||||
* @method \PicoFeed\Config setFilterMediaAttributes(array $value)
|
||||
* @method \PicoFeed\Config setFilterSchemeWhitelist(array $value)
|
||||
* @method \PicoFeed\Config setFilterWhitelistedTags(array $value)
|
||||
* @method \PicoFeed\Config setFilterBlacklistedTags(array $value)
|
||||
*
|
||||
* @method integer getClientTimeout()
|
||||
* @method string getClientUserAgent()
|
||||
* @method integer getMaxRedirections()
|
||||
* @method integer getMaxBodySize()
|
||||
* @method string getProxyHostname()
|
||||
* @method integer getProxyPort()
|
||||
* @method string getProxyUsername()
|
||||
* @method string getProxyPassword()
|
||||
* @method integer getGrabberTimeout()
|
||||
* @method string getGrabberUserAgent()
|
||||
* @method string getParserHashAlgo()
|
||||
* @method boolean getContentFiltering(bool $default_value)
|
||||
* @method string getTimezone()
|
||||
* @method array getFilterIframeWhitelist(array $default_value)
|
||||
* @method array getFilterIntegerAttributes(array $default_value)
|
||||
* @method array getFilterAttributeOverrides(array $default_value)
|
||||
* @method array getFilterRequiredAttributes(array $default_value)
|
||||
* @method array getFilterMediaBlacklist(array $default_value)
|
||||
* @method array getFilterMediaAttributes(array $default_value)
|
||||
* @method array getFilterSchemeWhitelist(array $default_value)
|
||||
* @method array getFilterWhitelistedTags(array $default_value)
|
||||
* @method array getFilterBlacklistedTags(array $default_value)
|
||||
*/
|
||||
class Config
|
||||
{
|
||||
|
59
vendor/PicoFeed/Export.php
vendored
59
vendor/PicoFeed/Export.php
vendored
@ -58,23 +58,39 @@ class Export
|
||||
|
||||
$body = $xml->addChild('body');
|
||||
|
||||
foreach ($this->content as $feed) {
|
||||
foreach ($this->content as $category => $values) {
|
||||
|
||||
if (is_string($category)) {
|
||||
$this->createCategory($body, $category, $values);
|
||||
}
|
||||
else {
|
||||
$this->createEntry($body, $values);
|
||||
}
|
||||
}
|
||||
|
||||
return $xml->asXML();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a feed entry
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $parent Parent Element
|
||||
* @param array $feed Feed properties
|
||||
*/
|
||||
public function createEntry(SimpleXMLElement $parent, array $feed)
|
||||
{
|
||||
$valid = true;
|
||||
|
||||
foreach ($this->required_fields as $field) {
|
||||
|
||||
if (! isset($feed[$field])) {
|
||||
$valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (! $valid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$outline = $body->addChild('outline');
|
||||
if ($valid) {
|
||||
$outline = $parent->addChild('outline');
|
||||
$outline->addAttribute('xmlUrl', $feed['feed_url']);
|
||||
$outline->addAttribute('htmlUrl', $feed['site_url']);
|
||||
$outline->addAttribute('title', $feed['title']);
|
||||
@ -83,7 +99,34 @@ class Export
|
||||
$outline->addAttribute('type', 'rss');
|
||||
$outline->addAttribute('version', 'RSS');
|
||||
}
|
||||
}
|
||||
|
||||
return $xml->asXML();
|
||||
/**
|
||||
* Create entries for a feed list
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $parent Parent Element
|
||||
* @param array $feeds Feed list
|
||||
*/
|
||||
public function createEntries(SimpleXMLElement $parent, array $feeds)
|
||||
{
|
||||
foreach ($feeds as $feed) {
|
||||
$this->createEntry($parent, $feed);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a category entry
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $parent Parent Element
|
||||
* @param string $category Category
|
||||
* @param array $feed Feed properties
|
||||
*/
|
||||
public function createCategory(SimpleXMLElement $parent, $category, array $feeds)
|
||||
{
|
||||
$outline = $parent->addChild('outline');
|
||||
$outline->addAttribute('text', $category);
|
||||
$this->createEntries($outline, $feeds);
|
||||
}
|
||||
}
|
||||
|
163
vendor/PicoFeed/Favicon.php
vendored
Normal file
163
vendor/PicoFeed/Favicon.php
vendored
Normal file
@ -0,0 +1,163 @@
|
||||
<?php
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
use DOMXpath;
|
||||
|
||||
/**
|
||||
* Favicon class
|
||||
*
|
||||
* https://en.wikipedia.org/wiki/Favicon
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*/
|
||||
class Favicon
|
||||
{
|
||||
/**
|
||||
* Config class instance
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Config
|
||||
*/
|
||||
private $config = null;
|
||||
|
||||
/**
|
||||
* Icon content
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $content = '';
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Config $config Config class instance
|
||||
*/
|
||||
public function __construct(Config $config = null)
|
||||
{
|
||||
$this->config = $config ?: new Config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the icon file content (available only after the download)
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getContent()
|
||||
{
|
||||
return $this->content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Download and check if a resource exists
|
||||
*
|
||||
* @access public
|
||||
* @param string $url URL
|
||||
* @return string Resource content
|
||||
*/
|
||||
public function download($url)
|
||||
{
|
||||
Logging::setMessage(get_called_class().' Download => '.$url);
|
||||
|
||||
$client = Client::getInstance();
|
||||
$client->setConfig($this->config);
|
||||
|
||||
if ($client->execute($url) && ! $client->isNotFound()) {
|
||||
return $client->getContent();
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a remote file exists
|
||||
*
|
||||
* @access public
|
||||
* @param string $url URL
|
||||
* @return boolean
|
||||
*/
|
||||
public function exists($url)
|
||||
{
|
||||
return $this->download($url) !== '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the icon link for a website
|
||||
*
|
||||
* @access public
|
||||
* @param string $website_link URL
|
||||
* @return string
|
||||
*/
|
||||
public function find($website_link)
|
||||
{
|
||||
$website = new Url($website_link);
|
||||
|
||||
$icons = $this->extract($this->download($website->getBaseUrl('/')));
|
||||
$icons[] = $website->getBaseUrl('/favicon.ico');
|
||||
|
||||
foreach ($icons as $icon_link) {
|
||||
|
||||
$icon_link = $this->convertLink($website, new Url($icon_link));
|
||||
$this->content = $this->download($icon_link);
|
||||
|
||||
if ($this->content !== '') {
|
||||
return $icon_link;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert icon links to absolute url
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Url $website Website url
|
||||
* @param \PicoFeed\Url $icon Icon url
|
||||
* @return string
|
||||
*/
|
||||
public function convertLink(Url $website, Url $icon)
|
||||
{
|
||||
$base_url = '';
|
||||
|
||||
if ($icon->isRelativeUrl()) {
|
||||
$base_url = $website->getBaseUrl();
|
||||
}
|
||||
else if ($icon->isProtocolRelative()) {
|
||||
$icon->setScheme($website->getScheme());
|
||||
}
|
||||
|
||||
return $icon->getAbsoluteUrl($base_url);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the icon links from the HTML
|
||||
*
|
||||
* @access public
|
||||
* @param string $html HTML
|
||||
* @return array
|
||||
*/
|
||||
public function extract($html)
|
||||
{
|
||||
$icons = array();
|
||||
|
||||
if (empty($html)) {
|
||||
return $icons;
|
||||
}
|
||||
|
||||
$dom = XmlParser::getHtmlDocument($html);
|
||||
|
||||
$xpath = new DOMXpath($dom);
|
||||
$elements = $xpath->query("//link[contains(@rel, 'icon') and not(contains(@rel, 'apple'))]");
|
||||
|
||||
for ($i = 0; $i < $elements->length; $i++) {
|
||||
$icons[] = $elements->item($i)->getAttribute('href');
|
||||
}
|
||||
|
||||
return $icons;
|
||||
}
|
||||
}
|
46
vendor/PicoFeed/Feed.php
vendored
46
vendor/PicoFeed/Feed.php
vendored
@ -35,7 +35,15 @@ class Feed
|
||||
public $title = '';
|
||||
|
||||
/**
|
||||
* Item url
|
||||
* Feed description
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $description = '';
|
||||
|
||||
/**
|
||||
* Feed url
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
@ -43,7 +51,7 @@ class Feed
|
||||
public $url = '';
|
||||
|
||||
/**
|
||||
* Item date
|
||||
* Feed date
|
||||
*
|
||||
* @access public
|
||||
* @var integer
|
||||
@ -51,13 +59,21 @@ class Feed
|
||||
public $date = 0;
|
||||
|
||||
/**
|
||||
* Item language
|
||||
* Feed language
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $language = '';
|
||||
|
||||
/**
|
||||
* Feed logo URL (not the same as icon)
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $logo = '';
|
||||
|
||||
/**
|
||||
* Return feed information
|
||||
*
|
||||
@ -68,7 +84,7 @@ class Feed
|
||||
{
|
||||
$output = '';
|
||||
|
||||
foreach (array('id', 'title', 'url', 'date', 'language') as $property) {
|
||||
foreach (array('id', 'title', 'url', 'date', 'language', 'description', 'logo') as $property) {
|
||||
$output .= 'Feed::'.$property.' = '.$this->$property.PHP_EOL;
|
||||
}
|
||||
|
||||
@ -93,6 +109,28 @@ class Feed
|
||||
return $this->title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get description
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getDescription()
|
||||
{
|
||||
return $this->description;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the logo url
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getLogo()
|
||||
{
|
||||
return $this->logo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get url
|
||||
*
|
||||
|
803
vendor/PicoFeed/Filter.php
vendored
803
vendor/PicoFeed/Filter.php
vendored
@ -2,7 +2,7 @@
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
use DOMDocument;
|
||||
use PicoFeed\Filter\Html;
|
||||
|
||||
/**
|
||||
* Filter class
|
||||
@ -13,436 +13,18 @@ use DOMDocument;
|
||||
class Filter
|
||||
{
|
||||
/**
|
||||
* Config object
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Config
|
||||
*/
|
||||
private $config = null;
|
||||
|
||||
/**
|
||||
* Filtered XML data
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $data = '';
|
||||
|
||||
/**
|
||||
* Site URL (used to build absolute URL)
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $url = '';
|
||||
|
||||
/**
|
||||
* Unfiltered XML data
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $input = '';
|
||||
|
||||
/**
|
||||
* List of empty tags
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $empty_tags = array();
|
||||
|
||||
/**
|
||||
* Flag to remove the content of a tag
|
||||
*
|
||||
* @access private
|
||||
* @var boolean
|
||||
*/
|
||||
private $strip_content = false;
|
||||
|
||||
/**
|
||||
* Flag to remember if the current payload is a source code <pre/>
|
||||
*
|
||||
* @access private
|
||||
* @var boolean
|
||||
*/
|
||||
private $is_code = false;
|
||||
|
||||
/**
|
||||
* Tags and attribute whitelist
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $whitelist_tags = array(
|
||||
'audio' => array('controls', 'src'),
|
||||
'video' => array('poster', 'controls', 'height', 'width', 'src'),
|
||||
'source' => array('src', 'type'),
|
||||
'dt' => array(),
|
||||
'dd' => array(),
|
||||
'dl' => array(),
|
||||
'table' => array(),
|
||||
'caption' => array(),
|
||||
'tr' => array(),
|
||||
'th' => array(),
|
||||
'td' => array(),
|
||||
'tbody' => array(),
|
||||
'thead' => array(),
|
||||
'h2' => array(),
|
||||
'h3' => array(),
|
||||
'h4' => array(),
|
||||
'h5' => array(),
|
||||
'h6' => array(),
|
||||
'strong' => array(),
|
||||
'em' => array(),
|
||||
'code' => array(),
|
||||
'pre' => array(),
|
||||
'blockquote' => array(),
|
||||
'p' => array(),
|
||||
'ul' => array(),
|
||||
'li' => array(),
|
||||
'ol' => array(),
|
||||
'br' => array(),
|
||||
'del' => array(),
|
||||
'a' => array('href'),
|
||||
'img' => array('src', 'title', 'alt'),
|
||||
'figure' => array(),
|
||||
'figcaption' => array(),
|
||||
'cite' => array(),
|
||||
'time' => array('datetime'),
|
||||
'abbr' => array('title'),
|
||||
'iframe' => array('width', 'height', 'frameborder', 'src'),
|
||||
'q' => array('cite')
|
||||
);
|
||||
|
||||
/**
|
||||
* Tags blacklist, strip the content of those tags
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $blacklisted_tags = array(
|
||||
'script'
|
||||
);
|
||||
|
||||
/**
|
||||
* Scheme whitelist
|
||||
* For a complete list go to http://en.wikipedia.org/wiki/URI_scheme
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $scheme_whitelist = array(
|
||||
'//',
|
||||
'data:image/png;base64,',
|
||||
'data:image/gif;base64,',
|
||||
'data:image/jpg;base64,',
|
||||
'bitcoin:',
|
||||
'callto:',
|
||||
'ed2k://',
|
||||
'facetime://',
|
||||
'feed:',
|
||||
'ftp://',
|
||||
'geo:',
|
||||
'git://',
|
||||
'http://',
|
||||
'https://',
|
||||
'irc://',
|
||||
'irc6://',
|
||||
'ircs://',
|
||||
'jabber:',
|
||||
'magnet:',
|
||||
'mailto:',
|
||||
'nntp://',
|
||||
'rtmp://',
|
||||
'sftp://',
|
||||
'sip:',
|
||||
'sips:',
|
||||
'skype:',
|
||||
'smb://',
|
||||
'sms:',
|
||||
'spotify:',
|
||||
'ssh:',
|
||||
'steam:',
|
||||
'svn://',
|
||||
'tel:',
|
||||
);
|
||||
|
||||
/**
|
||||
* Attributes used for external resources
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $media_attributes = array(
|
||||
'src',
|
||||
'href',
|
||||
'poster',
|
||||
);
|
||||
|
||||
/**
|
||||
* Blacklisted resources
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $media_blacklist = array(
|
||||
'feeds.feedburner.com',
|
||||
'share.feedsportal.com',
|
||||
'da.feedsportal.com',
|
||||
'rss.feedsportal.com',
|
||||
'res.feedsportal.com',
|
||||
'res1.feedsportal.com',
|
||||
'res2.feedsportal.com',
|
||||
'res3.feedsportal.com',
|
||||
'pi.feedsportal.com',
|
||||
'rss.nytimes.com',
|
||||
'feeds.wordpress.com',
|
||||
'stats.wordpress.com',
|
||||
'rss.cnn.com',
|
||||
'twitter.com/home?status=',
|
||||
'twitter.com/share',
|
||||
'twitter_icon_large.png',
|
||||
'www.facebook.com/sharer.php',
|
||||
'facebook_icon_large.png',
|
||||
'plus.google.com/share',
|
||||
'www.gstatic.com/images/icons/gplus-16.png',
|
||||
'www.gstatic.com/images/icons/gplus-32.png',
|
||||
'www.gstatic.com/images/icons/gplus-64.png',
|
||||
);
|
||||
|
||||
/**
|
||||
* Mandatory attributes for specified tags
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $required_attributes = array(
|
||||
'a' => array('href'),
|
||||
'img' => array('src'),
|
||||
'iframe' => array('src'),
|
||||
'audio' => array('src'),
|
||||
'source' => array('src'),
|
||||
);
|
||||
|
||||
/**
|
||||
* Add attributes to specified tags
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $add_attributes = array(
|
||||
'a' => 'rel="noreferrer" target="_blank"'
|
||||
);
|
||||
|
||||
/**
|
||||
* Attributes that must be integer
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $integer_attributes = array(
|
||||
'width',
|
||||
'height',
|
||||
'frameborder',
|
||||
);
|
||||
|
||||
/**
|
||||
* Iframe source whitelist, everything else is ignored
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $iframe_whitelist = array(
|
||||
'//www.youtube.com',
|
||||
'http://www.youtube.com',
|
||||
'https://www.youtube.com',
|
||||
'http://player.vimeo.com',
|
||||
'https://player.vimeo.com',
|
||||
'http://www.dailymotion.com',
|
||||
'https://www.dailymotion.com',
|
||||
);
|
||||
|
||||
/**
|
||||
* Initialize the filter, all inputs data must be encoded in UTF-8 before
|
||||
* Get the Html filter instance
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $data XML content
|
||||
* @param string $site_url Site URL (used to build absolute URL)
|
||||
* @param string $html HTML content
|
||||
* @param string $website Site URL (used to build absolute URL)
|
||||
* @return PicoFeed\Filter\Html
|
||||
*/
|
||||
public function __construct($data, $site_url)
|
||||
public static function html($html, $website)
|
||||
{
|
||||
$this->url = $site_url;
|
||||
|
||||
libxml_use_internal_errors(true);
|
||||
|
||||
// Convert bad formatted documents to XML
|
||||
$dom = new DOMDocument;
|
||||
$dom->loadHTML('<?xml version="1.0" encoding="UTF-8">'.$data);
|
||||
$this->input = $dom->saveXML($dom->getElementsByTagName('body')->item(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Run tags/attributes filtering
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function execute()
|
||||
{
|
||||
$parser = xml_parser_create();
|
||||
xml_set_object($parser, $this);
|
||||
xml_set_element_handler($parser, 'startTag', 'endTag');
|
||||
xml_set_character_data_handler($parser, 'dataTag');
|
||||
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false);
|
||||
xml_parse($parser, $this->input, true); // We ignore parsing error (for old libxml)
|
||||
xml_parser_free($parser);
|
||||
|
||||
$this->data = $this->removeEmptyTags($this->data);
|
||||
$this->data = $this->removeMultipleTags($this->data);
|
||||
|
||||
return trim($this->data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse opening tag
|
||||
*
|
||||
* @access public
|
||||
* @param resource $parser XML parser
|
||||
* @param string $name Tag name
|
||||
* @param array $attributes Tag attributes
|
||||
*/
|
||||
public function startTag($parser, $name, $attributes)
|
||||
{
|
||||
$empty_tag = false;
|
||||
$this->strip_content = false;
|
||||
|
||||
if ($this->is_code === false && $name === 'pre') $this->is_code = true;
|
||||
|
||||
if ($this->isPixelTracker($name, $attributes)) {
|
||||
|
||||
$empty_tag = true;
|
||||
}
|
||||
else if ($this->isAllowedTag($name)) {
|
||||
|
||||
$attr_data = '';
|
||||
$used_attributes = array();
|
||||
|
||||
foreach ($attributes as $attribute => $value) {
|
||||
|
||||
if ($value != '' && $this->isAllowedAttribute($name, $attribute)) {
|
||||
|
||||
if ($this->isResource($attribute)) {
|
||||
|
||||
if ($name === 'iframe') {
|
||||
|
||||
if ($this->isAllowedIframeResource($value)) {
|
||||
|
||||
$attr_data .= ' '.$attribute.'="'.$this->escape($value).'"';
|
||||
$used_attributes[] = $attribute;
|
||||
}
|
||||
}
|
||||
else if ($this->isRelativePath($value)) {
|
||||
|
||||
$attr_data .= ' '.$attribute.'="'.$this->escape($this->getAbsoluteUrl($value, $this->url)).'"';
|
||||
$used_attributes[] = $attribute;
|
||||
}
|
||||
else if ($this->isAllowedProtocol($value) && ! $this->isBlacklistedMedia($value)) {
|
||||
|
||||
if ($attribute == 'src' &&
|
||||
isset($attributes['data-src']) &&
|
||||
$this->isAllowedProtocol($attributes['data-src']) &&
|
||||
! $this->isBlacklistedMedia($attributes['data-src'])) {
|
||||
|
||||
$value = $attributes['data-src'];
|
||||
}
|
||||
|
||||
// Replace protocol-relative url // by http://
|
||||
if (substr($value, 0, 2) === '//') $value = 'http:'.$value;
|
||||
|
||||
$attr_data .= ' '.$attribute.'="'.$this->escape($value).'"';
|
||||
$used_attributes[] = $attribute;
|
||||
}
|
||||
}
|
||||
else if ($this->validateAttributeValue($attribute, $value)) {
|
||||
|
||||
$attr_data .= ' '.$attribute.'="'.$this->escape($value).'"';
|
||||
$used_attributes[] = $attribute;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check for required attributes
|
||||
if (isset($this->required_attributes[$name])) {
|
||||
|
||||
foreach ($this->required_attributes[$name] as $required_attribute) {
|
||||
|
||||
if (! in_array($required_attribute, $used_attributes)) {
|
||||
|
||||
$empty_tag = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (! $empty_tag) {
|
||||
|
||||
$this->data .= '<'.$name.$attr_data;
|
||||
|
||||
// Add custom attributes
|
||||
if (isset($this->add_attributes[$name])) {
|
||||
|
||||
$this->data .= ' '.$this->add_attributes[$name].' ';
|
||||
}
|
||||
|
||||
// If img or br, we don't close it here
|
||||
if ($name !== 'img' && $name !== 'br') $this->data .= '>';
|
||||
}
|
||||
}
|
||||
|
||||
if (in_array($name, $this->blacklisted_tags)) {
|
||||
$this->strip_content = true;
|
||||
}
|
||||
|
||||
$this->empty_tags[] = $empty_tag;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse closing tag
|
||||
*
|
||||
* @access public
|
||||
* @param resource $parser XML parser
|
||||
* @param string $name Tag name
|
||||
*/
|
||||
public function endTag($parser, $name)
|
||||
{
|
||||
if (! array_pop($this->empty_tags) && $this->isAllowedTag($name)) {
|
||||
$this->data .= $name !== 'img' && $name !== 'br' ? '</'.$name.'>' : '/>';
|
||||
}
|
||||
|
||||
if ($this->is_code && $name === 'pre') $this->is_code = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse tag content
|
||||
*
|
||||
* @access public
|
||||
* @param resource $parser XML parser
|
||||
* @param string $content Tag content
|
||||
*/
|
||||
public function dataTag($parser, $content)
|
||||
{
|
||||
$content = str_replace("\xc2\xa0", ' ', $content); // Replace with normal space
|
||||
|
||||
// Issue with Cyrillic characters
|
||||
// Replace mutliple space by a single one
|
||||
// if (! $this->is_code) {
|
||||
// $content = preg_replace('!\s+!', ' ', $content);
|
||||
// }
|
||||
|
||||
if (! $this->strip_content) {
|
||||
$this->data .= $this->escape($content);
|
||||
}
|
||||
$filter = new Html($html, $website);
|
||||
return $filter;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -454,222 +36,7 @@ class Filter
|
||||
*/
|
||||
public static function escape($content)
|
||||
{
|
||||
return htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the absolute url for a relative link
|
||||
*
|
||||
* @access public
|
||||
* @param string $path Relative path
|
||||
* @param string $url Site base url
|
||||
* @return string
|
||||
*/
|
||||
public static function getAbsoluteUrl($path, $url)
|
||||
{
|
||||
$components = parse_url($url);
|
||||
|
||||
if (! isset($components['scheme'])) $components['scheme'] = 'http';
|
||||
|
||||
if (! isset($components['host'])) {
|
||||
|
||||
if ($url) {
|
||||
|
||||
$components['host'] = $url;
|
||||
$components['path'] = '/';
|
||||
}
|
||||
else {
|
||||
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
if (! strlen($path)) return $url;
|
||||
|
||||
if ($path{0} === '/') {
|
||||
|
||||
// Absolute path
|
||||
return $components['scheme'].'://'.$components['host'].$path;
|
||||
}
|
||||
else {
|
||||
|
||||
// Relative path
|
||||
$url_path = isset($components['path']) && ! empty($components['path']) ? $components['path'] : '/';
|
||||
$length = strlen($url_path);
|
||||
|
||||
if ($length > 1 && $url_path{$length - 1} !== '/') {
|
||||
$url_path = dirname($url_path).'/';
|
||||
}
|
||||
|
||||
if (substr($path, 0, 2) === './') {
|
||||
$path = substr($path, 2);
|
||||
}
|
||||
|
||||
return $components['scheme'].'://'.$components['host'].$url_path.$path;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an url is relative
|
||||
*
|
||||
* @access public
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
*/
|
||||
public static function isRelativePath($value)
|
||||
{
|
||||
if (strpos($value, 'data:') === 0) return false;
|
||||
return strpos($value, '://') === false && strpos($value, '//') !== 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a tag is on the whitelist
|
||||
*
|
||||
* @access public
|
||||
* @param string $name Tag name
|
||||
* @return boolean
|
||||
*/
|
||||
public function isAllowedTag($name)
|
||||
{
|
||||
return isset($this->whitelist_tags[$name]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an attribute is allowed for a given tag
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attribute Attribute name
|
||||
* @return boolean
|
||||
*/
|
||||
public function isAllowedAttribute($tag, $attribute)
|
||||
{
|
||||
return in_array($attribute, $this->whitelist_tags[$tag]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an attribute name is an external resource
|
||||
*
|
||||
* @access public
|
||||
* @param string $data Attribute name
|
||||
* @return boolean
|
||||
*/
|
||||
public function isResource($attribute)
|
||||
{
|
||||
return in_array($attribute, $this->media_attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an iframe url is allowed
|
||||
*
|
||||
* @access public
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
*/
|
||||
public function isAllowedIframeResource($value)
|
||||
{
|
||||
foreach ($this->iframe_whitelist as $url) {
|
||||
|
||||
if (strpos($value, $url) === 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if the protocol is allowed or not
|
||||
*
|
||||
* @access public
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
*/
|
||||
public function isAllowedProtocol($value)
|
||||
{
|
||||
foreach ($this->scheme_whitelist as $protocol) {
|
||||
|
||||
if (strpos($value, $protocol) === 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if an url is blacklisted
|
||||
*
|
||||
* @access public
|
||||
* @param string $resouce Attribute value (URL)
|
||||
* @return boolean
|
||||
*/
|
||||
public function isBlacklistedMedia($resource)
|
||||
{
|
||||
foreach ($this->media_blacklist as $name) {
|
||||
|
||||
if (strpos($resource, $name) !== false) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if an image tag is a pixel tracker
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Tag attributes
|
||||
* @return boolean
|
||||
*/
|
||||
public function isPixelTracker($tag, array $attributes)
|
||||
{
|
||||
return $tag === 'img' &&
|
||||
isset($attributes['height']) && isset($attributes['width']) &&
|
||||
$attributes['height'] == 1 && $attributes['width'] == 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an attribute value is integer
|
||||
*
|
||||
* @access public
|
||||
* @param string $attribute Attribute name
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
*/
|
||||
public function validateAttributeValue($attribute, $value)
|
||||
{
|
||||
if (in_array($attribute, $this->integer_attributes)) {
|
||||
return ctype_digit($value);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace <br/><br/> by only one
|
||||
*
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public function removeMultipleTags($data)
|
||||
{
|
||||
return preg_replace("/(<br\s*\/?>\s*)+/", "<br/>", $data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove empty tags
|
||||
*
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public function removeEmptyTags($data)
|
||||
{
|
||||
return preg_replace('/<([^<\/>]*)>([\s]*?|(?R))<\/\1>/imsU', '', $data);
|
||||
return @htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -734,145 +101,41 @@ class Filter
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whitelisted tags adn attributes for each tag
|
||||
* Trim whitespace from the begining, the end and inside a string and don't break utf-8 string
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
|
||||
* @return \PicoFeed\Filter
|
||||
* @param string $value Raw data
|
||||
* @return string Normalized data
|
||||
*/
|
||||
public function setWhitelistedTags(array $values)
|
||||
public static function stripWhiteSpace($value)
|
||||
{
|
||||
$this->whitelist_tags = $values ?: $this->whitelist_tags;
|
||||
return $this;
|
||||
$value = str_replace("\r", "", $value);
|
||||
$value = str_replace("\t", "", $value);
|
||||
$value = str_replace("\n", "", $value);
|
||||
return trim($value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set blacklisted tags
|
||||
* Dirty quickfixes before XML parsing
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param array $values List of tags: ['video', 'img']
|
||||
* @return \PicoFeed\Filter
|
||||
* @param string $data Raw data
|
||||
* @return string Normalized data
|
||||
*/
|
||||
public function setBlacklistedTags(array $values)
|
||||
public static function normalizeData($data)
|
||||
{
|
||||
$this->blacklisted_tags = $values ?: $this->blacklisted_tags;
|
||||
return $this;
|
||||
$invalid_chars = array(
|
||||
"\x10",
|
||||
"\xc3\x20",
|
||||
"",
|
||||
);
|
||||
|
||||
foreach ($invalid_chars as $needle) {
|
||||
$data = str_replace($needle, '', $data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set scheme whitelist
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of scheme: ['http://', 'ftp://']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setSchemeWhitelist(array $values)
|
||||
{
|
||||
$this->scheme_whitelist = $values ?: $this->scheme_whitelist;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set media attributes (used to load external resources)
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of values: ['src', 'href']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setMediaAttributes(array $values)
|
||||
{
|
||||
$this->media_attributes = $values ?: $this->media_attributes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set blacklisted external resources
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['http://google.com/', '...']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setMediaBlacklist(array $values)
|
||||
{
|
||||
$this->media_blacklist = $values ?: $this->media_blacklist;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set mandatory attributes for whitelisted tags
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['img' => 'src']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setRequiredAttributes(array $values)
|
||||
{
|
||||
$this->required_attributes = $values ?: $this->required_attributes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set attributes to automatically to specific tags
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['a' => 'target="_blank"']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setAttributeOverrides(array $values)
|
||||
{
|
||||
$this->add_attributes = $values ?: $this->add_attributes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set attributes that must be an integer
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['width', 'height']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setIntegerAttributes(array $values)
|
||||
{
|
||||
$this->integer_attributes = $values ?: $this->integer_attributes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set allowed iframe resources
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['http://www.youtube.com']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setIframeWhitelist(array $values)
|
||||
{
|
||||
$this->iframe_whitelist = $values ?: $this->iframe_whitelist;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set config object
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Config $config Config instance
|
||||
* @return \PicoFeed\Parse
|
||||
*/
|
||||
public function setConfig($config)
|
||||
{
|
||||
$this->config = $config;
|
||||
|
||||
if ($this->config !== null) {
|
||||
$this->setIframeWhitelist($this->config->getFilterIframeWhitelist(array()));
|
||||
$this->setIntegerAttributes($this->config->getFilterIntegerAttributes(array()));
|
||||
$this->setAttributeOverrides($this->config->getFilterAttributeOverrides(array()));
|
||||
$this->setRequiredAttributes($this->config->getFilterRequiredAttributes(array()));
|
||||
$this->setMediaBlacklist($this->config->getFilterMediaBlacklist(array()));
|
||||
$this->setMediaAttributes($this->config->getFilterMediaAttributes(array()));
|
||||
$this->setSchemeWhitelist($this->config->getFilterSchemeWhitelist(array()));
|
||||
$this->setBlacklistedTags($this->config->getFilterBlacklistedTags(array()));
|
||||
$this->setWhitelistedTags($this->config->getFilterWhitelistedTags(array()));
|
||||
}
|
||||
|
||||
return $this;
|
||||
return $data;
|
||||
}
|
||||
}
|
||||
|
590
vendor/PicoFeed/Filter/Attribute.php
vendored
Normal file
590
vendor/PicoFeed/Filter/Attribute.php
vendored
Normal file
@ -0,0 +1,590 @@
|
||||
<?php
|
||||
|
||||
namespace PicoFeed\Filter;
|
||||
|
||||
use \PicoFeed\Url;
|
||||
use \PicoFeed\Filter;
|
||||
|
||||
/**
|
||||
* Attribute Filter class
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package filter
|
||||
*/
|
||||
class Attribute
|
||||
{
|
||||
/**
|
||||
* Tags and attribute whitelist
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $attribute_whitelist = array(
|
||||
'audio' => array('controls', 'src'),
|
||||
'video' => array('poster', 'controls', 'height', 'width', 'src'),
|
||||
'source' => array('src', 'type'),
|
||||
'dt' => array(),
|
||||
'dd' => array(),
|
||||
'dl' => array(),
|
||||
'table' => array(),
|
||||
'caption' => array(),
|
||||
'tr' => array(),
|
||||
'th' => array(),
|
||||
'td' => array(),
|
||||
'tbody' => array(),
|
||||
'thead' => array(),
|
||||
'h2' => array(),
|
||||
'h3' => array(),
|
||||
'h4' => array(),
|
||||
'h5' => array(),
|
||||
'h6' => array(),
|
||||
'strong' => array(),
|
||||
'em' => array(),
|
||||
'code' => array(),
|
||||
'pre' => array(),
|
||||
'blockquote' => array(),
|
||||
'p' => array(),
|
||||
'ul' => array(),
|
||||
'li' => array(),
|
||||
'ol' => array(),
|
||||
'br' => array(),
|
||||
'del' => array(),
|
||||
'a' => array('href'),
|
||||
'img' => array('src', 'title', 'alt'),
|
||||
'figure' => array(),
|
||||
'figcaption' => array(),
|
||||
'cite' => array(),
|
||||
'time' => array('datetime'),
|
||||
'abbr' => array('title'),
|
||||
'iframe' => array('width', 'height', 'frameborder', 'src'),
|
||||
'q' => array('cite')
|
||||
);
|
||||
|
||||
/**
|
||||
* Scheme whitelist
|
||||
*
|
||||
* For a complete list go to http://en.wikipedia.org/wiki/URI_scheme
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $scheme_whitelist = array(
|
||||
'bitcoin:',
|
||||
'callto:',
|
||||
'ed2k://',
|
||||
'facetime://',
|
||||
'feed:',
|
||||
'ftp://',
|
||||
'geo:',
|
||||
'git://',
|
||||
'http://',
|
||||
'https://',
|
||||
'irc://',
|
||||
'irc6://',
|
||||
'ircs://',
|
||||
'jabber:',
|
||||
'magnet:',
|
||||
'mailto:',
|
||||
'nntp://',
|
||||
'rtmp://',
|
||||
'sftp://',
|
||||
'sip:',
|
||||
'sips:',
|
||||
'skype:',
|
||||
'smb://',
|
||||
'sms:',
|
||||
'spotify:',
|
||||
'ssh:',
|
||||
'steam:',
|
||||
'svn://',
|
||||
'tel:',
|
||||
);
|
||||
|
||||
/**
|
||||
* Iframe source whitelist, everything else is ignored
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $iframe_whitelist = array(
|
||||
'http://www.youtube.com',
|
||||
'https://www.youtube.com',
|
||||
'http://player.vimeo.com',
|
||||
'https://player.vimeo.com',
|
||||
'http://www.dailymotion.com',
|
||||
'https://www.dailymotion.com',
|
||||
);
|
||||
|
||||
/**
|
||||
* Blacklisted resources
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $media_blacklist = array(
|
||||
'api.flattr.com',
|
||||
'feeds.feedburner.com',
|
||||
'share.feedsportal.com',
|
||||
'da.feedsportal.com',
|
||||
'rss.feedsportal.com',
|
||||
'res.feedsportal.com',
|
||||
'res1.feedsportal.com',
|
||||
'res2.feedsportal.com',
|
||||
'res3.feedsportal.com',
|
||||
'pi.feedsportal.com',
|
||||
'rss.nytimes.com',
|
||||
'feeds.wordpress.com',
|
||||
'stats.wordpress.com',
|
||||
'rss.cnn.com',
|
||||
'twitter.com/home?status=',
|
||||
'twitter.com/share',
|
||||
'twitter_icon_large.png',
|
||||
'www.facebook.com/sharer.php',
|
||||
'facebook_icon_large.png',
|
||||
'plus.google.com/share',
|
||||
'www.gstatic.com/images/icons/gplus-16.png',
|
||||
'www.gstatic.com/images/icons/gplus-32.png',
|
||||
'www.gstatic.com/images/icons/gplus-64.png',
|
||||
);
|
||||
|
||||
/**
|
||||
* Attributes used for external resources
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $media_attributes = array(
|
||||
'src',
|
||||
'href',
|
||||
'poster',
|
||||
);
|
||||
|
||||
/**
|
||||
* Attributes that must be integer
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $integer_attributes = array(
|
||||
'width',
|
||||
'height',
|
||||
'frameborder',
|
||||
);
|
||||
|
||||
/**
|
||||
* Mandatory attributes for specified tags
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $required_attributes = array(
|
||||
'a' => array('href'),
|
||||
'img' => array('src'),
|
||||
'iframe' => array('src'),
|
||||
'audio' => array('src'),
|
||||
'source' => array('src'),
|
||||
);
|
||||
|
||||
/**
|
||||
* Add attributes to specified tags
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $add_attributes = array(
|
||||
'a' => array('rel' => 'noreferrer', 'target' => '_blank')
|
||||
);
|
||||
|
||||
/**
|
||||
* List of filters to apply
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $filters = array(
|
||||
'filterEmptyAttribute',
|
||||
'filterAllowedAttribute',
|
||||
'filterIntegerAttribute',
|
||||
'filterAbsoluteUrlAttribute',
|
||||
'filterIframeAttribute',
|
||||
'filterBlacklistResourceAttribute',
|
||||
'filterProtocolUrlAttribute',
|
||||
);
|
||||
|
||||
/**
|
||||
* Add attributes to specified tags
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Url
|
||||
*/
|
||||
private $website = null;
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Url $website Website url instance
|
||||
*/
|
||||
public function __construct(Url $website)
|
||||
{
|
||||
$this->website = $website;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply filters to the attributes list
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes dictionary
|
||||
* @return array Filtered attributes
|
||||
*/
|
||||
public function filter($tag, array $attributes)
|
||||
{
|
||||
foreach ($attributes as $attribute => &$value) {
|
||||
foreach ($this->filters as $filter) {
|
||||
if (! $this->$filter($tag, $attribute, $value)) {
|
||||
unset($attributes[$attribute]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the value is not empty (remove empty attributes)
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Atttribute name
|
||||
* @param string $value Atttribute value
|
||||
* @return boolean
|
||||
*/
|
||||
public function filterEmptyAttribute($tag, $attribute, $value)
|
||||
{
|
||||
return $value !== '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the value is allowed (remove not allowed attributes)
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Atttribute name
|
||||
* @param string $value Atttribute value
|
||||
* @return boolean
|
||||
*/
|
||||
public function filterAllowedAttribute($tag, $attribute, $value)
|
||||
{
|
||||
return isset($this->attribute_whitelist[$tag]) && in_array($attribute, $this->attribute_whitelist[$tag]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the value is not integer (remove attributes that should have an integer value)
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Atttribute name
|
||||
* @param string $value Atttribute value
|
||||
* @return boolean
|
||||
*/
|
||||
public function filterIntegerAttribute($tag, $attribute, $value)
|
||||
{
|
||||
if (in_array($attribute, $this->integer_attributes)) {
|
||||
return ctype_digit($value);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the iframe source is allowed (remove not allowed iframe)
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Atttribute name
|
||||
* @param string $value Atttribute value
|
||||
* @return boolean
|
||||
*/
|
||||
public function filterIframeAttribute($tag, $attribute, $value)
|
||||
{
|
||||
if ($tag === 'iframe' && $attribute === 'src') {
|
||||
|
||||
foreach ($this->iframe_whitelist as $url) {
|
||||
if (strpos($value, $url) === 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the resource is not blacklisted (remove blacklisted resource attributes)
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Atttribute name
|
||||
* @param string $value Atttribute value
|
||||
* @return boolean
|
||||
*/
|
||||
public function filterBlacklistResourceAttribute($tag, $attribute, $value)
|
||||
{
|
||||
if ($this->isResource($attribute) && $this->isBlacklistedMedia($value)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert all relative links to absolute url
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Atttribute name
|
||||
* @param string $value Atttribute value
|
||||
* @return boolean
|
||||
*/
|
||||
public function filterAbsoluteUrlAttribute($tag, $attribute, &$value)
|
||||
{
|
||||
if ($this->isResource($attribute)) {
|
||||
$value = Url::resolve($value, $this->website);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the scheme is authorized
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attribute Atttribute name
|
||||
* @param string $value Atttribute value
|
||||
* @return boolean
|
||||
*/
|
||||
public function filterProtocolUrlAttribute($tag, $attribute, $value)
|
||||
{
|
||||
if ($this->isResource($attribute) && ! $this->isAllowedProtocol($value)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Automatically add/override some attributes for specific tags
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Atttributes list
|
||||
* @return array
|
||||
*/
|
||||
public function addAttributes($tag, array $attributes)
|
||||
{
|
||||
if (isset($this->add_attributes[$tag])) {
|
||||
$attributes += $this->add_attributes[$tag];
|
||||
}
|
||||
|
||||
return $attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if all required attributes are present
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Atttributes list
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasRequiredAttributes($tag, array $attributes)
|
||||
{
|
||||
if (isset($this->required_attributes[$tag])) {
|
||||
|
||||
foreach ($this->required_attributes[$tag] as $attribute) {
|
||||
if (! isset($attributes[$attribute])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an attribute name is an external resource
|
||||
*
|
||||
* @access public
|
||||
* @param string $data Attribute name
|
||||
* @return boolean
|
||||
*/
|
||||
public function isResource($attribute)
|
||||
{
|
||||
return in_array($attribute, $this->media_attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if the protocol is allowed or not
|
||||
*
|
||||
* @access public
|
||||
* @param string $value Attribute value
|
||||
* @return boolean
|
||||
*/
|
||||
public function isAllowedProtocol($value)
|
||||
{
|
||||
foreach ($this->scheme_whitelist as $protocol) {
|
||||
|
||||
if (strpos($value, $protocol) === 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if an url is blacklisted
|
||||
*
|
||||
* @access public
|
||||
* @param string $resouce Attribute value (URL)
|
||||
* @return boolean
|
||||
*/
|
||||
public function isBlacklistedMedia($resource)
|
||||
{
|
||||
foreach ($this->media_blacklist as $name) {
|
||||
|
||||
if (strpos($resource, $name) !== false) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the attribute list to html
|
||||
*
|
||||
* @access public
|
||||
* @param array $attributes Attributes
|
||||
* @return string
|
||||
*/
|
||||
public function toHtml(array $attributes)
|
||||
{
|
||||
$html = array();
|
||||
|
||||
foreach ($attributes as $attribute => $value) {
|
||||
$html[] = sprintf('%s="%s"', $attribute, Filter::escape($value));
|
||||
}
|
||||
|
||||
return implode(' ', $html);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whitelisted tags adn attributes for each tag
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setWhitelistedAttributes(array $values)
|
||||
{
|
||||
$this->attribute_whitelist = $values ?: $this->attribute_whitelist;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set scheme whitelist
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of scheme: ['http://', 'ftp://']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setSchemeWhitelist(array $values)
|
||||
{
|
||||
$this->scheme_whitelist = $values ?: $this->scheme_whitelist;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set media attributes (used to load external resources)
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of values: ['src', 'href']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setMediaAttributes(array $values)
|
||||
{
|
||||
$this->media_attributes = $values ?: $this->media_attributes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set blacklisted external resources
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['http://google.com/', '...']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setMediaBlacklist(array $values)
|
||||
{
|
||||
$this->media_blacklist = $values ?: $this->media_blacklist;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set mandatory attributes for whitelisted tags
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['img' => 'src']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setRequiredAttributes(array $values)
|
||||
{
|
||||
$this->required_attributes = $values ?: $this->required_attributes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set attributes to automatically to specific tags
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['a' => 'target="_blank"']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setAttributeOverrides(array $values)
|
||||
{
|
||||
$this->add_attributes = $values ?: $this->add_attributes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set attributes that must be an integer
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['width', 'height']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setIntegerAttributes(array $values)
|
||||
{
|
||||
$this->integer_attributes = $values ?: $this->integer_attributes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set allowed iframe resources
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['http://www.youtube.com']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setIframeWhitelist(array $values)
|
||||
{
|
||||
$this->iframe_whitelist = $values ?: $this->iframe_whitelist;
|
||||
return $this;
|
||||
}
|
||||
}
|
197
vendor/PicoFeed/Filter/Html.php
vendored
Normal file
197
vendor/PicoFeed/Filter/Html.php
vendored
Normal file
@ -0,0 +1,197 @@
|
||||
<?php
|
||||
|
||||
namespace PicoFeed\Filter;
|
||||
|
||||
use \PicoFeed\Url;
|
||||
use \PicoFeed\Filter;
|
||||
use \PicoFeed\XmlParser;
|
||||
|
||||
/**
|
||||
* HTML Filter class
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package filter
|
||||
*/
|
||||
class Html
|
||||
{
|
||||
/**
|
||||
* Config object
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Config
|
||||
*/
|
||||
private $config = null;
|
||||
|
||||
/**
|
||||
* Unfiltered XML data
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $input = '';
|
||||
|
||||
/**
|
||||
* Filtered XML data
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $output = '';
|
||||
|
||||
/**
|
||||
* List of empty tags
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $empty_tags = array();
|
||||
|
||||
/**
|
||||
* Empty flag
|
||||
*
|
||||
* @access private
|
||||
* @var boolean
|
||||
*/
|
||||
private $empty = true;
|
||||
|
||||
/**
|
||||
* Tag instance
|
||||
*
|
||||
* @access public
|
||||
* @var \PicoFeed\Filter\Tag
|
||||
*/
|
||||
public $tag = '';
|
||||
|
||||
/**
|
||||
* Attribute instance
|
||||
*
|
||||
* @access public
|
||||
* @var \PicoFeed\Filter\Attribute
|
||||
*/
|
||||
public $attribute = '';
|
||||
|
||||
/**
|
||||
* Initialize the filter, all inputs data must be encoded in UTF-8 before
|
||||
*
|
||||
* @access public
|
||||
* @param string $html HTML content
|
||||
* @param string $website Site URL (used to build absolute URL)
|
||||
*/
|
||||
public function __construct($html, $website)
|
||||
{
|
||||
$this->input = XmlParser::HtmlToXml($html);
|
||||
$this->output = '';
|
||||
$this->tag = new Tag;
|
||||
$this->attribute = new Attribute(new Url($website));
|
||||
}
|
||||
|
||||
/**
|
||||
* Set config object
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Config $config Config instance
|
||||
* @return \PicoFeed\Html
|
||||
*/
|
||||
public function setConfig($config)
|
||||
{
|
||||
$this->config = $config;
|
||||
|
||||
if ($this->config !== null) {
|
||||
$this->attribute->setIframeWhitelist($this->config->getFilterIframeWhitelist(array()));
|
||||
$this->attribute->setIntegerAttributes($this->config->getFilterIntegerAttributes(array()));
|
||||
$this->attribute->setAttributeOverrides($this->config->getFilterAttributeOverrides(array()));
|
||||
$this->attribute->setRequiredAttributes($this->config->getFilterRequiredAttributes(array()));
|
||||
$this->attribute->setMediaBlacklist($this->config->getFilterMediaBlacklist(array()));
|
||||
$this->attribute->setMediaAttributes($this->config->getFilterMediaAttributes(array()));
|
||||
$this->attribute->setSchemeWhitelist($this->config->getFilterSchemeWhitelist(array()));
|
||||
$this->attribute->setWhitelistedAttributes($this->config->getFilterWhitelistedTags(array()));
|
||||
$this->tag->setWhitelistedTags(array_keys($this->config->getFilterWhitelistedTags(array())));
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run tags/attributes filtering
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function execute()
|
||||
{
|
||||
$parser = xml_parser_create();
|
||||
|
||||
xml_set_object($parser, $this);
|
||||
xml_set_element_handler($parser, 'startTag', 'endTag');
|
||||
xml_set_character_data_handler($parser, 'dataTag');
|
||||
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false);
|
||||
xml_parse($parser, $this->input, true);
|
||||
xml_parser_free($parser);
|
||||
|
||||
$this->postFilter();
|
||||
|
||||
return $this->output;
|
||||
}
|
||||
|
||||
public function postFilter()
|
||||
{
|
||||
$this->output = $this->tag->removeEmptyTags($this->output);
|
||||
$this->output = trim($this->output);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse opening tag
|
||||
*
|
||||
* @access public
|
||||
* @param resource $parser XML parser
|
||||
* @param string $name Tag name
|
||||
* @param array $attributes Tag attributes
|
||||
*/
|
||||
public function startTag($parser, $tag, array $attributes)
|
||||
{
|
||||
$this->empty = true;
|
||||
|
||||
if ($this->tag->isAllowed($tag, $attributes)) {
|
||||
|
||||
$attributes = $this->attribute->filter($tag, $attributes);
|
||||
|
||||
if ($this->attribute->hasRequiredAttributes($tag, $attributes)) {
|
||||
|
||||
$attributes = $this->attribute->addAttributes($tag, $attributes);
|
||||
|
||||
$this->output .= $this->tag->openHtmlTag($tag, $this->attribute->toHtml($attributes));
|
||||
$this->empty = false;
|
||||
}
|
||||
}
|
||||
|
||||
$this->empty_tags[] = $this->empty;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse closing tag
|
||||
*
|
||||
* @access public
|
||||
* @param resource $parser XML parser
|
||||
* @param string $name Tag name
|
||||
*/
|
||||
public function endTag($parser, $tag)
|
||||
{
|
||||
if (! array_pop($this->empty_tags) && $this->tag->isAllowedTag($tag)) {
|
||||
$this->output .= $this->tag->closeHtmlTag($tag);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse tag content
|
||||
*
|
||||
* @access public
|
||||
* @param resource $parser XML parser
|
||||
* @param string $content Tag content
|
||||
*/
|
||||
public function dataTag($parser, $content)
|
||||
{
|
||||
// Replace with normal space
|
||||
$content = str_replace("\xc2\xa0", ' ', $content);
|
||||
$this->output .= Filter::escape($content);
|
||||
}
|
||||
}
|
173
vendor/PicoFeed/Filter/Tag.php
vendored
Normal file
173
vendor/PicoFeed/Filter/Tag.php
vendored
Normal file
@ -0,0 +1,173 @@
|
||||
<?php
|
||||
|
||||
namespace PicoFeed\Filter;
|
||||
|
||||
/**
|
||||
* Tag Filter class
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package filter
|
||||
*/
|
||||
class Tag
|
||||
{
|
||||
/**
|
||||
* Tags whitelist
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $tag_whitelist = array(
|
||||
'audio',
|
||||
'video',
|
||||
'source',
|
||||
'dt',
|
||||
'dd',
|
||||
'dl',
|
||||
'table',
|
||||
'caption',
|
||||
'tr',
|
||||
'th',
|
||||
'td',
|
||||
'tbody',
|
||||
'thead',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
'strong',
|
||||
'em',
|
||||
'code',
|
||||
'pre',
|
||||
'blockquote',
|
||||
'p',
|
||||
'ul',
|
||||
'li',
|
||||
'ol',
|
||||
'br',
|
||||
'del',
|
||||
'a',
|
||||
'img',
|
||||
'figure',
|
||||
'figcaption',
|
||||
'cite',
|
||||
'time',
|
||||
'abbr',
|
||||
'iframe',
|
||||
'q',
|
||||
);
|
||||
|
||||
/**
|
||||
* Check if the tag is allowed and is not a pixel tracker
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Attributes dictionary
|
||||
* @return boolean
|
||||
*/
|
||||
public function isAllowed($tag, array $attributes)
|
||||
{
|
||||
return $this->isAllowedTag($tag) && ! $this->isPixelTracker($tag, $attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the HTML opening tag
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param string $attributes Attributes converted in html
|
||||
* @return string
|
||||
*/
|
||||
public function openHtmlTag($tag, $attributes = '')
|
||||
{
|
||||
return '<'.$tag.(empty($attributes) ? '' : ' '.$attributes).($this->isSelfClosingTag($tag) ? '/>' : '>');
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the HTML closing tag
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @return string
|
||||
*/
|
||||
public function closeHtmlTag($tag)
|
||||
{
|
||||
return $this->isSelfClosingTag($tag) ? '' : '</'.$tag.'>';
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true is the tag is self-closing
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @return boolean
|
||||
*/
|
||||
public function isSelfClosingTag($tag)
|
||||
{
|
||||
return in_array($tag, array('br', 'img'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a tag is on the whitelist
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @return boolean
|
||||
*/
|
||||
public function isAllowedTag($tag)
|
||||
{
|
||||
return in_array($tag, $this->tag_whitelist);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if an image tag is a pixel tracker
|
||||
*
|
||||
* @access public
|
||||
* @param string $tag Tag name
|
||||
* @param array $attributes Tag attributes
|
||||
* @return boolean
|
||||
*/
|
||||
public function isPixelTracker($tag, array $attributes)
|
||||
{
|
||||
return $tag === 'img' &&
|
||||
isset($attributes['height']) && isset($attributes['width']) &&
|
||||
$attributes['height'] == 1 && $attributes['width'] == 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove empty tags
|
||||
*
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public function removeEmptyTags($data)
|
||||
{
|
||||
return preg_replace('/<([^<\/>]*)>([\s]*?|(?R))<\/\1>/imsU', '', $data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace <br/><br/> by only one
|
||||
*
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public function removeMultipleTags($data)
|
||||
{
|
||||
return preg_replace("/(<br\s*\/?>\s*)+/", "<br/>", $data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whitelisted tags adn attributes for each tag
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setWhitelistedTags(array $values)
|
||||
{
|
||||
$this->tag_whitelist = $values ?: $this->tag_whitelist;
|
||||
return $this;
|
||||
}
|
||||
}
|
24
vendor/PicoFeed/Grabber.php
vendored
24
vendor/PicoFeed/Grabber.php
vendored
@ -3,10 +3,6 @@
|
||||
namespace PicoFeed;
|
||||
|
||||
use DOMXPath;
|
||||
use PicoFeed\Logging;
|
||||
use PicoFeed\Client;
|
||||
use PicoFeed\Encoding;
|
||||
use PicoFeed\Filter;
|
||||
|
||||
/**
|
||||
* Grabber class
|
||||
@ -224,20 +220,9 @@ class Grabber
|
||||
public function download()
|
||||
{
|
||||
$client = Client::getInstance();
|
||||
|
||||
if ($this->config !== null) {
|
||||
|
||||
$client->setTimeout($this->config->getGrabberTimeout())
|
||||
->setUserAgent($this->config->getGrabberUserAgent())
|
||||
->setMaxRedirections($this->config->getMaxRedirections())
|
||||
->setMaxBodySize($this->config->getMaxBodySize())
|
||||
->setProxyHostname($this->config->getProxyHostname())
|
||||
->setProxyPort($this->config->getProxyPort())
|
||||
->setProxyUsername($this->config->getProxyUsername())
|
||||
->setProxyPassword($this->config->getProxyPassword());
|
||||
}
|
||||
|
||||
$client->setConfig($this->config);
|
||||
$client->execute($this->url);
|
||||
|
||||
$this->html = $client->getContent();
|
||||
$this->encoding = $client->getEncoding();
|
||||
|
||||
@ -253,6 +238,11 @@ class Grabber
|
||||
public function getRules()
|
||||
{
|
||||
$hostname = parse_url($this->url, PHP_URL_HOST);
|
||||
|
||||
if ($hostname === false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$files = array($hostname);
|
||||
|
||||
if (substr($hostname, 0, 4) == 'www.') {
|
||||
|
96
vendor/PicoFeed/Import.php
vendored
96
vendor/PicoFeed/Import.php
vendored
@ -2,11 +2,8 @@
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
require_once __DIR__.'/Logging.php';
|
||||
require_once __DIR__.'/XmlParser.php';
|
||||
|
||||
use PicoFeed\Logging;
|
||||
use PicoFeed\XmlParser;
|
||||
use SimpleXmlElement;
|
||||
use StdClass;
|
||||
|
||||
/**
|
||||
* OPML Import
|
||||
@ -79,21 +76,94 @@ class Import
|
||||
foreach ($tree->outline as $item) {
|
||||
|
||||
if (isset($item->outline)) {
|
||||
|
||||
$this->parseEntries($item);
|
||||
}
|
||||
else if ((isset($item['text']) || isset($item['title'])) && isset($item['xmlUrl'])) {
|
||||
|
||||
$entry = new \StdClass;
|
||||
$entry->category = isset($tree['title']) ? (string) $tree['title'] : (string) $tree['text'];
|
||||
$entry->title = isset($item['title']) ? (string) $item['title'] : (string) $item['text'];
|
||||
$entry->feed_url = (string) $item['xmlUrl'];
|
||||
$entry->site_url = isset($item['htmlUrl']) ? (string) $item['htmlUrl'] : $entry->feed_url;
|
||||
$entry->type = isset($item['version']) ? (string) $item['version'] : isset($item['type']) ? (string) $item['type'] : 'rss';
|
||||
$entry->description = isset($item['description']) ? (string) $item['description'] : $entry->title;
|
||||
$entry = new StdClass;
|
||||
$entry->category = $this->findCategory($tree);
|
||||
$entry->title = $this->findTitle($item);
|
||||
$entry->feed_url = $this->findFeedUrl($item);
|
||||
$entry->site_url = $this->findSiteUrl($item, $entry);
|
||||
$entry->type = $this->findType($item);
|
||||
$entry->description = $this->findDescription($item, $entry);
|
||||
$this->items[] = $entry;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find category
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXmlElement $tree XML tree
|
||||
* @return string
|
||||
*/
|
||||
public function findCategory(SimpleXmlElement $tree)
|
||||
{
|
||||
return isset($tree['title']) ? (string) $tree['title'] : (string) $tree['text'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Find title
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXmlElement $item XML tree
|
||||
* @return string
|
||||
*/
|
||||
public function findTitle(SimpleXmlElement $item)
|
||||
{
|
||||
return isset($item['title']) ? (string) $item['title'] : (string) $item['text'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Find feed url
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXmlElement $item XML tree
|
||||
* @return string
|
||||
*/
|
||||
public function findFeedUrl(SimpleXmlElement $item)
|
||||
{
|
||||
return (string) $item['xmlUrl'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Find site url
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXmlElement $item XML tree
|
||||
* @param StdClass $entry Feed entry
|
||||
* @return string
|
||||
*/
|
||||
public function findSiteUrl(SimpleXmlElement $item, StdClass $entry)
|
||||
{
|
||||
return isset($item['htmlUrl']) ? (string) $item['htmlUrl'] : $entry->feed_url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find type
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXmlElement $item XML tree
|
||||
* @return string
|
||||
*/
|
||||
public function findType(SimpleXmlElement $item)
|
||||
{
|
||||
return isset($item['version']) ? (string) $item['version'] : isset($item['type']) ? (string) $item['type'] : 'rss';
|
||||
}
|
||||
|
||||
/**
|
||||
* Find description
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXmlElement $item XML tree
|
||||
* @param StdClass $entry Feed entry
|
||||
* @return string
|
||||
*/
|
||||
public function findDescription(SimpleXmlElement $item, StdClass $entry)
|
||||
{
|
||||
return isset($item['description']) ? (string) $item['description'] : $entry->title;
|
||||
}
|
||||
}
|
||||
|
2
vendor/PicoFeed/Logging.php
vendored
2
vendor/PicoFeed/Logging.php
vendored
@ -27,7 +27,7 @@ class Logging
|
||||
*
|
||||
* @static
|
||||
* @access private
|
||||
* @var array
|
||||
* @var string
|
||||
*/
|
||||
private static $timezone = 'UTC';
|
||||
|
||||
|
207
vendor/PicoFeed/Parser.php
vendored
207
vendor/PicoFeed/Parser.php
vendored
@ -4,14 +4,6 @@ namespace PicoFeed;
|
||||
|
||||
use DateTime;
|
||||
use DateTimeZone;
|
||||
use DOMXPath;
|
||||
use SimpleXMLElement;
|
||||
use PicoFeed\Config;
|
||||
use PicoFeed\Encoding;
|
||||
use PicoFeed\Filter;
|
||||
use PicoFeed\Grabber;
|
||||
use PicoFeed\Logging;
|
||||
use PicoFeed\XmlParser;
|
||||
|
||||
/**
|
||||
* Base parser class
|
||||
@ -61,13 +53,21 @@ abstract class Parser
|
||||
*/
|
||||
protected $namespaces = array();
|
||||
|
||||
/**
|
||||
* Enable the content filtering
|
||||
*
|
||||
* @access private
|
||||
* @var bool
|
||||
*/
|
||||
private $enable_filter = true;
|
||||
|
||||
/**
|
||||
* Enable the content grabber
|
||||
*
|
||||
* @access private
|
||||
* @var bool
|
||||
*/
|
||||
public $enable_grabber = false;
|
||||
private $enable_grabber = false;
|
||||
|
||||
/**
|
||||
* Ignore those urls for the content scraper
|
||||
@ -96,7 +96,7 @@ abstract class Parser
|
||||
$this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding);
|
||||
|
||||
// Workarounds
|
||||
$this->content = $this->normalizeData($this->content);
|
||||
$this->content = Filter::normalizeData($this->content);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -122,9 +122,11 @@ abstract class Parser
|
||||
$feed = new Feed;
|
||||
$this->findFeedUrl($xml, $feed);
|
||||
$this->findFeedTitle($xml, $feed);
|
||||
$this->findFeedDescription($xml, $feed);
|
||||
$this->findFeedLanguage($xml, $feed);
|
||||
$this->findFeedId($xml, $feed);
|
||||
$this->findFeedDate($xml, $feed);
|
||||
$this->findFeedLogo($xml, $feed);
|
||||
|
||||
foreach ($this->getItemsTree($xml) as $entry) {
|
||||
|
||||
@ -137,6 +139,10 @@ abstract class Parser
|
||||
$this->findItemContent($entry, $item);
|
||||
$this->findItemEnclosure($entry, $item, $feed);
|
||||
$this->findItemLanguage($entry, $item, $feed);
|
||||
|
||||
$this->scrapWebsite($item);
|
||||
$this->filterItemContent($feed, $item);
|
||||
|
||||
$feed->items[] = $item;
|
||||
}
|
||||
|
||||
@ -146,103 +152,42 @@ abstract class Parser
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter HTML for entry content
|
||||
* Fetch item content with the content grabber
|
||||
*
|
||||
* @access public
|
||||
* @param string $item_content Item content
|
||||
* @param string $item_url Item URL
|
||||
* @return string Filtered content
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function filterHtml($item_content, $item_url)
|
||||
public function scrapWebsite(Item $item)
|
||||
{
|
||||
$content = '';
|
||||
if ($this->enable_grabber && ! in_array($item->getUrl(), $this->grabber_ignore_urls)) {
|
||||
|
||||
// Setup the content scraper
|
||||
if ($this->enable_grabber && ! in_array($item_url, $this->grabber_ignore_urls)) {
|
||||
|
||||
$grabber = new Grabber($item_url);
|
||||
$grabber = new Grabber($item->getUrl());
|
||||
$grabber->setConfig($this->config);
|
||||
$grabber->download();
|
||||
|
||||
if ($grabber->parse()) {
|
||||
$item_content = $grabber->getContent();
|
||||
$item->content = $grabber->getContent() ?: $item->content;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Content filtering
|
||||
if ($item_content) {
|
||||
|
||||
if ($this->config !== null) {
|
||||
|
||||
$callback = $this->config->getContentFilteringCallback();
|
||||
|
||||
if (is_callable($callback)) {
|
||||
$content = $callback($item_content, $item_url);
|
||||
}
|
||||
}
|
||||
|
||||
if (! $content) {
|
||||
$filter = new Filter($item_content, $item_url);
|
||||
/**
|
||||
* Filter HTML for entry content
|
||||
*
|
||||
* @access public
|
||||
* @param Feed $feed Feed object
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function filterItemContent(Feed $feed, Item $item)
|
||||
{
|
||||
if ($this->isFilteringEnabled()) {
|
||||
$filter = Filter::html($item->getContent(), $feed->getUrl());
|
||||
$filter->setConfig($this->config);
|
||||
$content = $filter->execute();
|
||||
$item->content = $filter->execute();
|
||||
}
|
||||
else {
|
||||
Logging::setMessage(get_called_class().': Content filtering disabled');
|
||||
}
|
||||
|
||||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dirty quickfixes before XML parsing
|
||||
*
|
||||
* @access public
|
||||
* @param string $data Raw data
|
||||
* @return string Normalized data
|
||||
*/
|
||||
public function normalizeData($data)
|
||||
{
|
||||
$invalid_chars = array(
|
||||
"\x10",
|
||||
"\xc3\x20",
|
||||
"",
|
||||
);
|
||||
|
||||
foreach ($invalid_chars as $needle) {
|
||||
$data = str_replace($needle, '', $data);
|
||||
}
|
||||
|
||||
$data = $this->replaceEntityAttribute($data);
|
||||
return $data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace & by & for each href attribute (Fix broken feeds)
|
||||
*
|
||||
* @access public
|
||||
* @param string $content Raw data
|
||||
* @return string Normalized data
|
||||
*/
|
||||
public function replaceEntityAttribute($content)
|
||||
{
|
||||
$content = preg_replace_callback('/href="[^"]+"/', function(array $matches) {
|
||||
return htmlspecialchars($matches[0], ENT_NOQUOTES, 'UTF-8', false);
|
||||
}, $content);
|
||||
|
||||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim whitespace from the begining, the end and inside a string and don't break utf-8 string
|
||||
*
|
||||
* @access public
|
||||
* @param string $value Raw data
|
||||
* @return string Normalized data
|
||||
*/
|
||||
public function stripWhiteSpace($value)
|
||||
{
|
||||
$value = str_replace("\r", "", $value);
|
||||
$value = str_replace("\t", "", $value);
|
||||
$value = str_replace("\n", "", $value);
|
||||
return trim($value);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -355,25 +300,6 @@ abstract class Parser
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get xml:lang value
|
||||
*
|
||||
* @access public
|
||||
* @param string $xml XML string
|
||||
* @return string Language
|
||||
*/
|
||||
public function getXmlLang($xml)
|
||||
{
|
||||
$dom = XmlParser::getDomDocument($this->content);
|
||||
|
||||
if ($dom === false) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$xpath = new DOMXPath($dom);
|
||||
return $xpath->evaluate('string(//@xml:lang[1])') ?: '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the given language is "Right to Left"
|
||||
*
|
||||
@ -446,6 +372,32 @@ abstract class Parser
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable the content grabber
|
||||
*
|
||||
* @access public
|
||||
* @return \PicoFeed\Parser
|
||||
*/
|
||||
public function disableContentFiltering()
|
||||
{
|
||||
$this->enable_filter = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the content filtering is enabled
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
*/
|
||||
public function isFilteringEnabled()
|
||||
{
|
||||
if ($this->config === null) {
|
||||
return $this->enable_filter;
|
||||
}
|
||||
|
||||
return $this->config->getContentFiltering($this->enable_filter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable the content grabber
|
||||
*
|
||||
@ -468,37 +420,4 @@ abstract class Parser
|
||||
{
|
||||
$this->grabber_ignore_urls = $urls;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a value from a XML namespace
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml XML element
|
||||
* @param array $namespaces XML namespaces
|
||||
* @param string $property XML tag name
|
||||
* @param string $attribute XML attribute name
|
||||
* @return string
|
||||
*/
|
||||
public function getNamespaceValue(SimpleXMLElement $xml, array $namespaces, $property, $attribute = '')
|
||||
{
|
||||
foreach ($namespaces as $name => $url) {
|
||||
$namespace = $xml->children($namespaces[$name]);
|
||||
|
||||
if ($namespace->$property->count() > 0) {
|
||||
|
||||
if ($attribute) {
|
||||
|
||||
foreach ($namespace->$property->attributes() as $xml_attribute => $xml_value) {
|
||||
if ($xml_attribute === $attribute && $xml_value) {
|
||||
return (string) $xml_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (string) $namespace->$property;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
42
vendor/PicoFeed/Parsers/Atom.php
vendored
42
vendor/PicoFeed/Parsers/Atom.php
vendored
@ -6,9 +6,10 @@ use SimpleXMLElement;
|
||||
use PicoFeed\Parser;
|
||||
use PicoFeed\XmlParser;
|
||||
use PicoFeed\Logging;
|
||||
use PicoFeed\Filter;
|
||||
use PicoFeed\Feed;
|
||||
use PicoFeed\Filter;
|
||||
use PicoFeed\Item;
|
||||
use PicoFeed\Url;
|
||||
|
||||
/**
|
||||
* Atom parser
|
||||
@ -42,6 +43,30 @@ class Atom extends Parser
|
||||
$feed->url = $this->getLink($xml);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed description
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->description = (string) $xml->subtitle;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed logo url
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->logo = (string) $xml->logo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed title
|
||||
*
|
||||
@ -51,7 +76,7 @@ class Atom extends Parser
|
||||
*/
|
||||
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->title = $this->stripWhiteSpace((string) $xml->title) ?: $feed->url;
|
||||
$feed->title = Filter::stripWhiteSpace((string) $xml->title) ?: $feed->url;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -63,7 +88,7 @@ class Atom extends Parser
|
||||
*/
|
||||
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->language = $this->getXmlLang($this->content);
|
||||
$feed->language = XmlParser::getXmlLang($this->content);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -111,7 +136,7 @@ class Atom extends Parser
|
||||
*/
|
||||
public function findItemTitle(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$item->title = $this->stripWhiteSpace((string) $entry->title);
|
||||
$item->title = Filter::stripWhiteSpace((string) $entry->title);
|
||||
|
||||
if (empty($item->title)) {
|
||||
$item->title = $item->url;
|
||||
@ -145,7 +170,7 @@ class Atom extends Parser
|
||||
*/
|
||||
public function findItemContent(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$item->content = $this->filterHtml($this->getContent($entry), $item->url);
|
||||
$item->content = $this->getContent($entry);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -202,13 +227,8 @@ class Atom extends Parser
|
||||
foreach ($entry->link as $link) {
|
||||
if ((string) $link['rel'] === 'enclosure') {
|
||||
|
||||
$item->enclosure_url = (string) $link['href'];
|
||||
$item->enclosure_url = Url::resolve((string) $link['href'], $feed->url);
|
||||
$item->enclosure_type = (string) $link['type'];
|
||||
|
||||
if (Filter::isRelativePath($item->enclosure_url)) {
|
||||
$item->enclosure_url = Filter::getAbsoluteUrl($item->enclosure_url, $feed->url);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
5
vendor/PicoFeed/Parsers/Rss10.php
vendored
5
vendor/PicoFeed/Parsers/Rss10.php
vendored
@ -7,6 +7,7 @@ require_once __DIR__.'/Rss20.php';
|
||||
use SimpleXMLElement;
|
||||
use PicoFeed\Feed;
|
||||
use PicoFeed\Item;
|
||||
use PicoFeed\XmlParser;
|
||||
use PicoFeed\Parsers\Rss20;
|
||||
|
||||
/**
|
||||
@ -38,7 +39,7 @@ class Rss10 extends Rss20
|
||||
*/
|
||||
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->date = $this->parseDate($this->getNamespaceValue($xml->channel, $this->namespaces, 'date'));
|
||||
$feed->date = $this->parseDate(XmlParser::getNamespaceValue($xml->channel, $this->namespaces, 'date'));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -50,7 +51,7 @@ class Rss10 extends Rss20
|
||||
*/
|
||||
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->language = $this->getNamespaceValue($xml->channel, $this->namespaces, 'language');
|
||||
$feed->language = XmlParser::getNamespaceValue($xml->channel, $this->namespaces, 'language');
|
||||
}
|
||||
|
||||
/**
|
||||
|
54
vendor/PicoFeed/Parsers/Rss20.php
vendored
54
vendor/PicoFeed/Parsers/Rss20.php
vendored
@ -6,9 +6,10 @@ use SimpleXMLElement;
|
||||
use PicoFeed\Parser;
|
||||
use PicoFeed\XmlParser;
|
||||
use PicoFeed\Logging;
|
||||
use PicoFeed\Filter;
|
||||
use PicoFeed\Feed;
|
||||
use PicoFeed\Filter;
|
||||
use PicoFeed\Item;
|
||||
use PicoFeed\Url;
|
||||
|
||||
/**
|
||||
* RSS 2.0 Parser
|
||||
@ -57,6 +58,32 @@ class Rss20 extends Parser
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed description
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->description = (string) $xml->channel->description;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed logo url
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
if (isset($xml->channel->image->url)) {
|
||||
$feed->logo = (string) $xml->channel->image->url;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed title
|
||||
*
|
||||
@ -66,7 +93,7 @@ class Rss20 extends Parser
|
||||
*/
|
||||
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->title = $this->stripWhiteSpace((string) $xml->channel->title) ?: $feed->url;
|
||||
$feed->title = Filter::stripWhiteSpace((string) $xml->channel->title) ?: $feed->url;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -115,10 +142,10 @@ class Rss20 extends Parser
|
||||
*/
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$date = $this->getNamespaceValue($entry, $this->namespaces, 'date');
|
||||
$date = XmlParser::getNamespaceValue($entry, $this->namespaces, 'date');
|
||||
|
||||
if (empty($date)) {
|
||||
$date = $this->getNamespaceValue($entry, $this->namespaces, 'updated');
|
||||
$date = XmlParser::getNamespaceValue($entry, $this->namespaces, 'updated');
|
||||
}
|
||||
|
||||
if (empty($date)) {
|
||||
@ -137,7 +164,7 @@ class Rss20 extends Parser
|
||||
*/
|
||||
public function findItemTitle(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$item->title = $this->stripWhiteSpace((string) $entry->title);
|
||||
$item->title = Filter::stripWhiteSpace((string) $entry->title);
|
||||
|
||||
if (empty($item->title)) {
|
||||
$item->title = $item->url;
|
||||
@ -154,7 +181,7 @@ class Rss20 extends Parser
|
||||
*/
|
||||
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$item->author = $this->getNamespaceValue($entry, $this->namespaces, 'creator');
|
||||
$item->author = XmlParser::getNamespaceValue($entry, $this->namespaces, 'creator');
|
||||
|
||||
if (empty($item->author)) {
|
||||
if (isset($entry->author)) {
|
||||
@ -175,13 +202,13 @@ class Rss20 extends Parser
|
||||
*/
|
||||
public function findItemContent(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$content = $this->getNamespaceValue($entry, $this->namespaces, 'encoded');
|
||||
$content = XmlParser::getNamespaceValue($entry, $this->namespaces, 'encoded');
|
||||
|
||||
if (empty($content) && $entry->description->count() > 0) {
|
||||
$content = (string) $entry->description;
|
||||
}
|
||||
|
||||
$item->content = $this->filterHtml($content, $item->url);
|
||||
$item->content = $content;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -194,9 +221,9 @@ class Rss20 extends Parser
|
||||
public function findItemUrl(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$links = array(
|
||||
$this->getNamespaceValue($entry, $this->namespaces, 'origLink'),
|
||||
XmlParser::getNamespaceValue($entry, $this->namespaces, 'origLink'),
|
||||
isset($entry->link) ? (string) $entry->link : '',
|
||||
$this->getNamespaceValue($entry, $this->namespaces, 'link', 'href'),
|
||||
XmlParser::getNamespaceValue($entry, $this->namespaces, 'link', 'href'),
|
||||
isset($entry->guid) ? (string) $entry->guid : '',
|
||||
);
|
||||
|
||||
@ -247,17 +274,14 @@ class Rss20 extends Parser
|
||||
{
|
||||
if (isset($entry->enclosure)) {
|
||||
|
||||
$item->enclosure_url = $this->getNamespaceValue($entry->enclosure, $this->namespaces, 'origEnclosureLink');
|
||||
$item->enclosure_url = XmlParser::getNamespaceValue($entry->enclosure, $this->namespaces, 'origEnclosureLink');
|
||||
|
||||
if (empty($item->enclosure_url)) {
|
||||
$item->enclosure_url = isset($entry->enclosure['url']) ? (string) $entry->enclosure['url'] : '';
|
||||
}
|
||||
|
||||
$item->enclosure_type = isset($entry->enclosure['type']) ? (string) $entry->enclosure['type'] : '';
|
||||
|
||||
if (Filter::isRelativePath($item->enclosure_url)) {
|
||||
$item->enclosure_url = Filter::getAbsoluteUrl($item->enclosure_url, $feed->url);
|
||||
}
|
||||
$item->enclosure_url = Url::resolve($item->enclosure_url, $feed->url);
|
||||
}
|
||||
}
|
||||
|
||||
|
5
vendor/PicoFeed/PicoFeed.php
vendored
5
vendor/PicoFeed/PicoFeed.php
vendored
@ -4,10 +4,14 @@
|
||||
|
||||
require __DIR__.'/Config.php';
|
||||
require __DIR__.'/Logging.php';
|
||||
require __DIR__.'/Url.php';
|
||||
require __DIR__.'/Item.php';
|
||||
require __DIR__.'/Feed.php';
|
||||
require __DIR__.'/Client.php';
|
||||
require __DIR__.'/Filter.php';
|
||||
require __DIR__.'/Filter/Attribute.php';
|
||||
require __DIR__.'/Filter/Tag.php';
|
||||
require __DIR__.'/Filter/Html.php';
|
||||
require __DIR__.'/XmlParser.php';
|
||||
require __DIR__.'/Encoding.php';
|
||||
require __DIR__.'/Grabber.php';
|
||||
@ -18,3 +22,4 @@ require __DIR__.'/Writer.php';
|
||||
require __DIR__.'/Writers/Rss20.php';
|
||||
require __DIR__.'/Writers/Atom.php';
|
||||
require __DIR__.'/Parser.php';
|
||||
require __DIR__.'/Favicon.php';
|
||||
|
21
vendor/PicoFeed/Reader.php
vendored
21
vendor/PicoFeed/Reader.php
vendored
@ -9,6 +9,7 @@ use PicoFeed\Logging;
|
||||
use PicoFeed\Filter;
|
||||
use PicoFeed\Client;
|
||||
use PicoFeed\Parser;
|
||||
use PicoFeed\Url;
|
||||
|
||||
/**
|
||||
* Reader class
|
||||
@ -78,14 +79,7 @@ class Reader
|
||||
}
|
||||
|
||||
$client = Client::getInstance();
|
||||
$client->setTimeout($this->config->getClientTimeout())
|
||||
->setUserAgent($this->config->getClientUserAgent())
|
||||
->setMaxRedirections($this->config->getMaxRedirections())
|
||||
->setMaxBodySize($this->config->getMaxBodySize())
|
||||
->setProxyHostname($this->config->getProxyHostname())
|
||||
->setProxyPort($this->config->getProxyPort())
|
||||
->setProxyUsername($this->config->getProxyUsername())
|
||||
->setProxyPassword($this->config->getProxyPassword())
|
||||
$client->setConfig($this->config)
|
||||
->setLastModified($last_modified)
|
||||
->setEtag($etag);
|
||||
|
||||
@ -249,16 +243,13 @@ class Reader
|
||||
|
||||
if (! empty($link)) {
|
||||
|
||||
// Relative links
|
||||
if (strpos($link, 'http') !== 0) {
|
||||
$feedUrl = new Url($link);
|
||||
$siteUrl = new Url($this->url);
|
||||
|
||||
if ($link{0} === '/') $link = substr($link, 1);
|
||||
if ($this->url{strlen($this->url) - 1} !== '/') $this->url .= '/';
|
||||
|
||||
$link = $this->url.$link;
|
||||
}
|
||||
$link = $feedUrl->getAbsoluteUrl($feedUrl->isRelativeUrl() ? $siteUrl->getBaseUrl() : '');
|
||||
|
||||
Logging::setMessage(get_called_class().': Find subscription link: '.$link);
|
||||
|
||||
$this->download($link);
|
||||
|
||||
return true;
|
||||
|
254
vendor/PicoFeed/Url.php
vendored
Normal file
254
vendor/PicoFeed/Url.php
vendored
Normal file
@ -0,0 +1,254 @@
|
||||
<?php
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
/**
|
||||
* URL class
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*/
|
||||
class Url
|
||||
{
|
||||
/**
|
||||
* URL
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $url = '';
|
||||
|
||||
/**
|
||||
* URL components
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $components = array();
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @access public
|
||||
* @param string $url URL
|
||||
*/
|
||||
public function __construct($url)
|
||||
{
|
||||
$this->url = $url;
|
||||
$this->components = parse_url($url) ?: array();
|
||||
|
||||
// Issue with PHP < 5.4.7 and protocol relative url
|
||||
if (version_compare(PHP_VERSION, '5.4.7', '<') && $this->isProtocolRelative()) {
|
||||
$pos = strpos($this->components['path'], '/', 2);
|
||||
|
||||
if ($pos === false) {
|
||||
$pos = strlen($this->components['path']);
|
||||
}
|
||||
|
||||
$this->components['host'] = substr($this->components['path'], 2, $pos - 2);
|
||||
$this->components['path'] = substr($this->components['path'], $pos);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Shortcut method to get an absolute url from relative url
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $item_url Unknown url (can be relative or not)
|
||||
* @param mixed $website_url Website url
|
||||
* @return string
|
||||
*/
|
||||
public static function resolve($item_url, $website_url)
|
||||
{
|
||||
$link = new Url($item_url);
|
||||
$website = is_string($website_url) ? new Url($website_url) : $website_url;
|
||||
|
||||
if ($link->isRelativeUrl()) {
|
||||
|
||||
if ($link->isRelativePath()) {
|
||||
return $link->getAbsoluteUrl($website->getAbsoluteUrl());
|
||||
}
|
||||
|
||||
return $link->getAbsoluteUrl($website->getBaseUrl());
|
||||
}
|
||||
else if ($link->isProtocolRelative()) {
|
||||
$link->setScheme($website->getScheme());
|
||||
}
|
||||
|
||||
return $link->getAbsoluteUrl();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the base URL
|
||||
*
|
||||
* @access public
|
||||
* @param string $suffix Add a suffix to the url
|
||||
* @return string
|
||||
*/
|
||||
public function getBaseUrl($suffix = '')
|
||||
{
|
||||
return $this->hasHost() ? $this->getScheme('://').$this->getHost().$this->getPort(':').$suffix : '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the absolute URL
|
||||
*
|
||||
* @access public
|
||||
* @param string $base_url Use this url as base url
|
||||
* @return string
|
||||
*/
|
||||
public function getAbsoluteUrl($base_url = '')
|
||||
{
|
||||
if ($base_url) {
|
||||
$base = new Url($base_url);
|
||||
$url = $base->getAbsoluteUrl().substr($this->getFullPath(), 1);
|
||||
}
|
||||
else {
|
||||
$url = $this->hasHost() ? $this->getBaseUrl().$this->getFullPath() : '';
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url is relative
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
*/
|
||||
public function isRelativeUrl()
|
||||
{
|
||||
return ! $this->hasScheme() && ! $this->isProtocolRelative();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the path is relative
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
*/
|
||||
public function isRelativePath()
|
||||
{
|
||||
$path = $this->getPath();
|
||||
return empty($path) || $path{0} !== '/';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the path
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getPath()
|
||||
{
|
||||
return empty($this->components['path']) ? '' : $this->components['path'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the full path (path + querystring + fragment)
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getFullPath()
|
||||
{
|
||||
$path = $this->isRelativePath() ? '/' : '';
|
||||
$path .= $this->getPath();
|
||||
$path .= empty($this->components['query']) ? '' : '?'.$this->components['query'];
|
||||
$path .= empty($this->components['fragment']) ? '' : '#'.$this->components['fragment'];
|
||||
|
||||
return $path;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the hostname
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getHost()
|
||||
{
|
||||
return empty($this->components['host']) ? '' : $this->components['host'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url has a hostname
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasHost()
|
||||
{
|
||||
return ! empty($this->components['host']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the scheme
|
||||
*
|
||||
* @access public
|
||||
* @param string $suffix Suffix to add when there is a scheme
|
||||
* @return string
|
||||
*/
|
||||
public function getScheme($suffix = '')
|
||||
{
|
||||
return ($this->hasScheme() ? $this->components['scheme'] : 'http').$suffix;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the scheme
|
||||
*
|
||||
* @access public
|
||||
* @param string $scheme Set a scheme
|
||||
* @return string
|
||||
*/
|
||||
public function setScheme($scheme)
|
||||
{
|
||||
$this->components['scheme'] = $scheme;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url has a scheme
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasScheme()
|
||||
{
|
||||
return ! empty($this->components['scheme']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the port
|
||||
*
|
||||
* @access public
|
||||
* @param string $prefix Prefix to add when there is a port
|
||||
* @return string
|
||||
*/
|
||||
public function getPort($prefix = '')
|
||||
{
|
||||
return $this->hasPort() ? $prefix.$this->components['port'] : '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url has a port
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasPort()
|
||||
{
|
||||
return ! empty($this->components['port']);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the url is protocol relative (start with //)
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
*/
|
||||
public function isProtocolRelative()
|
||||
{
|
||||
return strpos($this->url, '//') === 0;
|
||||
}
|
||||
}
|
43
vendor/PicoFeed/Writer.php
vendored
43
vendor/PicoFeed/Writer.php
vendored
@ -9,6 +9,7 @@ use RuntimeException;
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
* @property string $description Feed description
|
||||
*/
|
||||
abstract class Writer
|
||||
{
|
||||
@ -16,7 +17,7 @@ abstract class Writer
|
||||
* Dom object
|
||||
*
|
||||
* @access protected
|
||||
* @var DomDocument
|
||||
* @var \DomDocument
|
||||
*/
|
||||
protected $dom;
|
||||
|
||||
@ -28,6 +29,46 @@ abstract class Writer
|
||||
*/
|
||||
public $items = array();
|
||||
|
||||
/**
|
||||
* Author
|
||||
*
|
||||
* @access public
|
||||
* @var array
|
||||
*/
|
||||
public $author = array();
|
||||
|
||||
/**
|
||||
* Feed URL
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $feed_url = '';
|
||||
|
||||
/**
|
||||
* Website URL
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $site_url = '';
|
||||
|
||||
/**
|
||||
* Feed title
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $title = '';
|
||||
|
||||
/**
|
||||
* Feed modification date (timestamp)
|
||||
*
|
||||
* @access public
|
||||
* @var integer
|
||||
*/
|
||||
public $updated = 0;
|
||||
|
||||
/**
|
||||
* Generate the XML document
|
||||
*
|
||||
|
42
vendor/PicoFeed/Writers/Atom.php
vendored
42
vendor/PicoFeed/Writers/Atom.php
vendored
@ -72,7 +72,7 @@ class Atom extends Writer
|
||||
$feed->appendChild($id);
|
||||
|
||||
// <updated/>
|
||||
$this->addUpdated($feed, isset($this->updated) ? $this->updated : '');
|
||||
$this->addUpdated($feed, $this->updated);
|
||||
|
||||
// <link rel="alternate" type="text/html" href="http://example.org/"/>
|
||||
$this->addLink($feed, $this->site_url);
|
||||
@ -85,9 +85,29 @@ class Atom extends Writer
|
||||
|
||||
// <entry/>
|
||||
foreach ($this->items as $item) {
|
||||
|
||||
$this->checkRequiredProperties($this->required_item_properties, $item);
|
||||
$feed->appendChild($this->createEntry($item));
|
||||
}
|
||||
|
||||
$this->dom->appendChild($feed);
|
||||
|
||||
if ($filename) {
|
||||
$this->dom->save($filename);
|
||||
}
|
||||
else {
|
||||
return $this->dom->saveXML();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create item entry
|
||||
*
|
||||
* @access public
|
||||
* @param arrray $item Item properties
|
||||
* @return DomElement
|
||||
*/
|
||||
public function createEntry(array $item)
|
||||
{
|
||||
$entry = $this->dom->createElement('entry');
|
||||
|
||||
// <title/>
|
||||
@ -127,19 +147,11 @@ class Atom extends Writer
|
||||
}
|
||||
|
||||
// <author/>
|
||||
if (isset($item['author'])) $this->addAuthor($entry, $item['author']);
|
||||
|
||||
$feed->appendChild($entry);
|
||||
if (isset($item['author'])) {
|
||||
$this->addAuthor($entry, $item['author']);
|
||||
}
|
||||
|
||||
$this->dom->appendChild($feed);
|
||||
|
||||
if ($filename) {
|
||||
$this->dom->save($filename);
|
||||
}
|
||||
else {
|
||||
return $this->dom->saveXML();
|
||||
}
|
||||
return $entry;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -165,9 +177,9 @@ class Atom extends Writer
|
||||
*
|
||||
* @access public
|
||||
* @param DomElement $xml XML node
|
||||
* @param string $value Timestamp
|
||||
* @param integer $value Timestamp
|
||||
*/
|
||||
public function addUpdated(DomElement $xml, $value = '')
|
||||
public function addUpdated(DomElement $xml, $value = 0)
|
||||
{
|
||||
$xml->appendChild($this->dom->createElement(
|
||||
'updated',
|
||||
|
44
vendor/PicoFeed/Writers/Rss20.php
vendored
44
vendor/PicoFeed/Writers/Rss20.php
vendored
@ -75,7 +75,7 @@ class Rss20 extends Writer
|
||||
$channel->appendChild($description);
|
||||
|
||||
// <pubDate/>
|
||||
$this->addPubDate($channel, isset($this->updated) ? $this->updated : '');
|
||||
$this->addPubDate($channel, $this->updated);
|
||||
|
||||
// <atom:link/>
|
||||
$link = $this->dom->createElement('atom:link');
|
||||
@ -94,9 +94,30 @@ class Rss20 extends Writer
|
||||
|
||||
// <item/>
|
||||
foreach ($this->items as $item) {
|
||||
|
||||
$this->checkRequiredProperties($this->required_item_properties, $item);
|
||||
$channel->appendChild($this->createEntry($item));
|
||||
}
|
||||
|
||||
$rss->appendChild($channel);
|
||||
$this->dom->appendChild($rss);
|
||||
|
||||
if ($filename) {
|
||||
$this->dom->save($filename);
|
||||
}
|
||||
else {
|
||||
return $this->dom->saveXML();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create item entry
|
||||
*
|
||||
* @access public
|
||||
* @param arrray $item Item properties
|
||||
* @return DomElement
|
||||
*/
|
||||
public function createEntry(array $item)
|
||||
{
|
||||
$entry = $this->dom->createElement('item');
|
||||
|
||||
// <title/>
|
||||
@ -141,20 +162,11 @@ class Rss20 extends Writer
|
||||
}
|
||||
|
||||
// <author/>
|
||||
if (isset($item['author'])) $this->addAuthor($entry, 'author', $item['author']);
|
||||
|
||||
$channel->appendChild($entry);
|
||||
if (isset($item['author'])) {
|
||||
$this->addAuthor($entry, 'author', $item['author']);
|
||||
}
|
||||
|
||||
$rss->appendChild($channel);
|
||||
$this->dom->appendChild($rss);
|
||||
|
||||
if ($filename) {
|
||||
$this->dom->save($filename);
|
||||
}
|
||||
else {
|
||||
return $this->dom->saveXML();
|
||||
}
|
||||
return $entry;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -162,9 +174,9 @@ class Rss20 extends Writer
|
||||
*
|
||||
* @access public
|
||||
* @param DomElement $xml XML node
|
||||
* @param string $value Timestamp
|
||||
* @param integer $value Timestamp
|
||||
*/
|
||||
public function addPubDate(DomElement $xml, $value = '')
|
||||
public function addPubDate(DomElement $xml, $value = 0)
|
||||
{
|
||||
$xml->appendChild($this->dom->createElement(
|
||||
'pubDate',
|
||||
|
69
vendor/PicoFeed/XmlParser.php
vendored
69
vendor/PicoFeed/XmlParser.php
vendored
@ -3,6 +3,7 @@
|
||||
namespace PicoFeed;
|
||||
|
||||
use DomDocument;
|
||||
use DOMXPath;
|
||||
use SimpleXmlElement;
|
||||
|
||||
/**
|
||||
@ -110,6 +111,20 @@ class XmlParser
|
||||
return $dom;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a HTML document to XML
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $html HTML document
|
||||
* @return string
|
||||
*/
|
||||
public static function HtmlToXml($html)
|
||||
{
|
||||
$dom = self::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$html);
|
||||
return $dom->saveXML($dom->getElementsByTagName('body')->item(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get XML parser errors
|
||||
*
|
||||
@ -160,4 +175,58 @@ class XmlParser
|
||||
|
||||
return $encoding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get xml:lang value
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $xml XML string
|
||||
* @return string Language
|
||||
*/
|
||||
public static function getXmlLang($xml)
|
||||
{
|
||||
$dom = self::getDomDocument($xml);
|
||||
|
||||
if ($dom === false) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$xpath = new DOMXPath($dom);
|
||||
return $xpath->evaluate('string(//@xml:lang[1])') ?: '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a value from a XML namespace
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml XML element
|
||||
* @param array $namespaces XML namespaces
|
||||
* @param string $property XML tag name
|
||||
* @param string $attribute XML attribute name
|
||||
* @return string
|
||||
*/
|
||||
public static function getNamespaceValue(SimpleXMLElement $xml, array $namespaces, $property, $attribute = '')
|
||||
{
|
||||
foreach ($namespaces as $name => $url) {
|
||||
$namespace = $xml->children($namespaces[$name]);
|
||||
|
||||
if ($namespace->$property->count() > 0) {
|
||||
|
||||
if ($attribute) {
|
||||
|
||||
foreach ($namespace->$property->attributes() as $xml_attribute => $xml_value) {
|
||||
if ($xml_attribute === $attribute && $xml_value) {
|
||||
return (string) $xml_value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (string) $namespace->$property;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
109
vendor/Readability/JSLikeHTMLElement.php
vendored
109
vendor/Readability/JSLikeHTMLElement.php
vendored
@ -1,109 +0,0 @@
|
||||
<?php
|
||||
/**
|
||||
* JavaScript-like HTML DOM Element
|
||||
*
|
||||
* This class extends PHP's DOMElement to allow
|
||||
* users to get and set the innerHTML property of
|
||||
* HTML elements in the same way it's done in
|
||||
* JavaScript.
|
||||
*
|
||||
* Example usage:
|
||||
* @code
|
||||
* require_once 'JSLikeHTMLElement.php';
|
||||
* header('Content-Type: text/plain');
|
||||
* $doc = new DOMDocument();
|
||||
* $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
|
||||
* $doc->loadHTML('<div><p>Para 1</p><p>Para 2</p></div>');
|
||||
* $elem = $doc->getElementsByTagName('div')->item(0);
|
||||
*
|
||||
* // print innerHTML
|
||||
* echo $elem->innerHTML; // prints '<p>Para 1</p><p>Para 2</p>'
|
||||
* echo "\n\n";
|
||||
*
|
||||
* // set innerHTML
|
||||
* $elem->innerHTML = '<a href="http://fivefilters.org">FiveFilters.org</a>';
|
||||
* echo $elem->innerHTML; // prints '<a href="http://fivefilters.org">FiveFilters.org</a>'
|
||||
* echo "\n\n";
|
||||
*
|
||||
* // print document (with our changes)
|
||||
* echo $doc->saveXML();
|
||||
* @endcode
|
||||
*
|
||||
* @author Keyvan Minoukadeh - http://www.keyvan.net - keyvan@keyvan.net
|
||||
* @see http://fivefilters.org (the project this was written for)
|
||||
*/
|
||||
class JSLikeHTMLElement extends DOMElement
|
||||
{
|
||||
/**
|
||||
* Used for setting innerHTML like it's done in JavaScript:
|
||||
* @code
|
||||
* $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>';
|
||||
* @endcode
|
||||
*/
|
||||
public function __set($name, $value) {
|
||||
if ($name == 'innerHTML') {
|
||||
// first, empty the element
|
||||
for ($x=$this->childNodes->length-1; $x>=0; $x--) {
|
||||
$this->removeChild($this->childNodes->item($x));
|
||||
}
|
||||
// $value holds our new inner HTML
|
||||
if ($value != '') {
|
||||
$f = $this->ownerDocument->createDocumentFragment();
|
||||
// appendXML() expects well-formed markup (XHTML)
|
||||
$result = @$f->appendXML($value); // @ to suppress PHP warnings
|
||||
if ($result) {
|
||||
if ($f->hasChildNodes()) $this->appendChild($f);
|
||||
} else {
|
||||
// $value is probably ill-formed
|
||||
$f = new DOMDocument();
|
||||
$value = mb_convert_encoding($value, 'HTML-ENTITIES', 'UTF-8');
|
||||
// Using <htmlfragment> will generate a warning, but so will bad HTML
|
||||
// (and by this point, bad HTML is what we've got).
|
||||
// We use it (and suppress the warning) because an HTML fragment will
|
||||
// be wrapped around <html><body> tags which we don't really want to keep.
|
||||
// Note: despite the warning, if loadHTML succeeds it will return true.
|
||||
$result = @$f->loadHTML('<htmlfragment>'.$value.'</htmlfragment>');
|
||||
if ($result) {
|
||||
$import = $f->getElementsByTagName('htmlfragment')->item(0);
|
||||
foreach ($import->childNodes as $child) {
|
||||
$importedNode = $this->ownerDocument->importNode($child, true);
|
||||
$this->appendChild($importedNode);
|
||||
}
|
||||
} else {
|
||||
// oh well, we tried, we really did. :(
|
||||
// this element is now empty
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$trace = debug_backtrace();
|
||||
trigger_error('Undefined property via __set(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for getting innerHTML like it's done in JavaScript:
|
||||
* @code
|
||||
* $string = $div->innerHTML;
|
||||
* @endcode
|
||||
*/
|
||||
public function __get($name)
|
||||
{
|
||||
if ($name == 'innerHTML') {
|
||||
$inner = '';
|
||||
foreach ($this->childNodes as $child) {
|
||||
$inner .= $this->ownerDocument->saveXML($child);
|
||||
}
|
||||
return $inner;
|
||||
}
|
||||
|
||||
$trace = debug_backtrace();
|
||||
trigger_error('Undefined property via __get(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE);
|
||||
return null;
|
||||
}
|
||||
|
||||
public function __toString()
|
||||
{
|
||||
return '['.$this->tagName.']';
|
||||
}
|
||||
}
|
1137
vendor/Readability/Readability.php
vendored
1137
vendor/Readability/Readability.php
vendored
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user