Update to the last version of PicoFeed
This commit is contained in:
parent
58cb6979a8
commit
3840a87128
17
common.php
17
common.php
@ -3,8 +3,19 @@
|
||||
require __DIR__.'/check_setup.php';
|
||||
require __DIR__.'/lib/Translator.php';
|
||||
require __DIR__.'/vendor/PicoDb/Database.php';
|
||||
require __DIR__.'/vendor/PicoFeed/Client.php';
|
||||
require __DIR__.'/vendor/PicoFeed/Parser.php';
|
||||
require __DIR__.'/vendor/PicoFeed/PicoFeed.php';
|
||||
require __DIR__.'/vendor/Readability/Readability.php';
|
||||
|
||||
require __DIR__.'/vendor/SimpleValidator/Validator.php';
|
||||
require __DIR__.'/vendor/SimpleValidator/Base.php';
|
||||
require __DIR__.'/vendor/SimpleValidator/Validators/Required.php';
|
||||
require __DIR__.'/vendor/SimpleValidator/Validators/Unique.php';
|
||||
require __DIR__.'/vendor/SimpleValidator/Validators/MaxLength.php';
|
||||
require __DIR__.'/vendor/SimpleValidator/Validators/MinLength.php';
|
||||
require __DIR__.'/vendor/SimpleValidator/Validators/Integer.php';
|
||||
require __DIR__.'/vendor/SimpleValidator/Validators/Equals.php';
|
||||
require __DIR__.'/vendor/SimpleValidator/Validators/AlphaNumeric.php';
|
||||
|
||||
require __DIR__.'/models/config.php';
|
||||
require __DIR__.'/models/user.php';
|
||||
require __DIR__.'/models/feed.php';
|
||||
@ -40,8 +51,6 @@ defined('AUTO_UPDATE_DOWNLOAD_DIRECTORY') or define('AUTO_UPDATE_DOWNLOAD_DIRECT
|
||||
defined('AUTO_UPDATE_ARCHIVE_DIRECTORY') or define('AUTO_UPDATE_ARCHIVE_DIRECTORY', DATA_DIRECTORY.DIRECTORY_SEPARATOR.'archive');
|
||||
defined('AUTO_UPDATE_BACKUP_DIRECTORY') or define('AUTO_UPDATE_BACKUP_DIRECTORY', DATA_DIRECTORY.DIRECTORY_SEPARATOR.'backup');
|
||||
|
||||
PicoFeed\Client::proxy(PROXY_HOSTNAME, PROXY_PORT, PROXY_USERNAME, PROXY_PASSWORD);
|
||||
|
||||
PicoDb\Database::bootstrap('db', function() {
|
||||
|
||||
$db = new PicoDb\Database(array(
|
||||
|
@ -1,7 +1,5 @@
|
||||
<?php
|
||||
|
||||
require __DIR__.'/../vendor/PicoFeed/Writers/Atom.php';
|
||||
|
||||
use PicoFarad\Router;
|
||||
use PicoFarad\Response;
|
||||
use PicoFarad\Request;
|
||||
|
@ -31,7 +31,7 @@ Router\before(function($action) {
|
||||
date_default_timezone_set(Model\Config\get('timezone') ?: 'UTC');
|
||||
|
||||
// HTTP secure headers
|
||||
$frame_src = \PicoFeed\Filter::$iframe_whitelist;
|
||||
$frame_src = Model\Config\get_iframe_whitelist();;
|
||||
$frame_src[] = 'https://login.persona.org';
|
||||
|
||||
Response\csp(array(
|
||||
|
@ -17,7 +17,7 @@ else {
|
||||
}
|
||||
|
||||
if (! empty($options['database'])) {
|
||||
\Model\Database\select($options['database']);
|
||||
Model\Database\select($options['database']);
|
||||
}
|
||||
|
||||
$limit = ! empty($options['limit']) && ctype_digit($options['limit']) ? (int) $options['limit'] : Model\Feed\LIMIT_ALL;
|
||||
|
@ -2,29 +2,52 @@
|
||||
|
||||
namespace Model\Config;
|
||||
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validator.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Base.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/Required.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/Unique.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/MaxLength.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/MinLength.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/Integer.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/Equals.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/Integer.php';
|
||||
|
||||
use SimpleValidator\Validator;
|
||||
use SimpleValidator\Validators;
|
||||
use PicoDb\Database;
|
||||
use PicoFeed\Config as ReaderConfig;
|
||||
use PicoFeed\Logging;
|
||||
|
||||
const DB_VERSION = 24;
|
||||
const HTTP_USERAGENT = 'Miniflux - http://miniflux.net';
|
||||
const HTTP_FAKE_USERAGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36';
|
||||
const HTTP_USER_AGENT = 'Miniflux (http://miniflux.net)';
|
||||
|
||||
// Get PicoFeed config
|
||||
function get_reader_config()
|
||||
{
|
||||
$config = new ReaderConfig;
|
||||
$config->setTimezone(get('timezone'));
|
||||
|
||||
$config->setClientTimeout(HTTP_TIMEOUT);
|
||||
$config->setClientUserAgent(HTTP_USER_AGENT);
|
||||
$config->setGrabberUserAgent(HTTP_USER_AGENT);
|
||||
|
||||
$config->setProxyHostname(PROXY_HOSTNAME);
|
||||
$config->setProxyPort(PROXY_PORT);
|
||||
$config->setProxyUsername(PROXY_USERNAME);
|
||||
$config->setProxyPassword(PROXY_PASSWORD);
|
||||
|
||||
$config->setFilterIframeWhitelist(get_iframe_whitelist());
|
||||
|
||||
return $config;
|
||||
}
|
||||
|
||||
function get_iframe_whitelist()
|
||||
{
|
||||
return array(
|
||||
'//www.youtube.com',
|
||||
'http://www.youtube.com',
|
||||
'https://www.youtube.com',
|
||||
'http://player.vimeo.com',
|
||||
'https://player.vimeo.com',
|
||||
'http://www.dailymotion.com',
|
||||
'https://www.dailymotion.com',
|
||||
);
|
||||
}
|
||||
|
||||
// Send a debug message to the console
|
||||
function debug($line)
|
||||
{
|
||||
\PicoFeed\Logging::log($line);
|
||||
Logging::setMessage($line);
|
||||
write_debug();
|
||||
}
|
||||
|
||||
@ -32,14 +55,7 @@ function debug($line)
|
||||
function write_debug()
|
||||
{
|
||||
if (DEBUG) {
|
||||
|
||||
$data = '';
|
||||
|
||||
foreach (\PicoFeed\Logging::$messages as $line) {
|
||||
$data .= $line.PHP_EOL;
|
||||
}
|
||||
|
||||
file_put_contents(DEBUG_FILENAME, $data);
|
||||
file_put_contents(DEBUG_FILENAME, implode(PHP_EOL, Logging::getMessages()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,14 +2,6 @@
|
||||
|
||||
namespace Model\Database;
|
||||
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validator.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Base.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/Required.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/MaxLength.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/MinLength.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/Equals.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/AlphaNumeric.php';
|
||||
|
||||
use SimpleValidator\Validator;
|
||||
use SimpleValidator\Validators;
|
||||
|
||||
|
@ -2,17 +2,15 @@
|
||||
|
||||
namespace Model\Feed;
|
||||
|
||||
require_once __DIR__.'/../vendor/PicoFeed/Filter.php';
|
||||
require_once __DIR__.'/../vendor/PicoFeed/Export.php';
|
||||
require_once __DIR__.'/../vendor/PicoFeed/Import.php';
|
||||
require_once __DIR__.'/../vendor/PicoFeed/Reader.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validator.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Base.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/Required.php';
|
||||
|
||||
use SimpleValidator\Validator;
|
||||
use SimpleValidator\Validators;
|
||||
use PicoDb\Database;
|
||||
use PicoFeed\Export;
|
||||
use PicoFeed\Import;
|
||||
use PicoFeed\Reader;
|
||||
use PicoFeed\Logging;
|
||||
use Model\Config;
|
||||
use Model\Item;
|
||||
|
||||
const LIMIT_ALL = -1;
|
||||
|
||||
@ -32,14 +30,15 @@ function update(array $values)
|
||||
// Export all feeds
|
||||
function export_opml()
|
||||
{
|
||||
$opml = new \PicoFeed\Export(get_all());
|
||||
$opml = new Export(get_all());
|
||||
return $opml->execute();
|
||||
}
|
||||
|
||||
// Import OPML file
|
||||
function import_opml($content)
|
||||
{
|
||||
$import = new \PicoFeed\Import($content);
|
||||
Logging::setTimezone(Config\get('timezone'));
|
||||
$import = new Import($content);
|
||||
$feeds = $import->execute();
|
||||
|
||||
if ($feeds) {
|
||||
@ -61,65 +60,71 @@ function import_opml($content)
|
||||
|
||||
$db->closeTransaction();
|
||||
|
||||
\Model\Config\write_debug();
|
||||
Config\write_debug();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
\Model\Config\write_debug();
|
||||
Config\write_debug();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Add a new feed from an URL
|
||||
function create($url, $grabber = false)
|
||||
function create($url, $enable_grabber = false)
|
||||
{
|
||||
$reader = new \PicoFeed\Reader;
|
||||
$resource = $reader->download($url, '', '', HTTP_TIMEOUT, \Model\Config\HTTP_USERAGENT);
|
||||
$reader = new Reader(Config\get_reader_config());
|
||||
$resource = $reader->download($url);
|
||||
|
||||
$parser = $reader->getParser();
|
||||
|
||||
if ($parser !== false) {
|
||||
|
||||
$parser->grabber = $grabber;
|
||||
if ($enable_grabber) {
|
||||
$parser->enableContentGrabber();
|
||||
}
|
||||
|
||||
$feed = $parser->execute();
|
||||
|
||||
if ($feed === false) {
|
||||
\Model\Config\write_debug();
|
||||
Config\write_debug();
|
||||
return false;
|
||||
}
|
||||
|
||||
if (! $feed->url) $feed->url = $reader->getUrl();
|
||||
if (! $feed->getUrl()) {
|
||||
$feed->url = $reader->getUrl();
|
||||
}
|
||||
|
||||
if (! $feed->title) {
|
||||
\Model\Config\write_debug();
|
||||
if (! $feed->getTitle()) {
|
||||
Config\write_debug();
|
||||
return false;
|
||||
}
|
||||
|
||||
$db = Database::get('db');
|
||||
|
||||
// Check if the feed is already there
|
||||
if (! $db->table('feeds')->eq('feed_url', $reader->getUrl())->count()) {
|
||||
|
||||
// Etag and LastModified are added the next update
|
||||
$rs = $db->table('feeds')->save(array(
|
||||
'title' => $feed->title,
|
||||
'site_url' => $feed->url,
|
||||
'title' => $feed->getTitle(),
|
||||
'site_url' => $feed->getUrl(),
|
||||
'feed_url' => $reader->getUrl(),
|
||||
'download_content' => $grabber ? 1 : 0
|
||||
'download_content' => $enable_grabber ? 1 : 0
|
||||
));
|
||||
|
||||
if ($rs) {
|
||||
|
||||
$feed_id = $db->getConnection()->getLastId();
|
||||
\Model\Item\update_all($feed_id, $feed->items, $grabber);
|
||||
\Model\Config\write_debug();
|
||||
Item\update_all($feed_id, $feed->getItems(), $enable_grabber);
|
||||
Config\write_debug();
|
||||
|
||||
return (int) $feed_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
\Model\Config\write_debug();
|
||||
Config\write_debug();
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -143,16 +148,17 @@ function refresh_all($limit = LIMIT_ALL)
|
||||
function refresh($feed_id)
|
||||
{
|
||||
$feed = get($feed_id);
|
||||
if (empty($feed)) return false;
|
||||
|
||||
$reader = new \PicoFeed\Reader;
|
||||
if (empty($feed)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$reader = new Reader(Config\get_reader_config());
|
||||
|
||||
$resource = $reader->download(
|
||||
$feed['feed_url'],
|
||||
$feed['last_modified'],
|
||||
$feed['etag'],
|
||||
HTTP_TIMEOUT,
|
||||
\Model\Config\HTTP_USERAGENT
|
||||
$feed['etag']
|
||||
);
|
||||
|
||||
// Update the `last_checked` column each time, HTTP cache or not
|
||||
@ -160,7 +166,7 @@ function refresh($feed_id)
|
||||
|
||||
if (! $resource->isModified()) {
|
||||
update_parsing_error($feed_id, 0);
|
||||
\Model\Config\write_debug();
|
||||
Config\write_debug();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -171,14 +177,8 @@ function refresh($feed_id)
|
||||
if ($feed['download_content']) {
|
||||
|
||||
// Don't fetch previous items, only new one
|
||||
$parser->grabber_ignore_urls = Database::get('db')
|
||||
->table('items')
|
||||
->eq('feed_id', $feed_id)
|
||||
->findAllByColumn('url');
|
||||
|
||||
$parser->grabber = true;
|
||||
$parser->grabber_timeout = HTTP_TIMEOUT;
|
||||
$parser->grabber_user_agent = \Model\Config\HTTP_FAKE_USERAGENT;
|
||||
$parser->enableContentGrabber();
|
||||
$parser->setGrabberIgnoreUrls(Database::get('db')->table('items')->eq('feed_id', $feed_id)->findAllByColumn('url'));
|
||||
}
|
||||
|
||||
$result = $parser->execute();
|
||||
@ -187,15 +187,16 @@ function refresh($feed_id)
|
||||
|
||||
update_parsing_error($feed_id, 0);
|
||||
update_cache($feed_id, $resource->getLastModified(), $resource->getEtag());
|
||||
\Model\Item\update_all($feed_id, $result->items, $parser->grabber);
|
||||
\Model\Config\write_debug();
|
||||
|
||||
Item\update_all($feed_id, $result->getItems(), $feed['download_content']);
|
||||
Config\write_debug();
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
update_parsing_error($feed_id, 1);
|
||||
\Model\Config\write_debug();
|
||||
Config\write_debug();
|
||||
|
||||
return false;
|
||||
}
|
||||
|
127
models/item.php
127
models/item.php
@ -2,11 +2,13 @@
|
||||
|
||||
namespace Model\Item;
|
||||
|
||||
require_once __DIR__.'/../vendor/Readability/Readability.php';
|
||||
require_once __DIR__.'/../vendor/PicoFeed/Grabber.php';
|
||||
require_once __DIR__.'/../vendor/PicoFeed/Filter.php';
|
||||
|
||||
use Model\Config;
|
||||
use PicoDb\Database;
|
||||
use PicoFeed\Logging;
|
||||
use PicoFeed\Grabber;
|
||||
use PicoFeed\Client;
|
||||
use PicoFeed\Filter;
|
||||
use Readability;
|
||||
|
||||
// Get all items without filtering
|
||||
function get_everything()
|
||||
@ -141,7 +143,7 @@ function get_bookmarks($offset = null, $limit = null)
|
||||
->join('feeds', 'id', 'feed_id')
|
||||
->in('status', array('read', 'unread'))
|
||||
->eq('bookmark', 1)
|
||||
->orderBy('updated', \Model\Config\get('items_sorting_direction'))
|
||||
->orderBy('updated', Config\get('items_sorting_direction'))
|
||||
->offset($offset)
|
||||
->limit($limit)
|
||||
->findAll();
|
||||
@ -201,7 +203,7 @@ function get_nav($item, $status = array('unread'), $bookmark = array(1, 0), $fee
|
||||
->table('items')
|
||||
->columns('id', 'status', 'title', 'bookmark')
|
||||
->neq('status', 'removed')
|
||||
->orderBy('updated', \Model\Config\get('items_sorting_direction'));
|
||||
->orderBy('updated', Config\get('items_sorting_direction'));
|
||||
|
||||
if ($feed_id) $query->eq('feed_id', $feed_id);
|
||||
|
||||
@ -377,7 +379,7 @@ function mark_feed_as_read($feed_id)
|
||||
// Mark all read items to removed after X days
|
||||
function autoflush()
|
||||
{
|
||||
$autoflush = (int) \Model\Config\get('autoflush');
|
||||
$autoflush = (int) Config\get('autoflush');
|
||||
|
||||
if ($autoflush > 0) {
|
||||
|
||||
@ -401,9 +403,9 @@ function autoflush()
|
||||
}
|
||||
|
||||
// Update all items
|
||||
function update_all($feed_id, array $items, $grabber = false)
|
||||
function update_all($feed_id, array $items, $enable_grabber = false)
|
||||
{
|
||||
$nocontent = (bool) \Model\Config\get('nocontent');
|
||||
$nocontent = (bool) Config\get('nocontent');
|
||||
|
||||
$items_in_feed = array();
|
||||
|
||||
@ -412,54 +414,55 @@ function update_all($feed_id, array $items, $grabber = false)
|
||||
|
||||
foreach ($items as $item) {
|
||||
|
||||
\PicoFeed\Logging::log('Item => '.$item->id.' '.$item->url);
|
||||
Logging::setMessage('Item => '.$item->getId().' '.$item->getUrl());
|
||||
|
||||
// Item parsed correctly?
|
||||
if ($item->id && $item->url) {
|
||||
if ($item->getId() && $item->getUrl()) {
|
||||
|
||||
\PicoFeed\Logging::log('Item parsed correctly');
|
||||
Logging::setMessage('Item parsed correctly');
|
||||
|
||||
// Get item record in database, if any
|
||||
$itemrec = $db
|
||||
->table('items')
|
||||
->columns('enclosure')
|
||||
->eq('id', $item->id)->findOne();
|
||||
->eq('id', $item->getId())
|
||||
->findOne();
|
||||
|
||||
// Insert a new item
|
||||
if ($itemrec === null) {
|
||||
|
||||
\PicoFeed\Logging::log('Item added to the database');
|
||||
Logging::setMessage('Item added to the database');
|
||||
|
||||
if (! $item->content && ! $nocontent && $grabber) {
|
||||
$item->content = download_content_url($item->url);
|
||||
if ($enable_grabber && ! $nocontent && ! $item->getContent()) {
|
||||
$item->content = download_content_url($item->getUrl());
|
||||
}
|
||||
|
||||
$db->table('items')->save(array(
|
||||
'id' => $item->id,
|
||||
'title' => $item->title,
|
||||
'url' => $item->url,
|
||||
'updated' => $item->updated,
|
||||
'author' => $item->author,
|
||||
'content' => $nocontent ? '' : $item->content,
|
||||
'id' => $item->getId(),
|
||||
'title' => $item->getTitle(),
|
||||
'url' => $item->getUrl(),
|
||||
'updated' => $item->getDate(),
|
||||
'author' => $item->getAuthor(),
|
||||
'content' => $nocontent ? '' : $item->getContent(),
|
||||
'status' => 'unread',
|
||||
'feed_id' => $feed_id,
|
||||
'enclosure' => isset($item->enclosure) ? $item->enclosure : null,
|
||||
'enclosure_type' => isset($item->enclosure_type) ? $item->enclosure_type : null,
|
||||
'language' => $item->language,
|
||||
'enclosure' => $item->getEnclosureUrl(),
|
||||
'enclosure_type' => $item->getEnclosureType(),
|
||||
'language' => $item->getLanguage(),
|
||||
));
|
||||
}
|
||||
else if (isset($item->enclosure) && $item->enclosure && !$itemrec['enclosure']) {
|
||||
else if (! $itemrec['enclosure'] && $item->getEnclosureUrl()) {
|
||||
|
||||
\PicoFeed\Logging::log('Update item enclosure');
|
||||
Logging::setMessage('Update item enclosure');
|
||||
|
||||
$db->table('items')->eq('id', $item->id)->save(array(
|
||||
$db->table('items')->eq('id', $item->getId())->save(array(
|
||||
'status' => 'unread',
|
||||
'enclosure' => $item->enclosure,
|
||||
'enclosure_type' => isset($item->enclosure_type) ? $item->enclosure_type : null,
|
||||
'enclosure' => $item->getEnclosureUrl(),
|
||||
'enclosure_type' => $item->getEnclosureType(),
|
||||
));
|
||||
}
|
||||
else {
|
||||
\PicoFeed\Logging::log('Item already in the database');
|
||||
Logging::setMessage('Item already in the database');
|
||||
}
|
||||
|
||||
// Items inside this feed
|
||||
@ -467,10 +470,20 @@ function update_all($feed_id, array $items, $grabber = false)
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from the database items marked as "removed"
|
||||
// and not present inside the feed
|
||||
// Cleanup old items
|
||||
cleanup($feed_id, $items_in_feed);
|
||||
|
||||
$db->closeTransaction();
|
||||
}
|
||||
|
||||
// Remove from the database items marked as "removed"
|
||||
// and not present inside the feed
|
||||
function cleanup($feed_id, array $items_in_feed)
|
||||
{
|
||||
if (! empty($items_in_feed)) {
|
||||
|
||||
$db = Database::get('db');
|
||||
|
||||
$removed_items = $db
|
||||
->table('items')
|
||||
->columns('id')
|
||||
@ -489,7 +502,7 @@ function update_all($feed_id, array $items, $grabber = false)
|
||||
if (! empty($items_to_remove)) {
|
||||
|
||||
$nb_items = count($items_to_remove);
|
||||
\PicoFeed\Logging::log('There is '.$nb_items.' items to remove');
|
||||
Logging::setMessage('There is '.$nb_items.' items to remove');
|
||||
|
||||
// Handle the case when there is a huge number of items to remove
|
||||
// Sqlite have a limit of 1000 sql variables by default
|
||||
@ -508,43 +521,31 @@ function update_all($feed_id, array $items, $grabber = false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
\PicoFeed\Logging::log('Db transaction => '.($db->getConnection()->inTransaction() ? 'ok' : 'rollback'));
|
||||
|
||||
$db->closeTransaction();
|
||||
}
|
||||
|
||||
// Download content from an URL
|
||||
function download_content_url($url)
|
||||
{
|
||||
$client = \PicoFeed\Client::create();
|
||||
$client->url = $url;
|
||||
$client->timeout = HTTP_TIMEOUT;
|
||||
$client->user_agent = \Model\Config\HTTP_FAKE_USERAGENT;
|
||||
$client->execute();
|
||||
|
||||
$html = $client->getContent();
|
||||
|
||||
if (! empty($html)) {
|
||||
|
||||
// Try first with PicoFeed grabber and with Readability after
|
||||
$grabber = new \PicoFeed\Grabber($url, $html, $client->getEncoding());
|
||||
$content = '';
|
||||
|
||||
$grabber = new Grabber($url);
|
||||
$grabber->setConfig(Config\get_reader_config());
|
||||
$grabber->download();
|
||||
|
||||
if ($grabber->parse()) {
|
||||
$content = $grabber->content;
|
||||
$content = $grabber->getcontent();
|
||||
}
|
||||
else {
|
||||
$content = download_content_readability($grabber->getRawContent(), $url);
|
||||
}
|
||||
|
||||
if (empty($content)) {
|
||||
$content = download_content_readability($grabber->html, $url);
|
||||
if (! empty($content)) {
|
||||
$filter = new Filter($content, $url);
|
||||
$filter->setConfig(Config\get_reader_config());
|
||||
$content = $filter->execute();
|
||||
}
|
||||
|
||||
// Filter content
|
||||
$filter = new \PicoFeed\Filter($content, $url);
|
||||
return $filter->execute();
|
||||
}
|
||||
|
||||
return '';
|
||||
return $content;
|
||||
}
|
||||
|
||||
// Download content from item ID
|
||||
@ -555,7 +556,7 @@ function download_content_id($item_id)
|
||||
|
||||
if (! empty($content)) {
|
||||
|
||||
if (! \Model\Config\get('nocontent')) {
|
||||
if (! Config\get('nocontent')) {
|
||||
|
||||
// Save content
|
||||
Database::get('db')
|
||||
@ -564,7 +565,7 @@ function download_content_id($item_id)
|
||||
->save(array('content' => $content));
|
||||
}
|
||||
|
||||
\Model\Config\write_debug();
|
||||
Config\write_debug();
|
||||
|
||||
return array(
|
||||
'result' => true,
|
||||
@ -572,7 +573,7 @@ function download_content_id($item_id)
|
||||
);
|
||||
}
|
||||
|
||||
\Model\Config\write_debug();
|
||||
Config\write_debug();
|
||||
|
||||
return array(
|
||||
'result' => false,
|
||||
@ -585,7 +586,7 @@ function download_content_readability($content, $url)
|
||||
{
|
||||
if (! empty($content)) {
|
||||
|
||||
$readability = new \Readability($content, $url);
|
||||
$readability = new Readability($content, $url);
|
||||
|
||||
if ($readability->init()) {
|
||||
return $readability->getContent()->innerHTML;
|
||||
|
@ -2,11 +2,6 @@
|
||||
|
||||
namespace Model\User;
|
||||
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validator.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Base.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/Required.php';
|
||||
require_once __DIR__.'/../vendor/SimpleValidator/Validators/MaxLength.php';
|
||||
|
||||
use SimpleValidator\Validator;
|
||||
use SimpleValidator\Validators;
|
||||
use PicoDb\Database;
|
||||
|
409
vendor/PicoFeed/Client.php
vendored
409
vendor/PicoFeed/Client.php
vendored
@ -2,59 +2,170 @@
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
require_once __DIR__.'/Logging.php';
|
||||
use LogicException;
|
||||
use Clients\Curl;
|
||||
use Clients\Stream;
|
||||
use PicoFeed\Logging;
|
||||
|
||||
/**
|
||||
* Client class
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package client
|
||||
*/
|
||||
abstract class Client
|
||||
{
|
||||
protected static $proxy_hostname = null;
|
||||
protected static $proxy_port = null;
|
||||
protected static $proxy_username = null;
|
||||
protected static $proxy_password = null;
|
||||
/**
|
||||
* Flag that say if the resource have been modified
|
||||
*
|
||||
* @access private
|
||||
* @var bool
|
||||
*/
|
||||
private $is_modified = true;
|
||||
|
||||
public $encoding = '';
|
||||
public $etag = '';
|
||||
public $last_modified = '';
|
||||
public $is_modified = true;
|
||||
public $content = '';
|
||||
public $url = '';
|
||||
public $timeout = 10;
|
||||
public $max_redirects = 5;
|
||||
public $max_body_size = 2097152; // 2MB
|
||||
public $user_agent = 'PicoFeed (https://github.com/fguillot/picoFeed)';
|
||||
/**
|
||||
* HTTP encoding
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $encoding = '';
|
||||
|
||||
/**
|
||||
* HTTP Etag header
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $etag = '';
|
||||
|
||||
public static function create($adapter = null)
|
||||
{
|
||||
return $adapter ?: self::chooseAdapter();
|
||||
}
|
||||
/**
|
||||
* HTTP Last-Modified header
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $last_modified = '';
|
||||
|
||||
/**
|
||||
* Proxy hostname
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $proxy_hostname = '';
|
||||
|
||||
public static function chooseAdapter()
|
||||
/**
|
||||
* Proxy port
|
||||
*
|
||||
* @access protected
|
||||
* @var integer
|
||||
*/
|
||||
protected $proxy_port = 3128;
|
||||
|
||||
/**
|
||||
* Proxy username
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $proxy_username = '';
|
||||
|
||||
/**
|
||||
* Proxy password
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $proxy_password = '';
|
||||
|
||||
/**
|
||||
* Client connection timeout
|
||||
*
|
||||
* @access protected
|
||||
* @var integer
|
||||
*/
|
||||
protected $timeout = 10;
|
||||
|
||||
/**
|
||||
* User-agent
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $user_agent = 'PicoFeed (https://github.com/fguillot/picoFeed)';
|
||||
|
||||
/**
|
||||
* Real URL used (can be changed after a HTTP redirect)
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $url = '';
|
||||
|
||||
/**
|
||||
* Page/Feed content
|
||||
*
|
||||
* @access protected
|
||||
* @var string
|
||||
*/
|
||||
protected $content = '';
|
||||
|
||||
/**
|
||||
* Number maximum of HTTP redirections to avoid infinite loops
|
||||
*
|
||||
* @access protected
|
||||
* @var integer
|
||||
*/
|
||||
protected $max_redirects = 5;
|
||||
|
||||
/**
|
||||
* Maximum size of the HTTP body response
|
||||
*
|
||||
* @access protected
|
||||
* @var integer
|
||||
*/
|
||||
protected $max_body_size = 2097152; // 2MB
|
||||
|
||||
/**
|
||||
* Get client instance: curl or stream driver
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public static function getInstance()
|
||||
{
|
||||
if (function_exists('curl_init')) {
|
||||
|
||||
require_once __DIR__.'/Clients/Curl.php';
|
||||
return new Clients\Curl;
|
||||
|
||||
} else if (ini_get('allow_url_fopen')) {
|
||||
}
|
||||
else if (ini_get('allow_url_fopen')) {
|
||||
|
||||
require_once __DIR__.'/Clients/Stream.php';
|
||||
return new Clients\Stream;
|
||||
}
|
||||
|
||||
throw new \LogicException('You must have "allow_url_fopen=1" or curl extension installed');
|
||||
throw new LogicException('You must have "allow_url_fopen=1" or curl extension installed');
|
||||
}
|
||||
|
||||
|
||||
public function execute()
|
||||
/**
|
||||
* Perform the HTTP request
|
||||
*
|
||||
* @access public
|
||||
* @param string $url URL
|
||||
* @return bool
|
||||
*/
|
||||
public function execute($url = '')
|
||||
{
|
||||
if ($this->url === '') {
|
||||
throw new \LogicException('The URL is missing');
|
||||
if ($url !== '') {
|
||||
$this->url = $url;
|
||||
}
|
||||
|
||||
Logging::log(\get_called_class().' Fetch URL: '.$this->url);
|
||||
Logging::log(\get_called_class().' Etag provided: '.$this->etag);
|
||||
Logging::log(\get_called_class().' Last-Modified provided: '.$this->last_modified);
|
||||
Logging::setMessage(get_called_class().' Fetch URL: '.$this->url);
|
||||
Logging::setMessage(get_called_class().' Etag provided: '.$this->etag);
|
||||
Logging::setMessage(get_called_class().' Last-Modified provided: '.$this->last_modified);
|
||||
|
||||
$response = $this->doRequest();
|
||||
|
||||
@ -62,25 +173,42 @@ abstract class Client
|
||||
|
||||
if ($response['status'] == 304) {
|
||||
$this->is_modified = false;
|
||||
Logging::log(\get_called_class().' Resource not modified');
|
||||
Logging::setMessage(get_called_class().' Resource not modified');
|
||||
}
|
||||
else if ($response['status'] == 404) {
|
||||
Logging::log(\get_called_class().' Resource not found');
|
||||
Logging::setMessage(get_called_class().' Resource not found');
|
||||
}
|
||||
else {
|
||||
$this->etag = isset($response['headers']['ETag']) ? $response['headers']['ETag'] : '';
|
||||
$this->last_modified = isset($response['headers']['Last-Modified']) ? $response['headers']['Last-Modified'] : '';
|
||||
$etag = isset($response['headers']['ETag']) ? $response['headers']['ETag'] : '';
|
||||
$last_modified = isset($response['headers']['Last-Modified']) ? $response['headers']['Last-Modified'] : '';
|
||||
$this->content = $response['body'];
|
||||
|
||||
if (isset($response['headers']['Content-Type'])) {
|
||||
$result = explode('charset=', strtolower($response['headers']['Content-Type']));
|
||||
$this->encoding = isset($result[1]) ? $result[1] : '';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (($this->etag && $this->etag === $etag) || ($this->last_modified && $last_modified === $this->last_modified)) {
|
||||
$this->is_modified = false;
|
||||
}
|
||||
|
||||
$this->etag = $etag;
|
||||
$this->last_modified = $last_modified;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse HTTP headers
|
||||
*
|
||||
* @access public
|
||||
* @param array $lines List of headers
|
||||
* @return array
|
||||
*/
|
||||
public function parseHeaders(array $lines)
|
||||
{
|
||||
$status = 200;
|
||||
@ -88,7 +216,7 @@ abstract class Client
|
||||
|
||||
foreach ($lines as $line) {
|
||||
|
||||
if (strpos($line, 'HTTP') === 0/* && strpos($line, '301') === false && strpos($line, '302') === false*/) {
|
||||
if (strpos($line, 'HTTP') === 0) {
|
||||
$status = (int) substr($line, 9, 3);
|
||||
}
|
||||
else if (strpos($line, ':') !== false) {
|
||||
@ -98,71 +226,242 @@ abstract class Client
|
||||
}
|
||||
}
|
||||
|
||||
Logging::log(\get_called_class().' HTTP status code: '.$status);
|
||||
Logging::setMessage(get_called_class().' HTTP status code: '.$status);
|
||||
|
||||
foreach ($headers as $name => $value) {
|
||||
Logging::log(\get_called_class().' HTTP header: '.$name.' => '.$value);
|
||||
Logging::setMessage(get_called_class().' HTTP header: '.$name.' => '.$value);
|
||||
}
|
||||
|
||||
return array($status, $headers);
|
||||
}
|
||||
|
||||
|
||||
public static function proxy($hostname, $port = 3128, $username = '', $password = '')
|
||||
{
|
||||
self::$proxy_hostname = $hostname;
|
||||
self::$proxy_port = $port;
|
||||
self::$proxy_username = $username;
|
||||
self::$proxy_password = $password;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set the Last-Modified HTTP header
|
||||
*
|
||||
* @access public
|
||||
* @param string $last_modified Header value
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public function setLastModified($last_modified)
|
||||
{
|
||||
$this->last_modified = $last_modified;
|
||||
return $this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the value of the Last-Modified HTTP header
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getLastModified()
|
||||
{
|
||||
return $this->last_modified;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set the value of the Etag HTTP header
|
||||
*
|
||||
* @access public
|
||||
* @param string $etag Etag HTTP header value
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public function setEtag($etag)
|
||||
{
|
||||
$this->etag = $etag;
|
||||
return $this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the Etag HTTP header value
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getEtag()
|
||||
{
|
||||
return $this->etag;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the final url value
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getUrl()
|
||||
{
|
||||
return $this->url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the url
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public function setUrl($url)
|
||||
{
|
||||
$this->url = $url;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the body of the HTTP response
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getContent()
|
||||
{
|
||||
return $this->content;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the encoding value from HTTP headers
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getEncoding()
|
||||
{
|
||||
return $this->encoding;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return true if the remote resource has changed
|
||||
*
|
||||
* @access public
|
||||
* @return bool
|
||||
*/
|
||||
public function isModified()
|
||||
{
|
||||
return $this->is_modified;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set connection timeout
|
||||
*
|
||||
* @access public
|
||||
* @param integer $timeout Connection timeout
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public function setTimeout($timeout)
|
||||
{
|
||||
$this->timeout = $timeout ?: $this->timeout;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a custom user agent
|
||||
*
|
||||
* @access public
|
||||
* @param string $user_agent User Agent
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public function setUserAgent($user_agent)
|
||||
{
|
||||
$this->user_agent = $user_agent ?: $this->user_agent;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the mximum number of HTTP redirections
|
||||
*
|
||||
* @access public
|
||||
* @param integer $max Maximum
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public function setMaxRedirections($max)
|
||||
{
|
||||
$this->max_redirects = $max ?: $this->max_redirects;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the maximum size of the HTTP body
|
||||
*
|
||||
* @access public
|
||||
* @param integer $max Maximum
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public function setMaxBodySize($max)
|
||||
{
|
||||
$this->max_body_size = $max ?: $this->max_body_size;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the proxy hostname
|
||||
*
|
||||
* @access public
|
||||
* @param string $hostname Proxy hostname
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public function setProxyHostname($hostname)
|
||||
{
|
||||
$this->proxy_hostname = $hostname ?: $this->proxy_hostname;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the proxy port
|
||||
*
|
||||
* @access public
|
||||
* @param integer $port Proxy port
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public function setProxyPort($port)
|
||||
{
|
||||
$this->proxy_port = $port ?: $this->proxy_port;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the proxy username
|
||||
*
|
||||
* @access public
|
||||
* @param string $username Proxy username
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public function setProxyUsername($username)
|
||||
{
|
||||
$this->proxy_username = $username ?: $this->proxy_username;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the proxy password
|
||||
*
|
||||
* @access public
|
||||
* @param string $password Password
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public function setProxyPassword($password)
|
||||
{
|
||||
$this->proxy_password = $password ?: $this->proxy_password;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set config object
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Config $config Config instance
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public function setConfig($config)
|
||||
{
|
||||
$this->setTimeout($config->getGrabberTimeout());
|
||||
$this->setUserAgent($config->getGrabberUserAgent());
|
||||
$this->setMaxRedirections($config->getMaxRedirections());
|
||||
$this->setMaxBodySize($config->getMaxBodySize());
|
||||
$this->setProxyHostname($config->getProxyHostname());
|
||||
$this->setProxyPort($config->getProxyPort());
|
||||
$this->setProxyUsername($config->getProxyUsername());
|
||||
$this->setProxyPassword($config->getProxyPassword());
|
||||
|
||||
return $this;
|
||||
}
|
||||
}
|
98
vendor/PicoFeed/Clients/Curl.php
vendored
98
vendor/PicoFeed/Clients/Curl.php
vendored
@ -3,27 +3,80 @@
|
||||
namespace PicoFeed\Clients;
|
||||
|
||||
use \PicoFeed\Logging;
|
||||
use \PicoFeed\Client;
|
||||
|
||||
class Curl extends \PicoFeed\Client
|
||||
/**
|
||||
* cURL HTTP client
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package client
|
||||
*/
|
||||
class Curl extends Client
|
||||
{
|
||||
/**
|
||||
* HTTP response body
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $body = '';
|
||||
|
||||
/**
|
||||
* Body size
|
||||
*
|
||||
* @access private
|
||||
* @var integer
|
||||
*/
|
||||
private $body_length = 0;
|
||||
|
||||
/**
|
||||
* HTTP response headers
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $headers = array();
|
||||
|
||||
/**
|
||||
* Counter on the number of header received
|
||||
*
|
||||
* @access private
|
||||
* @var integer
|
||||
*/
|
||||
private $headers_counter = 0;
|
||||
|
||||
|
||||
/**
|
||||
* cURL callback to read the HTTP body
|
||||
*
|
||||
* If the function return -1, curl stop to read the HTTP response
|
||||
*
|
||||
* @access public
|
||||
* @param resource $ch cURL handler
|
||||
* @param string $buffer Chunk of data
|
||||
* @return integer Length of the buffer
|
||||
*/
|
||||
public function readBody($ch, $buffer)
|
||||
{
|
||||
$length = strlen($buffer);
|
||||
$this->body_length += $length;
|
||||
|
||||
if ($this->body_length > $this->max_body_size) return -1;
|
||||
if ($this->body_length > $this->max_body_size) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
$this->body .= $buffer;
|
||||
|
||||
return $length;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* cURL callback to read HTTP headers
|
||||
*
|
||||
* @access public
|
||||
* @param resource $ch cURL handler
|
||||
* @param string $buffer Header line
|
||||
* @return integer Length of the buffer
|
||||
*/
|
||||
public function readHeaders($ch, $buffer)
|
||||
{
|
||||
$length = strlen($buffer);
|
||||
@ -43,7 +96,13 @@ class Curl extends \PicoFeed\Client
|
||||
return $length;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Do the HTTP request
|
||||
*
|
||||
* @access public
|
||||
* @param bool $follow_location Flag used when there is an open_basedir restriction
|
||||
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
|
||||
*/
|
||||
public function doRequest($follow_location = true)
|
||||
{
|
||||
$request_headers = array('Connection: close');
|
||||
@ -54,6 +113,7 @@ class Curl extends \PicoFeed\Client
|
||||
$ch = curl_init();
|
||||
|
||||
curl_setopt($ch, CURLOPT_URL, $this->url);
|
||||
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
|
||||
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->timeout);
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
|
||||
curl_setopt($ch, CURLOPT_USERAGENT, $this->user_agent);
|
||||
@ -67,28 +127,34 @@ class Curl extends \PicoFeed\Client
|
||||
curl_setopt($ch, CURLOPT_COOKIEJAR, 'php://memory');
|
||||
curl_setopt($ch, CURLOPT_COOKIEFILE, 'php://memory');
|
||||
|
||||
if (parent::$proxy_hostname) {
|
||||
if ($this->proxy_hostname) {
|
||||
|
||||
curl_setopt($ch, CURLOPT_PROXYPORT, parent::$proxy_port);
|
||||
Logging::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
|
||||
|
||||
curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxy_port);
|
||||
curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP');
|
||||
curl_setopt($ch, CURLOPT_PROXY, parent::$proxy_hostname);
|
||||
curl_setopt($ch, CURLOPT_PROXY, $this->proxy_hostname);
|
||||
|
||||
if (parent::$proxy_username) {
|
||||
curl_setopt($ch, CURLOPT_PROXYUSERPWD, parent::$proxy_username.':'.parent::$proxy_password);
|
||||
if ($this->proxy_username) {
|
||||
Logging::setMessage(get_called_class().' Proxy credentials: Yes');
|
||||
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxy_username.':'.$this->proxy_password);
|
||||
}
|
||||
else {
|
||||
Logging::setMessage(get_called_class().' Proxy credentials: No');
|
||||
}
|
||||
}
|
||||
|
||||
curl_exec($ch);
|
||||
|
||||
Logging::log(\get_called_class().' cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME));
|
||||
Logging::log(\get_called_class().' cURL dns lookup time: '.curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME));
|
||||
Logging::log(\get_called_class().' cURL connect time: '.curl_getinfo($ch, CURLINFO_CONNECT_TIME));
|
||||
Logging::log(\get_called_class().' cURL speed download: '.curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD));
|
||||
Logging::log(\get_called_class().' cURL effective url: '.curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
|
||||
Logging::setMessage(get_called_class().' cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME));
|
||||
Logging::setMessage(get_called_class().' cURL dns lookup time: '.curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME));
|
||||
Logging::setMessage(get_called_class().' cURL connect time: '.curl_getinfo($ch, CURLINFO_CONNECT_TIME));
|
||||
Logging::setMessage(get_called_class().' cURL speed download: '.curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD));
|
||||
Logging::setMessage(get_called_class().' cURL effective url: '.curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
|
||||
|
||||
if (curl_errno($ch)) {
|
||||
|
||||
Logging::log(\get_called_class().' cURL error: '.curl_error($ch));
|
||||
Logging::setMessage(get_called_class().' cURL error: '.curl_error($ch));
|
||||
|
||||
curl_close($ch);
|
||||
return false;
|
||||
|
33
vendor/PicoFeed/Clients/Stream.php
vendored
33
vendor/PicoFeed/Clients/Stream.php
vendored
@ -3,6 +3,7 @@
|
||||
namespace PicoFeed\Clients;
|
||||
|
||||
use \PicoFeed\Logging;
|
||||
use \PicoFeed\Client;
|
||||
|
||||
/**
|
||||
* Stream context HTTP client
|
||||
@ -10,7 +11,7 @@ use \PicoFeed\Logging;
|
||||
* @author Frederic Guillot
|
||||
* @package client
|
||||
*/
|
||||
class Stream extends \PicoFeed\Client
|
||||
class Stream extends Client
|
||||
{
|
||||
/**
|
||||
* Do the HTTP request
|
||||
@ -24,11 +25,19 @@ class Stream extends \PicoFeed\Client
|
||||
$headers = array(
|
||||
'Connection: close',
|
||||
'User-Agent: '.$this->user_agent,
|
||||
'Accept-Encoding: gzip',
|
||||
);
|
||||
|
||||
if ($this->etag) $headers[] = 'If-None-Match: '.$this->etag;
|
||||
if ($this->last_modified) $headers[] = 'If-Modified-Since: '.$this->last_modified;
|
||||
if (function_exists('gzdecode')) {
|
||||
$headers[] = 'Accept-Encoding: gzip';
|
||||
}
|
||||
|
||||
if ($this->etag) {
|
||||
$headers[] = 'If-None-Match: '.$this->etag;
|
||||
}
|
||||
|
||||
if ($this->last_modified) {
|
||||
$headers[] = 'If-Modified-Since: '.$this->last_modified;
|
||||
}
|
||||
|
||||
// Create context
|
||||
$context_options = array(
|
||||
@ -41,14 +50,22 @@ class Stream extends \PicoFeed\Client
|
||||
)
|
||||
);
|
||||
|
||||
if (parent::$proxy_hostname) {
|
||||
$context_options['http']['proxy'] = 'tcp://'.parent::$proxy_hostname.':'.parent::$proxy_port;
|
||||
if ($this->proxy_hostname) {
|
||||
|
||||
Logging::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
|
||||
|
||||
$context_options['http']['proxy'] = 'tcp://'.$this->proxy_hostname.':'.$this->proxy_port;
|
||||
$context_options['http']['request_fulluri'] = true;
|
||||
|
||||
if (parent::$proxy_username) {
|
||||
$headers[] = 'Proxy-Authorization: Basic '.base64_encode(parent::$proxy_username.':'.parent::$proxy_password);
|
||||
if ($this->proxy_username) {
|
||||
Logging::setMessage(get_called_class().' Proxy credentials: Yes');
|
||||
|
||||
$headers[] = 'Proxy-Authorization: Basic '.base64_encode($this->proxy_username.':'.$this->proxy_password);
|
||||
$context_options['http']['header'] = implode("\r\n", $headers);
|
||||
}
|
||||
else {
|
||||
Logging::setMessage(get_called_class().' Proxy credentials: No');
|
||||
}
|
||||
}
|
||||
|
||||
$context = stream_context_create($context_options);
|
||||
|
230
vendor/PicoFeed/Encoding.php
vendored
230
vendor/PicoFeed/Encoding.php
vendored
@ -1,32 +1,6 @@
|
||||
<?php
|
||||
/*
|
||||
Copyright (c) 2008 Sebastián Grignoli
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. Neither the name of copyright holders nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
namespace PicoFeed;
|
||||
|
||||
/**
|
||||
* @author "Sebastián Grignoli" <grignoli@framework2.com.ar>
|
||||
@ -36,14 +10,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
* @example https://github.com/neitanod/forceutf8
|
||||
* @license Revised BSD
|
||||
*/
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
class Encoding {
|
||||
|
||||
class Encoding
|
||||
{
|
||||
protected static $win1252ToUtf8 = array(
|
||||
128 => "\xe2\x82\xac",
|
||||
|
||||
130 => "\xe2\x80\x9a",
|
||||
131 => "\xc6\x92",
|
||||
132 => "\xe2\x80\x9e",
|
||||
@ -55,10 +25,7 @@ class Encoding {
|
||||
138 => "\xc5\xa0",
|
||||
139 => "\xe2\x80\xb9",
|
||||
140 => "\xc5\x92",
|
||||
|
||||
142 => "\xc5\xbd",
|
||||
|
||||
|
||||
145 => "\xe2\x80\x98",
|
||||
146 => "\xe2\x80\x99",
|
||||
147 => "\xe2\x80\x9c",
|
||||
@ -71,49 +38,12 @@ class Encoding {
|
||||
154 => "\xc5\xa1",
|
||||
155 => "\xe2\x80\xba",
|
||||
156 => "\xc5\x93",
|
||||
|
||||
158 => "\xc5\xbe",
|
||||
159 => "\xc5\xb8"
|
||||
);
|
||||
|
||||
protected static $brokenUtf8ToUtf8 = array(
|
||||
"\xc2\x80" => "\xe2\x82\xac",
|
||||
|
||||
"\xc2\x82" => "\xe2\x80\x9a",
|
||||
"\xc2\x83" => "\xc6\x92",
|
||||
"\xc2\x84" => "\xe2\x80\x9e",
|
||||
"\xc2\x85" => "\xe2\x80\xa6",
|
||||
"\xc2\x86" => "\xe2\x80\xa0",
|
||||
"\xc2\x87" => "\xe2\x80\xa1",
|
||||
"\xc2\x88" => "\xcb\x86",
|
||||
"\xc2\x89" => "\xe2\x80\xb0",
|
||||
"\xc2\x8a" => "\xc5\xa0",
|
||||
"\xc2\x8b" => "\xe2\x80\xb9",
|
||||
"\xc2\x8c" => "\xc5\x92",
|
||||
|
||||
"\xc2\x8e" => "\xc5\xbd",
|
||||
|
||||
|
||||
"\xc2\x91" => "\xe2\x80\x98",
|
||||
"\xc2\x92" => "\xe2\x80\x99",
|
||||
"\xc2\x93" => "\xe2\x80\x9c",
|
||||
"\xc2\x94" => "\xe2\x80\x9d",
|
||||
"\xc2\x95" => "\xe2\x80\xa2",
|
||||
"\xc2\x96" => "\xe2\x80\x93",
|
||||
"\xc2\x97" => "\xe2\x80\x94",
|
||||
"\xc2\x98" => "\xcb\x9c",
|
||||
"\xc2\x99" => "\xe2\x84\xa2",
|
||||
"\xc2\x9a" => "\xc5\xa1",
|
||||
"\xc2\x9b" => "\xe2\x80\xba",
|
||||
"\xc2\x9c" => "\xc5\x93",
|
||||
|
||||
"\xc2\x9e" => "\xc5\xbe",
|
||||
"\xc2\x9f" => "\xc5\xb8"
|
||||
);
|
||||
|
||||
protected static $utf8ToWin1252 = array(
|
||||
"\xe2\x82\xac" => "\x80",
|
||||
|
||||
"\xe2\x80\x9a" => "\x82",
|
||||
"\xc6\x92" => "\x83",
|
||||
"\xe2\x80\x9e" => "\x84",
|
||||
@ -125,10 +55,7 @@ class Encoding {
|
||||
"\xc5\xa0" => "\x8a",
|
||||
"\xe2\x80\xb9" => "\x8b",
|
||||
"\xc5\x92" => "\x8c",
|
||||
|
||||
"\xc5\xbd" => "\x8e",
|
||||
|
||||
|
||||
"\xe2\x80\x98" => "\x91",
|
||||
"\xe2\x80\x99" => "\x92",
|
||||
"\xe2\x80\x9c" => "\x93",
|
||||
@ -141,12 +68,10 @@ class Encoding {
|
||||
"\xc5\xa1" => "\x9a",
|
||||
"\xe2\x80\xba" => "\x9b",
|
||||
"\xc5\x93" => "\x9c",
|
||||
|
||||
"\xc5\xbe" => "\x9e",
|
||||
"\xc5\xb8" => "\x9f"
|
||||
);
|
||||
|
||||
static function toUTF8($text){
|
||||
/**
|
||||
* Function Encoding::toUTF8
|
||||
*
|
||||
@ -171,158 +96,95 @@ class Encoding {
|
||||
* @return string The same string, UTF8 encoded
|
||||
*
|
||||
*/
|
||||
|
||||
if(is_array($text))
|
||||
{
|
||||
foreach($text as $k => $v)
|
||||
public static function toUTF8($text)
|
||||
{
|
||||
if (is_array($text)) {
|
||||
foreach ($text as $k => $v) {
|
||||
$text[$k] = self::toUTF8($v);
|
||||
}
|
||||
|
||||
return $text;
|
||||
} elseif(is_string($text)) {
|
||||
}
|
||||
elseif (is_string($text)) {
|
||||
|
||||
$max = strlen($text);
|
||||
$buf = "";
|
||||
for($i = 0; $i < $max; $i++){
|
||||
|
||||
for ($i = 0; $i < $max; $i++) {
|
||||
|
||||
$c1 = $text{$i};
|
||||
if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already
|
||||
|
||||
if ($c1>="\xc0") { //Should be converted to UTF8, if it's not UTF8 already
|
||||
|
||||
$c2 = $i+1 >= $max? "\x00" : $text{$i+1};
|
||||
$c3 = $i+2 >= $max? "\x00" : $text{$i+2};
|
||||
$c4 = $i+3 >= $max? "\x00" : $text{$i+3};
|
||||
if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8
|
||||
if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already
|
||||
|
||||
if ($c1 >= "\xc0" & $c1 <= "\xdf") { //looks like 2 bytes UTF8
|
||||
|
||||
if ($c2 >= "\x80" && $c2 <= "\xbf") { //yeah, almost sure it's UTF8 already
|
||||
$buf .= $c1 . $c2;
|
||||
$i++;
|
||||
} else { //not valid UTF8. Convert it.
|
||||
}
|
||||
else { //not valid UTF8. Convert it.
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8
|
||||
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already
|
||||
}
|
||||
else if ($c1 >= "\xe0" & $c1 <= "\xef") { //looks like 3 bytes UTF8
|
||||
|
||||
if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { //yeah, almost sure it's UTF8 already
|
||||
$buf .= $c1 . $c2 . $c3;
|
||||
$i = $i + 2;
|
||||
} else { //not valid UTF8. Convert it.
|
||||
}
|
||||
else { //not valid UTF8. Convert it.
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8
|
||||
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already
|
||||
}
|
||||
else if ($c1 >= "\xf0" & $c1 <= "\xf7") { //looks like 4 bytes UTF8
|
||||
|
||||
if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { //yeah, almost sure it's UTF8 already
|
||||
$buf .= $c1 . $c2 . $c3;
|
||||
$i = $i + 2;
|
||||
} else { //not valid UTF8. Convert it.
|
||||
}
|
||||
else { //not valid UTF8. Convert it.
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} else { //doesn't look like UTF8, but should be converted
|
||||
}
|
||||
else { //doesn't look like UTF8, but should be converted
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = (($c1 & "\x3f") | "\x80");
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} elseif(($c1 & "\xc0") == "\x80"){ // needs conversion
|
||||
if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
|
||||
}
|
||||
elseif (($c1 & "\xc0") == "\x80") { // needs conversion
|
||||
|
||||
if (isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
|
||||
$buf .= self::$win1252ToUtf8[ord($c1)];
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = (($c1 & "\x3f") | "\x80");
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} else { // it doesn't need convesion
|
||||
}
|
||||
else { // it doesn't need convesion
|
||||
$buf .= $c1;
|
||||
}
|
||||
}
|
||||
|
||||
return $buf;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
return $text;
|
||||
}
|
||||
}
|
||||
|
||||
static function toWin1252($text) {
|
||||
if(is_array($text)) {
|
||||
foreach($text as $k => $v) {
|
||||
$text[$k] = self::toWin1252($v);
|
||||
}
|
||||
return $text;
|
||||
} elseif(is_string($text)) {
|
||||
return utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text)));
|
||||
} else {
|
||||
return $text;
|
||||
}
|
||||
}
|
||||
|
||||
static function toISO8859($text) {
|
||||
return self::toWin1252($text);
|
||||
}
|
||||
|
||||
static function toLatin1($text) {
|
||||
return self::toWin1252($text);
|
||||
}
|
||||
|
||||
static function fixUTF8($text){
|
||||
if(is_array($text)) {
|
||||
foreach($text as $k => $v) {
|
||||
$text[$k] = self::fixUTF8($v);
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
|
||||
$last = "";
|
||||
while($last <> $text){
|
||||
$last = $text;
|
||||
$text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text)));
|
||||
}
|
||||
$text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text)));
|
||||
return $text;
|
||||
}
|
||||
|
||||
static function UTF8FixWin1252Chars($text){
|
||||
// If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
|
||||
// (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
|
||||
// See: http://en.wikipedia.org/wiki/Windows-1252
|
||||
|
||||
return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
|
||||
}
|
||||
|
||||
static function removeBOM($str=""){
|
||||
if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) {
|
||||
$str=substr($str, 3);
|
||||
}
|
||||
return $str;
|
||||
}
|
||||
|
||||
public static function normalizeEncoding($encodingLabel)
|
||||
{
|
||||
$encoding = strtoupper($encodingLabel);
|
||||
$enc = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
|
||||
$equivalences = array(
|
||||
'ISO88591' => 'ISO-8859-1',
|
||||
'ISO8859' => 'ISO-8859-1',
|
||||
'ISO' => 'ISO-8859-1',
|
||||
'LATIN1' => 'ISO-8859-1',
|
||||
'LATIN' => 'ISO-8859-1',
|
||||
'UTF8' => 'UTF-8',
|
||||
'UTF' => 'UTF-8',
|
||||
'WIN1252' => 'ISO-8859-1',
|
||||
'WINDOWS1252' => 'ISO-8859-1'
|
||||
);
|
||||
|
||||
if(empty($equivalences[$encoding])){
|
||||
return 'UTF-8';
|
||||
}
|
||||
|
||||
return $equivalences[$encoding];
|
||||
}
|
||||
|
||||
public static function encode($encodingLabel, $text)
|
||||
{
|
||||
$encodingLabel = self::normalizeEncoding($encodingLabel);
|
||||
if($encodingLabel == 'UTF-8') return Encoding::toUTF8($text);
|
||||
if($encodingLabel == 'ISO-8859-1') return Encoding::toLatin1($text);
|
||||
}
|
||||
|
||||
|
||||
public static function cp1251ToUtf8($input)
|
||||
{
|
||||
return iconv('CP1251', 'UTF-8//TRANSLIT', $input);
|
||||
|
45
vendor/PicoFeed/Export.php
vendored
45
vendor/PicoFeed/Export.php
vendored
@ -2,26 +2,56 @@
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
use SimpleXMLElement;
|
||||
|
||||
/**
|
||||
* OPML export class
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*/
|
||||
class Export
|
||||
{
|
||||
/**
|
||||
* List of feeds to exports
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $content = array();
|
||||
|
||||
public $required_fields = array(
|
||||
/**
|
||||
* List of required properties for each feed
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $required_fields = array(
|
||||
'title',
|
||||
'site_url',
|
||||
'feed_url'
|
||||
'feed_url',
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @access public
|
||||
* @param array $content List of feeds
|
||||
*/
|
||||
public function __construct(array $content)
|
||||
{
|
||||
$this->content = $content;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the OPML document
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function execute()
|
||||
{
|
||||
$xml = new \SimpleXMLElement('<?xml version="1.0" encoding="utf-8"?><opml/>');
|
||||
$xml = new SimpleXMLElement('<?xml version="1.0" encoding="utf-8"?><opml/>');
|
||||
|
||||
$head = $xml->addChild('head');
|
||||
$head->addChild('title', 'OPML Export');
|
||||
@ -35,13 +65,14 @@ class Export
|
||||
foreach ($this->required_fields as $field) {
|
||||
|
||||
if (! isset($feed[$field])) {
|
||||
|
||||
$valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (! $valid) continue;
|
||||
if (! $valid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$outline = $body->addChild('outline');
|
||||
$outline->addAttribute('xmlUrl', $feed['feed_url']);
|
||||
|
150
vendor/PicoFeed/Feed.php
vendored
Normal file
150
vendor/PicoFeed/Feed.php
vendored
Normal file
@ -0,0 +1,150 @@
|
||||
<?php
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
/**
|
||||
* Feed
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*/
|
||||
class Feed
|
||||
{
|
||||
/**
|
||||
* Feed items
|
||||
*
|
||||
* @access public
|
||||
* @var array
|
||||
*/
|
||||
public $items = array();
|
||||
|
||||
/**
|
||||
* Feed id
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $id = '';
|
||||
|
||||
/**
|
||||
* Feed title
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $title = '';
|
||||
|
||||
/**
|
||||
* Item url
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $url = '';
|
||||
|
||||
/**
|
||||
* Item date
|
||||
*
|
||||
* @access public
|
||||
* @var integer
|
||||
*/
|
||||
public $date = 0;
|
||||
|
||||
/**
|
||||
* Item language
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $language = '';
|
||||
|
||||
/**
|
||||
* Return feed information
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
$output = '';
|
||||
|
||||
foreach (array('id', 'title', 'url', 'date', 'language') as $property) {
|
||||
$output .= 'Feed::'.$property.' = '.$this->$property.PHP_EOL;
|
||||
}
|
||||
|
||||
$output .= 'Feed::items = '.count($this->items).' items'.PHP_EOL;
|
||||
|
||||
foreach ($this->items as $item) {
|
||||
$output .= '----'.PHP_EOL;
|
||||
$output .= $item;
|
||||
}
|
||||
|
||||
return $output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get title
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getTitle()
|
||||
{
|
||||
return $this->title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get url
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getUrl()
|
||||
{
|
||||
return $this->url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get date
|
||||
*
|
||||
* @access public
|
||||
* $return integer
|
||||
*/
|
||||
public function getDate()
|
||||
{
|
||||
return $this->date;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get language
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getLanguage()
|
||||
{
|
||||
return $this->language;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get id
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getId()
|
||||
{
|
||||
return $this->id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get feed items
|
||||
*
|
||||
* @access public
|
||||
* $return array
|
||||
*/
|
||||
public function getItems()
|
||||
{
|
||||
return $this->items;
|
||||
}
|
||||
}
|
230
vendor/PicoFeed/Filter.php
vendored
230
vendor/PicoFeed/Filter.php
vendored
@ -2,14 +2,24 @@
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
use DOMDocument;
|
||||
|
||||
/**
|
||||
* Filter class
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package parser
|
||||
* @package picofeed
|
||||
*/
|
||||
class Filter
|
||||
{
|
||||
/**
|
||||
* Config object
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Config
|
||||
*/
|
||||
private $config = null;
|
||||
|
||||
/**
|
||||
* Filtered XML data
|
||||
*
|
||||
@ -61,11 +71,10 @@ class Filter
|
||||
/**
|
||||
* Tags and attribute whitelist
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
public static $whitelist_tags = array(
|
||||
private $whitelist_tags = array(
|
||||
'audio' => array('controls', 'src'),
|
||||
'video' => array('poster', 'controls', 'height', 'width', 'src'),
|
||||
'source' => array('src', 'type'),
|
||||
@ -109,11 +118,10 @@ class Filter
|
||||
/**
|
||||
* Tags blacklist, strip the content of those tags
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
public static $blacklist_tags = array(
|
||||
private $blacklisted_tags = array(
|
||||
'script'
|
||||
);
|
||||
|
||||
@ -121,11 +129,10 @@ class Filter
|
||||
* Scheme whitelist
|
||||
* For a complete list go to http://en.wikipedia.org/wiki/URI_scheme
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
public static $scheme_whitelist = array(
|
||||
private $scheme_whitelist = array(
|
||||
'//',
|
||||
'data:image/png;base64,',
|
||||
'data:image/gif;base64,',
|
||||
@ -164,11 +171,10 @@ class Filter
|
||||
/**
|
||||
* Attributes used for external resources
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
public static $media_attributes = array(
|
||||
private $media_attributes = array(
|
||||
'src',
|
||||
'href',
|
||||
'poster',
|
||||
@ -177,11 +183,10 @@ class Filter
|
||||
/**
|
||||
* Blacklisted resources
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
public static $media_blacklist = array(
|
||||
private $media_blacklist = array(
|
||||
'feeds.feedburner.com',
|
||||
'share.feedsportal.com',
|
||||
'da.feedsportal.com',
|
||||
@ -209,11 +214,10 @@ class Filter
|
||||
/**
|
||||
* Mandatory attributes for specified tags
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
public static $required_attributes = array(
|
||||
private $required_attributes = array(
|
||||
'a' => array('href'),
|
||||
'img' => array('src'),
|
||||
'iframe' => array('src'),
|
||||
@ -224,22 +228,20 @@ class Filter
|
||||
/**
|
||||
* Add attributes to specified tags
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
public static $add_attributes = array(
|
||||
private $add_attributes = array(
|
||||
'a' => 'rel="noreferrer" target="_blank"'
|
||||
);
|
||||
|
||||
/**
|
||||
* Attributes that must be integer
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
public static $integer_attributes = array(
|
||||
private $integer_attributes = array(
|
||||
'width',
|
||||
'height',
|
||||
'frameborder',
|
||||
@ -248,11 +250,10 @@ class Filter
|
||||
/**
|
||||
* Iframe source whitelist, everything else is ignored
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
public static $iframe_whitelist = array(
|
||||
private $iframe_whitelist = array(
|
||||
'//www.youtube.com',
|
||||
'http://www.youtube.com',
|
||||
'https://www.youtube.com',
|
||||
@ -273,10 +274,10 @@ class Filter
|
||||
{
|
||||
$this->url = $site_url;
|
||||
|
||||
\libxml_use_internal_errors(true);
|
||||
libxml_use_internal_errors(true);
|
||||
|
||||
// Convert bad formatted documents to XML
|
||||
$dom = new \DOMDocument;
|
||||
$dom = new DOMDocument;
|
||||
$dom->loadHTML('<?xml version="1.0" encoding="UTF-8">'.$data);
|
||||
$this->input = $dom->saveXML($dom->getElementsByTagName('body')->item(0));
|
||||
}
|
||||
@ -300,7 +301,7 @@ class Filter
|
||||
$this->data = $this->removeEmptyTags($this->data);
|
||||
$this->data = $this->removeMultipleTags($this->data);
|
||||
|
||||
return $this->data;
|
||||
return trim($this->data);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -372,9 +373,9 @@ class Filter
|
||||
}
|
||||
|
||||
// Check for required attributes
|
||||
if (isset(self::$required_attributes[$name])) {
|
||||
if (isset($this->required_attributes[$name])) {
|
||||
|
||||
foreach (self::$required_attributes[$name] as $required_attribute) {
|
||||
foreach ($this->required_attributes[$name] as $required_attribute) {
|
||||
|
||||
if (! in_array($required_attribute, $used_attributes)) {
|
||||
|
||||
@ -389,9 +390,9 @@ class Filter
|
||||
$this->data .= '<'.$name.$attr_data;
|
||||
|
||||
// Add custom attributes
|
||||
if (isset(self::$add_attributes[$name])) {
|
||||
if (isset($this->add_attributes[$name])) {
|
||||
|
||||
$this->data .= ' '.self::$add_attributes[$name].' ';
|
||||
$this->data .= ' '.$this->add_attributes[$name].' ';
|
||||
}
|
||||
|
||||
// If img or br, we don't close it here
|
||||
@ -399,7 +400,7 @@ class Filter
|
||||
}
|
||||
}
|
||||
|
||||
if (in_array($name, self::$blacklist_tags)) {
|
||||
if (in_array($name, $this->blacklisted_tags)) {
|
||||
$this->strip_content = true;
|
||||
}
|
||||
|
||||
@ -530,7 +531,7 @@ class Filter
|
||||
*/
|
||||
public function isAllowedTag($name)
|
||||
{
|
||||
return isset(self::$whitelist_tags[$name]);
|
||||
return isset($this->whitelist_tags[$name]);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -543,7 +544,7 @@ class Filter
|
||||
*/
|
||||
public function isAllowedAttribute($tag, $attribute)
|
||||
{
|
||||
return in_array($attribute, self::$whitelist_tags[$tag]);
|
||||
return in_array($attribute, $this->whitelist_tags[$tag]);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -555,7 +556,7 @@ class Filter
|
||||
*/
|
||||
public function isResource($attribute)
|
||||
{
|
||||
return in_array($attribute, self::$media_attributes);
|
||||
return in_array($attribute, $this->media_attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -567,7 +568,7 @@ class Filter
|
||||
*/
|
||||
public function isAllowedIframeResource($value)
|
||||
{
|
||||
foreach (self::$iframe_whitelist as $url) {
|
||||
foreach ($this->iframe_whitelist as $url) {
|
||||
|
||||
if (strpos($value, $url) === 0) {
|
||||
return true;
|
||||
@ -586,7 +587,7 @@ class Filter
|
||||
*/
|
||||
public function isAllowedProtocol($value)
|
||||
{
|
||||
foreach (self::$scheme_whitelist as $protocol) {
|
||||
foreach ($this->scheme_whitelist as $protocol) {
|
||||
|
||||
if (strpos($value, $protocol) === 0) {
|
||||
return true;
|
||||
@ -605,7 +606,7 @@ class Filter
|
||||
*/
|
||||
public function isBlacklistedMedia($resource)
|
||||
{
|
||||
foreach (self::$media_blacklist as $name) {
|
||||
foreach ($this->media_blacklist as $name) {
|
||||
|
||||
if (strpos($resource, $name) !== false) {
|
||||
return true;
|
||||
@ -640,7 +641,7 @@ class Filter
|
||||
*/
|
||||
public function validateAttributeValue($attribute, $value)
|
||||
{
|
||||
if (in_array($attribute, self::$integer_attributes)) {
|
||||
if (in_array($attribute, $this->integer_attributes)) {
|
||||
return ctype_digit($value);
|
||||
}
|
||||
|
||||
@ -758,4 +759,147 @@ class Filter
|
||||
|
||||
return $encoding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whitelisted tags adn attributes for each tag
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setWhitelistedTags(array $values)
|
||||
{
|
||||
$this->whitelist_tags = $values ?: $this->whitelist_tags;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set blacklisted tags
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['video', 'img']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setBlacklistedTags(array $values)
|
||||
{
|
||||
$this->blacklisted_tags = $values ?: $this->blacklisted_tags;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set scheme whitelist
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of scheme: ['http://', 'ftp://']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setSchemeWhitelist(array $values)
|
||||
{
|
||||
$this->scheme_whitelist = $values ?: $this->scheme_whitelist;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set media attributes (used to load external resources)
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of values: ['src', 'href']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setMediaAttributes(array $values)
|
||||
{
|
||||
$this->media_attributes = $values ?: $this->media_attributes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set blacklisted external resources
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['http://google.com/', '...']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setMediaBlacklist(array $values)
|
||||
{
|
||||
$this->media_blacklist = $values ?: $this->media_blacklist;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set mandatory attributes for whitelisted tags
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['img' => 'src']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setRequiredAttributes(array $values)
|
||||
{
|
||||
$this->required_attributes = $values ?: $this->required_attributes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set attributes to automatically to specific tags
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['a' => 'target="_blank"']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setAttributeOverrides(array $values)
|
||||
{
|
||||
$this->add_attributes = $values ?: $this->add_attributes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set attributes that must be an integer
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['width', 'height']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setIntegerAttributes(array $values)
|
||||
{
|
||||
$this->integer_attributes = $values ?: $this->integer_attributes;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set allowed iframe resources
|
||||
*
|
||||
* @access public
|
||||
* @param array $values List of tags: ['http://www.youtube.com']
|
||||
* @return \PicoFeed\Filter
|
||||
*/
|
||||
public function setIframeWhitelist(array $values)
|
||||
{
|
||||
$this->iframe_whitelist = $values ?: $this->iframe_whitelist;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set config object
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Config $config Config instance
|
||||
* @return \PicoFeed\Parse
|
||||
*/
|
||||
public function setConfig($config)
|
||||
{
|
||||
$this->config = $config;
|
||||
|
||||
if ($this->config !== null) {
|
||||
$this->setIframeWhitelist($this->config->getFilterIframeWhitelist(array()));
|
||||
$this->setIntegerAttributes($this->config->getFilterIntegerAttributes(array()));
|
||||
$this->setAttributeOverrides($this->config->getFilterAttributeOverrides(array()));
|
||||
$this->setRequiredAttributes($this->config->getFilterRequiredAttributes(array()));
|
||||
$this->setMediaBlacklist($this->config->getFilterMediaBlacklist(array()));
|
||||
$this->setMediaAttributes($this->config->getFilterMediaAttributes(array()));
|
||||
$this->setSchemeWhitelist($this->config->getFilterSchemeWhitelist(array()));
|
||||
$this->setBlacklistedTags($this->config->getFilterBlacklistedTags(array()));
|
||||
$this->setWhitelistedTags($this->config->getFilterWhitelistedTags(array()));
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
}
|
||||
|
239
vendor/PicoFeed/Grabber.php
vendored
239
vendor/PicoFeed/Grabber.php
vendored
@ -2,19 +2,59 @@
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
require_once __DIR__.'/Client.php';
|
||||
require_once __DIR__.'/Encoding.php';
|
||||
require_once __DIR__.'/Logging.php';
|
||||
require_once __DIR__.'/Filter.php';
|
||||
use DOMXPath;
|
||||
use PicoFeed\Logging;
|
||||
use PicoFeed\Client;
|
||||
use PicoFeed\Encoding;
|
||||
use PicoFeed\Filter;
|
||||
|
||||
/**
|
||||
* Grabber class
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*/
|
||||
class Grabber
|
||||
{
|
||||
public $content = '';
|
||||
public $html = '';
|
||||
public $encoding = '';
|
||||
/**
|
||||
* URL
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $url = '';
|
||||
|
||||
// Order is important, generic terms at the end
|
||||
public $candidatesAttributes = array(
|
||||
/**
|
||||
* Relevant content
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $content = '';
|
||||
|
||||
/**
|
||||
* HTML content
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $html = '';
|
||||
|
||||
/**
|
||||
* HTML content encoding
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $encoding = '';
|
||||
|
||||
/**
|
||||
* List of attributes to try to get the content, order is important, generic terms at the end
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $candidatesAttributes = array(
|
||||
'articleBody',
|
||||
'articlebody',
|
||||
'article-body',
|
||||
@ -37,7 +77,13 @@ class Grabber
|
||||
'main',
|
||||
);
|
||||
|
||||
public $stripAttributes = array(
|
||||
/**
|
||||
* List of attributes to strip
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $stripAttributes = array(
|
||||
'comment',
|
||||
'share',
|
||||
'links',
|
||||
@ -57,7 +103,13 @@ class Grabber
|
||||
'categories',
|
||||
);
|
||||
|
||||
public $stripTags = array(
|
||||
/**
|
||||
* Tags to remove
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $stripTags = array(
|
||||
'script',
|
||||
'style',
|
||||
'nav',
|
||||
@ -67,7 +119,22 @@ class Grabber
|
||||
'form',
|
||||
);
|
||||
|
||||
/**
|
||||
* Config object
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Config
|
||||
*/
|
||||
private $config = null;
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @access public
|
||||
* @param string $url Url
|
||||
* @param string $html HTML content
|
||||
* @param string $encoding Charset
|
||||
*/
|
||||
public function __construct($url, $html = '', $encoding = 'utf-8')
|
||||
{
|
||||
$this->url = $url;
|
||||
@ -75,13 +142,53 @@ class Grabber
|
||||
$this->encoding = $encoding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set config object
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Config $config Config instance
|
||||
* @return \PicoFeed\Grabber
|
||||
*/
|
||||
public function setConfig($config)
|
||||
{
|
||||
$this->config = $config;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get relevant content
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getContent()
|
||||
{
|
||||
return $this->content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get raw content (unfiltered)
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getRawContent()
|
||||
{
|
||||
return $this->html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the HTML content
|
||||
*
|
||||
* @access public
|
||||
* @return bool
|
||||
*/
|
||||
public function parse()
|
||||
{
|
||||
if ($this->html) {
|
||||
|
||||
Logging::log(\get_called_class().' Fix encoding');
|
||||
Logging::log(\get_called_class().': HTTP Encoding "'.$this->encoding.'"');
|
||||
Logging::setMessage(get_called_class().' Fix encoding');
|
||||
Logging::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"');
|
||||
|
||||
$this->html = Filter::stripHeadTags($this->html);
|
||||
|
||||
@ -92,42 +199,63 @@ class Grabber
|
||||
$this->html = Encoding::toUTF8($this->html);
|
||||
}
|
||||
|
||||
Logging::log(\get_called_class().' Content length: '.strlen($this->html).' bytes');
|
||||
Logging::setMessage(get_called_class().' Content length: '.strlen($this->html).' bytes');
|
||||
$rules = $this->getRules();
|
||||
|
||||
if (is_array($rules)) {
|
||||
Logging::log(\get_called_class().' Parse content with rules');
|
||||
Logging::setMessage(get_called_class().' Parse content with rules');
|
||||
$this->parseContentWithRules($rules);
|
||||
}
|
||||
else {
|
||||
Logging::log(\get_called_class().' Parse content with candidates');
|
||||
Logging::setMessage(get_called_class().' Parse content with candidates');
|
||||
$this->parseContentWithCandidates();
|
||||
}
|
||||
}
|
||||
else {
|
||||
Logging::log(\get_called_class().' No content fetched');
|
||||
Logging::setMessage(get_called_class().' No content fetched');
|
||||
}
|
||||
|
||||
Logging::log(\get_called_class().' Content length: '.strlen($this->content).' bytes');
|
||||
Logging::log(\get_called_class().' Grabber done');
|
||||
Logging::setMessage(get_called_class().' Content length: '.strlen($this->content).' bytes');
|
||||
Logging::setMessage(get_called_class().' Grabber done');
|
||||
|
||||
return $this->content !== '';
|
||||
}
|
||||
|
||||
|
||||
public function download($timeout = 5, $user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36')
|
||||
/**
|
||||
* Download the HTML content
|
||||
*
|
||||
* @access public
|
||||
* @return HTML content
|
||||
*/
|
||||
public function download()
|
||||
{
|
||||
$client = Client::create();
|
||||
$client->url = $this->url;
|
||||
$client->timeout = $timeout;
|
||||
$client->user_agent = $user_agent;
|
||||
$client->execute();
|
||||
$client = Client::getInstance();
|
||||
|
||||
if ($this->config !== null) {
|
||||
|
||||
$client->setTimeout($this->config->getGrabberTimeout())
|
||||
->setUserAgent($this->config->getGrabberUserAgent())
|
||||
->setMaxRedirections($this->config->getMaxRedirections())
|
||||
->setMaxBodySize($this->config->getMaxBodySize())
|
||||
->setProxyHostname($this->config->getProxyHostname())
|
||||
->setProxyPort($this->config->getProxyPort())
|
||||
->setProxyUsername($this->config->getProxyUsername())
|
||||
->setProxyPassword($this->config->getProxyPassword());
|
||||
}
|
||||
|
||||
$client->execute($this->url);
|
||||
$this->html = $client->getContent();
|
||||
$this->encoding = $client->getEncoding();
|
||||
|
||||
return $this->html;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Try to find a predefined rule
|
||||
*
|
||||
* @access public
|
||||
* @return mixed
|
||||
*/
|
||||
public function getRules()
|
||||
{
|
||||
$hostname = parse_url($this->url, PHP_URL_HOST);
|
||||
@ -147,7 +275,7 @@ class Grabber
|
||||
$filename = __DIR__.'/Rules/'.$file.'.php';
|
||||
|
||||
if (file_exists($filename)) {
|
||||
Logging::log(\get_called_class().' Load rule: '.$file);
|
||||
Logging::setMessage(get_called_class().' Load rule: '.$file);
|
||||
return include $filename;
|
||||
}
|
||||
}
|
||||
@ -155,13 +283,16 @@ class Grabber
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the relevant content with predefined rules
|
||||
*
|
||||
* @access public
|
||||
* @param array $rules Rules
|
||||
*/
|
||||
public function parseContentWithRules(array $rules)
|
||||
{
|
||||
\libxml_use_internal_errors(true);
|
||||
$dom = new \DOMDocument;
|
||||
$dom->loadHTML('<?xml version="1.0" encoding="UTF-8">'.$this->html);
|
||||
$xpath = new \DOMXPath($dom);
|
||||
$dom = XmlParser::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$this->html);
|
||||
$xpath = new DOMXPath($dom);
|
||||
|
||||
if (isset($rules['strip']) && is_array($rules['strip'])) {
|
||||
|
||||
@ -192,24 +323,26 @@ class Grabber
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the relevant content with the list of potential attributes
|
||||
*
|
||||
* @access public
|
||||
*/
|
||||
public function parseContentWithCandidates()
|
||||
{
|
||||
\libxml_use_internal_errors(true);
|
||||
$dom = new \DOMDocument;
|
||||
$dom->loadHTML('<?xml version="1.0" encoding="UTF-8">'.$this->html);
|
||||
$xpath = new \DOMXPath($dom);
|
||||
$dom = XmlParser::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$this->html);
|
||||
$xpath = new DOMXPath($dom);
|
||||
|
||||
// Try to lookup in each tag
|
||||
foreach ($this->candidatesAttributes as $candidate) {
|
||||
|
||||
Logging::log(\get_called_class().' Try this candidate: "'.$candidate.'"');
|
||||
Logging::setMessage(get_called_class().' Try this candidate: "'.$candidate.'"');
|
||||
|
||||
$nodes = $xpath->query('//*[(contains(@class, "'.$candidate.'") or @id="'.$candidate.'") and not (contains(@class, "nav") or contains(@class, "page"))]');
|
||||
|
||||
if ($nodes !== false && $nodes->length > 0) {
|
||||
$this->content = $dom->saveXML($nodes->item(0));
|
||||
Logging::log(\get_called_class().' Find candidate "'.$candidate.'" ('.strlen($this->content).' bytes)');
|
||||
Logging::setMessage(get_called_class().' Find candidate "'.$candidate.'" ('.strlen($this->content).' bytes)');
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -221,33 +354,38 @@ class Grabber
|
||||
|
||||
if ($nodes !== false && $nodes->length > 0) {
|
||||
$this->content = $dom->saveXML($nodes->item(0));
|
||||
Logging::log(\get_called_class().' Find <article/> tag ('.strlen($this->content).' bytes)');
|
||||
Logging::setMessage(get_called_class().' Find <article/> tag ('.strlen($this->content).' bytes)');
|
||||
}
|
||||
}
|
||||
|
||||
if (strlen($this->content) < 50) {
|
||||
Logging::log(\get_called_class().' No enought content fetched, get the full body');
|
||||
Logging::setMessage(get_called_class().' No enought content fetched, get the full body');
|
||||
$this->content = $dom->saveXML($dom->firstChild);
|
||||
}
|
||||
|
||||
Logging::log(\get_called_class().' Strip garbage');
|
||||
Logging::setMessage(get_called_class().' Strip garbage');
|
||||
$this->stripGarbage();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Strip useless tags
|
||||
*
|
||||
* @access public
|
||||
*/
|
||||
public function stripGarbage()
|
||||
{
|
||||
\libxml_use_internal_errors(true);
|
||||
$dom = new \DOMDocument;
|
||||
$dom->loadXML($this->content);
|
||||
$xpath = new \DOMXPath($dom);
|
||||
$dom = XmlParser::getDomDocument($this->content);
|
||||
|
||||
if ($dom !== false) {
|
||||
|
||||
$xpath = new DOMXPath($dom);
|
||||
|
||||
foreach ($this->stripTags as $tag) {
|
||||
|
||||
$nodes = $xpath->query('//'.$tag);
|
||||
|
||||
if ($nodes !== false && $nodes->length > 0) {
|
||||
Logging::log(\get_called_class().' Strip tag: "'.$tag.'"');
|
||||
Logging::setMessage(get_called_class().' Strip tag: "'.$tag.'"');
|
||||
foreach ($nodes as $node) {
|
||||
$node->parentNode->removeChild($node);
|
||||
}
|
||||
@ -259,7 +397,7 @@ class Grabber
|
||||
$nodes = $xpath->query('//*[contains(@class, "'.$attribute.'") or contains(@id, "'.$attribute.'")]');
|
||||
|
||||
if ($nodes !== false && $nodes->length > 0) {
|
||||
Logging::log(\get_called_class().' Strip attribute: "'.$tag.'"');
|
||||
Logging::setMessage(get_called_class().' Strip attribute: "'.$attribute.'"');
|
||||
foreach ($nodes as $node) {
|
||||
$node->parentNode->removeChild($node);
|
||||
}
|
||||
@ -268,4 +406,5 @@ class Grabber
|
||||
|
||||
$this->content = $dom->saveXML($dom->documentElement);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
64
vendor/PicoFeed/Import.php
vendored
64
vendor/PicoFeed/Import.php
vendored
@ -3,47 +3,75 @@
|
||||
namespace PicoFeed;
|
||||
|
||||
require_once __DIR__.'/Logging.php';
|
||||
require_once __DIR__.'/XmlParser.php';
|
||||
|
||||
use PicoFeed\Logging;
|
||||
use PicoFeed\XmlParser;
|
||||
|
||||
/**
|
||||
* OPML Import
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*/
|
||||
class Import
|
||||
{
|
||||
/**
|
||||
* OPML file content
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $content = '';
|
||||
|
||||
/**
|
||||
* Subscriptions
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $items = array();
|
||||
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @access public
|
||||
* @param string $content OPML file content
|
||||
*/
|
||||
public function __construct($content)
|
||||
{
|
||||
$this->content = $content;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse the OPML file
|
||||
*
|
||||
* @access public
|
||||
* @return array|false
|
||||
*/
|
||||
public function execute()
|
||||
{
|
||||
\PicoFeed\Logging::log(\get_called_class().': start importation');
|
||||
Logging::setMessage(get_called_class().': start importation');
|
||||
|
||||
try {
|
||||
$xml = XmlParser::getSimpleXml(trim($this->content));
|
||||
|
||||
\libxml_use_internal_errors(true);
|
||||
|
||||
$xml = new \SimpleXMLElement(trim($this->content));
|
||||
|
||||
if ($xml->getName() !== 'opml' || ! isset($xml->body)) {
|
||||
\PicoFeed\Logging::log(\get_called_class().': OPML tag not found');
|
||||
if ($xml === false || $xml->getName() !== 'opml' || ! isset($xml->body)) {
|
||||
Logging::setMessage(get_called_class().': OPML tag not found or malformed XML document');
|
||||
return false;
|
||||
}
|
||||
|
||||
$this->parseEntries($xml->body);
|
||||
|
||||
\PicoFeed\Logging::log(\get_called_class().': '.count($this->items).' subscriptions found');
|
||||
}
|
||||
catch (\Exception $e) {
|
||||
\PicoFeed\Logging::log(\get_called_class().': '.$e->getMessage());
|
||||
return false;
|
||||
}
|
||||
Logging::setMessage(get_called_class().': '.count($this->items).' subscriptions found');
|
||||
|
||||
return $this->items;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse each entries of the subscription list
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $tree XML node
|
||||
*/
|
||||
public function parseEntries($tree)
|
||||
{
|
||||
if (isset($tree->outline)) {
|
||||
|
202
vendor/PicoFeed/Item.php
vendored
Normal file
202
vendor/PicoFeed/Item.php
vendored
Normal file
@ -0,0 +1,202 @@
|
||||
<?php
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
/**
|
||||
* Feed Item
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*/
|
||||
class Item
|
||||
{
|
||||
/**
|
||||
* Item id
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $id = '';
|
||||
|
||||
/**
|
||||
* Item title
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $title = '';
|
||||
|
||||
/**
|
||||
* Item url
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $url = '';
|
||||
|
||||
/**
|
||||
* Item author
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $author= '';
|
||||
|
||||
/**
|
||||
* Item date
|
||||
*
|
||||
* @access public
|
||||
* @var integer
|
||||
*/
|
||||
public $date = 0;
|
||||
|
||||
/**
|
||||
* Item content
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $content = '';
|
||||
|
||||
/**
|
||||
* Item enclosure url
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $enclosure_url = '';
|
||||
|
||||
/**
|
||||
* Item enclusure type
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $enclosure_type = '';
|
||||
|
||||
/**
|
||||
* Item language
|
||||
*
|
||||
* @access public
|
||||
* @var string
|
||||
*/
|
||||
public $language = '';
|
||||
|
||||
/**
|
||||
* Return item information
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
$output = '';
|
||||
|
||||
foreach (array('id', 'title', 'url', 'date', 'language', 'author', 'enclosure_url', 'enclosure_type') as $property) {
|
||||
$output .= 'Item::'.$property.' = '.$this->$property.PHP_EOL;
|
||||
}
|
||||
|
||||
$output .= 'Item::content = '.strlen($this->content).' bytes'.PHP_EOL;
|
||||
|
||||
return $output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get title
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getTitle()
|
||||
{
|
||||
return $this->title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get url
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getUrl()
|
||||
{
|
||||
return $this->url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get id
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getId()
|
||||
{
|
||||
return $this->id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get date
|
||||
*
|
||||
* @access public
|
||||
* $return integer
|
||||
*/
|
||||
public function getDate()
|
||||
{
|
||||
return $this->date;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get content
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getContent()
|
||||
{
|
||||
return $this->content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get enclosure url
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getEnclosureUrl()
|
||||
{
|
||||
return $this->enclosure_url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get enclosure type
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getEnclosureType()
|
||||
{
|
||||
return $this->enclosure_type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get language
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getLanguage()
|
||||
{
|
||||
return $this->language;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get author
|
||||
*
|
||||
* @access public
|
||||
* $return string
|
||||
*/
|
||||
public function getAuthor()
|
||||
{
|
||||
return $this->author;
|
||||
}
|
||||
}
|
76
vendor/PicoFeed/Logging.php
vendored
76
vendor/PicoFeed/Logging.php
vendored
@ -2,12 +2,82 @@
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
use DateTime;
|
||||
use DateTimeZone;
|
||||
|
||||
/**
|
||||
* Logging class
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*/
|
||||
class Logging
|
||||
{
|
||||
public static $messages = array();
|
||||
/**
|
||||
* List of messages
|
||||
*
|
||||
* @static
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private static $messages = array();
|
||||
|
||||
public static function log($message)
|
||||
/**
|
||||
* Default timezone
|
||||
*
|
||||
* @static
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private static $timezone = 'UTC';
|
||||
|
||||
/**
|
||||
* Add a new message
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $message Message
|
||||
*/
|
||||
public static function setMessage($message)
|
||||
{
|
||||
self::$messages[] = '['.date('Y-m-d H:i:s').'] '.$message;
|
||||
$date = new DateTime('now', new DateTimeZone(self::$timezone));
|
||||
|
||||
self::$messages[] = '['.$date->format('Y-m-d H:i:s').'] '.$message;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all logged messages
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @return array
|
||||
*/
|
||||
public static function getMessages()
|
||||
{
|
||||
return self::$messages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all logged messages
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
*/
|
||||
public static function deleteMessages()
|
||||
{
|
||||
self::$messages = array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a different timezone
|
||||
*
|
||||
* @static
|
||||
* @see http://php.net/manual/en/timezones.php
|
||||
* @access public
|
||||
* @param string $timezone Timezone
|
||||
*/
|
||||
public static function setTimeZone($timezone)
|
||||
{
|
||||
self::$timezone = $timezone ?: self::$timezone;
|
||||
}
|
||||
}
|
310
vendor/PicoFeed/Parser.php
vendored
310
vendor/PicoFeed/Parser.php
vendored
@ -2,10 +2,16 @@
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
require_once __DIR__.'/Logging.php';
|
||||
require_once __DIR__.'/Filter.php';
|
||||
require_once __DIR__.'/Encoding.php';
|
||||
require_once __DIR__.'/Grabber.php';
|
||||
use DateTime;
|
||||
use DateTimeZone;
|
||||
use DOMXPath;
|
||||
use SimpleXMLElement;
|
||||
use PicoFeed\Config;
|
||||
use PicoFeed\Encoding;
|
||||
use PicoFeed\Filter;
|
||||
use PicoFeed\Grabber;
|
||||
use PicoFeed\Logging;
|
||||
use PicoFeed\XmlParser;
|
||||
|
||||
/**
|
||||
* Base parser class
|
||||
@ -15,14 +21,29 @@ require_once __DIR__.'/Grabber.php';
|
||||
*/
|
||||
abstract class Parser
|
||||
{
|
||||
/**
|
||||
* Config object
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Config
|
||||
*/
|
||||
private $config = null;
|
||||
|
||||
/**
|
||||
* Hash algorithm used to generate item id, any value supported by PHP, see hash_algos()
|
||||
*
|
||||
* @access public
|
||||
* @static
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
public static $hashAlgo = 'crc32b'; // crc32b seems to be faster and shorter than other hash algorithms
|
||||
private $hash_algo = 'crc32b'; // crc32b seems to be faster and shorter than other hash algorithms
|
||||
|
||||
/**
|
||||
* Timezone used to parse feed dates
|
||||
*
|
||||
* @access private
|
||||
* @var string
|
||||
*/
|
||||
private $timezone = 'UTC';
|
||||
|
||||
/**
|
||||
* Feed content (XML data)
|
||||
@ -33,35 +54,28 @@ abstract class Parser
|
||||
protected $content = '';
|
||||
|
||||
/**
|
||||
* Feed properties (values parsed)
|
||||
* XML namespaces
|
||||
*
|
||||
* @access public
|
||||
* @access protected
|
||||
* @var array
|
||||
*/
|
||||
public $id = '';
|
||||
public $url = '';
|
||||
public $title = '';
|
||||
public $updated = '';
|
||||
public $language = '';
|
||||
public $items = array();
|
||||
protected $namespaces = array();
|
||||
|
||||
/**
|
||||
* Content grabber parameters
|
||||
* Enable the content grabber
|
||||
*
|
||||
* @access public
|
||||
* @access private
|
||||
* @var bool
|
||||
*/
|
||||
public $grabber = false;
|
||||
public $grabber_ignore_urls = array();
|
||||
public $grabber_timeout = null;
|
||||
public $grabber_user_agent = null;
|
||||
public $enable_grabber = false;
|
||||
|
||||
/**
|
||||
* Parse feed content
|
||||
* Ignore those urls for the content scraper
|
||||
*
|
||||
* @abstract
|
||||
* @access public
|
||||
* @return mixed
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
abstract public function execute();
|
||||
private $grabber_ignore_urls = array();
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
@ -73,7 +87,7 @@ abstract class Parser
|
||||
public function __construct($content, $http_encoding = '')
|
||||
{
|
||||
$xml_encoding = Filter::getEncodingFromXmlTag($content);
|
||||
Logging::log(\get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"');
|
||||
Logging::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"');
|
||||
|
||||
// Strip XML tag to avoid multiple encoding/decoding in the next XML processing
|
||||
$this->content = Filter::stripXmlTag($content);
|
||||
@ -90,6 +104,52 @@ abstract class Parser
|
||||
$this->content = $this->normalizeData($this->content);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the document
|
||||
*
|
||||
* @access public
|
||||
* @return mixed \PicoFeed\Feed instance or false
|
||||
*/
|
||||
public function execute()
|
||||
{
|
||||
Logging::setMessage(get_called_class().': begin parsing');
|
||||
|
||||
$xml = XmlParser::getSimpleXml($this->content);
|
||||
|
||||
if ($xml === false) {
|
||||
Logging::setMessage(get_called_class().': XML parsing error');
|
||||
Logging::setMessage(XmlParser::getErrors());
|
||||
return false;
|
||||
}
|
||||
|
||||
$this->namespaces = $xml->getNamespaces(true);
|
||||
|
||||
$feed = new Feed;
|
||||
$this->findFeedUrl($xml, $feed);
|
||||
$this->findFeedTitle($xml, $feed);
|
||||
$this->findFeedLanguage($xml, $feed);
|
||||
$this->findFeedId($xml, $feed);
|
||||
$this->findFeedDate($xml, $feed);
|
||||
|
||||
foreach ($this->getItemsTree($xml) as $entry) {
|
||||
|
||||
$item = new Item;
|
||||
$this->findItemAuthor($xml, $entry, $item);
|
||||
$this->findItemUrl($entry, $item);
|
||||
$this->findItemTitle($entry, $item);
|
||||
$this->findItemId($entry, $item, $feed);
|
||||
$this->findItemDate($entry, $item);
|
||||
$this->findItemContent($entry, $item);
|
||||
$this->findItemEnclosure($entry, $item, $feed);
|
||||
$this->findItemLanguage($entry, $item, $feed);
|
||||
$feed->items[] = $item;
|
||||
}
|
||||
|
||||
Logging::setMessage(get_called_class().PHP_EOL.$feed);
|
||||
|
||||
return $feed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter HTML for entry content
|
||||
*
|
||||
@ -102,43 +162,40 @@ abstract class Parser
|
||||
{
|
||||
$content = '';
|
||||
|
||||
if ($this->grabber && ! in_array($item_url, $this->grabber_ignore_urls)) {
|
||||
// Setup the content scraper
|
||||
if ($this->enable_grabber && ! in_array($item_url, $this->grabber_ignore_urls)) {
|
||||
|
||||
$grabber = new Grabber($item_url);
|
||||
$grabber->download($this->grabber_timeout, $this->grabber_user_agent);
|
||||
if ($grabber->parse()) $item_content = $grabber->content;
|
||||
$grabber->setConfig($this->config);
|
||||
$grabber->download();
|
||||
|
||||
if ($grabber->parse()) {
|
||||
$item_content = $grabber->getContent();
|
||||
}
|
||||
}
|
||||
|
||||
// Content filtering
|
||||
if ($item_content) {
|
||||
|
||||
if ($this->config !== null) {
|
||||
|
||||
$callback = $this->config->getContentFilteringCallback();
|
||||
|
||||
if (is_callable($callback)) {
|
||||
$content = $callback($item_content, $item_url);
|
||||
}
|
||||
}
|
||||
|
||||
if (! $content) {
|
||||
$filter = new Filter($item_content, $item_url);
|
||||
$filter->setConfig($this->config);
|
||||
$content = $filter->execute();
|
||||
}
|
||||
}
|
||||
|
||||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get XML parser errors
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getXmlErrors()
|
||||
{
|
||||
$errors = array();
|
||||
|
||||
foreach(\libxml_get_errors() as $error) {
|
||||
|
||||
$errors[] = sprintf('XML error: %s (Line: %d - Column: %d - Code: %d)',
|
||||
$error->message,
|
||||
$error->line,
|
||||
$error->column,
|
||||
$error->code
|
||||
);
|
||||
}
|
||||
|
||||
return implode(', ', $errors);
|
||||
}
|
||||
|
||||
/**
|
||||
* Dirty quickfixes before XML parsing
|
||||
*
|
||||
@ -148,6 +205,7 @@ abstract class Parser
|
||||
*/
|
||||
public function normalizeData($data)
|
||||
{
|
||||
$data = str_replace("\x10", '', $data);
|
||||
$data = str_replace("\xc3\x20", '', $data);
|
||||
$data = str_replace("", '', $data);
|
||||
$data = $this->replaceEntityAttribute($data);
|
||||
@ -194,7 +252,7 @@ abstract class Parser
|
||||
*/
|
||||
public function generateId()
|
||||
{
|
||||
return hash(self::$hashAlgo, implode(func_get_args()));
|
||||
return hash($this->hash_algo, implode(func_get_args()));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -249,7 +307,8 @@ abstract class Parser
|
||||
}
|
||||
}
|
||||
|
||||
return time();
|
||||
$date = new DateTime('now', new DateTimeZone($this->timezone));
|
||||
return $date->getTimestamp();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -262,11 +321,15 @@ abstract class Parser
|
||||
*/
|
||||
public function getValidDate($format, $value)
|
||||
{
|
||||
$date = \DateTime::createFromFormat($format, $value);
|
||||
$date = DateTime::createFromFormat($format, $value, new DateTimeZone($this->timezone));
|
||||
|
||||
if ($date !== false) {
|
||||
$errors = \DateTime::getLastErrors();
|
||||
if ($errors['error_count'] === 0 && $errors['warning_count'] === 0) return $date->getTimestamp();
|
||||
|
||||
$errors = DateTime::getLastErrors();
|
||||
|
||||
if ($errors['error_count'] === 0 && $errors['warning_count'] === 0) {
|
||||
return $date->getTimestamp();
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -299,10 +362,13 @@ abstract class Parser
|
||||
*/
|
||||
public function getXmlLang($xml)
|
||||
{
|
||||
$dom = new \DOMDocument;
|
||||
$dom->loadXML($this->content);
|
||||
$dom = XmlParser::getDomDocument($this->content);
|
||||
|
||||
$xpath = new \DOMXPath($dom);
|
||||
if ($dom === false) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$xpath = new DOMXPath($dom);
|
||||
return $xpath->evaluate('string(//@xml:lang[1])') ?: '';
|
||||
}
|
||||
|
||||
@ -318,30 +384,108 @@ abstract class Parser
|
||||
{
|
||||
$language = strtolower($language);
|
||||
|
||||
// Arabic (ar-**)
|
||||
if (strpos($language, 'ar') === 0) return true;
|
||||
$rtl_languages = array(
|
||||
'ar', // Arabic (ar-**)
|
||||
'fa', // Farsi (fa-**)
|
||||
'ur', // Urdu (ur-**)
|
||||
'ps', // Pashtu (ps-**)
|
||||
'syr', // Syriac (syr-**)
|
||||
'dv', // Divehi (dv-**)
|
||||
'he', // Hebrew (he-**)
|
||||
'yi', // Yiddish (yi-**)
|
||||
);
|
||||
|
||||
// Farsi (fa-**)
|
||||
if (strpos($language, 'fa') === 0) return true;
|
||||
|
||||
// Urdu (ur-**)
|
||||
if (strpos($language, 'ur') === 0) return true;
|
||||
|
||||
// Pashtu (ps-**)
|
||||
if (strpos($language, 'ps') === 0) return true;
|
||||
|
||||
// Syriac (syr-**)
|
||||
if (strpos($language, 'syr') === 0) return true;
|
||||
|
||||
// Divehi (dv-**)
|
||||
if (strpos($language, 'dv') === 0) return true;
|
||||
|
||||
// Hebrew (he-**)
|
||||
if (strpos($language, 'he') === 0) return true;
|
||||
|
||||
// Yiddish (yi-**)
|
||||
if (strpos($language, 'yi') === 0) return true;
|
||||
foreach ($rtl_languages as $prefix) {
|
||||
if (strpos($language, $prefix) === 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set Hash algorithm used for id generation
|
||||
*
|
||||
* @access public
|
||||
* @param string $algo Algorithm name
|
||||
* @return \PicoFeed\Parser
|
||||
*/
|
||||
public function setHashAlgo($algo)
|
||||
{
|
||||
$this->hash_algo = $algo ?: $this->hash_algo;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a different timezone
|
||||
*
|
||||
* @see http://php.net/manual/en/timezones.php
|
||||
* @access public
|
||||
* @param string $timezone Timezone
|
||||
* @return \PicoFeed\Parser
|
||||
*/
|
||||
public function setTimezone($timezone)
|
||||
{
|
||||
$this->timezone = $timezone ?: $this->timezone;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set config object
|
||||
*
|
||||
* @access public
|
||||
* @param \PicoFeed\Config $config Config instance
|
||||
* @return \PicoFeed\Parser
|
||||
*/
|
||||
public function setConfig($config)
|
||||
{
|
||||
$this->config = $config;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable the content grabber
|
||||
*
|
||||
* @access public
|
||||
* @return \PicoFeed\Parser
|
||||
*/
|
||||
public function enableContentGrabber()
|
||||
{
|
||||
$this->enable_grabber = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set ignored URLs for the content grabber
|
||||
*
|
||||
* @access public
|
||||
* @param array $urls URLs
|
||||
* @return \PicoFeed\Parser
|
||||
*/
|
||||
public function setGrabberIgnoreUrls(array $urls)
|
||||
{
|
||||
$this->grabber_ignore_urls = $urls;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a value from a XML namespace
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml XML element
|
||||
* @param array $namespaces XML namespaces
|
||||
* @param string $property XML tag name
|
||||
* @return string
|
||||
*/
|
||||
public function getNamespaceValue(SimpleXMLElement $xml, array $namespaces, $property)
|
||||
{
|
||||
foreach ($namespaces as $name => $url) {
|
||||
$namespace = $xml->children($namespaces[$name]);
|
||||
|
||||
if ($namespace->$property->count() > 0) {
|
||||
return (string) $namespace->$property;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
266
vendor/PicoFeed/Parsers/Atom.php
vendored
266
vendor/PicoFeed/Parsers/Atom.php
vendored
@ -2,81 +2,247 @@
|
||||
|
||||
namespace PicoFeed\Parsers;
|
||||
|
||||
use SimpleXMLElement;
|
||||
use PicoFeed\Parser;
|
||||
use PicoFeed\XmlParser;
|
||||
use PicoFeed\Logging;
|
||||
use PicoFeed\Filter;
|
||||
use PicoFeed\Feed;
|
||||
use PicoFeed\Item;
|
||||
|
||||
/**
|
||||
* Atom parser
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package parser
|
||||
*/
|
||||
class Atom extends \PicoFeed\Parser
|
||||
class Atom extends Parser
|
||||
{
|
||||
/**
|
||||
* Parse the document
|
||||
* Get the path to the items XML tree
|
||||
*
|
||||
* @access public
|
||||
* @return mixed Atom instance or false
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
public function execute()
|
||||
public function getItemsTree(SimpleXMLElement $xml)
|
||||
{
|
||||
\PicoFeed\Logging::log(\get_called_class().': begin parsing');
|
||||
|
||||
\libxml_use_internal_errors(true);
|
||||
$xml = \simplexml_load_string($this->content);
|
||||
|
||||
if ($xml === false) {
|
||||
\PicoFeed\Logging::log(\get_called_class().': XML parsing error');
|
||||
\PicoFeed\Logging::log($this->getXmlErrors());
|
||||
return false;
|
||||
return $xml->entry;
|
||||
}
|
||||
|
||||
$this->language = $this->getXmlLang($this->content);
|
||||
$this->url = $this->getUrl($xml);
|
||||
$this->title = $this->stripWhiteSpace((string) $xml->title) ?: $this->url;
|
||||
$this->id = (string) $xml->id;
|
||||
$this->updated = $this->parseDate((string) $xml->updated);
|
||||
$author = (string) $xml->author->name;
|
||||
/**
|
||||
* Find the feed url
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->url = $this->getLink($xml);
|
||||
}
|
||||
|
||||
\PicoFeed\Logging::log(\get_called_class().': Title => '.$this->title);
|
||||
\PicoFeed\Logging::log(\get_called_class().': Url => '.$this->url);
|
||||
/**
|
||||
* Find the feed title
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->title = $this->stripWhiteSpace((string) $xml->title) ?: $feed->url;
|
||||
}
|
||||
|
||||
foreach ($xml->entry as $entry) {
|
||||
/**
|
||||
* Find the feed language
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->language = $this->getXmlLang($this->content);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed id
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedId(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->id = (string) $xml->id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed date
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->date = $this->parseDate((string) $xml->updated);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item date
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$item->date = $this->parseDate((string) $entry->updated);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item title
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param Item $item Item object
|
||||
*/
|
||||
public function findItemTitle(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$item->title = $this->stripWhiteSpace((string) $entry->title);
|
||||
|
||||
if (empty($item->title)) {
|
||||
$item->title = $item->url;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item author
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
*/
|
||||
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
if (isset($entry->author->name)) {
|
||||
$author = (string) $entry->author->name;
|
||||
$item->author = (string) $entry->author->name;
|
||||
}
|
||||
else {
|
||||
$item->author = (string) $xml->author->name;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item content
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
*/
|
||||
public function findItemContent(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$item->content = $this->filterHtml($this->getContent($entry), $item->url);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item URL
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
*/
|
||||
public function findItemUrl(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$item->url = $this->getLink($entry);
|
||||
}
|
||||
|
||||
/**
|
||||
* Genereate the item id
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$id = (string) $entry->id;
|
||||
|
||||
$item = new \StdClass;
|
||||
$item->url = $this->getUrl($entry);
|
||||
$item->id = $this->generateId($id !== $item->url ? $id : $item->url, $this->isExcludedFromId($this->url) ? '' : $this->url);
|
||||
$item->title = $this->stripWhiteSpace((string) $entry->title);
|
||||
$item->updated = $this->parseDate((string) $entry->updated);
|
||||
$item->author = $author;
|
||||
$item->content = $this->filterHtml($this->getContent($entry), $item->url);
|
||||
$item->language = $this->language;
|
||||
if ($id !== $item->url) {
|
||||
$item_permalink = $id;
|
||||
}
|
||||
else {
|
||||
$item_permalink = $item->url;
|
||||
}
|
||||
|
||||
if (empty($item->title)) $item->title = $item->url;
|
||||
if ($this->isExcludedFromId($feed->url)) {
|
||||
$feed_permalink = '';
|
||||
}
|
||||
else {
|
||||
$feed_permalink = $feed->url;
|
||||
}
|
||||
|
||||
// Try to find an enclosure
|
||||
$item->id = $this->generateId($item_permalink, $feed_permalink);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item enclosure
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
foreach ($entry->link as $link) {
|
||||
if ((string) $link['rel'] === 'enclosure') {
|
||||
$item->enclosure = (string) $link['href'];
|
||||
|
||||
$item->enclosure_url = (string) $link['href'];
|
||||
$item->enclosure_type = (string) $link['type'];
|
||||
|
||||
if (\PicoFeed\Filter::isRelativePath($item->enclosure)) {
|
||||
$item->enclosure = \PicoFeed\Filter::getAbsoluteUrl($item->enclosure, $this->url);
|
||||
if (Filter::isRelativePath($item->enclosure_url)) {
|
||||
$item->enclosure_url = Filter::getAbsoluteUrl($item->enclosure_url, $feed->url);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
\PicoFeed\Logging::log(\get_called_class().': parsing finished ('.count($this->items).' items)');
|
||||
/**
|
||||
* Find the item language
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$item->language = $feed->language;
|
||||
}
|
||||
|
||||
return $this;
|
||||
/**
|
||||
* Get the URL from a link tag
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml XML tag
|
||||
* @return string
|
||||
*/
|
||||
public function getLink(SimpleXMLElement $xml)
|
||||
{
|
||||
foreach ($xml->link as $link) {
|
||||
if ((string) $link['type'] === 'text/html' || (string) $link['type'] === 'application/xhtml+xml') {
|
||||
return (string) $link['href'];
|
||||
}
|
||||
}
|
||||
|
||||
return (string) $xml->link['href'];
|
||||
}
|
||||
|
||||
/**
|
||||
@ -86,7 +252,7 @@ class Atom extends \PicoFeed\Parser
|
||||
* @param SimpleXMLElement $entry XML Entry
|
||||
* @return string
|
||||
*/
|
||||
public function getContent($entry)
|
||||
public function getContent(SimpleXMLElement $entry)
|
||||
{
|
||||
if (isset($entry->content) && ! empty($entry->content)) {
|
||||
|
||||
@ -103,22 +269,4 @@ class Atom extends \PicoFeed\Parser
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the URL from a link tag
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml XML tag
|
||||
* @return string
|
||||
*/
|
||||
public function getUrl($xml)
|
||||
{
|
||||
foreach ($xml->link as $link) {
|
||||
if ((string) $link['type'] === 'text/html' || (string) $link['type'] === 'application/xhtml+xml') {
|
||||
return (string) $link['href'];
|
||||
}
|
||||
}
|
||||
|
||||
return (string) $xml->link['href'];
|
||||
}
|
||||
}
|
138
vendor/PicoFeed/Parsers/Rss10.php
vendored
138
vendor/PicoFeed/Parsers/Rss10.php
vendored
@ -2,86 +2,86 @@
|
||||
|
||||
namespace PicoFeed\Parsers;
|
||||
|
||||
class Rss10 extends \PicoFeed\Parser
|
||||
require_once __DIR__.'/Rss20.php';
|
||||
|
||||
use SimpleXMLElement;
|
||||
use PicoFeed\Feed;
|
||||
use PicoFeed\Item;
|
||||
use PicoFeed\Parsers\Rss20;
|
||||
|
||||
/**
|
||||
* RSS 1.0 parser
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package parser
|
||||
*/
|
||||
class Rss10 extends Rss20
|
||||
{
|
||||
public function execute()
|
||||
/**
|
||||
* Get the path to the items XML tree
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
public function getItemsTree(SimpleXMLElement $xml)
|
||||
{
|
||||
\PicoFeed\Logging::log(\get_called_class().': begin parsing');
|
||||
|
||||
\libxml_use_internal_errors(true);
|
||||
$xml = \simplexml_load_string($this->content);
|
||||
|
||||
if ($xml === false) {
|
||||
\PicoFeed\Logging::log(\get_called_class().': XML parsing error');
|
||||
\PicoFeed\Logging::log($this->getXmlErrors());
|
||||
return false;
|
||||
return $xml->item;
|
||||
}
|
||||
|
||||
$namespaces = $xml->getNamespaces(true);
|
||||
/**
|
||||
* Find the feed date
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->date = $this->parseDate($this->getNamespaceValue($xml->channel, $this->namespaces, 'date'));
|
||||
}
|
||||
|
||||
$this->title = $this->stripWhiteSpace((string) $xml->channel->title) ?: $this->url;
|
||||
$this->url = (string) $xml->channel->link;
|
||||
$this->id = $this->url;
|
||||
$this->language = '';
|
||||
/**
|
||||
* Find the feed language
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->language = $this->getNamespaceValue($xml->channel, $this->namespaces, 'language');
|
||||
}
|
||||
|
||||
\PicoFeed\Logging::log(\get_called_class().': Title => '.$this->title);
|
||||
\PicoFeed\Logging::log(\get_called_class().': Url => '.$this->url);
|
||||
|
||||
if (isset($namespaces['dc'])) {
|
||||
$ns_dc = $xml->channel->children($namespaces['dc']);
|
||||
$this->updated = isset($ns_dc->date) ? $this->parseDate($ns_dc->date) : time();
|
||||
/**
|
||||
* Genereate the item id
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
if ($this->isExcludedFromId($feed->url)) {
|
||||
$feed_permalink = '';
|
||||
}
|
||||
else {
|
||||
$this->updated = time();
|
||||
$feed_permalink = $feed->url;
|
||||
}
|
||||
|
||||
foreach ($xml->item as $entry) {
|
||||
|
||||
$item = new \StdClass;
|
||||
$item->title = $this->stripWhiteSpace((string) $entry->title);
|
||||
$item->url = '';
|
||||
$item->author= '';
|
||||
$item->updated = '';
|
||||
$item->content = '';
|
||||
$item->language = '';
|
||||
|
||||
foreach ($namespaces as $name => $url) {
|
||||
|
||||
$namespace = $entry->children($namespaces[$name]);
|
||||
|
||||
if (! $item->url && ! empty($namespace->origLink)) $item->url = (string) $namespace->origLink;
|
||||
if (! $item->author && ! empty($namespace->creator)) $item->author = (string) $namespace->creator;
|
||||
if (! $item->updated && ! empty($namespace->date)) $item->updated = $this->parseDate((string) $namespace->date);
|
||||
if (! $item->updated && ! empty($namespace->updated)) $item->updated = $this->parseDate((string) $namespace->updated);
|
||||
if (! $item->content && ! empty($namespace->encoded)) $item->content = (string) $namespace->encoded;
|
||||
$item->id = $this->generateId($item->url, $feed_permalink);
|
||||
}
|
||||
|
||||
if (empty($item->url)) $item->url = (string) $entry->link;
|
||||
if (empty($item->updated)) $item->updated = $this->updated;
|
||||
|
||||
if (empty($item->content)) {
|
||||
$item->content = isset($entry->description) ? (string) $entry->description : '';
|
||||
}
|
||||
|
||||
if (empty($item->author)) {
|
||||
|
||||
if (isset($entry->author)) {
|
||||
$item->author = (string) $entry->author;
|
||||
}
|
||||
else if (isset($xml->channel->webMaster)) {
|
||||
$item->author = (string) $xml->channel->webMaster;
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($item->title)) $item->title = $item->url;
|
||||
|
||||
$item->id = $this->generateId($item->url, $this->isExcludedFromId($this->url) ? '' : $this->url);
|
||||
$item->content = $this->filterHtml($item->content, $item->url);
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
\PicoFeed\Logging::log(\get_called_class().': parsing finished ('.count($this->items).' items)');
|
||||
|
||||
return $this;
|
||||
/**
|
||||
* Find the item enclosure
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
}
|
||||
}
|
288
vendor/PicoFeed/Parsers/Rss20.php
vendored
288
vendor/PicoFeed/Parsers/Rss20.php
vendored
@ -2,35 +2,43 @@
|
||||
|
||||
namespace PicoFeed\Parsers;
|
||||
|
||||
use SimpleXMLElement;
|
||||
use PicoFeed\Parser;
|
||||
use PicoFeed\XmlParser;
|
||||
use PicoFeed\Logging;
|
||||
use PicoFeed\Filter;
|
||||
use PicoFeed\Feed;
|
||||
use PicoFeed\Item;
|
||||
|
||||
/**
|
||||
* RSS 2.0 Parser
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package parser
|
||||
*/
|
||||
class Rss20 extends \PicoFeed\Parser
|
||||
class Rss20 extends Parser
|
||||
{
|
||||
/**
|
||||
* Parse the document
|
||||
* Get the path to the items XML tree
|
||||
*
|
||||
* @access public
|
||||
* @return mixed Rss20 instance or false
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @return SimpleXMLElement
|
||||
*/
|
||||
public function execute()
|
||||
public function getItemsTree(SimpleXMLElement $xml)
|
||||
{
|
||||
\PicoFeed\Logging::log(\get_called_class().': begin parsing');
|
||||
|
||||
\libxml_use_internal_errors(true);
|
||||
$xml = \simplexml_load_string($this->content);
|
||||
|
||||
if ($xml === false) {
|
||||
\PicoFeed\Logging::log(\get_called_class().': XML parsing error');
|
||||
\PicoFeed\Logging::log($this->getXmlErrors());
|
||||
return false;
|
||||
return $xml->channel->item;
|
||||
}
|
||||
|
||||
$namespaces = $xml->getNamespaces(true);
|
||||
|
||||
/**
|
||||
* Find the feed url
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
if ($xml->channel->link && $xml->channel->link->count() > 1) {
|
||||
|
||||
foreach ($xml->channel->link as $xml_link) {
|
||||
@ -38,74 +46,117 @@ class Rss20 extends \PicoFeed\Parser
|
||||
$link = (string) $xml_link;
|
||||
|
||||
if ($link !== '') {
|
||||
$this->url = (string) $link;
|
||||
$feed->url = $link;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
$this->url = (string) $xml->channel->link;
|
||||
$feed->url = (string) $xml->channel->link;
|
||||
}
|
||||
}
|
||||
|
||||
$this->language = isset($xml->channel->language) ? (string) $xml->channel->language : '';
|
||||
$this->title = $this->stripWhiteSpace((string) $xml->channel->title) ?: $this->url;
|
||||
$this->id = $this->url;
|
||||
$this->updated = $this->parseDate(isset($xml->channel->pubDate) ? (string) $xml->channel->pubDate : (string) $xml->channel->lastBuildDate);
|
||||
|
||||
\PicoFeed\Logging::log(\get_called_class().': Title => '.$this->title);
|
||||
\PicoFeed\Logging::log(\get_called_class().': Url => '.$this->url);
|
||||
|
||||
// RSS feed might be empty
|
||||
if (! $xml->channel->item) {
|
||||
\PicoFeed\Logging::log(\get_called_class().': feed empty or malformed');
|
||||
return $this;
|
||||
/**
|
||||
* Find the feed title
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->title = $this->stripWhiteSpace((string) $xml->channel->title) ?: $feed->url;
|
||||
}
|
||||
|
||||
foreach ($xml->channel->item as $entry) {
|
||||
/**
|
||||
* Find the feed language
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->language = isset($xml->channel->language) ? (string) $xml->channel->language : '';
|
||||
}
|
||||
|
||||
$item = new \StdClass;
|
||||
/**
|
||||
* Find the feed id
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedId(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$feed->id = $feed->url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the feed date
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed xml
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
|
||||
{
|
||||
$date = isset($xml->channel->pubDate) ? $xml->channel->pubDate : $xml->channel->lastBuildDate;
|
||||
$feed->date = $this->parseDate((string) $date);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item date
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
*/
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$date = $this->getNamespaceValue($entry, $this->namespaces, 'date');
|
||||
|
||||
if (empty($date)) {
|
||||
$date = $this->getNamespaceValue($entry, $this->namespaces, 'updated');
|
||||
}
|
||||
|
||||
if (empty($date)) {
|
||||
$date = (string) $entry->pubDate;
|
||||
}
|
||||
|
||||
$item->date = $this->parseDate($date);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item title
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
*/
|
||||
public function findItemTitle(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$item->title = $this->stripWhiteSpace((string) $entry->title);
|
||||
$item->url = '';
|
||||
$item->author= '';
|
||||
$item->updated = '';
|
||||
$item->content = '';
|
||||
$item->enclosure = '';
|
||||
$item->enclosure_type = '';
|
||||
$item->language = $this->language;
|
||||
|
||||
foreach ($namespaces as $name => $url) {
|
||||
|
||||
$namespace = $entry->children($namespaces[$name]);
|
||||
|
||||
if (! $item->author && ! empty($namespace->creator)) $item->author = (string) $namespace->creator;
|
||||
if (! $item->updated && ! empty($namespace->date)) $item->updated = $this->parseDate((string) $namespace->date);
|
||||
if (! $item->updated && ! empty($namespace->updated)) $item->updated = $this->parseDate((string) $namespace->updated);
|
||||
if (! $item->content && ! empty($namespace->encoded)) $item->content = (string) $namespace->encoded;
|
||||
|
||||
// Get FeedBurner original links
|
||||
if (! $item->url && ! empty($namespace->origLink)) $item->url = (string) $namespace->origLink;
|
||||
if (! $item->enclosure && ! empty($namespace->origEnclosureLink)) $item->enclosure = (string) $namespace->origEnclosureLink;
|
||||
}
|
||||
|
||||
if (empty($item->url)) {
|
||||
|
||||
if (isset($entry->link)) {
|
||||
$item->url = (string) $entry->link;
|
||||
}
|
||||
else if (isset($entry->guid)) {
|
||||
$item->url = (string) $entry->guid;
|
||||
if (empty($item->title)) {
|
||||
$item->title = $item->url;
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($item->updated)) $item->updated = $this->parseDate((string) $entry->pubDate) ?: $this->updated;
|
||||
|
||||
if (empty($item->content)) {
|
||||
$item->content = isset($entry->description) ? (string) $entry->description : '';
|
||||
}
|
||||
/**
|
||||
* Find the item author
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $xml Feed
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
*/
|
||||
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$item->author = $this->getNamespaceValue($entry, $this->namespaces, 'creator');
|
||||
|
||||
if (empty($item->author)) {
|
||||
|
||||
if (isset($entry->author)) {
|
||||
$item->author = (string) $entry->author;
|
||||
}
|
||||
@ -113,37 +164,110 @@ class Rss20 extends \PicoFeed\Parser
|
||||
$item->author = (string) $xml->channel->webMaster;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isset($entry->guid) && isset($entry->guid['isPermaLink']) && (string) $entry->guid['isPermaLink'] != 'false') {
|
||||
$id = (string) $entry->guid;
|
||||
$item->id = $this->generateId($id !== '' && $id !== $item->url ? $id : $item->url, $this->isExcludedFromId($this->url) ? '' : $this->url);
|
||||
/**
|
||||
* Find the item content
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
*/
|
||||
public function findItemContent(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$content = $this->getNamespaceValue($entry, $this->namespaces, 'encoded');
|
||||
|
||||
if (empty($content) && $entry->description->count() > 0) {
|
||||
$content = (string) $entry->description;
|
||||
}
|
||||
|
||||
$item->content = $this->filterHtml($content, $item->url);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item URL
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
*/
|
||||
public function findItemUrl(SimpleXMLElement $entry, Item $item)
|
||||
{
|
||||
$item->url = $this->getNamespaceValue($entry, $this->namespaces, 'origLink');
|
||||
|
||||
if (empty($item->url)) {
|
||||
if (isset($entry->link)) {
|
||||
$item->url = (string) $entry->link;
|
||||
}
|
||||
else if (isset($entry->guid)) {
|
||||
$item->url = (string) $entry->guid;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Genereate the item id
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
if ($entry->guid->count() > 0 && (string) $entry->guid['isPermaLink'] !== 'false') {
|
||||
$item_permalink = (string) $entry->guid;
|
||||
}
|
||||
else {
|
||||
$item->id = $this->generateId($item->url, $this->isExcludedFromId($this->url) ? '' : $this->url);
|
||||
$item_permalink = $item->url;
|
||||
}
|
||||
|
||||
if (empty($item->title)) $item->title = $item->url;
|
||||
if ($this->isExcludedFromId($feed->url)) {
|
||||
$feed_permalink = '';
|
||||
}
|
||||
else {
|
||||
$feed_permalink = $feed->url;
|
||||
}
|
||||
|
||||
// if optional enclosure tag with multimedia provided, capture here
|
||||
$item->id = $this->generateId($item_permalink, $feed_permalink);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the item enclosure
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
if (isset($entry->enclosure)) {
|
||||
|
||||
if (! $item->enclosure) {
|
||||
$item->enclosure = isset($entry->enclosure['url']) ? (string) $entry->enclosure['url'] : '';
|
||||
$item->enclosure_url = $this->getNamespaceValue($entry->enclosure, $this->namespaces, 'origEnclosureLink');
|
||||
|
||||
if (empty($item->enclosure_url)) {
|
||||
$item->enclosure_url = isset($entry->enclosure['url']) ? (string) $entry->enclosure['url'] : '';
|
||||
}
|
||||
|
||||
$item->enclosure_type = isset($entry->enclosure['type']) ? (string) $entry->enclosure['type'] : '';
|
||||
|
||||
if (\PicoFeed\Filter::isRelativePath($item->enclosure)) {
|
||||
$item->enclosure = \PicoFeed\Filter::getAbsoluteUrl($item->enclosure, $this->url);
|
||||
if (Filter::isRelativePath($item->enclosure_url)) {
|
||||
$item->enclosure_url = Filter::getAbsoluteUrl($item->enclosure_url, $feed->url);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$item->content = $this->filterHtml($item->content, $item->url);
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
\PicoFeed\Logging::log(\get_called_class().': parsing finished ('.count($this->items).' items)');
|
||||
|
||||
return $this;
|
||||
/**
|
||||
* Find the item language
|
||||
*
|
||||
* @access public
|
||||
* @param SimpleXMLElement $entry Feed item
|
||||
* @param \PicoFeed\Item $item Item object
|
||||
* @param \PicoFeed\Feed $feed Feed object
|
||||
*/
|
||||
public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$item->language = $feed->language;
|
||||
}
|
||||
}
|
12
vendor/PicoFeed/Parsers/Rss91.php
vendored
12
vendor/PicoFeed/Parsers/Rss91.php
vendored
@ -4,4 +4,14 @@ namespace PicoFeed\Parsers;
|
||||
|
||||
require_once __DIR__.'/Rss20.php';
|
||||
|
||||
class Rss91 extends Rss20 {}
|
||||
use PicoFeed\Parsers\Rss20;
|
||||
|
||||
/**
|
||||
* RSS 0.91 Parser
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package parser
|
||||
*/
|
||||
class Rss91 extends Rss20
|
||||
{
|
||||
}
|
||||
|
12
vendor/PicoFeed/Parsers/Rss92.php
vendored
12
vendor/PicoFeed/Parsers/Rss92.php
vendored
@ -4,4 +4,14 @@ namespace PicoFeed\Parsers;
|
||||
|
||||
require_once __DIR__.'/Rss20.php';
|
||||
|
||||
class Rss92 extends Rss20 {}
|
||||
use PicoFeed\Parsers\Rss20;
|
||||
|
||||
/**
|
||||
* RSS 0.92 Parser
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package parser
|
||||
*/
|
||||
class Rss92 extends Rss20
|
||||
{
|
||||
}
|
||||
|
20
vendor/PicoFeed/PicoFeed.php
vendored
Normal file
20
vendor/PicoFeed/PicoFeed.php
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
<?php
|
||||
|
||||
// Include this file if you don't want to use an autoloader
|
||||
|
||||
require __DIR__.'/Config.php';
|
||||
require __DIR__.'/Logging.php';
|
||||
require __DIR__.'/Item.php';
|
||||
require __DIR__.'/Feed.php';
|
||||
require __DIR__.'/Client.php';
|
||||
require __DIR__.'/Filter.php';
|
||||
require __DIR__.'/XmlParser.php';
|
||||
require __DIR__.'/Encoding.php';
|
||||
require __DIR__.'/Grabber.php';
|
||||
require __DIR__.'/Reader.php';
|
||||
require __DIR__.'/Import.php';
|
||||
require __DIR__.'/Export.php';
|
||||
require __DIR__.'/Writer.php';
|
||||
require __DIR__.'/Writers/Rss20.php';
|
||||
require __DIR__.'/Writers/Atom.php';
|
||||
require __DIR__.'/Parser.php';
|
229
vendor/PicoFeed/Reader.php
vendored
229
vendor/PicoFeed/Reader.php
vendored
@ -2,16 +2,19 @@
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
require_once __DIR__.'/Logging.php';
|
||||
require_once __DIR__.'/Parser.php';
|
||||
require_once __DIR__.'/Client.php';
|
||||
require_once __DIR__.'/Filter.php';
|
||||
use DOMXPath;
|
||||
use PicoFeed\Config;
|
||||
use PicoFeed\XmlParser;
|
||||
use PicoFeed\Logging;
|
||||
use PicoFeed\Filter;
|
||||
use PicoFeed\Client;
|
||||
use PicoFeed\Parser;
|
||||
|
||||
/**
|
||||
* Reader class
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package parser
|
||||
* @package picofeed
|
||||
*/
|
||||
class Reader
|
||||
{
|
||||
@ -39,19 +42,24 @@ class Reader
|
||||
*/
|
||||
private $encoding = '';
|
||||
|
||||
/**
|
||||
* Config class instance
|
||||
*
|
||||
* @access private
|
||||
* @var \PicoFeed\Config
|
||||
*/
|
||||
private $config = null;
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @access public
|
||||
* @param string $content Feed content
|
||||
* @param string $encoding Feed encoding
|
||||
* @return Reader
|
||||
* @param \PicoFeed\Config $config Config class instance
|
||||
*/
|
||||
public function __construct($content = '', $encoding = '')
|
||||
public function __construct(Config $config = null)
|
||||
{
|
||||
$this->content = $content;
|
||||
$this->encoding = '';
|
||||
return $this;
|
||||
$this->config = $config ?: new Config;
|
||||
Logging::setTimezone($this->config->getTimezone());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -61,52 +69,53 @@ class Reader
|
||||
* @param string $url Feed content
|
||||
* @param string $last_modified Last modified HTTP header
|
||||
* @param string $etag Etag HTTP header
|
||||
* @param string $timeout Client connection timeout
|
||||
* @param string $user_agent HTTP user-agent
|
||||
* @return Client
|
||||
* @return \PicoFeed\Client
|
||||
*/
|
||||
public function download($url, $last_modified = '', $etag = '', $timeout = 5, $user_agent = 'PicoFeed (https://github.com/fguillot/picoFeed)')
|
||||
public function download($url, $last_modified = '', $etag = '')
|
||||
{
|
||||
if (strpos($url, 'http') !== 0) {
|
||||
|
||||
$url = 'http://'.$url;
|
||||
}
|
||||
|
||||
$client = Client::create();
|
||||
$client->url = $url;
|
||||
$client->timeout = $timeout;
|
||||
$client->user_agent = $user_agent;
|
||||
$client->last_modified = $last_modified;
|
||||
$client->etag = $etag;
|
||||
$client->execute();
|
||||
$client = Client::getInstance();
|
||||
$client->setTimeout($this->config->getClientTimeout())
|
||||
->setUserAgent($this->config->getClientUserAgent())
|
||||
->setMaxRedirections($this->config->getMaxRedirections())
|
||||
->setMaxBodySize($this->config->getMaxBodySize())
|
||||
->setProxyHostname($this->config->getProxyHostname())
|
||||
->setProxyPort($this->config->getProxyPort())
|
||||
->setProxyUsername($this->config->getProxyUsername())
|
||||
->setProxyPassword($this->config->getProxyPassword())
|
||||
->setLastModified($last_modified)
|
||||
->setEtag($etag);
|
||||
|
||||
if ($client->execute($url)) {
|
||||
$this->content = $client->getContent();
|
||||
$this->url = $client->getUrl();
|
||||
$this->encoding = $client->getEncoding();
|
||||
}
|
||||
|
||||
return $client;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the download content
|
||||
* Get a parser instance with a custom config
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
* @param string $name Parser name
|
||||
* @return \PicoFeed\Parser
|
||||
*/
|
||||
public function getContent()
|
||||
public function getParserInstance($name)
|
||||
{
|
||||
return $this->content;
|
||||
}
|
||||
require_once __DIR__.'/Parsers/'.ucfirst($name).'.php';
|
||||
$name = '\PicoFeed\Parsers\\'.$name;
|
||||
|
||||
/**
|
||||
* Get finale URL
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getUrl()
|
||||
{
|
||||
return $this->url;
|
||||
$parser = new $name($this->content, $this->encoding);
|
||||
$parser->setHashAlgo($this->config->getParserHashAlgo());
|
||||
$parser->setTimezone($this->config->getTimezone());
|
||||
$parser->setConfig($this->config);
|
||||
|
||||
return $parser;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -138,6 +147,31 @@ class Reader
|
||||
return substr($data, $open_tag, $close_tag);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect the feed format
|
||||
*
|
||||
* @access public
|
||||
* @param string $parser_name Parser name
|
||||
* @param string $haystack First XML tag
|
||||
* @param array $needles List of strings that need to be there
|
||||
* @return mixed False on failure or Parser instance
|
||||
*/
|
||||
public function detectFormat($parser_name, $haystack, array $needles)
|
||||
{
|
||||
$results = array();
|
||||
|
||||
foreach ($needles as $needle) {
|
||||
$results[] = strpos($haystack, $needle) !== false;
|
||||
}
|
||||
|
||||
if (! in_array(false, $results, true)) {
|
||||
Logging::setMessage(get_called_class().': Format detected => '.$parser_name);
|
||||
return $this->getParserInstance($parser_name);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Discover feed format and return a parser instance
|
||||
*
|
||||
@ -147,66 +181,44 @@ class Reader
|
||||
*/
|
||||
public function getParser($discover = false)
|
||||
{
|
||||
$formats = array(
|
||||
array('parser' => 'Atom', 'needles' => array('<feed')),
|
||||
array('parser' => 'Rss20', 'needles' => array('<rss', '2.0')),
|
||||
array('parser' => 'Rss92', 'needles' => array('<rss', '0.92')),
|
||||
array('parser' => 'Rss91', 'needles' => array('<rss', '0.91')),
|
||||
array('parser' => 'Rss10', 'needles' => array('<rdf:', 'xmlns="http://purl.org/rss/1.0/"')),
|
||||
);
|
||||
|
||||
$first_tag = $this->getFirstTag($this->content);
|
||||
|
||||
if (strpos($first_tag, '<feed') !== false) {
|
||||
foreach ($formats as $format) {
|
||||
|
||||
Logging::log(\get_called_class().': discover Atom feed');
|
||||
$parser = $this->detectFormat($format['parser'], $first_tag, $format['needles']);
|
||||
|
||||
require_once __DIR__.'/Parsers/Atom.php';
|
||||
return new Parsers\Atom($this->content, $this->encoding);
|
||||
if ($parser !== false) {
|
||||
return $parser;
|
||||
}
|
||||
else if (strpos($first_tag, '<rss') !== false &&
|
||||
(strpos($first_tag, 'version="2.0"') !== false || strpos($first_tag, 'version=\'2.0\'') !== false)) {
|
||||
|
||||
Logging::log(\get_called_class().': discover RSS 2.0 feed');
|
||||
|
||||
require_once __DIR__.'/Parsers/Rss20.php';
|
||||
return new Parsers\Rss20($this->content, $this->encoding);
|
||||
}
|
||||
else if (strpos($first_tag, '<rss') !== false &&
|
||||
(strpos($first_tag, 'version="0.92"') !== false || strpos($first_tag, 'version=\'0.92\'') !== false)) {
|
||||
|
||||
Logging::log(\get_called_class().': discover RSS 0.92 feed');
|
||||
if ($discover === true) {
|
||||
|
||||
require_once __DIR__.'/Parsers/Rss92.php';
|
||||
return new Parsers\Rss92($this->content, $this->encoding);
|
||||
}
|
||||
else if (strpos($first_tag, '<rss') !== false &&
|
||||
(strpos($first_tag, 'version="0.91"') !== false || strpos($first_tag, 'version=\'0.91\'') !== false)) {
|
||||
|
||||
Logging::log(\get_called_class().': discover RSS 0.91 feed');
|
||||
|
||||
require_once __DIR__.'/Parsers/Rss91.php';
|
||||
return new Parsers\Rss91($this->content, $this->encoding);
|
||||
}
|
||||
else if (strpos($first_tag, '<rdf:') !== false && strpos($first_tag, 'xmlns="http://purl.org/rss/1.0/"') !== false) {
|
||||
|
||||
Logging::log(\get_called_class().': discover RSS 1.0 feed');
|
||||
|
||||
require_once __DIR__.'/Parsers/Rss10.php';
|
||||
return new Parsers\Rss10($this->content, $this->encoding);
|
||||
}
|
||||
else if ($discover === true) {
|
||||
|
||||
Logging::log(\get_called_class().': Format not supported or malformed');
|
||||
Logging::log(\get_called_class().':'.PHP_EOL.$this->content);
|
||||
Logging::setMessage(get_called_class().': Format not supported or feed malformed');
|
||||
Logging::setMessage(get_called_class().': Content => '.PHP_EOL.$this->content);
|
||||
|
||||
return false;
|
||||
}
|
||||
else if ($this->discover()) {
|
||||
|
||||
return $this->getParser(true);
|
||||
}
|
||||
|
||||
Logging::log(\get_called_class().': Subscription not found');
|
||||
Logging::log(\get_called_class().': Content => '.PHP_EOL.$this->content);
|
||||
Logging::setMessage(get_called_class().': Subscription not found');
|
||||
Logging::setMessage(get_called_class().': Content => '.PHP_EOL.$this->content);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Discover feed url inside a HTML document and download the feed
|
||||
* Discover the feed url inside a HTML document and download the feed
|
||||
*
|
||||
* @access public
|
||||
* @return boolean
|
||||
@ -214,18 +226,13 @@ class Reader
|
||||
public function discover()
|
||||
{
|
||||
if (! $this->content) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
Logging::log(\get_called_class().': Try to discover a subscription');
|
||||
Logging::setMessage(get_called_class().': Try to discover a subscription');
|
||||
|
||||
\libxml_use_internal_errors(true);
|
||||
|
||||
$dom = new \DOMDocument;
|
||||
$dom->loadHTML($this->content);
|
||||
|
||||
$xpath = new \DOMXPath($dom);
|
||||
$dom = XmlParser::getHtmlDocument($this->content);
|
||||
$xpath = new DOMXPath($dom);
|
||||
|
||||
$queries = array(
|
||||
"//link[@type='application/atom+xml']",
|
||||
@ -251,7 +258,7 @@ class Reader
|
||||
$link = $this->url.$link;
|
||||
}
|
||||
|
||||
Logging::log(\get_called_class().': Find subscription link: '.$link);
|
||||
Logging::setMessage(get_called_class().': Find subscription link: '.$link);
|
||||
$this->download($link);
|
||||
|
||||
return true;
|
||||
@ -261,4 +268,52 @@ class Reader
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the downloaded content
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getContent()
|
||||
{
|
||||
return $this->content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the page content
|
||||
*
|
||||
* @access public
|
||||
* @param string $content Page content
|
||||
* @return \PicoFeed\Reader
|
||||
*/
|
||||
public function setContent($content)
|
||||
{
|
||||
$this->content = $content;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get final URL
|
||||
*
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public function getUrl()
|
||||
{
|
||||
return $this->url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the URL
|
||||
*
|
||||
* @access public
|
||||
* @param string $url URL
|
||||
* @return \PicoFeed\Reader
|
||||
*/
|
||||
public function setUrl($url)
|
||||
{
|
||||
$this->url = $url;
|
||||
return $this;
|
||||
}
|
||||
}
|
||||
|
10
vendor/PicoFeed/Rules/journaldugeek.com.php
vendored
Normal file
10
vendor/PicoFeed/Rules/journaldugeek.com.php
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
<?php
|
||||
return array(
|
||||
'test_url' => 'http://www./2014/05/20/le-playstation-now-arrive-en-beta-fermee-aux-etats-unis/',
|
||||
'body' => array(
|
||||
'//div[@class="post-content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//style'
|
||||
)
|
||||
);
|
45
vendor/PicoFeed/Writer.php
vendored
45
vendor/PicoFeed/Writer.php
vendored
@ -2,21 +2,54 @@
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
use RuntimeException;
|
||||
|
||||
/**
|
||||
* Base writer class
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*/
|
||||
abstract class Writer
|
||||
{
|
||||
/**
|
||||
* Dom object
|
||||
*
|
||||
* @access protected
|
||||
* @var DomDocument
|
||||
*/
|
||||
protected $dom;
|
||||
|
||||
/**
|
||||
* Items
|
||||
*
|
||||
* @access public
|
||||
* @var array
|
||||
*/
|
||||
public $items = array();
|
||||
|
||||
|
||||
/**
|
||||
* Generate the XML document
|
||||
*
|
||||
* @abstract
|
||||
* @access public
|
||||
* @param string $filename Optional filename
|
||||
* @return string
|
||||
*/
|
||||
abstract public function execute($filename = '');
|
||||
|
||||
|
||||
public function checkRequiredProperties($properties, $container)
|
||||
/**
|
||||
* Check required properties to generate the output
|
||||
*
|
||||
* @access public
|
||||
* @param array $properties List of properties
|
||||
* @param mixed $container Object or array container
|
||||
*/
|
||||
public function checkRequiredProperties(array $properties, $container)
|
||||
{
|
||||
foreach ($properties as $property) {
|
||||
|
||||
if ((is_object($container) && ! isset($container->$property)) || (is_array($container) && ! isset($container[$property]))) {
|
||||
|
||||
throw new \RuntimeException('Required property missing: '.$property);
|
||||
throw new RuntimeException('Required property missing: '.$property);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
69
vendor/PicoFeed/Writers/Atom.php
vendored
69
vendor/PicoFeed/Writers/Atom.php
vendored
@ -2,32 +2,59 @@
|
||||
|
||||
namespace PicoFeed\Writers;
|
||||
|
||||
require_once __DIR__.'/../Writer.php';
|
||||
use DomDocument;
|
||||
use DomElement;
|
||||
use DomAttr;
|
||||
use PicoFeed\Writer;
|
||||
|
||||
class Atom extends \PicoFeed\Writer
|
||||
/**
|
||||
* Atom writer class
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*/
|
||||
class Atom extends Writer
|
||||
{
|
||||
/**
|
||||
* List of required properties for each feed
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $required_feed_properties = array(
|
||||
'title',
|
||||
'site_url',
|
||||
'feed_url',
|
||||
);
|
||||
|
||||
/**
|
||||
* List of required properties for each item
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $required_item_properties = array(
|
||||
'title',
|
||||
'url',
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* Get the Atom document
|
||||
*
|
||||
* @access public
|
||||
* @param string $filename Optional filename
|
||||
* @return string
|
||||
*/
|
||||
public function execute($filename = '')
|
||||
{
|
||||
$this->checkRequiredProperties($this->required_feed_properties, $this);
|
||||
|
||||
$this->dom = new \DomDocument('1.0', 'UTF-8');
|
||||
$this->dom = new DomDocument('1.0', 'UTF-8');
|
||||
$this->dom->formatOutput = true;
|
||||
|
||||
// <feed/>
|
||||
$feed = $this->dom->createElement('feed');
|
||||
$feed->setAttributeNodeNS(new \DomAttr('xmlns', 'http://www.w3.org/2005/Atom'));
|
||||
$feed->setAttributeNodeNS(new DomAttr('xmlns', 'http://www.w3.org/2005/Atom'));
|
||||
|
||||
// <generator/>
|
||||
$generator = $this->dom->createElement('generator', 'PicoFeed');
|
||||
@ -115,8 +142,16 @@ class Atom extends \PicoFeed\Writer
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public function addLink($xml, $url, $rel = 'alternate', $type = 'text/html')
|
||||
/**
|
||||
* Add Link
|
||||
*
|
||||
* @access public
|
||||
* @param DomElement $xml XML node
|
||||
* @param string $url URL
|
||||
* @param string $rel Link rel attribute
|
||||
* @param string $type Link type attribute
|
||||
*/
|
||||
public function addLink(DomElement $xml, $url, $rel = 'alternate', $type = 'text/html')
|
||||
{
|
||||
$link = $this->dom->createElement('link');
|
||||
$link->setAttribute('rel', $rel);
|
||||
@ -125,8 +160,14 @@ class Atom extends \PicoFeed\Writer
|
||||
$xml->appendChild($link);
|
||||
}
|
||||
|
||||
|
||||
public function addUpdated($xml, $value = '')
|
||||
/**
|
||||
* Add publication date
|
||||
*
|
||||
* @access public
|
||||
* @param DomElement $xml XML node
|
||||
* @param string $value Timestamp
|
||||
*/
|
||||
public function addUpdated(DomElement $xml, $value = '')
|
||||
{
|
||||
$xml->appendChild($this->dom->createElement(
|
||||
'updated',
|
||||
@ -134,8 +175,14 @@ class Atom extends \PicoFeed\Writer
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
public function addAuthor($xml, array $values)
|
||||
/**
|
||||
* Add author
|
||||
*
|
||||
* @access public
|
||||
* @param DomElement $xml XML node
|
||||
* @param array $values Author name and email
|
||||
*/
|
||||
public function addAuthor(DomElement $xml, array $values)
|
||||
{
|
||||
$author = $this->dom->createElement('author');
|
||||
|
||||
|
60
vendor/PicoFeed/Writers/Rss20.php
vendored
60
vendor/PicoFeed/Writers/Rss20.php
vendored
@ -2,34 +2,61 @@
|
||||
|
||||
namespace PicoFeed\Writers;
|
||||
|
||||
require_once __DIR__.'/../Writer.php';
|
||||
use DomDocument;
|
||||
use DomAttr;
|
||||
use DomElement;
|
||||
use PicoFeed\Writer;
|
||||
|
||||
class Rss20 extends \PicoFeed\Writer
|
||||
/**
|
||||
* Rss 2.0 writer class
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*/
|
||||
class Rss20 extends Writer
|
||||
{
|
||||
/**
|
||||
* List of required properties for each feed
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $required_feed_properties = array(
|
||||
'title',
|
||||
'site_url',
|
||||
'feed_url',
|
||||
);
|
||||
|
||||
/**
|
||||
* List of required properties for each item
|
||||
*
|
||||
* @access private
|
||||
* @var array
|
||||
*/
|
||||
private $required_item_properties = array(
|
||||
'title',
|
||||
'url',
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* Get the Rss 2.0 document
|
||||
*
|
||||
* @access public
|
||||
* @param string $filename Optional filename
|
||||
* @return string
|
||||
*/
|
||||
public function execute($filename = '')
|
||||
{
|
||||
$this->checkRequiredProperties($this->required_feed_properties, $this);
|
||||
|
||||
$this->dom = new \DomDocument('1.0', 'UTF-8');
|
||||
$this->dom = new DomDocument('1.0', 'UTF-8');
|
||||
$this->dom->formatOutput = true;
|
||||
|
||||
// <rss/>
|
||||
$rss = $this->dom->createElement('rss');
|
||||
$rss->setAttribute('version', '2.0');
|
||||
$rss->setAttributeNodeNS(new \DomAttr('xmlns:content', 'http://purl.org/rss/1.0/modules/content/'));
|
||||
$rss->setAttributeNodeNS(new \DomAttr('xmlns:atom', 'http://www.w3.org/2005/Atom'));
|
||||
$rss->setAttributeNodeNS(new DomAttr('xmlns:content', 'http://purl.org/rss/1.0/modules/content/'));
|
||||
$rss->setAttributeNodeNS(new DomAttr('xmlns:atom', 'http://www.w3.org/2005/Atom'));
|
||||
|
||||
$channel = $this->dom->createElement('channel');
|
||||
|
||||
@ -130,8 +157,14 @@ class Rss20 extends \PicoFeed\Writer
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public function addPubDate($xml, $value = '')
|
||||
/**
|
||||
* Add publication date
|
||||
*
|
||||
* @access public
|
||||
* @param DomElement $xml XML node
|
||||
* @param string $value Timestamp
|
||||
*/
|
||||
public function addPubDate(DomElement $xml, $value = '')
|
||||
{
|
||||
$xml->appendChild($this->dom->createElement(
|
||||
'pubDate',
|
||||
@ -139,8 +172,15 @@ class Rss20 extends \PicoFeed\Writer
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
public function addAuthor($xml, $tag, array $values)
|
||||
/**
|
||||
* Add author
|
||||
*
|
||||
* @access public
|
||||
* @param DomElement $xml XML node
|
||||
* @param string $tag Tag name
|
||||
* @param array $values Author name and email
|
||||
*/
|
||||
public function addAuthor(DomElement $xml, $tag, array $values)
|
||||
{
|
||||
$value = '';
|
||||
|
||||
|
136
vendor/PicoFeed/XmlParser.php
vendored
Normal file
136
vendor/PicoFeed/XmlParser.php
vendored
Normal file
@ -0,0 +1,136 @@
|
||||
<?php
|
||||
|
||||
namespace PicoFeed;
|
||||
|
||||
use DomDocument;
|
||||
use SimpleXmlElement;
|
||||
|
||||
/**
|
||||
* XML parser class
|
||||
*
|
||||
* Checks for XML eXternal Entity (XXE) and XML Entity Expansion (XEE) attacks on XML documents
|
||||
*
|
||||
* @author Frederic Guillot
|
||||
* @package picofeed
|
||||
*/
|
||||
class XmlParser
|
||||
{
|
||||
/**
|
||||
* Get a SimpleXmlElement instance or return false
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $input XML content
|
||||
* @return mixed
|
||||
*/
|
||||
public static function getSimpleXml($input)
|
||||
{
|
||||
$dom = self::getDomDocument($input);
|
||||
|
||||
if ($dom !== false) {
|
||||
|
||||
$simplexml = simplexml_import_dom($dom);
|
||||
|
||||
if (! $simplexml instanceof SimpleXmlElement) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return $simplexml;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a DomDocument instance or return false
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $input XML content
|
||||
* @return mixed
|
||||
*/
|
||||
public static function getDomDocument($input)
|
||||
{
|
||||
if (substr(php_sapi_name(), 0, 3) === 'fpm') {
|
||||
|
||||
// If running with PHP-FPM and an entity is detected we refuse to parse the feed
|
||||
// @see https://bugs.php.net/bug.php?id=64938
|
||||
if (strpos($input, '<!ENTITY') !== false) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
libxml_disable_entity_loader(true);
|
||||
}
|
||||
|
||||
libxml_use_internal_errors(true);
|
||||
|
||||
$dom = new DomDocument;
|
||||
$dom->loadXml($input, LIBXML_NONET);
|
||||
|
||||
// The document is empty, there is probably some parsing errors
|
||||
if ($dom->childNodes->length === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Scan for potential XEE attacks using ENTITY
|
||||
foreach ($dom->childNodes as $child) {
|
||||
if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
|
||||
if ($child->entities->length > 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $dom;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load HTML document by using a DomDocument instance or return false on failure
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $input XML content
|
||||
* @return mixed
|
||||
*/
|
||||
public static function getHtmlDocument($input)
|
||||
{
|
||||
libxml_use_internal_errors(true);
|
||||
|
||||
$dom = new DomDocument;
|
||||
|
||||
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
|
||||
$dom->loadHTML($input, LIBXML_NONET);
|
||||
}
|
||||
else {
|
||||
$dom->loadHTML($input);
|
||||
}
|
||||
|
||||
return $dom;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get XML parser errors
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @return string
|
||||
*/
|
||||
public static function getErrors()
|
||||
{
|
||||
$errors = array();
|
||||
|
||||
foreach(libxml_get_errors() as $error) {
|
||||
|
||||
$errors[] = sprintf('XML error: %s (Line: %d - Column: %d - Code: %d)',
|
||||
$error->message,
|
||||
$error->line,
|
||||
$error->column,
|
||||
$error->code
|
||||
);
|
||||
}
|
||||
|
||||
return implode(', ', $errors);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user