diff --git a/common.php b/common.php index d815135..78289f5 100644 --- a/common.php +++ b/common.php @@ -12,7 +12,7 @@ require 'model.php'; if (file_exists('config.php')) require 'config.php'; defined('APP_VERSION') or define('APP_VERSION', 'master'); -defined('HTTP_TIMEOUT') or define('HTTP_TIMEOUT', 10); +defined('HTTP_TIMEOUT') or define('HTTP_TIMEOUT', 20); defined('DB_FILENAME') or define('DB_FILENAME', 'data/db.sqlite'); defined('DEBUG') or define('DEBUG', true); defined('DEBUG_FILENAME') or define('DEBUG_FILENAME', 'data/debug.log'); diff --git a/index.php b/index.php index 4cad2fe..a6eaab2 100644 --- a/index.php +++ b/index.php @@ -322,6 +322,38 @@ Router\get_action('refresh-all', function() { }); +// Disable content grabber for a feed +Router\get_action('disable-grabber-feed', function() { + + $id = Request\int_param('feed_id'); + + if ($id && Model\disable_grabber_feed($id)) { + Session\flash(t('The content grabber is disabled successfully.')); + } + else { + Session\flash_error(t('Unable to disable the content grabber for this subscription.')); + } + + Response\redirect('?action=feeds'); +}); + + +// Enable content grabber for a feed +Router\get_action('enable-grabber-feed', function() { + + $id = Request\int_param('feed_id'); + + if ($id && Model\enable_grabber_feed($id)) { + Session\flash(t('The content grabber is enabled successfully.')); + } + else { + Session\flash_error(t('Unable to activate the content grabber for this subscription.')); + } + + Response\redirect('?action=feeds'); +}); + + // Confirmation box to disable a feed Router\get_action('confirm-disable-feed', function() { @@ -467,7 +499,7 @@ Router\get_action('add', function() { // Add the feed Router\post_action('add', function() { - $result = Model\import_feed(trim($_POST['url'])); + $result = Model\import_feed(trim($_POST['url']), isset($_POST['download_content']) && $_POST['download_content'] == 1); if ($result) { @@ -590,11 +622,9 @@ Router\post_action('config', function() { if ($valid) { if (Model\save_config($values)) { - Session\flash(t('Your preferences are updated.')); } else { - Session\flash_error(t('Unable to update your preferences.')); } diff --git a/locales/fr_FR/translations.php b/locales/fr_FR/translations.php index c27c684..3798320 100644 --- a/locales/fr_FR/translations.php +++ b/locales/fr_FR/translations.php @@ -1,6 +1,13 @@ 'Le téléchargement de contenu est activé avec succès.', + 'Unable to activate the content grabber for this subscription.' => 'Impossible d\'activer le téléchargement de contenu pour cet abonnement.', + 'enable full content' => 'télécharger le contenu complet', + 'disable full content' => 'désactiver le téléchargement du contenu', + 'Download full content' => 'Télécharger le contenu complet', + 'Downloading full content is slower because Miniflux grab the content from the original website. You should use that for subscriptions that display only a summary. This feature doesn\'t work with all websites.' => + 'Le téléchargement complet du contenu est plus lent car Miniflux va chercher le contenu sur le site original. Vous devriez utiliser cela uniquement pour les abonnements qui affichent seulement un résumé. Cette fonctionnalité ne marche pas avec tous les sites web.', 'No message' => 'Aucun message', 'flush messages' => 'supprimer les messages', 'API endpoint:' => 'URL de l\'API : ', diff --git a/model.php b/model.php index 6d3087b..a53c636 100644 --- a/model.php +++ b/model.php @@ -2,7 +2,6 @@ namespace Model; -require_once 'vendor/PicoFeed/Encoding.php'; require_once 'vendor/PicoFeed/Filter.php'; require_once 'vendor/PicoFeed/Client.php'; require_once 'vendor/PicoFeed/Export.php'; @@ -25,8 +24,9 @@ use PicoFeed\Reader; use PicoFeed\Export; -const DB_VERSION = 14; +const DB_VERSION = 15; const HTTP_USERAGENT = 'Miniflux - http://miniflux.net'; +const HTTP_FAKE_USERAGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36'; const LIMIT_ALL = -1; @@ -169,7 +169,7 @@ function import_feeds($content) } -function import_feed($url) +function import_feed($url, $grabber = false) { $reader = new Reader; $resource = $reader->download($url, '', '', HTTP_TIMEOUT, HTTP_USERAGENT); @@ -178,6 +178,7 @@ function import_feed($url) if ($parser !== false) { + $parser->grabber = $grabber; $feed = $parser->execute(); if ($feed === false || ! $feed->title || ! $feed->url) { @@ -193,13 +194,14 @@ function import_feed($url) $rs = $db->table('feeds')->save(array( 'title' => $feed->title, 'site_url' => $feed->url, - 'feed_url' => $reader->getUrl() + 'feed_url' => $reader->getUrl(), + 'download_content' => $grabber ? 1 : 0 )); if ($rs) { $feed_id = $db->getConnection()->getLastId(); - update_items($feed_id, $feed->items); + update_items($feed_id, $feed->items, $grabber); write_debug(); return (int) $feed_id; @@ -255,12 +257,25 @@ function update_feed($feed_id) if ($parser !== false) { - $feed = $parser->execute(); + if ($feed['download_content']) { - if ($feed !== false) { + // Don't fetch previous items, only new one + $parser->grabber_ignore_urls = \PicoTools\singleton('db') + ->table('items') + ->eq('feed_id', $feed_id) + ->findAllByColumn('url'); + + $parser->grabber = true; + $parser->grabber_timeout = HTTP_TIMEOUT; + $parser->grabber_user_agent = HTTP_FAKE_USERAGENT; + } + + $result = $parser->execute(); + + if ($result !== false) { update_feed_cache_infos($feed_id, $resource->getLastModified(), $resource->getEtag()); - update_items($feed_id, $feed->items); + update_items($feed_id, $result->items, $parser->grabber); write_debug(); return true; @@ -349,52 +364,82 @@ function update_feed_cache_infos($feed_id, $last_modified, $etag) } -function download_item($item_id) +function parse_content_with_readability($content, $url) { require_once 'vendor/Readability/Readability.php'; - - $item = get_item($item_id); - - $client = \PicoFeed\Client::create(); - $client->url = $item['url']; - $client->timeout = HTTP_TIMEOUT; - $client->user_agent = HTTP_USERAGENT; - $client->execute(); - - $content = $client->getContent(); + require_once 'vendor/PicoFeed/Encoding.php'; if (! empty($content)) { $content = \PicoFeed\Encoding::toUTF8($content); - - $readability = new \Readability($content, $item['url']); + $readability = new \Readability($content, $url); if ($readability->init()) { - - // Get relevant content - $content = $readability->getContent()->innerHTML; - - // Filter content - $filter = new \PicoFeed\Filter($content, $item['url']); - $content = $filter->execute(); - - $nocontent = (bool) get_config_value('nocontent'); - if ($nocontent === false) { - - // Save content - \PicoTools\singleton('db') - ->table('items') - ->eq('id', $item['id']) - ->save(array('content' => $content)); - } - - return array( - 'result' => true, - 'content' => $content - ); + return $readability->getContent()->innerHTML; } } + return ''; +} + + +function download_content($url) +{ + require_once 'vendor/PicoFeed/Grabber.php'; + + $client = \PicoFeed\Client::create(); + $client->url = $url; + $client->timeout = HTTP_TIMEOUT; + $client->user_agent = HTTP_FAKE_USERAGENT; + $client->execute(); + + $html = $client->getContent(); + + if (! empty($html)) { + + // Try first with PicoFeed grabber and with Readability after + $grabber = new \PicoFeed\Grabber($url); + $grabber->html = $html; + + if ($grabber->parse()) { + $content = $grabber->content; + } + + if (empty($content)) { + $content = parse_content_with_readability($html, $url); + } + + // Filter content + $filter = new \PicoFeed\Filter($content, $url); + return $filter->execute(); + } + + return ''; +} + + +function download_item($item_id) +{ + $item = get_item($item_id); + $content = download_content($item['url']); + + if (! empty($content)) { + + if (! get_config_value('nocontent')) { + + // Save content + \PicoTools\singleton('db') + ->table('items') + ->eq('id', $item['id']) + ->save(array('content' => $content)); + } + + return array( + 'result' => true, + 'content' => $content + ); + } + return array( 'result' => false, 'content' => '' @@ -427,6 +472,18 @@ function disable_feed($feed_id) } +function enable_grabber_feed($feed_id) +{ + return \PicoTools\singleton('db')->table('feeds')->eq('id', $feed_id)->save((array('download_content' => 1))); +} + + +function disable_grabber_feed($feed_id) +{ + return \PicoTools\singleton('db')->table('feeds')->eq('id', $feed_id)->save((array('download_content' => 0))); +} + + function get_items($status, $offset = null, $limit = null) { return \PicoTools\singleton('db') @@ -727,7 +784,7 @@ function autoflush() } -function update_items($feed_id, array $items) +function update_items($feed_id, array $items, $grabber = false) { $nocontent = (bool) get_config_value('nocontent'); @@ -744,6 +801,10 @@ function update_items($feed_id, array $items) // Insert only new item if ($db->table('items')->eq('id', $item->id)->count() !== 1) { + if (! $item->content && ! $nocontent && $grabber) { + $item->content = download_content($item->url); + } + $db->table('items')->save(array( 'id' => $item->id, 'title' => $item->title, diff --git a/schema.php b/schema.php index f953050..4e0de33 100644 --- a/schema.php +++ b/schema.php @@ -3,6 +3,12 @@ namespace Schema; +function version_15($pdo) +{ + $pdo->exec('ALTER TABLE feeds ADD COLUMN download_content INTEGER DEFAULT 0'); +} + + function version_14($pdo) { $pdo->exec('ALTER TABLE config ADD COLUMN feed_token TEXT DEFAULT "'.\Model\generate_token().'"'); diff --git a/templates/add.php b/templates/add.php index 2b51fe7..291f9a8 100644 --- a/templates/add.php +++ b/templates/add.php @@ -10,6 +10,8 @@