Do not download again items with content scraper enabled

This commit is contained in:
Frederic Guillot 2017-01-15 19:53:11 -05:00
parent d9295207b0
commit 13a6d0797f
2 changed files with 29 additions and 9 deletions

View File

@ -10,7 +10,7 @@ use PicoFeed\Logging\Logger;
use PicoFeed\Reader\Favicon; use PicoFeed\Reader\Favicon;
use PicoFeed\Reader\Reader; use PicoFeed\Reader\Reader;
function fetch_feed($url, $download_content = false, $etag = '', $last_modified = '') function fetch_feed($url, $download_content = false, $etag = '', $last_modified = '', array $item_urls = array())
{ {
$error_message = ''; $error_message = '';
$feed = null; $feed = null;
@ -29,6 +29,7 @@ function fetch_feed($url, $download_content = false, $etag = '', $last_modified
if ($download_content) { if ($download_content) {
$parser->enableContentGrabber(); $parser->enableContentGrabber();
$parser->setGrabberIgnoreUrls($item_urls);
} }
$feed = $parser->execute(); $feed = $parser->execute();
@ -100,16 +101,22 @@ function create_feed($user_id, $url, $download_content = false, $rtl = false, $c
function update_feed($user_id, $feed_id) function update_feed($user_id, $feed_id)
{ {
$subscription = Model\Feed\get_feed($user_id, $feed_id); $subscription = Model\Feed\get_feed($user_id, $feed_id);
$item_urls = array();
if ($subscription['enabled'] == 0) { if ($subscription['enabled'] == 0) {
return false; return false;
} }
if ($subscription['download_content']) {
$item_urls = Model\Item\get_item_urls($user_id, $feed_id);
}
list($feed, $resource, $error_message) = fetch_feed( list($feed, $resource, $error_message) = fetch_feed(
$subscription['feed_url'], $subscription['feed_url'],
(bool) $subscription['download_content'], (bool) $subscription['download_content'],
$subscription['etag'], $subscription['etag'],
$subscription['last_modified'] $subscription['last_modified'],
$item_urls
); );
if (! empty($error_message)) { if (! empty($error_message)) {
@ -142,7 +149,7 @@ function update_feed($user_id, $feed_id)
} }
if ($feed !== null) { if ($feed !== null) {
Model\Item\update_feed_items($user_id, $feed_id, $feed->getItems(), $subscription['rtl']); Model\Item\update_feed_items($user_id, $feed_id, $feed->getItems(), $subscription['rtl'], $item_urls);
fetch_favicon($feed_id, $feed->getSiteUrl(), $feed->getIcon()); fetch_favicon($feed_id, $feed->getSiteUrl(), $feed->getIcon());
} }

View File

@ -3,7 +3,6 @@
namespace Miniflux\Model\Item; namespace Miniflux\Model\Item;
use PicoDb\Database; use PicoDb\Database;
use Miniflux\Model\Feed;
use Miniflux\Model\Group; use Miniflux\Model\Group;
use Miniflux\Handler; use Miniflux\Handler;
use Miniflux\Helper; use Miniflux\Helper;
@ -62,7 +61,7 @@ function change_item_ids_status($user_id, array $item_ids, $status)
->update(array('status' => $status)); ->update(array('status' => $status));
} }
function update_feed_items($user_id, $feed_id, array $items, $rtl = false) function update_feed_items($user_id, $feed_id, array $items, $rtl = false, array $ignore_urls = array())
{ {
$items_in_feed = array(); $items_in_feed = array();
$db = Database::getInstance('db'); $db = Database::getInstance('db');
@ -71,6 +70,7 @@ function update_feed_items($user_id, $feed_id, array $items, $rtl = false)
foreach ($items as $item) { foreach ($items as $item) {
if ($item->getId() && $item->getUrl()) { if ($item->getId() && $item->getUrl()) {
$item_id = get_item_id_from_checksum($feed_id, $item->getId()); $item_id = get_item_id_from_checksum($feed_id, $item->getId());
$values = array( $values = array(
'title' => $item->getTitle(), 'title' => $item->getTitle(),
'url' => $item->getUrl(), 'url' => $item->getUrl(),
@ -84,6 +84,10 @@ function update_feed_items($user_id, $feed_id, array $items, $rtl = false)
); );
if ($item_id > 0) { if ($item_id > 0) {
if (in_array($item->getUrl(), $ignore_urls)) {
unset($values['content']);
}
$db $db
->table(TABLE) ->table(TABLE)
->eq('user_id', $user_id) ->eq('user_id', $user_id)
@ -227,7 +231,7 @@ function get_item_nav($user_id, array $item, $status = array(STATUS_UNREAD), $bo
function get_items_by_status($user_id, $status, $feed_ids = array(), $offset = null, $limit = null, $order_column = 'updated', $order_direction = 'desc') function get_items_by_status($user_id, $status, $feed_ids = array(), $offset = null, $limit = null, $order_column = 'updated', $order_direction = 'desc')
{ {
return Database::getInstance('db') return Database::getInstance('db')
->table('items') ->table(TABLE)
->columns( ->columns(
'items.id', 'items.id',
'items.checksum', 'items.checksum',
@ -259,7 +263,7 @@ function get_items_by_status($user_id, $status, $feed_ids = array(), $offset = n
function get_items($user_id, $since_id = null, array $item_ids = array(), $limit = 50) function get_items($user_id, $since_id = null, array $item_ids = array(), $limit = 50)
{ {
$query = Database::getInstance('db') $query = Database::getInstance('db')
->table('items') ->table(TABLE)
->columns( ->columns(
'items.id', 'items.id',
'items.checksum', 'items.checksum',
@ -296,13 +300,22 @@ function get_items($user_id, $since_id = null, array $item_ids = array(), $limit
function get_item_ids_by_status($user_id, $status) function get_item_ids_by_status($user_id, $status)
{ {
return Database::getInstance('db') return Database::getInstance('db')
->table('items') ->table(TABLE)
->eq('user_id', $user_id) ->eq('user_id', $user_id)
->eq('status', $status) ->eq('status', $status)
->asc('id') ->asc('id')
->findAllByColumn('id'); ->findAllByColumn('id');
} }
function get_item_urls($user_id, $feed_id)
{
return Database::getInstance('db')
->table(TABLE)
->eq('user_id', $user_id)
->eq('feed_id', $feed_id)
->findAllByColumn('url');
}
function get_latest_unread_items_timestamps($user_id) function get_latest_unread_items_timestamps($user_id)
{ {
return Database::getInstance('db') return Database::getInstance('db')
@ -321,7 +334,7 @@ function get_latest_unread_items_timestamps($user_id)
function count_by_status($user_id, $status, $feed_ids = array()) function count_by_status($user_id, $status, $feed_ids = array())
{ {
$query = Database::getInstance('db') $query = Database::getInstance('db')
->table('items') ->table(TABLE)
->eq('user_id', $user_id) ->eq('user_id', $user_id)
->in('feed_id', $feed_ids); ->in('feed_id', $feed_ids);