Create Scraper handler

This commit is contained in:
Frederic Guillot 2016-08-18 22:33:58 -04:00
parent 7e20a3fdc3
commit 46bc8cfd71
No known key found for this signature in database
GPG Key ID: 92D77191BA7FBC99
6 changed files with 31 additions and 25 deletions

View File

@ -132,7 +132,7 @@ Router\post_action('download-item', function () {
$item = Model\Item\get($id); $item = Model\Item\get($id);
$feed = Model\Feed\get($item['feed_id']); $feed = Model\Feed\get($item['feed_id']);
$download = Model\Item\download_content_id($id); $download = Model\Item\download_contents($id);
$download['content'] = Model\Proxy\rewrite_html($download['content'], $item['url'], Model\Config\get('image_proxy'), $feed['cloak_referrer']); $download['content'] = Model\Proxy\rewrite_html($download['content'], $item['url'], Model\Config\get('image_proxy'), $feed['cloak_referrer']);
Response\json($download); Response\json($download);

21
app/handlers/scraper.php Normal file
View File

@ -0,0 +1,21 @@
<?php
namespace Handler\Scraper;
use PicoFeed\Scraper\Scraper;
use Model\Config;
function download_contents($url)
{
$contents = '';
$scraper = new Scraper(Config\get_reader_config());
$scraper->setUrl($url);
$scraper->execute();
if ($scraper->hasRelevantContent()) {
$contents = $scraper->getFilteredContent();
}
return $contents;
}

View File

@ -2,12 +2,12 @@
namespace Model\Item; namespace Model\Item;
use PicoDb\Database;
use PicoFeed\Logging\Logger;
use Model\Service; use Model\Service;
use Model\Config; use Model\Config;
use Model\Group; use Model\Group;
use PicoDb\Database; use Handler;
use PicoFeed\Logging\Logger;
use PicoFeed\Scraper\Scraper;
// Get all items without filtering // Get all items without filtering
function get_all() function get_all()
@ -407,32 +407,14 @@ function cleanup($feed_id, array $items_in_feed)
} }
} }
// Download content from an URL // Download item content
function download_content_url($url) function download_contents($item_id)
{
$content = '';
$grabber = new Scraper(Config\get_reader_config());
$grabber->setUrl($url);
$grabber->execute();
if ($grabber->hasRelevantContent()) {
$content = $grabber->getFilteredContent();
}
return $content;
}
// Download content from item ID
function download_content_id($item_id)
{ {
$item = get($item_id); $item = get($item_id);
$content = download_content_url($item['url']); $content = Handler\Scraper\download_contents($item['url']);
if (! empty($content)) { if (! empty($content)) {
if (! Config\get('nocontent')) { if (! Config\get('nocontent')) {
// Save content
Database::getInstance('db') Database::getInstance('db')
->table('items') ->table('items')
->eq('id', $item['id']) ->eq('id', $item['id'])

View File

@ -38,6 +38,7 @@
"app/core/router.php", "app/core/router.php",
"app/core/session.php", "app/core/session.php",
"app/core/template.php", "app/core/template.php",
"app/handlers/scraper.php",
"app/models/config.php", "app/models/config.php",
"app/models/service.php", "app/models/service.php",
"app/models/search.php", "app/models/search.php",

View File

@ -19,6 +19,7 @@ return array(
'dbd9090b0db725af4a3cd765a9d2e39a' => $baseDir . '/app/core/router.php', 'dbd9090b0db725af4a3cd765a9d2e39a' => $baseDir . '/app/core/router.php',
'98faa6699f100c5ddb2013d85f9dfabb' => $baseDir . '/app/core/session.php', '98faa6699f100c5ddb2013d85f9dfabb' => $baseDir . '/app/core/session.php',
'93228d441890e5962b0566344884332c' => $baseDir . '/app/core/template.php', '93228d441890e5962b0566344884332c' => $baseDir . '/app/core/template.php',
'9de087554be89ca71a2ed558a4e35fde' => $baseDir . '/app/handlers/scraper.php',
'bc98222aedc910930f5b76b8c84f334e' => $baseDir . '/app/models/config.php', 'bc98222aedc910930f5b76b8c84f334e' => $baseDir . '/app/models/config.php',
'c3080c7edf4a590ce36fc4f3561968dc' => $baseDir . '/app/models/service.php', 'c3080c7edf4a590ce36fc4f3561968dc' => $baseDir . '/app/models/service.php',
'b59348c9973f21f2c58eb493d9fea5be' => $baseDir . '/app/models/search.php', 'b59348c9973f21f2c58eb493d9fea5be' => $baseDir . '/app/models/search.php',

View File

@ -20,6 +20,7 @@ class ComposerStaticInitfd7e8d436e1dc450edc3153ac8bc31b4
'dbd9090b0db725af4a3cd765a9d2e39a' => __DIR__ . '/../..' . '/app/core/router.php', 'dbd9090b0db725af4a3cd765a9d2e39a' => __DIR__ . '/../..' . '/app/core/router.php',
'98faa6699f100c5ddb2013d85f9dfabb' => __DIR__ . '/../..' . '/app/core/session.php', '98faa6699f100c5ddb2013d85f9dfabb' => __DIR__ . '/../..' . '/app/core/session.php',
'93228d441890e5962b0566344884332c' => __DIR__ . '/../..' . '/app/core/template.php', '93228d441890e5962b0566344884332c' => __DIR__ . '/../..' . '/app/core/template.php',
'9de087554be89ca71a2ed558a4e35fde' => __DIR__ . '/../..' . '/app/handlers/scraper.php',
'bc98222aedc910930f5b76b8c84f334e' => __DIR__ . '/../..' . '/app/models/config.php', 'bc98222aedc910930f5b76b8c84f334e' => __DIR__ . '/../..' . '/app/models/config.php',
'c3080c7edf4a590ce36fc4f3561968dc' => __DIR__ . '/../..' . '/app/models/service.php', 'c3080c7edf4a590ce36fc4f3561968dc' => __DIR__ . '/../..' . '/app/models/service.php',
'b59348c9973f21f2c58eb493d9fea5be' => __DIR__ . '/../..' . '/app/models/search.php', 'b59348c9973f21f2c58eb493d9fea5be' => __DIR__ . '/../..' . '/app/models/search.php',