Create Scraper handler
This commit is contained in:
parent
7e20a3fdc3
commit
46bc8cfd71
@ -132,7 +132,7 @@ Router\post_action('download-item', function () {
|
|||||||
$item = Model\Item\get($id);
|
$item = Model\Item\get($id);
|
||||||
$feed = Model\Feed\get($item['feed_id']);
|
$feed = Model\Feed\get($item['feed_id']);
|
||||||
|
|
||||||
$download = Model\Item\download_content_id($id);
|
$download = Model\Item\download_contents($id);
|
||||||
$download['content'] = Model\Proxy\rewrite_html($download['content'], $item['url'], Model\Config\get('image_proxy'), $feed['cloak_referrer']);
|
$download['content'] = Model\Proxy\rewrite_html($download['content'], $item['url'], Model\Config\get('image_proxy'), $feed['cloak_referrer']);
|
||||||
|
|
||||||
Response\json($download);
|
Response\json($download);
|
||||||
|
21
app/handlers/scraper.php
Normal file
21
app/handlers/scraper.php
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Handler\Scraper;
|
||||||
|
|
||||||
|
use PicoFeed\Scraper\Scraper;
|
||||||
|
use Model\Config;
|
||||||
|
|
||||||
|
function download_contents($url)
|
||||||
|
{
|
||||||
|
$contents = '';
|
||||||
|
|
||||||
|
$scraper = new Scraper(Config\get_reader_config());
|
||||||
|
$scraper->setUrl($url);
|
||||||
|
$scraper->execute();
|
||||||
|
|
||||||
|
if ($scraper->hasRelevantContent()) {
|
||||||
|
$contents = $scraper->getFilteredContent();
|
||||||
|
}
|
||||||
|
|
||||||
|
return $contents;
|
||||||
|
}
|
@ -2,12 +2,12 @@
|
|||||||
|
|
||||||
namespace Model\Item;
|
namespace Model\Item;
|
||||||
|
|
||||||
|
use PicoDb\Database;
|
||||||
|
use PicoFeed\Logging\Logger;
|
||||||
use Model\Service;
|
use Model\Service;
|
||||||
use Model\Config;
|
use Model\Config;
|
||||||
use Model\Group;
|
use Model\Group;
|
||||||
use PicoDb\Database;
|
use Handler;
|
||||||
use PicoFeed\Logging\Logger;
|
|
||||||
use PicoFeed\Scraper\Scraper;
|
|
||||||
|
|
||||||
// Get all items without filtering
|
// Get all items without filtering
|
||||||
function get_all()
|
function get_all()
|
||||||
@ -407,32 +407,14 @@ function cleanup($feed_id, array $items_in_feed)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Download content from an URL
|
// Download item content
|
||||||
function download_content_url($url)
|
function download_contents($item_id)
|
||||||
{
|
|
||||||
$content = '';
|
|
||||||
|
|
||||||
$grabber = new Scraper(Config\get_reader_config());
|
|
||||||
$grabber->setUrl($url);
|
|
||||||
$grabber->execute();
|
|
||||||
|
|
||||||
if ($grabber->hasRelevantContent()) {
|
|
||||||
$content = $grabber->getFilteredContent();
|
|
||||||
}
|
|
||||||
|
|
||||||
return $content;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Download content from item ID
|
|
||||||
function download_content_id($item_id)
|
|
||||||
{
|
{
|
||||||
$item = get($item_id);
|
$item = get($item_id);
|
||||||
$content = download_content_url($item['url']);
|
$content = Handler\Scraper\download_contents($item['url']);
|
||||||
|
|
||||||
if (! empty($content)) {
|
if (! empty($content)) {
|
||||||
if (! Config\get('nocontent')) {
|
if (! Config\get('nocontent')) {
|
||||||
|
|
||||||
// Save content
|
|
||||||
Database::getInstance('db')
|
Database::getInstance('db')
|
||||||
->table('items')
|
->table('items')
|
||||||
->eq('id', $item['id'])
|
->eq('id', $item['id'])
|
||||||
|
@ -38,6 +38,7 @@
|
|||||||
"app/core/router.php",
|
"app/core/router.php",
|
||||||
"app/core/session.php",
|
"app/core/session.php",
|
||||||
"app/core/template.php",
|
"app/core/template.php",
|
||||||
|
"app/handlers/scraper.php",
|
||||||
"app/models/config.php",
|
"app/models/config.php",
|
||||||
"app/models/service.php",
|
"app/models/service.php",
|
||||||
"app/models/search.php",
|
"app/models/search.php",
|
||||||
|
1
vendor/composer/autoload_files.php
vendored
1
vendor/composer/autoload_files.php
vendored
@ -19,6 +19,7 @@ return array(
|
|||||||
'dbd9090b0db725af4a3cd765a9d2e39a' => $baseDir . '/app/core/router.php',
|
'dbd9090b0db725af4a3cd765a9d2e39a' => $baseDir . '/app/core/router.php',
|
||||||
'98faa6699f100c5ddb2013d85f9dfabb' => $baseDir . '/app/core/session.php',
|
'98faa6699f100c5ddb2013d85f9dfabb' => $baseDir . '/app/core/session.php',
|
||||||
'93228d441890e5962b0566344884332c' => $baseDir . '/app/core/template.php',
|
'93228d441890e5962b0566344884332c' => $baseDir . '/app/core/template.php',
|
||||||
|
'9de087554be89ca71a2ed558a4e35fde' => $baseDir . '/app/handlers/scraper.php',
|
||||||
'bc98222aedc910930f5b76b8c84f334e' => $baseDir . '/app/models/config.php',
|
'bc98222aedc910930f5b76b8c84f334e' => $baseDir . '/app/models/config.php',
|
||||||
'c3080c7edf4a590ce36fc4f3561968dc' => $baseDir . '/app/models/service.php',
|
'c3080c7edf4a590ce36fc4f3561968dc' => $baseDir . '/app/models/service.php',
|
||||||
'b59348c9973f21f2c58eb493d9fea5be' => $baseDir . '/app/models/search.php',
|
'b59348c9973f21f2c58eb493d9fea5be' => $baseDir . '/app/models/search.php',
|
||||||
|
1
vendor/composer/autoload_static.php
vendored
1
vendor/composer/autoload_static.php
vendored
@ -20,6 +20,7 @@ class ComposerStaticInitfd7e8d436e1dc450edc3153ac8bc31b4
|
|||||||
'dbd9090b0db725af4a3cd765a9d2e39a' => __DIR__ . '/../..' . '/app/core/router.php',
|
'dbd9090b0db725af4a3cd765a9d2e39a' => __DIR__ . '/../..' . '/app/core/router.php',
|
||||||
'98faa6699f100c5ddb2013d85f9dfabb' => __DIR__ . '/../..' . '/app/core/session.php',
|
'98faa6699f100c5ddb2013d85f9dfabb' => __DIR__ . '/../..' . '/app/core/session.php',
|
||||||
'93228d441890e5962b0566344884332c' => __DIR__ . '/../..' . '/app/core/template.php',
|
'93228d441890e5962b0566344884332c' => __DIR__ . '/../..' . '/app/core/template.php',
|
||||||
|
'9de087554be89ca71a2ed558a4e35fde' => __DIR__ . '/../..' . '/app/handlers/scraper.php',
|
||||||
'bc98222aedc910930f5b76b8c84f334e' => __DIR__ . '/../..' . '/app/models/config.php',
|
'bc98222aedc910930f5b76b8c84f334e' => __DIR__ . '/../..' . '/app/models/config.php',
|
||||||
'c3080c7edf4a590ce36fc4f3561968dc' => __DIR__ . '/../..' . '/app/models/service.php',
|
'c3080c7edf4a590ce36fc4f3561968dc' => __DIR__ . '/../..' . '/app/models/service.php',
|
||||||
'b59348c9973f21f2c58eb493d9fea5be' => __DIR__ . '/../..' . '/app/models/search.php',
|
'b59348c9973f21f2c58eb493d9fea5be' => __DIR__ . '/../..' . '/app/models/search.php',
|
||||||
|
Loading…
Reference in New Issue
Block a user