Update PicoFeed
This commit is contained in:
parent
165acb0342
commit
2ef48e5f5c
@ -15,7 +15,7 @@
|
|||||||
"fguillot/simple-validator": "v1.0.0",
|
"fguillot/simple-validator": "v1.0.0",
|
||||||
"fguillot/json-rpc": "v1.0.2",
|
"fguillot/json-rpc": "v1.0.2",
|
||||||
"fguillot/picodb": "v1.0.2",
|
"fguillot/picodb": "v1.0.2",
|
||||||
"fguillot/picofeed": "v0.1.20"
|
"fguillot/picofeed": "v0.1.21"
|
||||||
},
|
},
|
||||||
"require-dev": {
|
"require-dev": {
|
||||||
"phpunit/phpunit": "4.8.3",
|
"phpunit/phpunit": "4.8.3",
|
||||||
|
2
vendor/composer/autoload_classmap.php
vendored
2
vendor/composer/autoload_classmap.php
vendored
@ -29,6 +29,7 @@ return array(
|
|||||||
'PicoFeed\\Client\\Client' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Client.php',
|
'PicoFeed\\Client\\Client' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Client.php',
|
||||||
'PicoFeed\\Client\\ClientException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/ClientException.php',
|
'PicoFeed\\Client\\ClientException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/ClientException.php',
|
||||||
'PicoFeed\\Client\\Curl' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Curl.php',
|
'PicoFeed\\Client\\Curl' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Curl.php',
|
||||||
|
'PicoFeed\\Client\\ForbiddenException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/ForbiddenException.php',
|
||||||
'PicoFeed\\Client\\HttpHeaders' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php',
|
'PicoFeed\\Client\\HttpHeaders' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php',
|
||||||
'PicoFeed\\Client\\InvalidCertificateException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/InvalidCertificateException.php',
|
'PicoFeed\\Client\\InvalidCertificateException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/InvalidCertificateException.php',
|
||||||
'PicoFeed\\Client\\InvalidUrlException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/InvalidUrlException.php',
|
'PicoFeed\\Client\\InvalidUrlException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/InvalidUrlException.php',
|
||||||
@ -36,6 +37,7 @@ return array(
|
|||||||
'PicoFeed\\Client\\MaxSizeException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/MaxSizeException.php',
|
'PicoFeed\\Client\\MaxSizeException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/MaxSizeException.php',
|
||||||
'PicoFeed\\Client\\Stream' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Stream.php',
|
'PicoFeed\\Client\\Stream' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Stream.php',
|
||||||
'PicoFeed\\Client\\TimeoutException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/TimeoutException.php',
|
'PicoFeed\\Client\\TimeoutException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/TimeoutException.php',
|
||||||
|
'PicoFeed\\Client\\UnauthorizedException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/UnauthorizedException.php',
|
||||||
'PicoFeed\\Client\\Url' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Url.php',
|
'PicoFeed\\Client\\Url' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Url.php',
|
||||||
'PicoFeed\\Config\\Config' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Config/Config.php',
|
'PicoFeed\\Config\\Config' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Config/Config.php',
|
||||||
'PicoFeed\\Encoding\\Encoding' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php',
|
'PicoFeed\\Encoding\\Encoding' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php',
|
||||||
|
12
vendor/composer/installed.json
vendored
12
vendor/composer/installed.json
vendored
@ -163,17 +163,17 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "fguillot/picofeed",
|
"name": "fguillot/picofeed",
|
||||||
"version": "v0.1.20",
|
"version": "v0.1.21",
|
||||||
"version_normalized": "0.1.20.0",
|
"version_normalized": "0.1.21.0",
|
||||||
"source": {
|
"source": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/fguillot/picoFeed.git",
|
"url": "https://github.com/fguillot/picoFeed.git",
|
||||||
"reference": "d6bbdd248fa4a3eef7831ffaae0491a2ea58f897"
|
"reference": "2baff3240ef187c9f443656ab26b0b626aec5776"
|
||||||
},
|
},
|
||||||
"dist": {
|
"dist": {
|
||||||
"type": "zip",
|
"type": "zip",
|
||||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/d6bbdd248fa4a3eef7831ffaae0491a2ea58f897",
|
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/2baff3240ef187c9f443656ab26b0b626aec5776",
|
||||||
"reference": "d6bbdd248fa4a3eef7831ffaae0491a2ea58f897",
|
"reference": "2baff3240ef187c9f443656ab26b0b626aec5776",
|
||||||
"shasum": ""
|
"shasum": ""
|
||||||
},
|
},
|
||||||
"require": {
|
"require": {
|
||||||
@ -188,7 +188,7 @@
|
|||||||
"suggest": {
|
"suggest": {
|
||||||
"ext-curl": "PicoFeed will use cURL if present"
|
"ext-curl": "PicoFeed will use cURL if present"
|
||||||
},
|
},
|
||||||
"time": "2016-03-24 12:09:56",
|
"time": "2016-03-31 00:39:41",
|
||||||
"bin": [
|
"bin": [
|
||||||
"picofeed"
|
"picofeed"
|
||||||
],
|
],
|
||||||
|
@ -211,7 +211,7 @@ abstract class Client
|
|||||||
|
|
||||||
$this->status_code = $response['status'];
|
$this->status_code = $response['status'];
|
||||||
$this->handleNotModifiedResponse($response);
|
$this->handleNotModifiedResponse($response);
|
||||||
$this->handleNotFoundResponse($response);
|
$this->handleErrorResponse($response);
|
||||||
$this->handleNormalResponse($response);
|
$this->handleNormalResponse($response);
|
||||||
|
|
||||||
return $this;
|
return $this;
|
||||||
@ -222,7 +222,7 @@ abstract class Client
|
|||||||
*
|
*
|
||||||
* @param array $response Client response
|
* @param array $response Client response
|
||||||
*/
|
*/
|
||||||
public function handleNotModifiedResponse(array $response)
|
protected function handleNotModifiedResponse(array $response)
|
||||||
{
|
{
|
||||||
if ($response['status'] == 304) {
|
if ($response['status'] == 304) {
|
||||||
$this->is_modified = false;
|
$this->is_modified = false;
|
||||||
@ -238,13 +238,18 @@ abstract class Client
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handle not found response.
|
* Handle Http Error codes
|
||||||
*
|
*
|
||||||
* @param array $response Client response
|
* @param array $response Client response
|
||||||
*/
|
*/
|
||||||
public function handleNotFoundResponse(array $response)
|
protected function handleErrorResponse(array $response)
|
||||||
{
|
{
|
||||||
if ($response['status'] == 404) {
|
$status = $response['status'];
|
||||||
|
if ($status == 401) {
|
||||||
|
throw new UnauthorizedException('Wrong or missing credentials');
|
||||||
|
} else if ($status == 403) {
|
||||||
|
throw new ForbiddenException('Not allowed to access resource');
|
||||||
|
} else if ($status == 404) {
|
||||||
throw new InvalidUrlException('Resource not found');
|
throw new InvalidUrlException('Resource not found');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -254,7 +259,7 @@ abstract class Client
|
|||||||
*
|
*
|
||||||
* @param array $response Client response
|
* @param array $response Client response
|
||||||
*/
|
*/
|
||||||
public function handleNormalResponse(array $response)
|
protected function handleNormalResponse(array $response)
|
||||||
{
|
{
|
||||||
if ($response['status'] == 200) {
|
if ($response['status'] == 200) {
|
||||||
$this->content = $response['body'];
|
$this->content = $response['body'];
|
||||||
|
@ -108,7 +108,7 @@ class Curl extends Client
|
|||||||
return $this->handleRedirection($headers['Location']);
|
return $this->handleRedirection($headers['Location']);
|
||||||
}
|
}
|
||||||
|
|
||||||
header($status);
|
header(':', true, $status);
|
||||||
|
|
||||||
if (isset($headers['Content-Type'])) {
|
if (isset($headers['Content-Type'])) {
|
||||||
header('Content-Type:' .$headers['Content-Type']);
|
header('Content-Type:' .$headers['Content-Type']);
|
||||||
|
10
vendor/fguillot/picofeed/lib/PicoFeed/Client/ForbiddenException.php
vendored
Normal file
10
vendor/fguillot/picofeed/lib/PicoFeed/Client/ForbiddenException.php
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace PicoFeed\Client;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Bernhard Posselt
|
||||||
|
*/
|
||||||
|
class ForbiddenException extends ClientException
|
||||||
|
{
|
||||||
|
}
|
10
vendor/fguillot/picofeed/lib/PicoFeed/Client/UnauthorizedException.php
vendored
Normal file
10
vendor/fguillot/picofeed/lib/PicoFeed/Client/UnauthorizedException.php
vendored
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace PicoFeed\Client;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author Bernhard Posselt
|
||||||
|
*/
|
||||||
|
class UnauthorizedException extends ClientException
|
||||||
|
{
|
||||||
|
}
|
@ -319,19 +319,25 @@ abstract class Parser
|
|||||||
/**
|
/**
|
||||||
* Enable the content grabber.
|
* Enable the content grabber.
|
||||||
*
|
*
|
||||||
* @param bool $needs_rule_file true if only pages with rule files should be
|
* @param bool $needsRuleFile true if only pages with rule files should be
|
||||||
* scraped
|
* scraped
|
||||||
|
* @param null|\Closure $scraperCallback Callback function that gets called for each
|
||||||
|
* scraper execution
|
||||||
*
|
*
|
||||||
* @return \PicoFeed\Parser\Parser
|
* @return \PicoFeed\Parser\Parser
|
||||||
*/
|
*/
|
||||||
public function enableContentGrabber($needs_rule_file = false)
|
public function enableContentGrabber($needsRuleFile = false, $scraperCallback = null)
|
||||||
{
|
{
|
||||||
$processor = new ScraperProcessor($this->config);
|
$processor = new ScraperProcessor($this->config);
|
||||||
|
|
||||||
if ($needs_rule_file) {
|
if ($needsRuleFile) {
|
||||||
$processor->getScraper()->disableCandidateParser();
|
$processor->getScraper()->disableCandidateParser();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($scraperCallback !== null) {
|
||||||
|
$processor->setExecutionCallback($scraperCallback);
|
||||||
|
}
|
||||||
|
|
||||||
$this->itemPostProcessor->register($processor);
|
$this->itemPostProcessor->register($processor);
|
||||||
return $this;
|
return $this;
|
||||||
}
|
}
|
||||||
|
@ -70,6 +70,18 @@ class ItemPostProcessor extends Base
|
|||||||
return $this;
|
return $this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks wheather a specific processor is registered or not
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @param string $class
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
|
public function hasProcessor($class)
|
||||||
|
{
|
||||||
|
return isset($this->processors[$class]);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get Processor instance
|
* Get Processor instance
|
||||||
*
|
*
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
namespace PicoFeed\Processor;
|
namespace PicoFeed\Processor;
|
||||||
|
|
||||||
|
use Closure;
|
||||||
use PicoFeed\Base;
|
use PicoFeed\Base;
|
||||||
use PicoFeed\Parser\Feed;
|
use PicoFeed\Parser\Feed;
|
||||||
use PicoFeed\Parser\Item;
|
use PicoFeed\Parser\Item;
|
||||||
@ -18,6 +19,26 @@ class ScraperProcessor extends Base implements ItemProcessorInterface
|
|||||||
private $ignoredUrls = array();
|
private $ignoredUrls = array();
|
||||||
private $scraper;
|
private $scraper;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback function for each scraper execution
|
||||||
|
*
|
||||||
|
* @var Closure
|
||||||
|
*/
|
||||||
|
private $executionCallback;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a new execution callback
|
||||||
|
*
|
||||||
|
* @access public
|
||||||
|
* @param Closure $executionCallback
|
||||||
|
* @return $this
|
||||||
|
*/
|
||||||
|
public function setExecutionCallback(Closure $executionCallback)
|
||||||
|
{
|
||||||
|
$this->executionCallback = $executionCallback;
|
||||||
|
return $this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Execute Item Processor
|
* Execute Item Processor
|
||||||
*
|
*
|
||||||
@ -33,6 +54,10 @@ class ScraperProcessor extends Base implements ItemProcessorInterface
|
|||||||
$scraper->setUrl($item->getUrl());
|
$scraper->setUrl($item->getUrl());
|
||||||
$scraper->execute();
|
$scraper->execute();
|
||||||
|
|
||||||
|
if ($this->executionCallback && is_callable($this->executionCallback)) {
|
||||||
|
call_user_func($this->executionCallback, $feed, $item, $scraper);
|
||||||
|
}
|
||||||
|
|
||||||
if ($scraper->hasRelevantContent()) {
|
if ($scraper->hasRelevantContent()) {
|
||||||
$item->setContent($scraper->getFilteredContent());
|
$item->setContent($scraper->getFilteredContent());
|
||||||
}
|
}
|
||||||
|
@ -5,19 +5,19 @@ return array(
|
|||||||
'%.*%' => array(
|
'%.*%' => array(
|
||||||
'test_url' => 'http://www.jsonline.com/news/usandworld/as-many-as-a-million-expected-for-popes-last-mass-in-us-b99585180z1-329688131.html',
|
'test_url' => 'http://www.jsonline.com/news/usandworld/as-many-as-a-million-expected-for-popes-last-mass-in-us-b99585180z1-329688131.html',
|
||||||
'body' => array(
|
'body' => array(
|
||||||
'//div[@id="mainContent"]',
|
'//div[@id="main"]',
|
||||||
),
|
),
|
||||||
'strip' => array(
|
'strip' => array(
|
||||||
'//script',
|
'//script',
|
||||||
'//h1',
|
'div[contains(@class, "header")]',
|
||||||
'//h4[@class="credit"]',
|
'div[@class="module--headline"]',
|
||||||
'//div[@class="columnist_container"]',
|
'div[@class="main--inlinemeta"]',
|
||||||
'//div[@class="storyTimestamp"]',
|
'div[contains(@class, "leftcol--")]',
|
||||||
'//ul[@id="sharing-tools"]',
|
'p[@class="main--author"]',
|
||||||
'//div[@class="title"]',
|
'div[@class="story--rightcol"]',
|
||||||
'//img[@class="floatLeft"]',
|
'div[contains(@class, "footer")]',
|
||||||
'//div[@class="first feature"]',
|
'div[contains(@class, "rightcol--")]',
|
||||||
'//div[@class="collateral_article_content"]',
|
'div[contains(@class, "author")]',
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
@ -18,6 +18,7 @@ return array(
|
|||||||
'//section[@class="ribboned"]',
|
'//section[@class="ribboned"]',
|
||||||
'//div[contains(@class,"sidebar")]',
|
'//div[contains(@class,"sidebar")]',
|
||||||
'//aside[@class="article_tag_list"]',
|
'//aside[@class="article_tag_list"]',
|
||||||
|
'//section[contains(@id, "more_posts")]',
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
Loading…
Reference in New Issue
Block a user