From 2ef48e5f5c4ab02e7fe8e6c73867afbbd8b1d4d2 Mon Sep 17 00:00:00 2001 From: Frederic Guillot Date: Wed, 30 Mar 2016 22:43:08 -0400 Subject: [PATCH] Update PicoFeed --- composer.json | 2 +- vendor/composer/autoload_classmap.php | 2 ++ vendor/composer/installed.json | 12 ++++----- .../picofeed/lib/PicoFeed/Client/Client.php | 17 ++++++++----- .../picofeed/lib/PicoFeed/Client/Curl.php | 2 +- .../PicoFeed/Client/ForbiddenException.php | 10 ++++++++ .../PicoFeed/Client/UnauthorizedException.php | 10 ++++++++ .../picofeed/lib/PicoFeed/Parser/Parser.php | 14 ++++++++--- .../PicoFeed/Processor/ItemPostProcessor.php | 12 +++++++++ .../PicoFeed/Processor/ScraperProcessor.php | 25 +++++++++++++++++++ .../lib/PicoFeed/Rules/jsonline.com.php | 20 +++++++-------- .../lib/PicoFeed/Rules/onmilwaukee.php | 1 + 12 files changed, 99 insertions(+), 28 deletions(-) create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Client/ForbiddenException.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Client/UnauthorizedException.php diff --git a/composer.json b/composer.json index 9d45006..36c5875 100644 --- a/composer.json +++ b/composer.json @@ -15,7 +15,7 @@ "fguillot/simple-validator": "v1.0.0", "fguillot/json-rpc": "v1.0.2", "fguillot/picodb": "v1.0.2", - "fguillot/picofeed": "v0.1.20" + "fguillot/picofeed": "v0.1.21" }, "require-dev": { "phpunit/phpunit": "4.8.3", diff --git a/vendor/composer/autoload_classmap.php b/vendor/composer/autoload_classmap.php index ecabc01..0a48966 100644 --- a/vendor/composer/autoload_classmap.php +++ b/vendor/composer/autoload_classmap.php @@ -29,6 +29,7 @@ return array( 'PicoFeed\\Client\\Client' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Client.php', 'PicoFeed\\Client\\ClientException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/ClientException.php', 'PicoFeed\\Client\\Curl' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Curl.php', + 'PicoFeed\\Client\\ForbiddenException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/ForbiddenException.php', 'PicoFeed\\Client\\HttpHeaders' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php', 'PicoFeed\\Client\\InvalidCertificateException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/InvalidCertificateException.php', 'PicoFeed\\Client\\InvalidUrlException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/InvalidUrlException.php', @@ -36,6 +37,7 @@ return array( 'PicoFeed\\Client\\MaxSizeException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/MaxSizeException.php', 'PicoFeed\\Client\\Stream' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Stream.php', 'PicoFeed\\Client\\TimeoutException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/TimeoutException.php', + 'PicoFeed\\Client\\UnauthorizedException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/UnauthorizedException.php', 'PicoFeed\\Client\\Url' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Url.php', 'PicoFeed\\Config\\Config' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Config/Config.php', 'PicoFeed\\Encoding\\Encoding' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Encoding/Encoding.php', diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json index d65a909..2fbeb75 100644 --- a/vendor/composer/installed.json +++ b/vendor/composer/installed.json @@ -163,17 +163,17 @@ }, { "name": "fguillot/picofeed", - "version": "v0.1.20", - "version_normalized": "0.1.20.0", + "version": "v0.1.21", + "version_normalized": "0.1.21.0", "source": { "type": "git", "url": "https://github.com/fguillot/picoFeed.git", - "reference": "d6bbdd248fa4a3eef7831ffaae0491a2ea58f897" + "reference": "2baff3240ef187c9f443656ab26b0b626aec5776" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/d6bbdd248fa4a3eef7831ffaae0491a2ea58f897", - "reference": "d6bbdd248fa4a3eef7831ffaae0491a2ea58f897", + "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/2baff3240ef187c9f443656ab26b0b626aec5776", + "reference": "2baff3240ef187c9f443656ab26b0b626aec5776", "shasum": "" }, "require": { @@ -188,7 +188,7 @@ "suggest": { "ext-curl": "PicoFeed will use cURL if present" }, - "time": "2016-03-24 12:09:56", + "time": "2016-03-31 00:39:41", "bin": [ "picofeed" ], diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php index 24c619c..d7f981b 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php @@ -211,7 +211,7 @@ abstract class Client $this->status_code = $response['status']; $this->handleNotModifiedResponse($response); - $this->handleNotFoundResponse($response); + $this->handleErrorResponse($response); $this->handleNormalResponse($response); return $this; @@ -222,7 +222,7 @@ abstract class Client * * @param array $response Client response */ - public function handleNotModifiedResponse(array $response) + protected function handleNotModifiedResponse(array $response) { if ($response['status'] == 304) { $this->is_modified = false; @@ -238,13 +238,18 @@ abstract class Client } /** - * Handle not found response. + * Handle Http Error codes * * @param array $response Client response */ - public function handleNotFoundResponse(array $response) + protected function handleErrorResponse(array $response) { - if ($response['status'] == 404) { + $status = $response['status']; + if ($status == 401) { + throw new UnauthorizedException('Wrong or missing credentials'); + } else if ($status == 403) { + throw new ForbiddenException('Not allowed to access resource'); + } else if ($status == 404) { throw new InvalidUrlException('Resource not found'); } } @@ -254,7 +259,7 @@ abstract class Client * * @param array $response Client response */ - public function handleNormalResponse(array $response) + protected function handleNormalResponse(array $response) { if ($response['status'] == 200) { $this->content = $response['body']; diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php index 82ea87a..69f22bb 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php @@ -108,7 +108,7 @@ class Curl extends Client return $this->handleRedirection($headers['Location']); } - header($status); + header(':', true, $status); if (isset($headers['Content-Type'])) { header('Content-Type:' .$headers['Content-Type']); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/ForbiddenException.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/ForbiddenException.php new file mode 100644 index 0000000..c226e95 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/ForbiddenException.php @@ -0,0 +1,10 @@ +config); - if ($needs_rule_file) { + if ($needsRuleFile) { $processor->getScraper()->disableCandidateParser(); } + if ($scraperCallback !== null) { + $processor->setExecutionCallback($scraperCallback); + } + $this->itemPostProcessor->register($processor); return $this; } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemPostProcessor.php b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemPostProcessor.php index ff9740b..97425bf 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemPostProcessor.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ItemPostProcessor.php @@ -70,6 +70,18 @@ class ItemPostProcessor extends Base return $this; } + /** + * Checks wheather a specific processor is registered or not + * + * @access public + * @param string $class + * @return bool + */ + public function hasProcessor($class) + { + return isset($this->processors[$class]); + } + /** * Get Processor instance * diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ScraperProcessor.php b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ScraperProcessor.php index 9966965..0c467af 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ScraperProcessor.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Processor/ScraperProcessor.php @@ -2,6 +2,7 @@ namespace PicoFeed\Processor; +use Closure; use PicoFeed\Base; use PicoFeed\Parser\Feed; use PicoFeed\Parser\Item; @@ -18,6 +19,26 @@ class ScraperProcessor extends Base implements ItemProcessorInterface private $ignoredUrls = array(); private $scraper; + /** + * Callback function for each scraper execution + * + * @var Closure + */ + private $executionCallback; + + /** + * Add a new execution callback + * + * @access public + * @param Closure $executionCallback + * @return $this + */ + public function setExecutionCallback(Closure $executionCallback) + { + $this->executionCallback = $executionCallback; + return $this; + } + /** * Execute Item Processor * @@ -33,6 +54,10 @@ class ScraperProcessor extends Base implements ItemProcessorInterface $scraper->setUrl($item->getUrl()); $scraper->execute(); + if ($this->executionCallback && is_callable($this->executionCallback)) { + call_user_func($this->executionCallback, $feed, $item, $scraper); + } + if ($scraper->hasRelevantContent()) { $item->setContent($scraper->getFilteredContent()); } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/jsonline.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/jsonline.com.php index 82322f7..9fd8d86 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/jsonline.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/jsonline.com.php @@ -5,19 +5,19 @@ return array( '%.*%' => array( 'test_url' => 'http://www.jsonline.com/news/usandworld/as-many-as-a-million-expected-for-popes-last-mass-in-us-b99585180z1-329688131.html', 'body' => array( - '//div[@id="mainContent"]', + '//div[@id="main"]', ), 'strip' => array( '//script', - '//h1', - '//h4[@class="credit"]', - '//div[@class="columnist_container"]', - '//div[@class="storyTimestamp"]', - '//ul[@id="sharing-tools"]', - '//div[@class="title"]', - '//img[@class="floatLeft"]', - '//div[@class="first feature"]', - '//div[@class="collateral_article_content"]', + 'div[contains(@class, "header")]', + 'div[@class="module--headline"]', + 'div[@class="main--inlinemeta"]', + 'div[contains(@class, "leftcol--")]', + 'p[@class="main--author"]', + 'div[@class="story--rightcol"]', + 'div[contains(@class, "footer")]', + 'div[contains(@class, "rightcol--")]', + 'div[contains(@class, "author")]', ), ), ), diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/onmilwaukee.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/onmilwaukee.php index c1ef6b0..2f74aac 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/onmilwaukee.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/onmilwaukee.php @@ -18,6 +18,7 @@ return array( '//section[@class="ribboned"]', '//div[contains(@class,"sidebar")]', '//aside[@class="article_tag_list"]', + '//section[contains(@id, "more_posts")]', ), ), ),