diff --git a/docs/full-article-download.markdown b/docs/full-article-download.markdown
index cdae156..65f05a8 100644
--- a/docs/full-article-download.markdown
+++ b/docs/full-article-download.markdown
@@ -18,37 +18,75 @@ Especially websites that use a lot of Javascript to generate the content.
How to write a grabber rules file?
----------------------------------
-Add a PHP file to the directory `rules`, the filename must be the domain name with the suffix `.php`:
+Miniflux will try first to find the file in the [default bundled rules directory](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules), then it will try to load your custom rules.
-Example with the BBC website, `www.bbc.co.uk.php`:
+You can create custom rules, by adding a PHP file to the directory `rules`. The filename must be the domain name with the suffix `.php`.
+
+Each rule has the following keys:
+* **body**: An array of xpath expressions which will be extracted from the page
+* **strip**: An array of xpath expressions which will be removed from the matched content
+* **test_url**: A test url to a matching page to test the grabber
+
+Example for the BBC website, `www.bbc.co.uk.php`:
```php
'http://www.bbc.co.uk/news/world-middle-east-23911833',
- 'body' => array(
- '//div[@class="story-body"]',
- ),
- 'strip' => array(
- '//script',
- '//form',
- '//style',
- '//*[@class="story-date"]',
- '//*[@class="story-header"]',
- '//*[@class="story-related"]',
- '//*[contains(@class, "byline")]',
- '//*[contains(@class, "story-feature")]',
- '//*[@id="video-carousel-container"]',
- '//*[@id="also-related-links"]',
- '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.bbc.co.uk/news/world-middle-east-23911833',
+ 'body' => array(
+ '//div[@class="story-body"]',
+ ),
+ 'strip' => array(
+ '//script',
+ '//form',
+ '//style',
+ '//*[@class="story-date"]',
+ '//*[@class="story-header"]',
+ '//*[@class="story-related"]',
+ '//*[contains(@class, "byline")]',
+ '//*[contains(@class, "story-feature")]',
+ '//*[@id="video-carousel-container"]',
+ '//*[@id="also-related-links"]',
+ '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]',
+ )
+ )
)
);
```
-Actually, only the keys `body`, `strip` and `test_url` are supported.
+Each rule file can contain rules for different subdivisions of a website. Those subdivisions are distinguished by their URL. The first level array key of a rule file will be matched against the full path of the URL using **preg_match**, e.g. for **http://www.bbc.co.uk/news/world-middle-east-23911833?test=1** the URL that would be matched is **/news/world-middle-east-23911833?test=1**
-Miniflux will try first to find the file in the [default bundled rules directory](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules), then it will try to load your custom rules.
+Let's say you want to extract a div with the id **video** if the article points to an URL like **http://comix.com/videos/423**, **audio** if the article points to an URL like **http://comix.com/podcasts/5** and all other links to the page should instead take the div with the id **content**. The following rulefile ```comix.com.php``` would fit that requirement:
+
+```php
+return array(
+ 'grabber' => array(
+ '%^/videos.*%' => array(
+ 'test_url' => 'http://comix.com/videos/423',
+ 'body' => array(
+ '//div[@id="video"]',
+ ),
+ 'strip' => array()
+ ),
+ '%^/podcasts.*%' => array(
+ 'test_url' => 'http://comix.com/podcasts/5',
+ 'body' => array(
+ '//div[@id="audio"]',
+ ),
+ 'strip' => array()
+ ),
+ '%.*%' => array(
+ 'test_url' => 'http://comix.com/blog/1',
+ 'body' => array(
+ '//div[@id="content"]',
+ ),
+ 'strip' => array()
+ )
+ )
+);
+```
Sharing your custom rules with the community
--------------------------------------------
@@ -59,4 +97,4 @@ That will be merged in the Miniflux code base.
List of content grabber rules
-----------------------------
-[List of rules included by default](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules).
+[List of rules included by default](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules).
\ No newline at end of file
diff --git a/models/item.php b/models/item.php
index 0c1b956..f66a35b 100644
--- a/models/item.php
+++ b/models/item.php
@@ -6,7 +6,7 @@ use Model\Service;
use Model\Config;
use PicoDb\Database;
use PicoFeed\Logging\Logger;
-use PicoFeed\Client\Grabber;
+use PicoFeed\Scraper\Scraper;
// Get all items without filtering
function get_all()
@@ -520,12 +520,12 @@ function download_content_url($url)
{
$content = '';
- $grabber = new Grabber($url);
- $grabber->setConfig(Config\get_reader_config());
- $grabber->download();
+ $grabber = new Scraper(Config\get_reader_config());
+ $grabber->setUrl($url);
+ $grabber->execute();
- if ($grabber->parse()) {
- $content = $grabber->getFilteredcontent();
+ if ($grabber->hasRelevantContent()) {
+ $content = $grabber->getFilteredContent();
}
return $content;
diff --git a/vendor/composer/ClassLoader.php b/vendor/composer/ClassLoader.php
index 70d78bc..5e1469e 100644
--- a/vendor/composer/ClassLoader.php
+++ b/vendor/composer/ClassLoader.php
@@ -54,6 +54,8 @@ class ClassLoader
private $useIncludePath = false;
private $classMap = array();
+ private $classMapAuthoritative = false;
+
public function getPrefixes()
{
if (!empty($this->prefixesPsr0)) {
@@ -248,6 +250,27 @@ class ClassLoader
return $this->useIncludePath;
}
+ /**
+ * Turns off searching the prefix and fallback directories for classes
+ * that have not been registered with the class map.
+ *
+ * @param bool $classMapAuthoritative
+ */
+ public function setClassMapAuthoritative($classMapAuthoritative)
+ {
+ $this->classMapAuthoritative = $classMapAuthoritative;
+ }
+
+ /**
+ * Should class lookup fail if not found in the current class map?
+ *
+ * @return bool
+ */
+ public function isClassMapAuthoritative()
+ {
+ return $this->classMapAuthoritative;
+ }
+
/**
* Registers this instance as an autoloader.
*
@@ -299,6 +322,9 @@ class ClassLoader
if (isset($this->classMap[$class])) {
return $this->classMap[$class];
}
+ if ($this->classMapAuthoritative) {
+ return false;
+ }
$file = $this->findFileWithExtension($class, '.php');
diff --git a/vendor/composer/autoload_classmap.php b/vendor/composer/autoload_classmap.php
index ffb214e..22338e2 100644
--- a/vendor/composer/autoload_classmap.php
+++ b/vendor/composer/autoload_classmap.php
@@ -20,7 +20,6 @@ return array(
'PicoFeed\\Client\\Client' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Client.php',
'PicoFeed\\Client\\ClientException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/ClientException.php',
'PicoFeed\\Client\\Curl' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Curl.php',
- 'PicoFeed\\Client\\Grabber' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php',
'PicoFeed\\Client\\HttpHeaders' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php',
'PicoFeed\\Client\\InvalidCertificateException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/InvalidCertificateException.php',
'PicoFeed\\Client\\InvalidUrlException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/InvalidUrlException.php',
@@ -54,6 +53,11 @@ return array(
'PicoFeed\\Reader\\ReaderException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Reader/ReaderException.php',
'PicoFeed\\Reader\\SubscriptionNotFoundException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Reader/SubscriptionNotFoundException.php',
'PicoFeed\\Reader\\UnsupportedFeedFormatException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Reader/UnsupportedFeedFormatException.php',
+ 'PicoFeed\\Scraper\\CandidateParser' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Scraper/CandidateParser.php',
+ 'PicoFeed\\Scraper\\ParserInterface' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Scraper/ParserInterface.php',
+ 'PicoFeed\\Scraper\\RuleLoader' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Scraper/RuleLoader.php',
+ 'PicoFeed\\Scraper\\RuleParser' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Scraper/RuleParser.php',
+ 'PicoFeed\\Scraper\\Scraper' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php',
'PicoFeed\\Serialization\\Export' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Serialization/Export.php',
'PicoFeed\\Serialization\\Import' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Serialization/Import.php',
'PicoFeed\\Syndication\\Atom' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Syndication/Atom.php',
diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json
index f626e28..89498fc 100644
--- a/vendor/composer/installed.json
+++ b/vendor/composer/installed.json
@@ -45,18 +45,18 @@
"source": {
"type": "git",
"url": "https://github.com/fguillot/picoFarad.git",
- "reference": "1bc48a4367adf359f3439c2e0ae20a7d299d8ccd"
+ "reference": "a5817c49ca3037829ec1509d14724be5f29c35a0"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/fguillot/picoFarad/zipball/1bc48a4367adf359f3439c2e0ae20a7d299d8ccd",
- "reference": "1bc48a4367adf359f3439c2e0ae20a7d299d8ccd",
+ "url": "https://api.github.com/repos/fguillot/picoFarad/zipball/a5817c49ca3037829ec1509d14724be5f29c35a0",
+ "reference": "a5817c49ca3037829ec1509d14724be5f29c35a0",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
- "time": "2015-02-01 19:40:13",
+ "time": "2015-04-14 01:53:02",
"type": "library",
"installation-source": "dist",
"autoload": {
@@ -66,7 +66,7 @@
},
"notification-url": "https://packagist.org/downloads/",
"license": [
- "Unlicense"
+ "MIT"
],
"authors": [
{
@@ -84,18 +84,18 @@
"source": {
"type": "git",
"url": "https://github.com/fguillot/simpleValidator.git",
- "reference": "41655dc7b9224395f5bb3b5623f6e428fe6d64e8"
+ "reference": "2f30078bb6e688cf123c150d58fda322792a1532"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/fguillot/simpleValidator/zipball/41655dc7b9224395f5bb3b5623f6e428fe6d64e8",
- "reference": "41655dc7b9224395f5bb3b5623f6e428fe6d64e8",
+ "url": "https://api.github.com/repos/fguillot/simpleValidator/zipball/2f30078bb6e688cf123c150d58fda322792a1532",
+ "reference": "2f30078bb6e688cf123c150d58fda322792a1532",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
- "time": "2015-04-05 21:44:06",
+ "time": "2015-04-14 02:03:43",
"type": "library",
"installation-source": "dist",
"autoload": {
@@ -109,8 +109,7 @@
],
"authors": [
{
- "name": "Frédéric Guillot",
- "homepage": "http://fredericguillot.com"
+ "name": "Frédéric Guillot"
}
],
"description": "The most easy to use validator library for PHP :)",
@@ -123,18 +122,18 @@
"source": {
"type": "git",
"url": "https://github.com/fguillot/JsonRPC.git",
- "reference": "29d63a09ecd450d5e29fef74f687aab221055910"
+ "reference": "1a397be7739ddabba87b07f0354655bd91087518"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/fguillot/JsonRPC/zipball/29d63a09ecd450d5e29fef74f687aab221055910",
- "reference": "29d63a09ecd450d5e29fef74f687aab221055910",
+ "url": "https://api.github.com/repos/fguillot/JsonRPC/zipball/1a397be7739ddabba87b07f0354655bd91087518",
+ "reference": "1a397be7739ddabba87b07f0354655bd91087518",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
- "time": "2015-04-05 21:49:38",
+ "time": "2015-04-14 01:50:16",
"type": "library",
"installation-source": "dist",
"autoload": {
@@ -144,7 +143,7 @@
},
"notification-url": "https://packagist.org/downloads/",
"license": [
- "Unlicense"
+ "MIT"
],
"authors": [
{
@@ -152,7 +151,7 @@
"homepage": "http://fredericguillot.com"
}
],
- "description": "A simple Json-RPC client/server library that just works",
+ "description": "Simple Json-RPC client/server library that just works",
"homepage": "https://github.com/fguillot/JsonRPC"
},
{
@@ -162,12 +161,12 @@
"source": {
"type": "git",
"url": "https://github.com/fguillot/picoFeed.git",
- "reference": "273c344b35b468b6c8053f635332c3a404f8c7b9"
+ "reference": "a6087e8264550891c1b8a6da77eca0cab9328709"
},
"dist": {
"type": "zip",
- "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/273c344b35b468b6c8053f635332c3a404f8c7b9",
- "reference": "273c344b35b468b6c8053f635332c3a404f8c7b9",
+ "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/a6087e8264550891c1b8a6da77eca0cab9328709",
+ "reference": "a6087e8264550891c1b8a6da77eca0cab9328709",
"shasum": ""
},
"require": {
@@ -181,7 +180,7 @@
"suggest": {
"ext-curl": "PicoFeed will use cURL if present"
},
- "time": "2015-04-11 12:46:50",
+ "time": "2015-04-27 22:22:06",
"bin": [
"picofeed"
],
@@ -194,7 +193,7 @@
},
"notification-url": "https://packagist.org/downloads/",
"license": [
- "Unlicense"
+ "MIT"
],
"authors": [
{
diff --git a/vendor/fguillot/json-rpc/LICENSE b/vendor/fguillot/json-rpc/LICENSE
new file mode 100644
index 0000000..6a362bc
--- /dev/null
+++ b/vendor/fguillot/json-rpc/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Frederic Guillot
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/vendor/fguillot/json-rpc/README.markdown b/vendor/fguillot/json-rpc/README.markdown
index 7a5f5f1..38c8430 100644
--- a/vendor/fguillot/json-rpc/README.markdown
+++ b/vendor/fguillot/json-rpc/README.markdown
@@ -11,7 +11,7 @@ Features
- Authentication and IP based client restrictions
- Minimalist: there is only 2 files
- Fully unit tested
-- License: Unlicense http://unlicense.org/
+- License: MIT
Requirements
------------
diff --git a/vendor/fguillot/json-rpc/composer.json b/vendor/fguillot/json-rpc/composer.json
index da33c6c..3dc805c 100644
--- a/vendor/fguillot/json-rpc/composer.json
+++ b/vendor/fguillot/json-rpc/composer.json
@@ -1,9 +1,9 @@
{
"name": "fguillot/json-rpc",
- "description": "A simple Json-RPC client/server library that just works",
+ "description": "Simple Json-RPC client/server library that just works",
"homepage": "https://github.com/fguillot/JsonRPC",
"type": "library",
- "license": "Unlicense",
+ "license": "MIT",
"authors": [
{
"name": "Frédéric Guillot",
diff --git a/vendor/fguillot/picofarad/LICENCE b/vendor/fguillot/picofarad/LICENCE
new file mode 100644
index 0000000..6a362bc
--- /dev/null
+++ b/vendor/fguillot/picofarad/LICENCE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Frederic Guillot
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/vendor/fguillot/picofarad/README.md b/vendor/fguillot/picofarad/README.md
index 9ea9bd9..b9c97ea 100644
--- a/vendor/fguillot/picofarad/README.md
+++ b/vendor/fguillot/picofarad/README.md
@@ -10,7 +10,7 @@ Features
- No dependency
- Easy to use, fast and very lightweight
- Only 4 files: Request, Response, Router and Session
-- License: Do what the fuck you want with that
+- License: MIT
Requirements
------------
diff --git a/vendor/fguillot/picofarad/composer.json b/vendor/fguillot/picofarad/composer.json
index bbbaf2b..96073b6 100644
--- a/vendor/fguillot/picofarad/composer.json
+++ b/vendor/fguillot/picofarad/composer.json
@@ -3,7 +3,7 @@
"description": "Minimalist micro-framework",
"homepage": "https://github.com/fguillot/picoFarad",
"type": "library",
- "license": "Unlicense",
+ "license": "MIT",
"authors": [
{
"name": "Frédéric Guillot",
diff --git a/vendor/fguillot/picofeed/.gitignore b/vendor/fguillot/picofeed/.gitignore
index b0ef068..acbddf5 100644
--- a/vendor/fguillot/picofeed/.gitignore
+++ b/vendor/fguillot/picofeed/.gitignore
@@ -1,2 +1,3 @@
.DS_Store
-vendor/
\ No newline at end of file
+vendor/
+*.py
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/.travis.yml b/vendor/fguillot/picofeed/.travis.yml
index 0c3d0fe..00b2b5b 100644
--- a/vendor/fguillot/picofeed/.travis.yml
+++ b/vendor/fguillot/picofeed/.travis.yml
@@ -1,12 +1,19 @@
language: php
php:
- - "5.6"
- - "5.5"
- - "5.4"
- - "5.3"
+ - 7.0
+ - 5.6
+ - 5.5
+ - 5.4
+ - 5.3
+
+matrix:
+ fast_finish: true
+ allow_failures:
+ - php: 7.0
+
+before_script:
+ - composer dump-autoload
-before_script: wget https://phar.phpunit.de/phpunit.phar
script:
- - composer dump-autoload
- - php phpunit.phar
+ - phpunit
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/LICENSE b/vendor/fguillot/picofeed/LICENSE
new file mode 100644
index 0000000..6a362bc
--- /dev/null
+++ b/vendor/fguillot/picofeed/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Frederic Guillot
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/vendor/fguillot/picofeed/README.markdown b/vendor/fguillot/picofeed/README.markdown
index 4a958c2..7e94eba 100644
--- a/vendor/fguillot/picofeed/README.markdown
+++ b/vendor/fguillot/picofeed/README.markdown
@@ -24,7 +24,7 @@ Features
- Content grabber: download from the original website the full content
- Enclosure detection
- RTL languages support
-- License: Unlicense
+- License: MIT
Requirements
------------
@@ -47,7 +47,6 @@ Authors
Real world usage
----------------
-- [AnythingNew](http://anythingnew.co)
- [Miniflux](http://miniflux.net)
- [Owncloud News](https://github.com/owncloud/news)
diff --git a/vendor/fguillot/picofeed/UNLICENSE b/vendor/fguillot/picofeed/UNLICENSE
deleted file mode 100644
index 68a49da..0000000
--- a/vendor/fguillot/picofeed/UNLICENSE
+++ /dev/null
@@ -1,24 +0,0 @@
-This is free and unencumbered software released into the public domain.
-
-Anyone is free to copy, modify, publish, use, compile, sell, or
-distribute this software, either in source code form or as a compiled
-binary, for any purpose, commercial or non-commercial, and by any
-means.
-
-In jurisdictions that recognize copyright laws, the author or authors
-of this software dedicate any and all copyright interest in the
-software to the public domain. We make this dedication for the benefit
-of the public at large and to the detriment of our heirs and
-successors. We intend this dedication to be an overt act of
-relinquishment in perpetuity of all present and future rights to this
-software under copyright law.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
-
-For more information, please refer to
diff --git a/vendor/fguillot/picofeed/composer.json b/vendor/fguillot/picofeed/composer.json
index 4c13bd6..56a5ee2 100644
--- a/vendor/fguillot/picofeed/composer.json
+++ b/vendor/fguillot/picofeed/composer.json
@@ -3,7 +3,7 @@
"description": "Modern library to write or read feeds (RSS/Atom)",
"homepage": "http://fguillot.github.io/picoFeed",
"type": "library",
- "license": "Unlicense",
+ "license": "MIT",
"authors": [
{
"name": "Frédéric Guillot",
diff --git a/vendor/fguillot/picofeed/docs/feed-parsing.markdown b/vendor/fguillot/picofeed/docs/feed-parsing.markdown
index 1ee2145..8ab2dac 100644
--- a/vendor/fguillot/picofeed/docs/feed-parsing.markdown
+++ b/vendor/fguillot/picofeed/docs/feed-parsing.markdown
@@ -215,6 +215,27 @@ catch (PicoFeedException $e) {
}
```
+Custom regex filters
+--------------------
+In case you want modify the content with a simple regex, you can create a rule file named after the domain of the feed's link attribute. For the feed pointing to **http://www.twogag.com/** the file is stored under **Rules/twogag.com.php**
+
+For filtering, only the array with the key **filter** will be considered. The first level key is a preg_match regex that will match the sub url, e.g. to only match a feed whose link attribute points to **twogag.com/test**, the regex could look like **%/test.*%**. The second level array contains a list of search and replace strings, which will be passed to the preg\_replace function. The first string is the argument that should be matched, the second is the replacement.
+
+To replace all occurences of links to smaller images for twogag, the following rule can be used:
+
+
+```php
+ array(
+ '%.*%' => array(
+ "%http://www.twogag.com/comics-rss/([^.]+)\\.jpg%" =>
+ "http://www.twogag.com/comics/$1.jpg"
+ )
+ )
+);
+```
+
Feed and item properties
------------------------
diff --git a/vendor/fguillot/picofeed/docs/grabber.markdown b/vendor/fguillot/picofeed/docs/grabber.markdown
index b99b756..4ac8306 100644
--- a/vendor/fguillot/picofeed/docs/grabber.markdown
+++ b/vendor/fguillot/picofeed/docs/grabber.markdown
@@ -15,23 +15,41 @@ How the content grabber works?
Standalone usage
----------------
+Fetch remote content:
+
```php
download();
-$grabber->parse();
+$config = new Config;
+
+$grabber = new Scraper($config)
+$grabber->setUrl($url);
+$grabber->execute();
// Get raw HTML content
echo $grabber->getRawContent();
// Get relevant content
-echo $grabber->getContent();
+echo $grabber->getRelevantContent();
// Get filtered relevant content
echo $grabber->getFilteredContent();
+
+// Return true if there is relevant content
+var_dump($grabber->hasRelevantContent());
+```
+
+Parse HTML content:
+
+```php
+setRawContent($html);
+$grabber->execute();
```
Fetch full item contents during feed parsing
@@ -79,11 +97,11 @@ Configuration
### Enable content grabber for items
- Method name: `enableContentGrabber()`
-- Default value: false (content grabber is disabled by default)
-- Argument value: none
+- Default value: false (also fetch content if no rule file exist)
+- Argument value: bool (true scrape only webpages which have a rule file)
```php
-$parser->enableContentGrabber();
+$parser->enableContentGrabber(false);
```
### Ignore item urls for the content grabber
@@ -106,30 +124,71 @@ Example with the BBC website, `www.bbc.co.uk.php`:
```php
'http://www.bbc.co.uk/news/world-middle-east-23911833',
- 'body' => array(
- '//div[@class="story-body"]',
- ),
- 'strip' => array(
- '//script',
- '//form',
- '//style',
- '//*[@class="story-date"]',
- '//*[@class="story-header"]',
- '//*[@class="story-related"]',
- '//*[contains(@class, "byline")]',
- '//*[contains(@class, "story-feature")]',
- '//*[@id="video-carousel-container"]',
- '//*[@id="also-related-links"]',
- '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.bbc.co.uk/news/world-middle-east-23911833',
+ 'body' => array(
+ '//div[@class="story-body"]',
+ ),
+ 'strip' => array(
+ '//script',
+ '//form',
+ '//style',
+ '//*[@class="story-date"]',
+ '//*[@class="story-header"]',
+ '//*[@class="story-related"]',
+ '//*[contains(@class, "byline")]',
+ '//*[contains(@class, "story-feature")]',
+ '//*[@id="video-carousel-container"]',
+ '//*[@id="also-related-links"]',
+ '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]',
+ )
+ )
)
);
```
+Each rule file can contain multiple rules, based so links to different website URLs can be handled differently. The first level key is a regex, which will be matched against the full path of the URL using **preg_match**, e.g. for **http://www.bbc.co.uk/news/world-middle-east-23911833?test=1** the URL that would be matched is **/news/world-middle-east-23911833?test=1**
-Actually, only `body`, `strip` and `test_url` are supported.
+Each rule has the following keys:
+* **body**: An array of xpath expressions which will be extracted from the page
+* **strip**: An array of xpath expressions which will be removed from the matched content
+* **test_url**: A test url to a matching page to test the grabber
Don't forget to send a pull request or a ticket to share your contribution with everybody,
+**A more complex example**:
+
+Let's say you wanted to extract a div with the id **video** if the article points to an URL like **http://comix.com/videos/423**, **audio** if the article points to an URL like **http://comix.com/podcasts/5** and all other links to the page should instead take the div with the id **content**. The following rulefile would fit that requirement and would be stored in a file called **lib/PicoFeed/Rules/comix.com.php**:
+
+
+```php
+return array(
+ 'grabber' => array(
+ '%^/videos.*%' => array(
+ 'test_url' => 'http://comix.com/videos/423',
+ 'body' => array(
+ '//div[@id="video"]',
+ ),
+ 'strip' => array()
+ ),
+ '%^/podcasts.*%' => array(
+ 'test_url' => 'http://comix.com/podcasts/5',
+ 'body' => array(
+ '//div[@id="audio"]',
+ ),
+ 'strip' => array()
+ ),
+ '%.*%' => array(
+ 'test_url' => 'http://comix.com/blog/1',
+ 'body' => array(
+ '//div[@id="content"]',
+ ),
+ 'strip' => array()
+ )
+ )
+);
+```
+
List of content grabber rules
-----------------------------
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php
index 4ad3f14..0c609db 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php
@@ -80,7 +80,7 @@ class Curl extends Client
{
$length = strlen($buffer);
- if ($buffer === "\r\n") {
+ if ($buffer === "\r\n" || $buffer === "\n") {
$this->response_headers_count++;
}
else {
@@ -162,6 +162,7 @@ class Curl extends Client
* Prepare curl proxy context
*
* @access private
+ * @param resource $ch
* @return resource $ch
*/
private function prepareProxyContext($ch)
@@ -190,6 +191,7 @@ class Curl extends Client
* Prepare curl auth context
*
* @access private
+ * @param resource $ch
* @return resource $ch
*/
private function prepareAuthContext($ch)
@@ -205,6 +207,7 @@ class Curl extends Client
* Set write/header functions
*
* @access private
+ * @param resource $ch
* @return resource $ch
*/
private function prepareDownloadMode($ch)
@@ -305,7 +308,7 @@ class Curl extends Client
{
$this->executeContext();
- list($status, $headers) = HttpHeaders::parse(explode("\r\n", $this->response_headers[$this->response_headers_count - 1]));
+ list($status, $headers) = HttpHeaders::parse(explode("\n", $this->response_headers[$this->response_headers_count - 1]));
// When restricted with open_basedir
if ($this->needToHandleRedirection($follow_location, $status)) {
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php
deleted file mode 100644
index bec8ab0..0000000
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php
+++ /dev/null
@@ -1,592 +0,0 @@
-url = $url;
- $this->html = $html;
- $this->encoding = $encoding;
-
- $this->handleFiles();
- $this->handleStreamingVideos();
- }
-
- /**
- * Set config object
- *
- * @access public
- * @param \PicoFeed\Config\Config $config Config instance
- * @return Grabber
- */
- public function setConfig($config)
- {
- $this->config = $config;
- return $this;
- }
-
- /**
- * Get URL to download.
- *
- * @access public
- * @return string
- */
- public function getUrl()
- {
- return $this->url;
- }
-
- /**
- * Set URL to download and reset object to use for another grab.
- *
- * @access public
- * @param string $url URL
- * @return string
- */
- public function setUrl($url)
- {
- $this->url = $url;
- $this->html = "";
- $this->content = "";
- $this->encoding = "";
-
- $this->handleFiles();
- $this->handleStreamingVideos();
- }
-
- /**
- * Get relevant content
- *
- * @access public
- * @return string
- */
- public function getContent()
- {
- return $this->content;
- }
-
- /**
- * Get raw content (unfiltered)
- *
- * @access public
- * @return string
- */
- public function getRawContent()
- {
- return $this->html;
- }
-
- /**
- * Get filtered relevant content
- *
- * @access public
- * @return string
- */
- public function getFilteredContent()
- {
- $filter = Filter::html($this->content, $this->url);
- $filter->setConfig($this->config);
- return $filter->execute();
- }
-
- /**
- * Return the Youtube embed player and skip processing
- *
- * @access public
- * @return string
- */
- public function handleStreamingVideos()
- {
- if (preg_match("#(?<=v=|v\/|vi=|vi\/|youtu.be\/)[a-zA-Z0-9_-]{11}#", $this->url, $matches)) {
- $this->content = '';
- $this->skip_processing = true;
- }
- }
-
- /**
- * Skip processing for PDF documents
- *
- * @access public
- * @return string
- */
- public function handleFiles()
- {
- if (substr($this->url, -3) === 'pdf') {
- $this->skip_processing = true;
- Logger::setMessage(get_called_class().': PDF document => processing skipped');
- }
- }
-
- /**
- * Parse the HTML content
- *
- * @access public
- * @return bool
- */
- public function parse()
- {
- if ($this->skip_processing) {
- return true;
- }
-
- if ($this->html) {
- $html_encoding = XmlParser::getEncodingFromMetaTag($this->html);
-
- // Encode everything in UTF-8
- Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'" ; HTML Encoding "'.$html_encoding.'"');
- $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding);
- $this->html = Filter::stripHeadTags($this->html);
-
- Logger::setMessage(get_called_class().': Content length: '.strlen($this->html).' bytes');
- $rules = $this->getRules();
-
- if (! empty($rules)) {
- Logger::setMessage(get_called_class().': Parse content with rules');
- $this->parseContentWithRules($rules);
- }
- else {
- Logger::setMessage(get_called_class().': Parse content with candidates');
- $this->parseContentWithCandidates();
- }
- }
- else {
- Logger::setMessage(get_called_class().': No content fetched');
- }
-
- Logger::setMessage(get_called_class().': Content length: '.strlen($this->content).' bytes');
- Logger::setMessage(get_called_class().': Grabber done');
-
- return $this->content !== '';
- }
-
- /**
- * Download the HTML content
- *
- * @access public
- * @return HTML content
- */
- public function download()
- {
- if (! $this->skip_processing && $this->url != '') {
-
- try {
-
- $client = Client::getInstance();
-
- if ($this->config !== null) {
- $client->setConfig($this->config);
- $client->setTimeout($this->config->getGrabberTimeout());
- $client->setUserAgent($this->config->getGrabberUserAgent());
- }
-
- $client->execute($this->url);
-
- $this->url = $client->getUrl();
- $this->html = $client->getContent();
- $this->encoding = $client->getEncoding();
- }
- catch (ClientException $e) {
- Logger::setMessage(get_called_class().': '.$e->getMessage());
- }
- }
-
- return $this->html;
- }
-
- /**
- * Try to find a predefined rule
- *
- * @access public
- * @return array
- */
- public function getRules()
- {
- $hostname = parse_url($this->url, PHP_URL_HOST);
-
- if ($hostname !== false) {
-
- $files = $this->getRulesFileList($hostname);
-
- foreach ($this->getRulesFolders() as $folder) {
- $rule = $this->loadRuleFile($folder, $files);
-
- if (! empty($rule)) {
- return $rule;
- }
- }
- }
-
- return array();
- }
-
- /**
- * Get the list of possible rules file names for a given hostname
- *
- * @access public
- * @param string $hostname Hostname
- * @return array
- */
- public function getRulesFileList($hostname)
- {
- $files = array($hostname); // subdomain.domain.tld
- $parts = explode('.', $hostname);
- $len = count($parts);
-
- if ($len > 2) {
- $subdomain = array_shift($parts);
- $files[] = implode('.', $parts); // domain.tld
- $files[] = '.'.implode('.', $parts); // .domain.tld
- $files[] = $subdomain; // subdomain
- }
- else if ($len === 2) {
- $files[] = '.'.implode('.', $parts); // .domain.tld
- $files[] = $parts[0]; // domain
- }
-
- return $files;
- }
-
- /**
- * Load a rule file from the defined folder
- *
- * @access public
- * @param string $folder Rule directory
- * @param array $files List of possible file names
- * @return array
- */
- public function loadRuleFile($folder, array $files)
- {
- foreach ($files as $file) {
- $filename = $folder.'/'.$file.'.php';
-
- if (file_exists($filename)) {
- Logger::setMessage(get_called_class().' Load rule: '.$file);
- return include $filename;
- }
- }
-
- return array();
- }
-
- /**
- * Get the list of folders that contains rules
- *
- * @access public
- * @return array
- */
- public function getRulesFolders()
- {
- $folders = array(__DIR__.'/../Rules');
-
- if ($this->config !== null && $this->config->getGrabberRulesFolder() !== null) {
- $folders[] = $this->config->getGrabberRulesFolder();
- }
-
- return $folders;
- }
-
- /**
- * Get the relevant content with predefined rules
- *
- * @access public
- * @param array $rules Rules
- */
- public function parseContentWithRules(array $rules)
- {
- // Logger::setMessage($this->html);
- $dom = XmlParser::getHtmlDocument(''.$this->html);
- $xpath = new DOMXPath($dom);
-
- if (isset($rules['strip']) && is_array($rules['strip'])) {
-
- foreach ($rules['strip'] as $pattern) {
-
- $nodes = $xpath->query($pattern);
-
- if ($nodes !== false && $nodes->length > 0) {
- foreach ($nodes as $node) {
- $node->parentNode->removeChild($node);
- }
- }
- }
- }
-
- if (isset($rules['body']) && is_array($rules['body'])) {
-
- foreach ($rules['body'] as $pattern) {
-
- $nodes = $xpath->query($pattern);
-
- if ($nodes !== false && $nodes->length > 0) {
- foreach ($nodes as $node) {
- $this->content .= $dom->saveXML($node);
- }
- }
- }
- }
- }
-
- /**
- * Get the relevant content with the list of potential attributes
- *
- * @access public
- */
- public function parseContentWithCandidates()
- {
- $dom = XmlParser::getHtmlDocument(''.$this->html);
- $xpath = new DOMXPath($dom);
-
- // Try to lookup in each tag
- foreach ($this->candidatesAttributes as $candidate) {
-
- Logger::setMessage(get_called_class().': Try this candidate: "'.$candidate.'"');
-
- $nodes = $xpath->query('//*[(contains(@class, "'.$candidate.'") or @id="'.$candidate.'") and not (contains(@class, "nav") or contains(@class, "page"))]');
-
- if ($nodes !== false && $nodes->length > 0) {
- $this->content = $dom->saveXML($nodes->item(0));
- Logger::setMessage(get_called_class().': Find candidate "'.$candidate.'" ('.strlen($this->content).' bytes)');
- break;
- }
- }
-
- // Try to fetch
- if (strlen($this->content) < 200) {
-
- $nodes = $xpath->query('//article');
-
- if ($nodes !== false && $nodes->length > 0) {
- $this->content = $dom->saveXML($nodes->item(0));
- Logger::setMessage(get_called_class().': Find tag ('.strlen($this->content).' bytes)');
- }
- }
-
- // Get everything
- if (strlen($this->content) < 50) {
-
- $nodes = $xpath->query('//body');
-
- if ($nodes !== false && $nodes->length > 0) {
- Logger::setMessage(get_called_class().' No enought content fetched, get //body');
- $this->content = $dom->saveXML($nodes->item(0));
- }
- }
-
- Logger::setMessage(get_called_class().': Strip garbage');
- $this->stripGarbage();
- }
-
- /**
- * Strip useless tags
- *
- * @access public
- */
- public function stripGarbage()
- {
- $dom = XmlParser::getDomDocument($this->content);
-
- if ($dom !== false) {
-
- $xpath = new DOMXPath($dom);
-
- foreach ($this->stripTags as $tag) {
-
- $nodes = $xpath->query('//'.$tag);
-
- if ($nodes !== false && $nodes->length > 0) {
- Logger::setMessage(get_called_class().': Strip tag: "'.$tag.'"');
- foreach ($nodes as $node) {
- $node->parentNode->removeChild($node);
- }
- }
- }
-
- foreach ($this->stripAttributes as $attribute) {
-
- $nodes = $xpath->query('//*[contains(@class, "'.$attribute.'") or contains(@id, "'.$attribute.'")]');
-
- if ($nodes !== false && $nodes->length > 0) {
- Logger::setMessage(get_called_class().': Strip attribute: "'.$attribute.'"');
- foreach ($nodes as $node) {
- if ($this->shouldRemove($dom, $node)) {
- $node->parentNode->removeChild($node);
- }
- }
- }
- }
-
- $this->content = $dom->saveXML($dom->documentElement);
- }
- }
-
- /**
- * Return false if the node should not be removed
- *
- * @access public
- * @param DomDocument $dom
- * @param DomNode $node
- * @return boolean
- */
- public function shouldRemove($dom, $node)
- {
- $document_length = strlen($dom->textContent);
- $node_length = strlen($node->textContent);
-
- if ($document_length === 0) {
- return true;
- }
-
- $ratio = $node_length * 100 / $document_length;
-
- if ($ratio >= 90) {
- Logger::setMessage(get_called_class().': Should not remove this node ('.$node->nodeName.') ratio: '.$ratio.'%');
- return false;
- }
-
- return true;
- }
-}
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php
index ae77ff7..ec1dac4 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php
@@ -235,6 +235,7 @@ class Attribute
'filterProtocolUrlAttribute',
'rewriteImageProxyUrl',
'secureIframeSrc',
+ 'removeYouTubeAutoplay'
);
/**
@@ -404,6 +405,25 @@ class Attribute
return true;
}
+ /**
+ * Removes YouTube autoplay from iframes
+ *
+ * @access public
+ * @param string $tag Tag name
+ * @param array $attribute Atttributes name
+ * @param string $value Attribute value
+ * @return boolean
+ */
+ public function removeYouTubeAutoplay($tag, $attribute, &$value)
+ {
+ $regex = '%^(https://(?:www\.)?youtube.com/.*\?.*autoplay=)(1)(.*)%i';
+ if ($tag === 'iframe' && $attribute === 'src' && preg_match($regex, $value)) {
+ $value = preg_replace($regex, '${1}0$3', $value);
+ }
+
+ return true;
+ }
+
/**
* Rewrite image url to use with a proxy
*
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php
index 4e04660..36ab3f1 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php
@@ -2,7 +2,9 @@
namespace PicoFeed\Filter;
+use PicoFeed\Config\Config;
use PicoFeed\Client\Url;
+use PicoFeed\Scraper\RuleLoader;
use PicoFeed\Parser\XmlParser;
/**
@@ -69,6 +71,14 @@ class Html
*/
public $attribute = '';
+ /**
+ * The website to filter
+ *
+ * @access private
+ * @var string
+ */
+ private $website;
+
/**
* Initialize the filter, all inputs data must be encoded in UTF-8 before
*
@@ -81,6 +91,7 @@ class Html
$this->input = XmlParser::HtmlToXml($html);
$this->output = '';
$this->tag = new Tag;
+ $this->website = $website;
$this->attribute = new Attribute(new Url($website));
}
@@ -155,9 +166,45 @@ class Html
public function postFilter()
{
$this->output = $this->tag->removeEmptyTags($this->output);
+ $this->output = $this->filterRules($this->output);
+ $this->output = $this->tag->removeMultipleBreakTags($this->output);
$this->output = trim($this->output);
}
+ /**
+ * Called after XML parsing
+ * @param string $content the content that should be filtered
+ *
+ * @access public
+ */
+ public function filterRules($content)
+ {
+ // the constructor should require a config, then this if can be removed
+ if ($this->config === null) {
+ $config = new Config;
+ } else {
+ $config = $this->config;
+ }
+
+ $loader = new RuleLoader($config);
+ $rules = $loader->getRules($this->website);
+
+ $url = new Url($this->website);
+ $sub_url = $url->getFullPath();
+
+ if (isset($rules['filter'])) {
+ foreach ($rules['filter'] as $pattern => $rule) {
+ if (preg_match($pattern, $sub_url)) {
+ foreach($rule as $search => $replace) {
+ $content = preg_replace($search, $replace, $content);
+ }
+ }
+ }
+ }
+
+ return $content;
+ }
+
/**
* Parse opening tag
*
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php
index aa7efe4..b14ed94 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php
@@ -194,7 +194,7 @@ class Tag
* @param string $data Input data
* @return string
*/
- public function removeMultipleTags($data)
+ public function removeMultipleBreakTags($data)
{
return preg_replace("/(
\s*)+/", "
", $data);
}
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
index 7ef904f..810494b 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php
@@ -3,11 +3,11 @@
namespace PicoFeed\Parser;
use SimpleXMLElement;
+use PicoFeed\Client\Url;
use PicoFeed\Encoding\Encoding;
use PicoFeed\Filter\Filter;
use PicoFeed\Logging\Logger;
-use PicoFeed\Client\Url;
-use PicoFeed\Client\Grabber;
+use PicoFeed\Scraper\Scraper;
/**
* Base parser class
@@ -81,6 +81,14 @@ abstract class Parser
*/
private $enable_grabber = false;
+ /**
+ * Enable the content grabber on all pages
+ *
+ * @access private
+ * @var bool
+ */
+ private $grabber_needs_rule_file = false;
+
/**
* Ignore those urls for the content scraper
*
@@ -237,11 +245,16 @@ abstract class Parser
{
if ($this->enable_grabber && ! in_array($item->getUrl(), $this->grabber_ignore_urls)) {
- $grabber = new Grabber($item->getUrl());
- $grabber->setConfig($this->config);
- $grabber->download();
+ $grabber = new Scraper($this->config);
+ $grabber->setUrl($item->getUrl());
- if ($grabber->parse()) {
+ if ($this->grabber_needs_rule_file) {
+ $grabber->disableCandidateParser();
+ }
+
+ $grabber->execute();
+
+ if ($grabber->hasRelevantContent()) {
$item->content = $grabber->getFilteredContent();
}
}
@@ -270,7 +283,6 @@ abstract class Parser
* Generate a unique id for an entry (hash all arguments)
*
* @access public
- * @param string $args Pieces of data to hash
* @return string
*/
public function generateId()
@@ -383,11 +395,14 @@ abstract class Parser
* Enable the content grabber
*
* @access public
+ * @param bool $needs_rule_file true if only pages with rule files should be
+ * scraped
* @return \PicoFeed\Parser\Parser
*/
- public function enableContentGrabber()
+ public function enableContentGrabber($needs_rule_file = false)
{
$this->enable_grabber = true;
+ $this->grabber_needs_rule_file = $needs_rule_file;
}
/**
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blog.lemonde.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blog.lemonde.fr.php
index 226169b..eec5e12 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blog.lemonde.fr.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blog.lemonde.fr.php
@@ -1,10 +1,14 @@
'http://combat.blog.lemonde.fr/2013/08/31/teddy-riner-le-rookie-devenu-rambo/#xtor=RSS-3208',
- 'body' => array(
- '//div[@class="entry-content"]',
- ),
- 'strip' => array(
- '//*[contains(@class, "fb-like") or contains(@class, "social")]'
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://combat.blog.lemonde.fr/2013/08/31/teddy-riner-le-rookie-devenu-rambo/#xtor=RSS-3208',
+ 'body' => array(
+ '//div[@class="entry-content"]',
+ ),
+ 'strip' => array(
+ '//*[contains(@class, "fb-like") or contains(@class, "social")]'
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blogs.nytimes.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blogs.nytimes.com.php
index aa17033..ee641b0 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blogs.nytimes.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blogs.nytimes.com.php
@@ -1,11 +1,15 @@
'//header/h1',
- 'test_url' => 'http://bits.blogs.nytimes.com/2012/01/16/wikipedia-plans-to-go-dark-on-wednesday-to-protest-sopa/',
- 'body' => array(
- '//div[@class="postContent"]',
- ),
- 'strip' => array(
- '//*[@class="shareToolsBox"]',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'title' => '//header/h1',
+ 'test_url' => 'http://bits.blogs.nytimes.com/2012/01/16/wikipedia-plans-to-go-dark-on-wednesday-to-protest-sopa/',
+ 'body' => array(
+ '//div[@class="postContent"]',
+ ),
+ 'strip' => array(
+ '//*[@class="shareToolsBox"]',
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.igen.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.igen.fr.php
index 0e0436e..e2d1cc6 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.igen.fr.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.igen.fr.php
@@ -1,9 +1,13 @@
'http://www.igen.fr/ailleurs/2014/05/nvidia-va-delaisser-les-smartphones-grand-public-86031',
- 'body' => array(
- '//div[contains(@class, "field-name-body")]'
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.igen.fr/ailleurs/2014/05/nvidia-va-delaisser-les-smartphones-grand-public-86031',
+ 'body' => array(
+ '//div[contains(@class, "field-name-body")]'
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.nytimes.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.nytimes.com.php
index 31f4d78..ed27bb5 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.nytimes.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.nytimes.com.php
@@ -1,8 +1,11 @@
'http://www.nytimes.com/2011/05/15/world/middleeast/15prince.html',
- 'title' => '//h1[@class="articleHeadline"]',
- 'body' => array(
- '//div[@class="articleBody"]',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.nytimes.com/2011/05/15/world/middleeast/15prince.html',
+ 'body' => array(
+ '//div[@class="articleBody"]',
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php
index 0fd99f7..a2be240 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php
@@ -1,9 +1,12 @@
'http://www.phoronix.com/scan.php?page=article&item=amazon_ec2_bare&num=1',
- 'body' => array(
- '//div[@class="KonaBody"]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.phoronix.com/scan.php?page=article&item=amazon_ec2_bare&num=1',
+ 'body' => array(
+ '//div[@class="KonaBody"]',
+ ),
+ 'strip' => array()
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.slate.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.slate.com.php
index ad6f9c9..164ded6 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.slate.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.slate.com.php
@@ -1,16 +1,20 @@
'http://www.slate.com/articles/business/moneybox/2013/08/microsoft_ceo_steve_ballmer_retires_a_firsthand_account_of_the_company_s.html',
- 'body' => array(
- '//div[@class="sl-art-body"]',
- ),
- 'strip' => array(
- '//*[contains(@class, "social") or contains(@class, "comments") or contains(@class, "sl-article-floatin-tools") or contains(@class, "sl-art-pag")]',
- '//*[@id="mys_slate_logged_in"]',
- '//*[@id="sl_article_tools_myslate_bottom"]',
- '//*[@id="mys_myslate"]',
- '//*[@class="sl-viral-container"]',
- '//*[@class="sl-art-creds-cntr"]',
- '//*[@class="sl-art-ad-midflex"]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.slate.com/articles/business/moneybox/2013/08/microsoft_ceo_steve_ballmer_retires_a_firsthand_account_of_the_company_s.html',
+ 'body' => array(
+ '//div[@class="sl-art-body"]',
+ ),
+ 'strip' => array(
+ '//*[contains(@class, "social") or contains(@class, "comments") or contains(@class, "sl-article-floatin-tools") or contains(@class, "sl-art-pag")]',
+ '//*[@id="mys_slate_logged_in"]',
+ '//*[@id="sl_article_tools_myslate_bottom"]',
+ '//*[@id="mys_myslate"]',
+ '//*[@class="sl-viral-container"]',
+ '//*[@class="sl-art-creds-cntr"]',
+ '//*[@class="sl-art-ad-midflex"]',
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.theguardian.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.theguardian.com.php
index 6118488..4a1e8d2 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.theguardian.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.theguardian.com.php
@@ -1,10 +1,14 @@
'http://www.theguardian.com/sustainable-business/2015/feb/02/2015-hyper-transparency-global-business',
- 'body' => array(
- '//div[contains(@class, "content__main-column--article")]',
- ),
- 'strip' => array(
- '//div[contains(@class, "meta-container")]',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.theguardian.com/sustainable-business/2015/feb/02/2015-hyper-transparency-global-business',
+ 'body' => array(
+ '//div[contains(@class, "content__main-column--article")]',
+ ),
+ 'strip' => array(
+ '//div[contains(@class, "meta-container")]',
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wikipedia.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wikipedia.org.php
index ea99ab6..7b8f76e 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wikipedia.org.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wikipedia.org.php
@@ -1,25 +1,29 @@
'https://en.wikipedia.org/wiki/Grace_Hopper',
- 'body' => array(
- '//div[@id="bodyContent"]',
- ),
- 'strip' => array(
- "//div[@id='toc']",
- "//div[@id='catlinks']",
- "//div[@id='jump-to-nav']",
- "//div[@class='thumbcaption']//div[@class='magnify']",
- "//table[@class='navbox']",
- "//table[contains(@class, 'infobox')]",
- "//div[@class='dablink']",
- "//div[@id='contentSub']",
- "//div[@id='siteSub']",
- "//table[@id='persondata']",
- "//table[contains(@class, 'metadata')]",
- "//*[contains(@class, 'noprint')]",
- "//*[contains(@class, 'printfooter')]",
- "//*[contains(@class, 'editsection')]",
- "//*[contains(@class, 'error')]",
- "//span[@title='pronunciation:']",
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'https://en.wikipedia.org/wiki/Grace_Hopper',
+ 'body' => array(
+ '//div[@id="bodyContent"]',
+ ),
+ 'strip' => array(
+ "//div[@id='toc']",
+ "//div[@id='catlinks']",
+ "//div[@id='jump-to-nav']",
+ "//div[@class='thumbcaption']//div[@class='magnify']",
+ "//table[@class='navbox']",
+ "//table[contains(@class, 'infobox')]",
+ "//div[@class='dablink']",
+ "//div[@id='contentSub']",
+ "//div[@id='siteSub']",
+ "//table[@id='persondata']",
+ "//table[contains(@class, 'metadata')]",
+ "//*[contains(@class, 'noprint')]",
+ "//*[contains(@class, 'printfooter')]",
+ "//*[contains(@class, 'editsection')]",
+ "//*[contains(@class, 'error')]",
+ "//span[@title='pronunciation:']",
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wired.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wired.com.php
index 32c0475..dcc4735 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wired.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wired.com.php
@@ -1,17 +1,21 @@
'http://www.wired.com/gamelife/2013/09/ouya-free-the-games/',
- 'body' => array(
- '//div[@class="entry"]',
- ),
- 'strip' => array(
- '//*[@id="linker_widget"]',
- '//*[contains(@class, "bio")]',
- '//*[contains(@class, "entry-footer")]',
- '//*[contains(@class, "mobify_backtotop_link")]',
- '//*[contains(@class, "gallery-navigation")]',
- '//*[contains(@class, "gallery-thumbnail")]',
- '//img[contains(@src, "1x1")]',
- '//a[contains(@href, "creativecommons")]',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.wired.com/gamelife/2013/09/ouya-free-the-games/',
+ 'body' => array(
+ '//div[@class="entry"]',
+ ),
+ 'strip' => array(
+ '//*[@id="linker_widget"]',
+ '//*[contains(@class, "bio")]',
+ '//*[contains(@class, "entry-footer")]',
+ '//*[contains(@class, "mobify_backtotop_link")]',
+ '//*[contains(@class, "gallery-navigation")]',
+ '//*[contains(@class, "gallery-thumbnail")]',
+ '//img[contains(@src, "1x1")]',
+ '//a[contains(@href, "creativecommons")]',
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wsj.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wsj.com.php
index 113feb5..752e8c2 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wsj.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wsj.com.php
@@ -1,11 +1,15 @@
'http://online.wsj.com/article/SB10001424127887324108204579023143974408428.html',
- 'body' => array(
- '//div[@class="articlePage"]',
- ),
- 'strip' => array(
- '//*[@id="articleThumbnail_2"]',
- '//*[@class="socialByline"]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://online.wsj.com/article/SB10001424127887324108204579023143974408428.html',
+ 'body' => array(
+ '//div[@class="articlePage"]',
+ ),
+ 'strip' => array(
+ '//*[@id="articleThumbnail_2"]',
+ '//*[@class="socialByline"]',
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/01net.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/01net.com.php
index 615ad77..9c64491 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/01net.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/01net.com.php
@@ -1,14 +1,18 @@
'http://www.01net.com/editorial/624550/twitter-rachete-madbits-un-specialiste-francais-de-lanalyse-dimages/',
- 'body' => array(
- '//div[@class="article_ventre_box"]',
- ),
- 'strip' => array(
- '//link',
- '//*[contains(@class, "article_navigation")]',
- '//h1',
- '//*[contains(@class, "article_toolbarMain")]',
- '//*[contains(@class, "article_imagehaute_box")]'
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.01net.com/editorial/624550/twitter-rachete-madbits-un-specialiste-francais-de-lanalyse-dimages/',
+ 'body' => array(
+ '//div[@class="article_ventre_box"]',
+ ),
+ 'strip' => array(
+ '//link',
+ '//*[contains(@class, "article_navigation")]',
+ '//h1',
+ '//*[contains(@class, "article_toolbarMain")]',
+ '//*[contains(@class, "article_imagehaute_box")]'
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/alainonline.net.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/alainonline.net.php
index bbe26af..2faf0c4 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/alainonline.net.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/alainonline.net.php
@@ -1,10 +1,14 @@
'http://www.alainonline.net/news_details.php?lang=arabic&sid=18907',
- 'body' => array(
- '//div[@class="news_details"]'
- ),
- 'strip' => array(
- '//div[@class="news_details"]/div/div[last()]',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.alainonline.net/news_details.php?lang=arabic&sid=18907',
+ 'body' => array(
+ '//div[@class="news_details"]'
+ ),
+ 'strip' => array(
+ '//div[@class="news_details"]/div/div[last()]',
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/allgemeine-zeitung.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/allgemeine-zeitung.de.php
index 603bcdc..984e827 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/allgemeine-zeitung.de.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/allgemeine-zeitung.de.php
@@ -1,20 +1,23 @@
'http://www.allgemeine-zeitung.de/lokales/polizei/mainz-gonsenheim-unbekannte-rauben-esso-tankstelle-in-kurt-schumacher-strasse-aus_14913147.htm',
- 'body' => array(
- '//div[contains(@class, "article")][1]',
- ),
- 'strip' => array(
- '//read/h1',
- '//*[@id="t-map"]',
- '//*[contains(@class, "modules")]',
- '//*[contains(@class, "adsense")]',
- '//*[contains(@class, "linkbox")]',
- '//*[contains(@class, "info")]',
- '//*[@class="skip"]',
- '//*[@class="funcs"]',
- '//span[@class="nd address"]',
- '//a[contains(@href, "abo-und-services")]'
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.allgemeine-zeitung.de/lokales/polizei/mainz-gonsenheim-unbekannte-rauben-esso-tankstelle-in-kurt-schumacher-strasse-aus_14913147.htm',
+ 'body' => array(
+ '//div[contains(@class, "article")][1]',
+ ),
+ 'strip' => array(
+ '//read/h1',
+ '//*[@id="t-map"]',
+ '//*[contains(@class, "modules")]',
+ '//*[contains(@class, "adsense")]',
+ '//*[contains(@class, "linkbox")]',
+ '//*[contains(@class, "info")]',
+ '//*[@class="skip"]',
+ '//*[@class="funcs"]',
+ '//span[@class="nd address"]',
+ '//a[contains(@href, "abo-und-services")]'
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/areadvd.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/areadvd.de.php
new file mode 100644
index 0000000..ce93b3e
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/areadvd.de.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.areadvd.de/news/daily-deals-angebote-bei-lautsprecher-teufel-3/',
+ 'body' => array('//div[contains(@class,"entry")]'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/awkwardzombie.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/awkwardzombie.com.php
new file mode 100644
index 0000000..ab3fde0
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/awkwardzombie.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%/index.php.*comic=.*%' => array(
+ 'test_url' => 'http://www.awkwardzombie.com/index.php?comic=041315',
+ 'body' => array('//*[@id="comic"]/img'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.fefe.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.fefe.de.php
index f540759..829cf3f 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.fefe.de.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.fefe.de.php
@@ -1,9 +1,13 @@
'http://blog.fefe.de/?ts=ad706a73',
- 'body' => array(
- '/html/body/ul'
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://blog.fefe.de/?ts=ad706a73',
+ 'body' => array(
+ '/html/body/ul'
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bunicomic.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bunicomic.com.php
index 2ddd17c..2073faf 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bunicomic.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bunicomic.com.php
@@ -1,9 +1,13 @@
'http://www.bunicomic.com/comic/buni-623/',
- 'body' => array(
- '//div[@class="comic-table"]',
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.bunicomic.com/comic/buni-623/',
+ 'body' => array(
+ '//div[@class="comic-table"]',
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cad-comic.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cad-comic.com.php
new file mode 100644
index 0000000..e12f69f
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cad-comic.com.php
@@ -0,0 +1,12 @@
+ array(
+ '%/cad/.+%' => array(
+ 'test_url' => 'http://www.cad-comic.com/cad/20150417',
+ 'body' => array(
+ '//*[@id="content"]/img'
+ ),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/chaoslife.findchaos.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/chaoslife.findchaos.com.php
new file mode 100644
index 0000000..b013a1d
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/chaoslife.findchaos.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://chaoslife.findchaos.com/pets-in-the-wild',
+ 'body' => array('//div[@id="comic"]'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cliquerefresh.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cliquerefresh.com.php
new file mode 100644
index 0000000..40e3eb8
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cliquerefresh.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%/comic.*%' => array(
+ 'test_url' => 'http://cliquerefresh.com/comic/078-stating-the-obvious/',
+ 'body' => array('//div[@class="comicImg"]/img | //div[@class="comicImg"]/a/img'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/consomac.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/consomac.fr.php
index 99a358f..7953b9a 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/consomac.fr.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/consomac.fr.php
@@ -1,9 +1,13 @@
'http://consomac.fr/news-2430-l-iphone-6-toujours-un-secret-bien-garde.html',
- 'body' => array(
- '//div[contains(@id, "newscontent")]',
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://consomac.fr/news-2430-l-iphone-6-toujours-un-secret-bien-garde.html',
+ 'body' => array(
+ '//div[contains(@id, "newscontent")]',
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailyjs.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailyjs.com.php
old mode 100755
new mode 100644
index bde5895..44ba13e
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailyjs.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailyjs.com.php
@@ -1,15 +1,19 @@
'http://dailyjs.com/2014/08/07/p5js/',
- 'body' => array(
- '//div[@id="post"]',
- ),
- 'strip' => array(
- '//h2[@class="post"]',
- '//div[@class="meta"]',
- '//*[contains(@class, "addthis_toolbox")]',
- '//*[contains(@class, "addthis_default_style")]',
- '//*[@class="navigation small"]',
- '//*[@id="related"]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://dailyjs.com/2014/08/07/p5js/',
+ 'body' => array(
+ '//div[@id="post"]',
+ ),
+ 'strip' => array(
+ '//h2[@class="post"]',
+ '//div[@class="meta"]',
+ '//*[contains(@class, "addthis_toolbox")]',
+ '//*[contains(@class, "addthis_default_style")]',
+ '//*[@class="navigation small"]',
+ '//*[@id="related"]',
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/degroupnews.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/degroupnews.com.php
index e5f17de..c1499d4 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/degroupnews.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/degroupnews.com.php
@@ -1,10 +1,14 @@
'http://www.degroupnews.com/medias/vodsvod/amazon-concurrence-la-chromecast-de-google-avec-fire-tv-stick',
- 'body' => array(
- '//div[@class="contenu"]',
- ),
- 'strip' => array(
- '//div[contains(@class, "a2a")]'
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.degroupnews.com/medias/vodsvod/amazon-concurrence-la-chromecast-de-google-avec-fire-tv-stick',
+ 'body' => array(
+ '//div[@class="contenu"]',
+ ),
+ 'strip' => array(
+ '//div[contains(@class, "a2a")]'
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/derstandard.at.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/derstandard.at.php
index 687d72d..599a686 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/derstandard.at.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/derstandard.at.php
@@ -1,10 +1,14 @@
'http://derstandard.at/2000010267354/The-Witcher-3-Hohe-Hardware-Anforderungen-fuer-PC-Spieler?ref=rss',
- 'body' => array(
- '//div[@class="copytext"]',
- '//ul[@id="media-list"]',
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://derstandard.at/2000010267354/The-Witcher-3-Hohe-Hardware-Anforderungen-fuer-PC-Spieler?ref=rss',
+ 'body' => array(
+ '//div[@class="copytext"]',
+ '//ul[@id="media-list"]',
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/distrowatch.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/distrowatch.com.php
index 39837cc..5c143f4 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/distrowatch.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/distrowatch.com.php
@@ -1,9 +1,13 @@
'http://distrowatch.com/?newsid=08355',
- 'body' => array(
- '//td[@class="NewsText"][1]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://distrowatch.com/?newsid=08355',
+ 'body' => array(
+ '//td[@class="NewsText"][1]',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dozodomo.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dozodomo.com.php
index 7ef5737..a57b7f4 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dozodomo.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dozodomo.com.php
@@ -1,11 +1,15 @@
'http://dozodomo.com/bento/2014/03/04/lart-des-maki-de-takayo-kiyota/',
- 'body' => array(
- '//div[@class="joke"]',
- '//div[@class="story-cover"]',
- '//div[@class="story-content"]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://dozodomo.com/bento/2014/03/04/lart-des-maki-de-takayo-kiyota/',
+ 'body' => array(
+ '//div[@class="joke"]',
+ '//div[@class="story-cover"]',
+ '//div[@class="story-content"]',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/engadget.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/engadget.com.php
new file mode 100644
index 0000000..aada167
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/engadget.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.engadget.com/2015/04/20/dark-matter-discovery/?ncid=rss_truncated',
+ 'body' => array('//div[@class="article-content"]/p[not(@class="read-more")] | //div[@class="article-content"]/div[@style="text-align: center;"]'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/escapistmagazine.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/escapistmagazine.com.php
new file mode 100644
index 0000000..6b6b20b
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/escapistmagazine.com.php
@@ -0,0 +1,42 @@
+ array(
+ '%/articles/view/comicsandcosplay/comics/critical-miss.*%' => array(
+ 'body' => array('//*[@class="body"]/span/img | //div[@class="folder_nav_links"]/following::p'),
+ 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/critical-miss/13776-Critical-Miss-on-Framerates?utm_source=rss&utm_medium=rss&utm_campaign=articles',
+ 'strip' => array()
+ ),
+ '%/articles/view/comicsandcosplay/comics/namegame.*%' => array(
+ 'body' => array('//*[@class="body"]/span/p/img[@height != "120"]'),
+ 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/namegame/9759-Leaving-the-Nest?utm_source=rss&utm_medium=rss&utm_campaign=articles',
+ 'strip' => array()
+ ),
+ '%/articles/view/comicsandcosplay/comics/stolen-pixels.*%' => array(
+ 'body' => array('//*[@class="body"]/span/p[2]/img'),
+ 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/stolen-pixels/8866-Stolen-Pixels-258-Where-the-Boys-Are?utm_source=rss&utm_medium=rss&utm_campaign=articles',
+ 'strip' => array()
+ ),
+ '%/articles/view/comicsandcosplay/comics/bumhugparade.*%' => array(
+ 'body' => array('//*[@class="body"]/span/p[2]/img'),
+ 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/bumhugparade/8262-Bumhug-Parade-13?utm_source=rss&utm_medium=rss&utm_campaign=articles',
+ 'strip' => array()
+ ),
+ '%/articles/view/comicsandcosplay.*/comics/escapistradiotheater%' => array(
+ 'body' => array('//*[@class="body"]/span/p[2]/img'),
+ 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/escapistradiotheater/8265-The-Escapist-Radio-Theater-13?utm_source=rss&utm_medium=rss&utm_campaign=articles',
+ 'strip' => array()
+ ),
+ '%/articles/view/comicsandcosplay/comics/paused.*%' => array(
+ 'body' => array('//*[@class="body"]/span/p[2]/img | //*[@class="body"]/span/div/img'),
+ 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/paused/8263-Paused-16?utm_source=rss&utm_medium=rss&utm_campaign=articles',
+ 'strip' => array()
+ ),
+ '%/articles/view/comicsandcosplay/comics/fraughtwithperil.*%' => array(
+ 'body' => array('//*[@class="body"]'),
+ 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/fraughtwithperil/12166-The-Escapist-Presents-Escapist-Comics-Critical-Miss-B-lyeh-Fhlop?utm_source=rss&utm_medium=rss&utm_campaign=articles',
+ 'strip' => array()
+ )
+ )
+);
+
+
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/explosm.net.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/explosm.net.php
index b9ca9b0..83a0890 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/explosm.net.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/explosm.net.php
@@ -1,9 +1,13 @@
'http://explosm.net/comics/3803/',
- 'body' => array(
- '//div[@id="comic-container"]',
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://explosm.net/comics/3803/',
+ 'body' => array(
+ '//div[@id="comic-container"]',
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcodesign.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcodesign.com.php
index 58cb9df..50995b8 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcodesign.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcodesign.com.php
@@ -1,9 +1,13 @@
'http://www.fastcodesign.com/3026548/exposure/peek-inside-the-worlds-forbidden-subway-tunnels',
- 'body' => array(
- '//article[contains(@class, "body prose")]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.fastcodesign.com/3026548/exposure/peek-inside-the-worlds-forbidden-subway-tunnels',
+ 'body' => array(
+ '//article[contains(@class, "body prose")]',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcoexist.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcoexist.com.php
index 9d31b0a..6aacbdb 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcoexist.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcoexist.com.php
@@ -1,9 +1,13 @@
'http://www.fastcoexist.com/3026114/take-a-seat-on-this-gates-funded-future-toilet-that-will-change-how-we-think-about-poop',
- 'body' => array(
- '//article[contains(@class, "body prose")]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.fastcoexist.com/3026114/take-a-seat-on-this-gates-funded-future-toilet-that-will-change-how-we-think-about-poop',
+ 'body' => array(
+ '//article[contains(@class, "body prose")]',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcompany.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcompany.com.php
index 3bce2aa..778adbf 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcompany.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcompany.com.php
@@ -1,9 +1,13 @@
'http://www.fastcompany.com/3026712/fast-feed/elon-musk-an-apple-tesla-merger-is-very-unlikely',
- 'body' => array(
- '//article[contains(@class, "body prose")]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.fastcompany.com/3026712/fast-feed/elon-musk-an-apple-tesla-merger-is-very-unlikely',
+ 'body' => array(
+ '//article[contains(@class, "body prose")]',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ffworld.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ffworld.com.php
index ba5db57..64dd263 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ffworld.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ffworld.com.php
@@ -1,9 +1,13 @@
'http://www.ffworld.com/?rub=news&page=voir&id=2709',
- 'body' => array(
- '//div[@class="news_body"]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.ffworld.com/?rub=news&page=voir&id=2709',
+ 'body' => array(
+ '//div[@class="news_body"]',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fowllanguagecomics.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fowllanguagecomics.com.php
new file mode 100644
index 0000000..027c57a
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fowllanguagecomics.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'body' => array('//*[@id="comic"] | //*[@class="post-image"]'),
+ 'strip' => array(),
+ 'test_url' => 'http://www.fowllanguagecomics.com/comic/working-out/'
+ )
+ )
+);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/github.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/github.com.php
index 9ddd030..6e28b99 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/github.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/github.com.php
@@ -1,10 +1,14 @@
'https://github.com/audreyr/favicon-cheat-sheet',
- 'body' => array(
- '//article[contains(@class, "entry-content")]',
- ),
- 'strip' => array(
- '//h1'
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'https://github.com/audreyr/favicon-cheat-sheet',
+ 'body' => array(
+ '//article[contains(@class, "entry-content")]',
+ ),
+ 'strip' => array(
+ '//h1'
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/golem.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/golem.de.php
index 1a45fa6..269170c 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/golem.de.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/golem.de.php
@@ -1,8 +1,12 @@
'http://www.golem.de/news/breko-telekom-verzoegert-gezielt-den-vectoring-ausbau-1311-102974.html',
- 'body' => array(
- '//header[@class="cluster-header"]',
- '//div[@class="formatted"]'
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.golem.de/news/breko-telekom-verzoegert-gezielt-den-vectoring-ausbau-1311-102974.html',
+ 'body' => array(
+ '//header[@class="cluster-header"]',
+ '//div[@class="formatted"]'
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/heise.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/heise.de.php
index fa1d548..93343bb 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/heise.de.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/heise.de.php
@@ -1,7 +1,11 @@
'http://www.heise.de/security/meldung/BND-300-Millionen-Euro-fuer-Fruehwarnsystem-gegen-Cyber-Attacken-2192237.html',
- 'body' => array(
- '//div[@class="meldung_wrapper"]'
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.heise.de/security/meldung/BND-300-Millionen-Euro-fuer-Fruehwarnsystem-gegen-Cyber-Attacken-2192237.html',
+ 'body' => array(
+ '//div[@class="meldung_wrapper"]'
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/huffingtonpost.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/huffingtonpost.com.php
index 18ad465..07f4816 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/huffingtonpost.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/huffingtonpost.com.php
@@ -1,9 +1,13 @@
'http://www.huffingtonpost.com/2014/02/20/centscere-social-media-syracuse_n_4823848.html',
- 'body' => array(
- '//article[@class="content")]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.huffingtonpost.com/2014/02/20/centscere-social-media-syracuse_n_4823848.html',
+ 'body' => array(
+ '//article[@class="content")]',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ing.dk.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ing.dk.php
index c4a80be..e61e09a 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ing.dk.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ing.dk.php
@@ -1,8 +1,12 @@
'http://ing.dk/artikel/smart-husisolering-og-styring-skal-mindske-japans-energikrise-164517',
- 'body' => array(
- '//section[contains(@class, "teaser")]',
- '//section[contains(@class, "body")]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://ing.dk/artikel/smart-husisolering-og-styring-skal-mindske-japans-energikrise-164517',
+ 'body' => array(
+ '//section[contains(@class, "teaser")]',
+ '//section[contains(@class, "body")]',
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/journaldugeek.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/journaldugeek.com.php
index ad0d67a..99d4ab1 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/journaldugeek.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/journaldugeek.com.php
@@ -1,7 +1,11 @@
'http://www./2014/05/20/le-playstation-now-arrive-en-beta-fermee-aux-etats-unis/',
- 'body' => array(
- '//div[@class="post-content"]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www./2014/05/20/le-playstation-now-arrive-en-beta-fermee-aux-etats-unis/',
+ 'body' => array(
+ '//div[@class="post-content"]',
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/kanpai.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/kanpai.fr.php
index 5a13053..3471bf5 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/kanpai.fr.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/kanpai.fr.php
@@ -1,9 +1,13 @@
'http://www.kanpai.fr/japon/comment-donner-lheure-en-japonais.html',
- 'body' => array(
- '//div[@class="single-left"]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.kanpai.fr/japon/comment-donner-lheure-en-japonais.html',
+ 'body' => array(
+ '//div[@class="single-left"]',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/karriere.jobfinder.dk.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/karriere.jobfinder.dk.php
index 2ffafd6..cdd6389 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/karriere.jobfinder.dk.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/karriere.jobfinder.dk.php
@@ -1,8 +1,12 @@
'http://karriere.jobfinder.dk/artikel/dansk-professor-skal-lede-smart-grid-forskning-20-millioner-dollars-763',
- 'body' => array(
- '//section[contains(@class, "teaser")]',
- '//section[contains(@class, "body")]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://karriere.jobfinder.dk/artikel/dansk-professor-skal-lede-smart-grid-forskning-20-millioner-dollars-763',
+ 'body' => array(
+ '//section[contains(@class, "teaser")]',
+ '//section[contains(@class, "body")]',
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lejapon.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lejapon.fr.php
index fbc2e53..4de41e4 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lejapon.fr.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lejapon.fr.php
@@ -1,13 +1,17 @@
'http://lejapon.fr/guide-voyage-japon/5223/tokyo-sous-la-neige.htm',
- 'body' => array(
- '//div[@class="entry"]'
- ),
- 'strip' => array(
- '//*[contains(@class, "addthis_toolbox")]',
- '//*[contains(@class, "addthis_default_style")]',
- '//*[@class="navigation small"]',
- '//*[@id="related"]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://lejapon.fr/guide-voyage-japon/5223/tokyo-sous-la-neige.htm',
+ 'body' => array(
+ '//div[@class="entry"]'
+ ),
+ 'strip' => array(
+ '//*[contains(@class, "addthis_toolbox")]',
+ '//*[contains(@class, "addthis_default_style")]',
+ '//*[@class="navigation small"]',
+ '//*[@id="related"]',
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lesjoiesducode.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lesjoiesducode.fr.php
index 68e097a..861e725 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lesjoiesducode.fr.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lesjoiesducode.fr.php
@@ -1,9 +1,13 @@
'http://lesjoiesducode.fr/post/75576211207/quand-lappli-ne-fonctionne-plus-sans-aucune-raison',
- 'body' => array(
- '//div[@class="blog-post-content"]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://lesjoiesducode.fr/post/75576211207/quand-lappli-ne-fonctionne-plus-sans-aucune-raison',
+ 'body' => array(
+ '//div[@class="blog-post-content"]',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lfg.co.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lfg.co.php
new file mode 100644
index 0000000..50e84fd
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lfg.co.php
@@ -0,0 +1,13 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.lfg.co/page/871/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+LookingForGroup+%28Looking+For+Group%29&utm_content=FeedBurner',
+ 'body' => array(
+ '//*[@id="comic"]/img | //*[@class="content"]'
+ ),
+ 'strip' => array(),
+ )
+ )
+);
+
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.com.php
index 9b22995..77c6cf3 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.com.php
@@ -1,14 +1,18 @@
'http://lifehacker.com/bring-water-bottle-caps-into-concerts-to-protect-your-d-1269334973',
- 'body' => array(
- '//div[contains(@class, "row")/img',
- '//div[contains(@class, "content-column")]',
- ),
- 'strip' => array(
- '//*[contains(@class, "meta")]',
- '//span[contains(@class, "icon")]',
- '//h1',
- '//aside',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://lifehacker.com/bring-water-bottle-caps-into-concerts-to-protect-your-d-1269334973',
+ 'body' => array(
+ '//div[contains(@class, "row")/img',
+ '//div[contains(@class, "content-column")]',
+ ),
+ 'strip' => array(
+ '//*[contains(@class, "meta")]',
+ '//span[contains(@class, "icon")]',
+ '//h1',
+ '//aside',
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lists.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lists.php
index fb9c8d0..fdd92d3 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lists.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lists.php
@@ -1,9 +1,13 @@
'http://lists.freebsd.org/pipermail/freebsd-announce/2013-September/001504.html',
- 'body' => array(
- '//pre',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://lists.freebsd.org/pipermail/freebsd-announce/2013-September/001504.html',
+ 'body' => array(
+ '//pre',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loadingartist.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loadingartist.com.php
new file mode 100644
index 0000000..3e07a22
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loadingartist.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%/comic.*%' => array(
+ 'test_url' => 'http://www.loadingartist.com/comic/lifted-spirits/',
+ 'body' => array('//div[@class="comic"]'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loldwell.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loldwell.com.php
new file mode 100644
index 0000000..282013c
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loldwell.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://loldwell.com/?comic=food-math-101',
+ 'body' => array('//*[@id="comic"]'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/macg.co.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/macg.co.php
index ae54540..695190a 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/macg.co.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/macg.co.php
@@ -1,9 +1,13 @@
'http://www.macg.co//logiciels/2014/05/feedly-sameliore-un-petit-peu-sur-mac-82205',
- 'body' => array(
- '//div[contains(@class, "field-name-body")]'
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.macg.co//logiciels/2014/05/feedly-sameliore-un-petit-peu-sur-mac-82205',
+ 'body' => array(
+ '//div[contains(@class, "field-name-body")]'
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/marc.info.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/marc.info.php
index f2016e6..4cda04b 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/marc.info.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/marc.info.php
@@ -1,9 +1,13 @@
'http://marc.info/?l=openbsd-misc&m=141987113202061&w=2',
- 'body' => array(
- '//pre',
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://marc.info/?l=openbsd-misc&m=141987113202061&w=2',
+ 'body' => array(
+ '//pre',
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/maximumble.thebookofbiff.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/maximumble.thebookofbiff.com.php
new file mode 100644
index 0000000..88c5fdc
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/maximumble.thebookofbiff.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://maximumble.thebookofbiff.com/2015/04/20/1084-change/',
+ 'body' => array('//div[@id="comic"]/div/a/img'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/medium.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/medium.com.php
index 79ed5bc..c0dfc49 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/medium.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/medium.com.php
@@ -1,9 +1,13 @@
'https://medium.com/lessons-learned/917b8b63ae3e',
- 'body' => array(
- '//div[contains(@class, "post-field body")]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'https://medium.com/lessons-learned/917b8b63ae3e',
+ 'body' => array(
+ '//div[contains(@class, "post-field body")]',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/metronieuws.nl.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/metronieuws.nl.php
new file mode 100644
index 0000000..787553f
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/metronieuws.nl.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.metronieuws.nl/sport/2015/04/broer-fellaini-zorgde-bijna-voor-paniek-bij-mourinho',
+ 'body' => array('//div[contains(@class,"article-top")]/div[contains(@class,"image-component")] | //div[@class="article-full-width"]/div[1]'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/mokepon.smackjeeves.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/mokepon.smackjeeves.com.php
new file mode 100644
index 0000000..632c864
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/mokepon.smackjeeves.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://mokepon.smackjeeves.com/comics/2120096/chapter-9-page-68/',
+ 'body' => array('//*[@id="comic_area_inner"]/img | //*[@id="comic_area_inner"]/a/img'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindowsphone.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindowsphone.com.php
index cfc4b2d..d3838af 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindowsphone.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindowsphone.com.php
@@ -1,9 +1,13 @@
'http://www.monwindowsphone.com/tout-savoir-sur-le-centre-d-action-de-windows-phone-8-1-t40574.html',
- 'body' => array(
- '//div[@class="blog-post-body"]'
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.monwindowsphone.com/tout-savoir-sur-le-centre-d-action-de-windows-phone-8-1-t40574.html',
+ 'body' => array(
+ '//div[@class="blog-post-body"]'
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php
new file mode 100644
index 0000000..ac41ee6
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php
@@ -0,0 +1,12 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.neustadt-ticker.de/36480/aktuell/nachrichten/buergerbuero-neustadt-ab-heute-wieder-geoeffnet',
+ 'body' => array('//div[contains(@class,"article")]/div[@class="PostContent" and *[not(contains(@class, "navigation"))]]'),
+ 'strip' => array(
+ '//*[@id="wp_rp_first"]'
+ ),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/niceteethcomic.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/niceteethcomic.com.php
new file mode 100644
index 0000000..d3048c4
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/niceteethcomic.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%/archives.*%' => array(
+ 'test_url' => 'http://niceteethcomic.com/archives/page119/',
+ 'body' => array('//*[@class="comicpane"]/a/img'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/nichtlustig.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/nichtlustig.de.php
new file mode 100644
index 0000000..b4fb73f
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/nichtlustig.de.php
@@ -0,0 +1,8 @@
+ array(
+ '%.*%' => array(
+ '%.*static.nichtlustig.de/comics/full/(\\d+).*%s' => ''
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/openrightsgroup.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/openrightsgroup.org.php
index 74d3fa1..1bdc199 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/openrightsgroup.org.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/openrightsgroup.org.php
@@ -1,16 +1,20 @@
'https://www.openrightsgroup.org/blog/2014/3-days-to-go-till-orgcon2014',
- 'body' => array(
- '//div[contains(@class, "content")]/div',
- ),
- 'strip' => array(
- '//h2[1]',
- '//div[@class="info"]',
- '//div[@class="tags"]',
- '//div[@class="comments"]',
- '//div[@class="breadcrumbs"]',
- '//h1[@class="pageTitle"]',
- '//p[@class="bookmarkThis"]',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'https://www.openrightsgroup.org/blog/2014/3-days-to-go-till-orgcon2014',
+ 'body' => array(
+ '//div[contains(@class, "content")]/div',
+ ),
+ 'strip' => array(
+ '//h2[1]',
+ '//div[@class="info"]',
+ '//div[@class="tags"]',
+ '//div[@class="comments"]',
+ '//div[@class="breadcrumbs"]',
+ '//h1[@class="pageTitle"]',
+ '//p[@class="bookmarkThis"]',
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/pastebin.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/pastebin.com.php
index 9a576f7..232cbca 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/pastebin.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/pastebin.com.php
@@ -1,9 +1,13 @@
'http://pastebin.com/ed1pP9Ak',
- 'body' => array(
- '//div[@class="text"]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://pastebin.com/ed1pP9Ak',
+ 'body' => array(
+ '//div[@class="text"]',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/penny-arcade.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/penny-arcade.com.php
new file mode 100644
index 0000000..dcd35a5
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/penny-arcade.com.php
@@ -0,0 +1,21 @@
+ array(
+ '%/news/.*%' => array(
+ 'test_url' => 'http://penny-arcade.com/news/post/2015/04/15/101-part-two',
+ 'body' => array(
+ '//*[@class="postBody"]/*',
+ ),
+ 'strip' => array(
+ )
+ ),
+ '%/comic/.*%' => array(
+ 'test_url' => 'http://penny-arcade.com/comic/2015/04/15',
+ 'body' => array(
+ '//*[@id="comicFrame"]/a/img',
+ ),
+ 'strip' => array(
+ )
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/plus.google.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/plus.google.com.php
index d18e1db..3b09b40 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/plus.google.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/plus.google.com.php
@@ -1,7 +1,11 @@
'https://plus.google.com/+LarryPage/posts/Lh8SKC6sED1',
- 'body' => array(
- '//div[@role="article"]/div[contains(@class, "eE")]',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'https://plus.google.com/+LarryPage/posts/Lh8SKC6sED1',
+ 'body' => array(
+ '//div[@role="article"]/div[contains(@class, "eE")]',
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/putaindecode.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/putaindecode.fr.php
old mode 100755
new mode 100644
index 6857c20..e44a130
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/putaindecode.fr.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/putaindecode.fr.php
@@ -1,12 +1,16 @@
'http://putaindecode.fr/posts/js/etat-lieux-js-modulaire-front/',
- 'body' => array(
- '//*[@class="putainde-Post-md"]',
- ),
- 'strip' => array(
- '//*[contains(@class, "inlineimg")]',
- '//*[contains(@class, "comment-respond")]',
- '//header'
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://putaindecode.fr/posts/js/etat-lieux-js-modulaire-front/',
+ 'body' => array(
+ '//*[@class="putainde-Post-md"]',
+ ),
+ 'strip' => array(
+ '//*[contains(@class, "inlineimg")]',
+ '//*[contains(@class, "comment-respond")]',
+ '//header'
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/rue89.nouvelobs.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/rue89.nouvelobs.com.php
index bf3c8d8..08a2b2f 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/rue89.nouvelobs.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/rue89.nouvelobs.com.php
@@ -1,9 +1,13 @@
'http://rue89.feedsportal.com/c/33822/f/608948/s/30999fa0/sc/24/l/0L0Srue890N0C20A130C0A80C30A0Cfaisait0Eboris0Eboillon0Eex0Esarko0Eboy0E350A0E0A0A0A0Eeuros0Egare0Enord0E245315/story01.htm',
- 'body' => array(
- '//*[@id="article"]/div[contains(@class, "content")]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://rue89.feedsportal.com/c/33822/f/608948/s/30999fa0/sc/24/l/0L0Srue890N0C20A130C0A80C30A0Cfaisait0Eboris0Eboillon0Eex0Esarko0Eboy0E350A0E0A0A0A0Eeuros0Egare0Enord0E245315/story01.htm',
+ 'body' => array(
+ '//*[@id="article"]/div[contains(@class, "content")]',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/satwcomic.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/satwcomic.com.php
new file mode 100644
index 0000000..173f563
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/satwcomic.com.php
@@ -0,0 +1,12 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://satwcomic.com/day-at-the-beach',
+ 'body' => array(
+ '//div[@class="container"]/center/a/img'
+ ),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/scrumalliance.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/scrumalliance.org.php
new file mode 100644
index 0000000..34f385d
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/scrumalliance.org.php
@@ -0,0 +1,12 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'https://www.scrumalliance.org/community/articles/2015/march/an-introduction-to-agile-project-intake?feed=articles',
+ 'body' => array(
+ '//div[@class="article_content"]',
+ ),
+ 'strip' => array()
+ )
+ )
+);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sitepoint.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sitepoint.com.php
index 8f3f588..5b3cbc7 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sitepoint.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sitepoint.com.php
@@ -1,9 +1,13 @@
'http://www.sitepoint.com/creating-hello-world-app-swift/',
- 'body' => array(
- '//section[@class="article_body"]',
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.sitepoint.com/creating-hello-world-app-swift/',
+ 'body' => array(
+ '//section[@class="article_body"]',
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/slashdot.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/slashdot.org.php
new file mode 100644
index 0000000..b212fce
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/slashdot.org.php
@@ -0,0 +1,11 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://science.slashdot.org/story/15/04/20/0528253/pull-top-can-tabs-at-50-reach-historic-archaeological-status',
+ 'body' => array(
+ '//article/div[@class="body"] | //article[@class="layout-article"]/div[@class="elips"]'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smallhousebliss.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smallhousebliss.com.php
index 8bea4fb..c82e31d 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smallhousebliss.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smallhousebliss.com.php
@@ -1,15 +1,19 @@
'http://smallhousebliss.com/2013/08/29/house-g-by-lode-architecture/',
- 'body' => array(
- '//div[@class="post-content"]',
- ),
- 'strip' => array(
- '//*[contains(@class, "gallery")]',
- '//*[contains(@class, "share")]',
- '//*[contains(@class, "wpcnt")]',
- '//*[contains(@class, "meta")]',
- '//*[contains(@class, "postitle")]',
- '//*[@id="nav-below"]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://smallhousebliss.com/2013/08/29/house-g-by-lode-architecture/',
+ 'body' => array(
+ '//div[@class="post-content"]',
+ ),
+ 'strip' => array(
+ '//*[contains(@class, "gallery")]',
+ '//*[contains(@class, "share")]',
+ '//*[contains(@class, "wpcnt")]',
+ '//*[contains(@class, "meta")]',
+ '//*[contains(@class, "postitle")]',
+ '//*[@id="nav-below"]',
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smarthomewelt.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smarthomewelt.de.php
new file mode 100644
index 0000000..83f93f1
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smarthomewelt.de.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://smarthomewelt.de/apple-tv-amazon-echo-smart-home/',
+ 'body' => array('//div[@class="entry-inner"]/p | //div[@class="entry-inner"]/div[contains(@class,"wp-caption")]'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smashingmagazine.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smashingmagazine.com.php
new file mode 100644
index 0000000..5000072
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smashingmagazine.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.smashingmagazine.com/2015/04/17/using-sketch-for-responsive-web-design-case-study/',
+ 'body' => array('//article[contains(@class,"post")]/p'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/spiegel.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/spiegel.de.php
index 375b17c..d71893a 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/spiegel.de.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/spiegel.de.php
@@ -1,7 +1,11 @@
'http://www.spiegel.de/politik/ausland/afrika-angola-geht-gegen-islam-vor-und-schliesst-moscheen-a-935788.html',
- 'body' => array(
- '//div[contains(@class, "article-section")]'
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.spiegel.de/politik/ausland/afrika-angola-geht-gegen-islam-vor-und-schliesst-moscheen-a-935788.html',
+ 'body' => array(
+ '//div[contains(@class, "article-section")]'
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sz.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sz.de.php
new file mode 100644
index 0000000..8629a58
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sz.de.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://sz.de/1.2443161',
+ 'body' => array('//article[@id="sitecontent"]/section[@class="topenrichment"]//img | //article[@id="sitecontent"]/section[@class="body"]/section[@class="authors"]/preceding-sibling::*[not(contains(@class, "ad"))]'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/techcrunch.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/techcrunch.com.php
index bc4d5b8..5646a17 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/techcrunch.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/techcrunch.com.php
@@ -1,11 +1,15 @@
'http://techcrunch.com/2013/08/31/indias-visa-maze/',
- 'body' => array(
- '//div[contains(@class, "media-container")]',
- '//div[@class="body-copy"]',
- ),
- 'strip' => array(
- '//*[contains(@class, "module-crunchbase")]'
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://techcrunch.com/2013/08/31/indias-visa-maze/',
+ 'body' => array(
+ '//div[contains(@class, "media-container")]',
+ '//div[@class="body-copy"]',
+ ),
+ 'strip' => array(
+ '//*[contains(@class, "module-crunchbase")]'
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thecodinglove.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thecodinglove.com.php
new file mode 100644
index 0000000..d33e127
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thecodinglove.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://thecodinglove.com/post/116897934767',
+ 'body' => array('//div[@class="bodytype"]'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thegamercat.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thegamercat.com.php
new file mode 100644
index 0000000..e733730
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thegamercat.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.thegamercat.com/comic/just-no/',
+ 'body' => array('//div[@id="comic"] | //div[@class="post-content"]/div[@class="entry"]/p'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/themerepublic.net.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/themerepublic.net.php
new file mode 100644
index 0000000..b625ac2
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/themerepublic.net.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.themerepublic.net/2015/04/david-lopez-pitoko.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+blogspot%2FDngUJ+%28Theme+Republic%29&utm_content=FeedBurner',
+ 'body' => array('//*[@class="post-body"]'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/travel-dealz.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/travel-dealz.de.php
new file mode 100644
index 0000000..b563a71
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/travel-dealz.de.php
@@ -0,0 +1,15 @@
+ array(
+ '%^/blog.*%' => array(
+ 'test_url' => 'http://travel-dealz.de/blog/venere-gutschein/',
+ 'body' => array('//div[@class="post-entry"]'),
+ 'strip' => array(
+ '//*[@id="jp-relatedposts"]',
+ '//*[@class="post-meta"]',
+ '//*[@class="post-data"]',
+ '//*[@id="author-meta"]',
+ ),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/treehugger.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/treehugger.com.php
index 7fbbb0c..f324f4c 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/treehugger.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/treehugger.com.php
@@ -1,10 +1,14 @@
'http://www.treehugger.com/uncategorized/top-ten-posts-week-bunnies-2.html',
- 'body' => array(
- '//div[contains(@class, "promo-image")]',
- '//div[contains(@id, "entry-body")]',
- ),
- 'strip' => array(
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.treehugger.com/uncategorized/top-ten-posts-week-bunnies-2.html',
+ 'body' => array(
+ '//div[contains(@class, "promo-image")]',
+ '//div[contains(@id, "entry-body")]',
+ ),
+ 'strip' => array(
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/twogag.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/twogag.com.php
new file mode 100644
index 0000000..77caec8
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/twogag.com.php
@@ -0,0 +1,9 @@
+ array(
+ '%.*%' => array(
+ "%http://www.twogag.com/comics-rss/([^.]+)\\.jpg%" =>
+ "http://www.twogag.com/comics/$1.jpg"
+ )
+ )
+);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/twokinds.keenspot.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/twokinds.keenspot.com.php
new file mode 100644
index 0000000..aafb71c
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/twokinds.keenspot.com.php
@@ -0,0 +1,10 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://twokinds.keenspot.com/archive.php?p=0',
+ 'body' => array('//*[@class="comic"]/div/a/img | //*[@class="comic"]/div/img | //*[@id="cg_img"]/img | //*[@id="cg_img"]/a/img'),
+ 'strip' => array(),
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/undeadly.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/undeadly.org.php
index f36ccfe..8b14d96 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/undeadly.org.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/undeadly.org.php
@@ -1,10 +1,14 @@
'http://undeadly.org/cgi?action=article&sid=20141101181155',
- 'body' => array(
- '/html/body/table[3]/tbody/tr/td[1]/table[2]/tr/td[1]'
- ),
- 'strip' => array(
- '//font',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://undeadly.org/cgi?action=article&sid=20141101181155',
+ 'body' => array(
+ '/html/body/table[3]/tbody/tr/td[1]/table[2]/tr/td[1]'
+ ),
+ 'strip' => array(
+ '//font',
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/version2.dk.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/version2.dk.php
index ce57beb..520496a 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/version2.dk.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/version2.dk.php
@@ -1,8 +1,12 @@
'http://www.version2.dk/artikel/surface-pro-2-fungerer-bedre-til-arbejde-end-fornoejelse-55195',
- 'body' => array(
- '//section[contains(@class, "teaser")]',
- '//section[contains(@class, "body")]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.version2.dk/artikel/surface-pro-2-fungerer-bedre-til-arbejde-end-fornoejelse-55195',
+ 'body' => array(
+ '//section[contains(@class, "teaser")]',
+ '//section[contains(@class, "body")]',
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/vgcats.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/vgcats.com.php
new file mode 100644
index 0000000..05de7c2
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/vgcats.com.php
@@ -0,0 +1,16 @@
+ array(
+ '%/comics.*%' => array(
+ 'test_url' => 'http://www.vgcats.com/comics/?strip_id=358',
+ 'body' => array('//*[@align="center"]/img'),
+ 'strip' => array(),
+ ),
+ '%/super.*%' => array(
+ 'test_url' => 'http://www.vgcats.com/super/?strip_id=84',
+ 'body' => array('//*[@align="center"]/p/img'),
+ 'strip' => array(),
+ )
+ )
+);
+
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/vuxml.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/vuxml.org.php
new file mode 100644
index 0000000..e9880f2
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/vuxml.org.php
@@ -0,0 +1,17 @@
+ array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.vuxml.org/freebsd/a5f160fa-deee-11e4-99f8-080027ef73ec.html',
+ 'body' => array(
+ '//body'
+ ),
+ 'strip' => array(
+ '//h1',
+ '//div[@class="blurb"]',
+ '//hr',
+ '//p[@class="copyright"]'
+ )
+ )
+ )
+);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bbc.co.uk.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bbc.co.uk.php
index 5440781..76895c2 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bbc.co.uk.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bbc.co.uk.php
@@ -1,20 +1,24 @@
'http://www.bbc.co.uk/news/world-middle-east-23911833',
- 'body' => array(
- '//div[@class="story-body"]',
- '//div[@class="indPost"]'
- ),
- 'strip' => array(
- '//form',
- '//*[@class="warning"]',
- '//*[@class="story-date"]',
- '//*[@class="story-header"]',
- '//*[@class="story-related"]',
- '//*[contains(@class, "byline")]',
- '//*[contains(@class, "story-feature")]',
- '//*[@id="video-carousel-container"]',
- '//*[@id="also-related-links"]',
- '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.bbc.co.uk/news/world-middle-east-23911833',
+ 'body' => array(
+ '//div[@class="story-body"]',
+ '//div[@class="indPost"]'
+ ),
+ 'strip' => array(
+ '//form',
+ '//*[@class="warning"]',
+ '//*[@class="story-date"]',
+ '//*[@class="story-header"]',
+ '//*[@class="story-related"]',
+ '//*[contains(@class, "byline")]',
+ '//*[contains(@class, "story-feature")]',
+ '//*[@id="video-carousel-container"]',
+ '//*[@id="also-related-links"]',
+ '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]',
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bdgest.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bdgest.com.php
index 528ad41..be1cbcd 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bdgest.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bdgest.com.php
@@ -1,11 +1,15 @@
'http://www.bdgest.com/chronique-6027-BD-Adrastee-Tome-2.html',
- 'body' => array(
- '//*[contains(@class, "chronique")]',
- ),
- 'strip' => array(
- '//*[contains(@class, "post-review")]',
- '//*[contains(@class, "footer-review")]',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.bdgest.com/chronique-6027-BD-Adrastee-Tome-2.html',
+ 'body' => array(
+ '//*[contains(@class, "chronique")]',
+ ),
+ 'strip' => array(
+ '//*[contains(@class, "post-review")]',
+ '//*[contains(@class, "footer-review")]',
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.businessweek.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.businessweek.com.php
index 1ac90a3..1f2ad25 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.businessweek.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.businessweek.com.php
@@ -1,11 +1,15 @@
'http://www.businessweek.com/articles/2013-09-18/elon-musks-hyperloop-will-work-says-some-very-smart-software',
- 'body' => array(
- '//div[@id="lead_graphic"]',
- '//div[@id="article_body"]',
- ),
- 'strip' => array(
- '//*[contains(@class, "related_item")]',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.businessweek.com/articles/2013-09-18/elon-musks-hyperloop-will-work-says-some-very-smart-software',
+ 'body' => array(
+ '//div[@id="lead_graphic"]',
+ '//div[@id="article_body"]',
+ ),
+ 'strip' => array(
+ '//*[contains(@class, "related_item")]',
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.cnn.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.cnn.com.php
index 5ceb3bd..c041aec 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.cnn.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.cnn.com.php
@@ -1,20 +1,24 @@
'http://www.cnn.com/2013/08/31/world/meast/syria-civil-war/index.html?hpt=hp_t1',
- 'body' => array(
- '//div[@class="cnn_strycntntlft"]',
- ),
- 'strip' => array(
- '//div[@class="cnn_stryshrwdgtbtm"]',
- '//div[@class="cnn_strybtmcntnt"]',
- '//div[@class="cnn_strylftcntnt"]',
- '//div[contains(@class, "cnnGalleryContainer")]',
- '//div[contains(@class, "cnn_strylftcexpbx")]',
- '//div[contains(@class, "articleGalleryNavContainer")]',
- '//div[contains(@class, "cnnArticleGalleryCaptionControl")]',
- '//div[contains(@class, "cnnArticleGalleryNavPrevNextDisabled")]',
- '//div[contains(@class, "cnnArticleGalleryNavPrevNext")]',
- '//div[contains(@class, "cnn_html_media_title_new")]',
- '//div[contains(@id, "disqus")]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.cnn.com/2013/08/31/world/meast/syria-civil-war/index.html?hpt=hp_t1',
+ 'body' => array(
+ '//div[@class="cnn_strycntntlft"]',
+ ),
+ 'strip' => array(
+ '//div[@class="cnn_stryshrwdgtbtm"]',
+ '//div[@class="cnn_strybtmcntnt"]',
+ '//div[@class="cnn_strylftcntnt"]',
+ '//div[contains(@class, "cnnGalleryContainer")]',
+ '//div[contains(@class, "cnn_strylftcexpbx")]',
+ '//div[contains(@class, "articleGalleryNavContainer")]',
+ '//div[contains(@class, "cnnArticleGalleryCaptionControl")]',
+ '//div[contains(@class, "cnnArticleGalleryNavPrevNextDisabled")]',
+ '//div[contains(@class, "cnnArticleGalleryNavPrevNext")]',
+ '//div[contains(@class, "cnn_html_media_title_new")]',
+ '//div[contains(@id, "disqus")]',
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.developpez.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.developpez.com.php
index d56bcca..3f1dd59 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.developpez.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.developpez.com.php
@@ -1,17 +1,21 @@
'http://www.developpez.com/actu/81757/Mozilla-annonce-la-disponibilite-de-Firefox-36-qui-passe-au-HTTP-2-et-permet-la-synchronisation-de-son-ecran-d-accueil/',
- 'body' => array(
- '//*[@itemprop="articleBody"]',
- ),
- 'strip' => array(
- '//form',
- '//div[@class="content"]/img',
- '//a[last()]/following-sibling::*',
- '//*[contains(@class,"actuTitle")]',
- '//*[contains(@class,"date")]',
- '//*[contains(@class,"inlineimg")]',
- '//*[@id="signaler"]',
- '//*[@id="signalerFrame"]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.developpez.com/actu/81757/Mozilla-annonce-la-disponibilite-de-Firefox-36-qui-passe-au-HTTP-2-et-permet-la-synchronisation-de-son-ecran-d-accueil/',
+ 'body' => array(
+ '//*[@itemprop="articleBody"]',
+ ),
+ 'strip' => array(
+ '//form',
+ '//div[@class="content"]/img',
+ '//a[last()]/following-sibling::*',
+ '//*[contains(@class,"actuTitle")]',
+ '//*[contains(@class,"date")]',
+ '//*[contains(@class,"inlineimg")]',
+ '//*[@id="signaler"]',
+ '//*[@id="signalerFrame"]',
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.egscomics.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.egscomics.com.php
index 9c9b73f..ada54ab 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.egscomics.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.egscomics.com.php
@@ -1,8 +1,12 @@
'http://www.egscomics.com/index.php?id=1690',
- 'title' => '/html/head/title',
- 'body' => array(
- '//img[@id="comic"]'
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.egscomics.com/index.php?id=1690',
+ 'title' => '/html/head/title',
+ 'body' => array(
+ '//img[@id="comic"]'
+ )
+ )
)
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.forbes.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.forbes.com.php
index 0eff7a7..4b92aaf 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.forbes.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.forbes.com.php
@@ -1,16 +1,20 @@
'http://www.forbes.com/sites/andygreenberg/2013/09/05/follow-the-bitcoins-how-we-got-busted-buying-drugs-on-silk-roads-black-market/',
- 'body' => array(
- '//div[@id="leftRail"]/div[contains(@class, body)]',
- ),
- 'strip' => array(
- '//aside',
- '//div[contains(@class, "entity_block")]',
- '//div[contains(@class, "vestpocket") and not contains(@class, "body")]',
- '//div[contains(@style, "display")]',
- '//div[contains(@id, "comment")]',
- '//div[contains(@class, "widget")]',
- '//div[contains(@class, "pagination")]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.forbes.com/sites/andygreenberg/2013/09/05/follow-the-bitcoins-how-we-got-busted-buying-drugs-on-silk-roads-black-market/',
+ 'body' => array(
+ '//div[@id="leftRail"]/div[contains(@class, body)]',
+ ),
+ 'strip' => array(
+ '//aside',
+ '//div[contains(@class, "entity_block")]',
+ '//div[contains(@class, "vestpocket") and not contains(@class, "body")]',
+ '//div[contains(@style, "display")]',
+ '//div[contains(@id, "comment")]',
+ '//div[contains(@class, "widget")]',
+ '//div[contains(@class, "pagination")]',
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.futura-sciences.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.futura-sciences.com.php
index 73d5744..238b056 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.futura-sciences.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.futura-sciences.com.php
@@ -1,15 +1,19 @@
'http://www.futura-sciences.com/magazines/espace/infos/actu/d/astronautique-curiosity-franchi-succes-dune-dingo-gap-52289/#xtor=RSS-8',
- 'body' => array(
- '//div[contains(@class, "content fiche-")]',
- ),
- 'strip' => array(
- '//h1',
- '//*[contains(@class, "content-date")]',
- '//*[contains(@class, "diaporama")]',
- '//*[contains(@class, "slider")]',
- '//*[contains(@class, "cartouche")]',
- '//*[contains(@class, "noprint")]',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.futura-sciences.com/magazines/espace/infos/actu/d/astronautique-curiosity-franchi-succes-dune-dingo-gap-52289/#xtor=RSS-8',
+ 'body' => array(
+ '//div[contains(@class, "content fiche-")]',
+ ),
+ 'strip' => array(
+ '//h1',
+ '//*[contains(@class, "content-date")]',
+ '//*[contains(@class, "diaporama")]',
+ '//*[contains(@class, "slider")]',
+ '//*[contains(@class, "cartouche")]',
+ '//*[contains(@class, "noprint")]',
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lemonde.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lemonde.fr.php
index 125bb6a..e72ddcf 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lemonde.fr.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lemonde.fr.php
@@ -1,13 +1,17 @@
array(
- 'http://www.lemonde.fr/societe/article/2013/08/30/boris-boillon-ancien-ambassadeur-de-sarkozy-arrete-avec-350-000-euros-en-liquide_3469109_3224.html',
- 'http://www.lemonde.fr/afrique/article/2015/04/06/plonge-dans-la-crise-l-angola-revele-son-vrai-visage_4610364_3212.html',
- ),
- 'body' => array(
- '//div[@id="articleBody"]',
- '//div[@itemprop="articleBody"]',
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => array(
+ 'http://www.lemonde.fr/societe/article/2013/08/30/boris-boillon-ancien-ambassadeur-de-sarkozy-arrete-avec-350-000-euros-en-liquide_3469109_3224.html',
+ 'http://www.lemonde.fr/afrique/article/2015/04/06/plonge-dans-la-crise-l-angola-revele-son-vrai-visage_4610364_3212.html',
+ ),
+ 'body' => array(
+ '//div[@id="articleBody"]',
+ '//div[@itemprop="articleBody"]',
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lepoint.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lepoint.fr.php
index adb5749..9a3107f 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lepoint.fr.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lepoint.fr.php
@@ -1,14 +1,18 @@
'http://www.lepoint.fr/c-est-arrive-aujourd-hui/19-septembre-1783-pour-la-premiere-fois-un-mouton-un-canard-et-un-coq-s-envoient-en-l-air-devant-louis-xvi-18-09-2012-1507704_494.php',
- 'body' => array(
- '//article',
- ),
- 'strip' => array(
- '//*[contains(@class, "info_article")]',
- '//*[contains(@class, "fildariane_titre")]',
- '//*[contains(@class, "entete2_article")]',
- '//*[contains(@class, "signature_article")]',
- '//*[contains(@id, "share")]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.lepoint.fr/c-est-arrive-aujourd-hui/19-septembre-1783-pour-la-premiere-fois-un-mouton-un-canard-et-un-coq-s-envoient-en-l-air-devant-louis-xvi-18-09-2012-1507704_494.php',
+ 'body' => array(
+ '//article',
+ ),
+ 'strip' => array(
+ '//*[contains(@class, "info_article")]',
+ '//*[contains(@class, "fildariane_titre")]',
+ '//*[contains(@class, "entete2_article")]',
+ '//*[contains(@class, "signature_article")]',
+ '//*[contains(@id, "share")]',
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.mac4ever.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.mac4ever.com.php
index ee91ae9..3951329 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.mac4ever.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.mac4ever.com.php
@@ -1,9 +1,13 @@
'http://www.mac4ever.com/actu/87392_video-quand-steve-jobs-et-bill-gates-jouaient-au-bachelor-avec-le-mac',
- 'body' => array(
- '//div[contains(@class, "news-news-content")]',
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.mac4ever.com/actu/87392_video-quand-steve-jobs-et-bill-gates-jouaient-au-bachelor-avec-le-mac',
+ 'body' => array(
+ '//div[contains(@class, "news-news-content")]',
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.nextinpact.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.nextinpact.com.php
index fc45ef2..2010e09 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.nextinpact.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.nextinpact.com.php
@@ -1,9 +1,13 @@
'http://www.pcinpact.com/news/85954-air-france-ne-vous-demande-plus-deteindre-vos-appareils-electroniques.htm?utm_source=PCi_RSS_Feed&utm_medium=news&utm_campaign=pcinpact',
- 'body' => array(
- '//div[contains(@id, "actu_content")]',
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.pcinpact.com/news/85954-air-france-ne-vous-demande-plus-deteindre-vos-appareils-electroniques.htm?utm_source=PCi_RSS_Feed&utm_medium=news&utm_campaign=pcinpact',
+ 'body' => array(
+ '//div[contains(@id, "actu_content")]',
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.npr.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.npr.org.php
index 630c060..e924982 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.npr.org.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.npr.org.php
@@ -1,13 +1,17 @@
'http://www.npr.org/blogs/thesalt/2013/09/17/223345977/auto-brewery-syndrome-apparently-you-can-make-beer-in-your-gut',
- 'body' => array(
- '//div[@id="storytext"]',
- ),
- 'strip' => array(
- '//*[@class="bucket img"]',
- '//*[@class="creditwrap"]',
- '//*[@class="captionwrap"]',
- '//*[contains(@class, "enlargebtn")]',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.npr.org/blogs/thesalt/2013/09/17/223345977/auto-brewery-syndrome-apparently-you-can-make-beer-in-your-gut',
+ 'body' => array(
+ '//div[@id="storytext"]',
+ ),
+ 'strip' => array(
+ '//*[@class="bucket img"]',
+ '//*[@class="creditwrap"]',
+ '//*[@class="captionwrap"]',
+ '//*[contains(@class, "enlargebtn")]',
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.numerama.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.numerama.com.php
index b6387da..1f75e4b 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.numerama.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.numerama.com.php
@@ -1,11 +1,15 @@
'http://www.numerama.com/magazine/26857-bientot-des-robots-dans-les-cuisines-de-mcdo.html',
- 'body' => array(
- '//div[@class="col_left"]//div[@class="content"]',
- ),
- 'strip' => array(
- '//div[@class="news_social"]',
- '//div[@id="newssuiv"]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.numerama.com/magazine/26857-bientot-des-robots-dans-les-cuisines-de-mcdo.html',
+ 'body' => array(
+ '//div[@class="col_left"]//div[@class="content"]',
+ ),
+ 'strip' => array(
+ '//div[@class="news_social"]',
+ '//div[@id="newssuiv"]',
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pcinpact.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pcinpact.com.php
index fc45ef2..2010e09 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pcinpact.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pcinpact.com.php
@@ -1,9 +1,13 @@
'http://www.pcinpact.com/news/85954-air-france-ne-vous-demande-plus-deteindre-vos-appareils-electroniques.htm?utm_source=PCi_RSS_Feed&utm_medium=news&utm_campaign=pcinpact',
- 'body' => array(
- '//div[contains(@id, "actu_content")]',
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.pcinpact.com/news/85954-air-france-ne-vous-demande-plus-deteindre-vos-appareils-electroniques.htm?utm_source=PCi_RSS_Feed&utm_medium=news&utm_campaign=pcinpact',
+ 'body' => array(
+ '//div[contains(@id, "actu_content")]',
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pseudo-sciences.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pseudo-sciences.org.php
index bfb9303..63a918e 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pseudo-sciences.org.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pseudo-sciences.org.php
@@ -1,12 +1,16 @@
'http://www.pseudo-sciences.org/spip.php?article2275',
- 'body' => array(
- '//div[@id="art_main"]',
- ),
- 'strip' => array(
- '//div[@id="art_print"]',
- '//div[@id="art_chapo"]',
- '//img[@class="puce"]',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.pseudo-sciences.org/spip.php?article2275',
+ 'body' => array(
+ '//div[@id="art_main"]',
+ ),
+ 'strip' => array(
+ '//div[@id="art_print"]',
+ '//div[@id="art_chapo"]',
+ '//img[@class="puce"]',
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.slate.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.slate.fr.php
index de211f4..c619199 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.slate.fr.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.slate.fr.php
@@ -1,15 +1,19 @@
'http://www.slate.fr/monde/77034/allemagne-2013-couacs-campagne',
- 'body' => array(
- '//div[@class="article_content"]',
- ),
- 'strip' => array(
- '//*[@id="slate_associated_bn"]',
- '//*[@id="ligatus-article"]',
- '//*[@id="article_sidebar"]',
- '//div[contains(@id, "reseaux")]',
- '//*[contains(@class, "smart") or contains(@class, "article_tags") or contains(@class, "article_reactions")]',
- '//*[contains(@class, "OUTBRAIN") or contains(@class, "related_item") or contains(@class, "share")]',
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.slate.fr/monde/77034/allemagne-2013-couacs-campagne',
+ 'body' => array(
+ '//div[@class="article_content"]',
+ ),
+ 'strip' => array(
+ '//*[@id="slate_associated_bn"]',
+ '//*[@id="ligatus-article"]',
+ '//*[@id="article_sidebar"]',
+ '//div[contains(@id, "reseaux")]',
+ '//*[contains(@class, "smart") or contains(@class, "article_tags") or contains(@class, "article_reactions")]',
+ '//*[contains(@class, "OUTBRAIN") or contains(@class, "related_item") or contains(@class, "share")]',
+ )
+ )
)
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.universfreebox.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.universfreebox.com.php
index 8679a7b..8203b97 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.universfreebox.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.universfreebox.com.php
@@ -1,11 +1,15 @@
'http://www.universfreebox.com/article/24305/4G-Bouygues-Telecom-lance-une-vente-flash-sur-son-forfait-Sensation-3Go',
- 'body' => array(
- '//div[@id="corps_corps"]'
- ),
- 'strip' => array(
- '//*[@id="formulaire"]',
- '//*[@id="commentaire"]',
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://www.universfreebox.com/article/24305/4G-Bouygues-Telecom-lance-une-vente-flash-sur-son-forfait-Sensation-3Go',
+ 'body' => array(
+ '//div[@id="corps_corps"]'
+ ),
+ 'strip' => array(
+ '//*[@id="formulaire"]',
+ '//*[@id="commentaire"]',
+ ),
+ )
+ )
);
\ No newline at end of file
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/xkcd.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/xkcd.com.php
index 37fec18..6f83cb8 100644
--- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/xkcd.com.php
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/xkcd.com.php
@@ -1,9 +1,13 @@
'http://xkcd.com/1472/',
- 'body' => array(
- '//div[@id="comic"]',
- ),
- 'strip' => array(
- ),
+ 'grabber' => array(
+ '%.*%' => array(
+ 'test_url' => 'http://xkcd.com/1472/',
+ 'body' => array(
+ '//div[@id="comic"]',
+ ),
+ 'strip' => array(
+ ),
+ )
+ )
);
diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/CandidateParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/CandidateParser.php
new file mode 100644
index 0000000..907c4d8
--- /dev/null
+++ b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/CandidateParser.php
@@ -0,0 +1,286 @@
+dom = XmlParser::getHtmlDocument(''.$html);
+ $this->xpath = new DOMXPath($this->dom);
+ }
+
+ /**
+ * Get the relevant content with the list of potential attributes
+ *
+ * @access public
+ * @return string
+ */
+ public function execute()
+ {
+ $content = $this->findContentWithCandidates();
+
+ if (strlen($content) < 200) {
+ $content = $this->findContentWithArticle();
+ }
+
+ if (strlen($content) < 50) {
+ $content = $this->findContentWithBody();
+ }
+
+ return $this->stripGarbage($content);
+ }
+
+ /**
+ * Find content based on the list of tag candidates
+ *
+ * @access public
+ * @return string
+ */
+ public function findContentWithCandidates()
+ {
+ foreach ($this->candidatesAttributes as $candidate) {
+
+ Logger::setMessage(get_called_class().': Try this candidate: "'.$candidate.'"');
+
+ $nodes = $this->xpath->query('//*[(contains(@class, "'.$candidate.'") or @id="'.$candidate.'") and not (contains(@class, "nav") or contains(@class, "page"))]');
+
+ if ($nodes !== false && $nodes->length > 0) {
+ Logger::setMessage(get_called_class().': Find candidate "'.$candidate.'"');
+ return $this->dom->saveXML($nodes->item(0));
+ }
+ }
+
+ return '';
+ }
+
+ /**
+ * Find tag
+ *
+ * @access public
+ * @return string
+ */
+ public function findContentWithArticle()
+ {
+ $nodes = $this->xpath->query('//article');
+
+ if ($nodes !== false && $nodes->length > 0) {
+ Logger::setMessage(get_called_class().': Find tag');
+ return $this->dom->saveXML($nodes->item(0));
+ }
+
+ return '';
+ }
+
+ /**
+ * Find