From f89ed85a830dca8d9900e850cc77c428b9491f32 Mon Sep 17 00:00:00 2001 From: Mathias Kresin Date: Tue, 28 Apr 2015 18:08:42 +0200 Subject: [PATCH] update libraries fixes #365, #367 --- docs/full-article-download.markdown | 82 ++- models/item.php | 12 +- vendor/composer/ClassLoader.php | 26 + vendor/composer/autoload_classmap.php | 6 +- vendor/composer/installed.json | 43 +- vendor/fguillot/json-rpc/LICENSE | 21 + vendor/fguillot/json-rpc/README.markdown | 2 +- vendor/fguillot/json-rpc/composer.json | 4 +- vendor/fguillot/picofarad/LICENCE | 21 + vendor/fguillot/picofarad/README.md | 2 +- vendor/fguillot/picofarad/composer.json | 2 +- vendor/fguillot/picofeed/.gitignore | 3 +- vendor/fguillot/picofeed/.travis.yml | 21 +- vendor/fguillot/picofeed/LICENSE | 21 + vendor/fguillot/picofeed/README.markdown | 3 +- vendor/fguillot/picofeed/UNLICENSE | 24 - vendor/fguillot/picofeed/composer.json | 2 +- .../picofeed/docs/feed-parsing.markdown | 21 + .../fguillot/picofeed/docs/grabber.markdown | 109 +++- .../picofeed/lib/PicoFeed/Client/Curl.php | 7 +- .../picofeed/lib/PicoFeed/Client/Grabber.php | 592 ------------------ .../lib/PicoFeed/Filter/Attribute.php | 20 + .../picofeed/lib/PicoFeed/Filter/Html.php | 47 ++ .../picofeed/lib/PicoFeed/Filter/Tag.php | 2 +- .../picofeed/lib/PicoFeed/Parser/Parser.php | 31 +- .../lib/PicoFeed/Rules/.blog.lemonde.fr.php | 18 +- .../lib/PicoFeed/Rules/.blogs.nytimes.com.php | 20 +- .../picofeed/lib/PicoFeed/Rules/.igen.fr.php | 16 +- .../lib/PicoFeed/Rules/.nytimes.com.php | 13 +- .../lib/PicoFeed/Rules/.phoronix.com.php | 13 +- .../lib/PicoFeed/Rules/.slate.com.php | 28 +- .../lib/PicoFeed/Rules/.theguardian.com.php | 18 +- .../lib/PicoFeed/Rules/.wikipedia.org.php | 48 +- .../lib/PicoFeed/Rules/.wired.com.php | 32 +- .../picofeed/lib/PicoFeed/Rules/.wsj.com.php | 18 +- .../picofeed/lib/PicoFeed/Rules/01net.com.php | 24 +- .../lib/PicoFeed/Rules/alainonline.net.php | 18 +- .../PicoFeed/Rules/allgemeine-zeitung.de.php | 35 +- .../lib/PicoFeed/Rules/areadvd.de.php | 10 + .../lib/PicoFeed/Rules/awkwardzombie.com.php | 10 + .../lib/PicoFeed/Rules/blog.fefe.de.php | 16 +- .../lib/PicoFeed/Rules/bunicomic.com.php | 16 +- .../lib/PicoFeed/Rules/cad-comic.com.php | 12 + .../Rules/chaoslife.findchaos.com.php | 10 + .../lib/PicoFeed/Rules/cliquerefresh.com.php | 10 + .../lib/PicoFeed/Rules/consomac.fr.php | 16 +- .../lib/PicoFeed/Rules/dailyjs.com.php | 26 +- .../lib/PicoFeed/Rules/degroupnews.com.php | 18 +- .../lib/PicoFeed/Rules/derstandard.at.php | 18 +- .../lib/PicoFeed/Rules/distrowatch.com.php | 14 +- .../lib/PicoFeed/Rules/dozodomo.com.php | 18 +- .../lib/PicoFeed/Rules/engadget.com.php | 10 + .../PicoFeed/Rules/escapistmagazine.com.php | 42 ++ .../lib/PicoFeed/Rules/explosm.net.php | 16 +- .../lib/PicoFeed/Rules/fastcodesign.com.php | 14 +- .../lib/PicoFeed/Rules/fastcoexist.com.php | 14 +- .../lib/PicoFeed/Rules/fastcompany.com.php | 14 +- .../lib/PicoFeed/Rules/ffworld.com.php | 14 +- .../PicoFeed/Rules/fowllanguagecomics.com.php | 10 + .../lib/PicoFeed/Rules/github.com.php | 16 +- .../picofeed/lib/PicoFeed/Rules/golem.de.php | 12 +- .../picofeed/lib/PicoFeed/Rules/heise.de.php | 10 +- .../lib/PicoFeed/Rules/huffingtonpost.com.php | 14 +- .../picofeed/lib/PicoFeed/Rules/ing.dk.php | 12 +- .../lib/PicoFeed/Rules/journaldugeek.com.php | 10 +- .../picofeed/lib/PicoFeed/Rules/kanpai.fr.php | 14 +- .../PicoFeed/Rules/karriere.jobfinder.dk.php | 12 +- .../lib/PicoFeed/Rules/lejapon.fr.php | 22 +- .../lib/PicoFeed/Rules/lesjoiesducode.fr.php | 14 +- .../picofeed/lib/PicoFeed/Rules/lfg.co.php | 13 + .../lib/PicoFeed/Rules/lifehacker.com.php | 24 +- .../picofeed/lib/PicoFeed/Rules/lists.php | 14 +- .../lib/PicoFeed/Rules/loadingartist.com.php | 10 + .../lib/PicoFeed/Rules/loldwell.com.php | 10 + .../picofeed/lib/PicoFeed/Rules/macg.co.php | 16 +- .../picofeed/lib/PicoFeed/Rules/marc.info.php | 16 +- .../Rules/maximumble.thebookofbiff.com.php | 10 + .../lib/PicoFeed/Rules/medium.com.php | 14 +- .../lib/PicoFeed/Rules/metronieuws.nl.php | 10 + .../Rules/mokepon.smackjeeves.com.php | 10 + .../PicoFeed/Rules/monwindowsphone.com.php | 16 +- .../lib/PicoFeed/Rules/neustadt-ticker.de.php | 12 + .../lib/PicoFeed/Rules/niceteethcomic.com.php | 10 + .../lib/PicoFeed/Rules/nichtlustig.de.php | 8 + .../PicoFeed/Rules/openrightsgroup.org.php | 30 +- .../lib/PicoFeed/Rules/pastebin.com.php | 14 +- .../lib/PicoFeed/Rules/penny-arcade.com.php | 21 + .../lib/PicoFeed/Rules/plus.google.com.php | 12 +- .../lib/PicoFeed/Rules/putaindecode.fr.php | 20 +- .../PicoFeed/Rules/rue89.nouvelobs.com.php | 14 +- .../lib/PicoFeed/Rules/satwcomic.com.php | 12 + .../lib/PicoFeed/Rules/scrumalliance.org.php | 12 + .../lib/PicoFeed/Rules/sitepoint.com.php | 16 +- .../lib/PicoFeed/Rules/slashdot.org.php | 11 + .../PicoFeed/Rules/smallhousebliss.com.php | 26 +- .../lib/PicoFeed/Rules/smarthomewelt.de.php | 10 + .../PicoFeed/Rules/smashingmagazine.com.php | 10 + .../lib/PicoFeed/Rules/spiegel.de.php | 10 +- .../picofeed/lib/PicoFeed/Rules/sz.de.php | 10 + .../lib/PicoFeed/Rules/techcrunch.com.php | 18 +- .../lib/PicoFeed/Rules/thecodinglove.com.php | 10 + .../lib/PicoFeed/Rules/thegamercat.com.php | 10 + .../lib/PicoFeed/Rules/themerepublic.net.php | 10 + .../lib/PicoFeed/Rules/travel-dealz.de.php | 15 + .../lib/PicoFeed/Rules/treehugger.com.php | 16 +- .../lib/PicoFeed/Rules/twogag.com.php | 9 + .../PicoFeed/Rules/twokinds.keenspot.com.php | 10 + .../lib/PicoFeed/Rules/undeadly.org.php | 18 +- .../lib/PicoFeed/Rules/version2.dk.php | 12 +- .../lib/PicoFeed/Rules/vgcats.com.php | 16 + .../picofeed/lib/PicoFeed/Rules/vuxml.org.php | 17 + .../lib/PicoFeed/Rules/www.bbc.co.uk.php | 36 +- .../lib/PicoFeed/Rules/www.bdgest.com.php | 20 +- .../PicoFeed/Rules/www.businessweek.com.php | 20 +- .../lib/PicoFeed/Rules/www.cnn.com.php | 36 +- .../lib/PicoFeed/Rules/www.developpez.com.php | 30 +- .../lib/PicoFeed/Rules/www.egscomics.com.php | 12 +- .../lib/PicoFeed/Rules/www.forbes.com.php | 28 +- .../Rules/www.futura-sciences.com.php | 28 +- .../lib/PicoFeed/Rules/www.lemonde.fr.php | 24 +- .../lib/PicoFeed/Rules/www.lepoint.fr.php | 24 +- .../lib/PicoFeed/Rules/www.mac4ever.com.php | 16 +- .../lib/PicoFeed/Rules/www.nextinpact.com.php | 16 +- .../lib/PicoFeed/Rules/www.npr.org.php | 24 +- .../lib/PicoFeed/Rules/www.numerama.com.php | 18 +- .../lib/PicoFeed/Rules/www.pcinpact.com.php | 16 +- .../Rules/www.pseudo-sciences.org.php | 22 +- .../lib/PicoFeed/Rules/www.slate.fr.php | 26 +- .../PicoFeed/Rules/www.universfreebox.com.php | 20 +- .../picofeed/lib/PicoFeed/Rules/xkcd.com.php | 16 +- .../lib/PicoFeed/Scraper/CandidateParser.php | 286 +++++++++ .../lib/PicoFeed/Scraper/ParserInterface.php | 13 + .../lib/PicoFeed/Scraper/RuleLoader.php | 127 ++++ .../lib/PicoFeed/Scraper/RuleParser.php | 93 +++ .../picofeed/lib/PicoFeed/Scraper/Scraper.php | 361 +++++++++++ vendor/fguillot/picofeed/picofeed | 11 +- .../picofeed/tests/Client/GrabberTest.php | 152 ----- .../tests/Filter/AttributeFilterTest.php | 18 + .../picofeed/tests/Filter/HtmlFilterTest.php | 22 + .../picofeed/tests/Reader/ReaderTest.php | 2 +- .../picofeed/tests/Scraper/RuleLoaderTest.php | 86 +++ .../picofeed/tests/Scraper/ScraperTest.php | 89 +++ vendor/fguillot/simple-validator/LICENSE | 12 +- vendor/fguillot/simple-validator/README.md | 14 +- .../fguillot/simple-validator/composer.json | 3 +- 145 files changed, 2748 insertions(+), 1444 deletions(-) create mode 100644 vendor/fguillot/json-rpc/LICENSE create mode 100644 vendor/fguillot/picofarad/LICENCE create mode 100644 vendor/fguillot/picofeed/LICENSE delete mode 100644 vendor/fguillot/picofeed/UNLICENSE delete mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/areadvd.de.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/awkwardzombie.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/cad-comic.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/chaoslife.findchaos.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/cliquerefresh.com.php mode change 100755 => 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailyjs.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/engadget.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/escapistmagazine.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/fowllanguagecomics.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/lfg.co.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/loadingartist.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/loldwell.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/maximumble.thebookofbiff.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/metronieuws.nl.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/mokepon.smackjeeves.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/niceteethcomic.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/nichtlustig.de.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/penny-arcade.com.php mode change 100755 => 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/putaindecode.fr.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/satwcomic.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/scrumalliance.org.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/slashdot.org.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/smarthomewelt.de.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/smashingmagazine.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/sz.de.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/thecodinglove.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/thegamercat.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/themerepublic.net.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/travel-dealz.de.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/twogag.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/twokinds.keenspot.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/vgcats.com.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Rules/vuxml.org.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Scraper/CandidateParser.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Scraper/ParserInterface.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Scraper/RuleLoader.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Scraper/RuleParser.php create mode 100644 vendor/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php delete mode 100644 vendor/fguillot/picofeed/tests/Client/GrabberTest.php create mode 100644 vendor/fguillot/picofeed/tests/Scraper/RuleLoaderTest.php create mode 100644 vendor/fguillot/picofeed/tests/Scraper/ScraperTest.php diff --git a/docs/full-article-download.markdown b/docs/full-article-download.markdown index cdae156..65f05a8 100644 --- a/docs/full-article-download.markdown +++ b/docs/full-article-download.markdown @@ -18,37 +18,75 @@ Especially websites that use a lot of Javascript to generate the content. How to write a grabber rules file? ---------------------------------- -Add a PHP file to the directory `rules`, the filename must be the domain name with the suffix `.php`: +Miniflux will try first to find the file in the [default bundled rules directory](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules), then it will try to load your custom rules. -Example with the BBC website, `www.bbc.co.uk.php`: +You can create custom rules, by adding a PHP file to the directory `rules`. The filename must be the domain name with the suffix `.php`. + +Each rule has the following keys: +* **body**: An array of xpath expressions which will be extracted from the page +* **strip**: An array of xpath expressions which will be removed from the matched content +* **test_url**: A test url to a matching page to test the grabber + +Example for the BBC website, `www.bbc.co.uk.php`: ```php 'http://www.bbc.co.uk/news/world-middle-east-23911833', - 'body' => array( - '//div[@class="story-body"]', - ), - 'strip' => array( - '//script', - '//form', - '//style', - '//*[@class="story-date"]', - '//*[@class="story-header"]', - '//*[@class="story-related"]', - '//*[contains(@class, "byline")]', - '//*[contains(@class, "story-feature")]', - '//*[@id="video-carousel-container"]', - '//*[@id="also-related-links"]', - '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.bbc.co.uk/news/world-middle-east-23911833', + 'body' => array( + '//div[@class="story-body"]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//*[@class="story-date"]', + '//*[@class="story-header"]', + '//*[@class="story-related"]', + '//*[contains(@class, "byline")]', + '//*[contains(@class, "story-feature")]', + '//*[@id="video-carousel-container"]', + '//*[@id="also-related-links"]', + '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]', + ) + ) ) ); ``` -Actually, only the keys `body`, `strip` and `test_url` are supported. +Each rule file can contain rules for different subdivisions of a website. Those subdivisions are distinguished by their URL. The first level array key of a rule file will be matched against the full path of the URL using **preg_match**, e.g. for **http://www.bbc.co.uk/news/world-middle-east-23911833?test=1** the URL that would be matched is **/news/world-middle-east-23911833?test=1** -Miniflux will try first to find the file in the [default bundled rules directory](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules), then it will try to load your custom rules. +Let's say you want to extract a div with the id **video** if the article points to an URL like **http://comix.com/videos/423**, **audio** if the article points to an URL like **http://comix.com/podcasts/5** and all other links to the page should instead take the div with the id **content**. The following rulefile ```comix.com.php``` would fit that requirement: + +```php +return array( + 'grabber' => array( + '%^/videos.*%' => array( + 'test_url' => 'http://comix.com/videos/423', + 'body' => array( + '//div[@id="video"]', + ), + 'strip' => array() + ), + '%^/podcasts.*%' => array( + 'test_url' => 'http://comix.com/podcasts/5', + 'body' => array( + '//div[@id="audio"]', + ), + 'strip' => array() + ), + '%.*%' => array( + 'test_url' => 'http://comix.com/blog/1', + 'body' => array( + '//div[@id="content"]', + ), + 'strip' => array() + ) + ) +); +``` Sharing your custom rules with the community -------------------------------------------- @@ -59,4 +97,4 @@ That will be merged in the Miniflux code base. List of content grabber rules ----------------------------- -[List of rules included by default](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules). +[List of rules included by default](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules). \ No newline at end of file diff --git a/models/item.php b/models/item.php index 0c1b956..f66a35b 100644 --- a/models/item.php +++ b/models/item.php @@ -6,7 +6,7 @@ use Model\Service; use Model\Config; use PicoDb\Database; use PicoFeed\Logging\Logger; -use PicoFeed\Client\Grabber; +use PicoFeed\Scraper\Scraper; // Get all items without filtering function get_all() @@ -520,12 +520,12 @@ function download_content_url($url) { $content = ''; - $grabber = new Grabber($url); - $grabber->setConfig(Config\get_reader_config()); - $grabber->download(); + $grabber = new Scraper(Config\get_reader_config()); + $grabber->setUrl($url); + $grabber->execute(); - if ($grabber->parse()) { - $content = $grabber->getFilteredcontent(); + if ($grabber->hasRelevantContent()) { + $content = $grabber->getFilteredContent(); } return $content; diff --git a/vendor/composer/ClassLoader.php b/vendor/composer/ClassLoader.php index 70d78bc..5e1469e 100644 --- a/vendor/composer/ClassLoader.php +++ b/vendor/composer/ClassLoader.php @@ -54,6 +54,8 @@ class ClassLoader private $useIncludePath = false; private $classMap = array(); + private $classMapAuthoritative = false; + public function getPrefixes() { if (!empty($this->prefixesPsr0)) { @@ -248,6 +250,27 @@ class ClassLoader return $this->useIncludePath; } + /** + * Turns off searching the prefix and fallback directories for classes + * that have not been registered with the class map. + * + * @param bool $classMapAuthoritative + */ + public function setClassMapAuthoritative($classMapAuthoritative) + { + $this->classMapAuthoritative = $classMapAuthoritative; + } + + /** + * Should class lookup fail if not found in the current class map? + * + * @return bool + */ + public function isClassMapAuthoritative() + { + return $this->classMapAuthoritative; + } + /** * Registers this instance as an autoloader. * @@ -299,6 +322,9 @@ class ClassLoader if (isset($this->classMap[$class])) { return $this->classMap[$class]; } + if ($this->classMapAuthoritative) { + return false; + } $file = $this->findFileWithExtension($class, '.php'); diff --git a/vendor/composer/autoload_classmap.php b/vendor/composer/autoload_classmap.php index ffb214e..22338e2 100644 --- a/vendor/composer/autoload_classmap.php +++ b/vendor/composer/autoload_classmap.php @@ -20,7 +20,6 @@ return array( 'PicoFeed\\Client\\Client' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Client.php', 'PicoFeed\\Client\\ClientException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/ClientException.php', 'PicoFeed\\Client\\Curl' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Curl.php', - 'PicoFeed\\Client\\Grabber' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php', 'PicoFeed\\Client\\HttpHeaders' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php', 'PicoFeed\\Client\\InvalidCertificateException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/InvalidCertificateException.php', 'PicoFeed\\Client\\InvalidUrlException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Client/InvalidUrlException.php', @@ -54,6 +53,11 @@ return array( 'PicoFeed\\Reader\\ReaderException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Reader/ReaderException.php', 'PicoFeed\\Reader\\SubscriptionNotFoundException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Reader/SubscriptionNotFoundException.php', 'PicoFeed\\Reader\\UnsupportedFeedFormatException' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Reader/UnsupportedFeedFormatException.php', + 'PicoFeed\\Scraper\\CandidateParser' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Scraper/CandidateParser.php', + 'PicoFeed\\Scraper\\ParserInterface' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Scraper/ParserInterface.php', + 'PicoFeed\\Scraper\\RuleLoader' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Scraper/RuleLoader.php', + 'PicoFeed\\Scraper\\RuleParser' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Scraper/RuleParser.php', + 'PicoFeed\\Scraper\\Scraper' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php', 'PicoFeed\\Serialization\\Export' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Serialization/Export.php', 'PicoFeed\\Serialization\\Import' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Serialization/Import.php', 'PicoFeed\\Syndication\\Atom' => $vendorDir . '/fguillot/picofeed/lib/PicoFeed/Syndication/Atom.php', diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json index f626e28..89498fc 100644 --- a/vendor/composer/installed.json +++ b/vendor/composer/installed.json @@ -45,18 +45,18 @@ "source": { "type": "git", "url": "https://github.com/fguillot/picoFarad.git", - "reference": "1bc48a4367adf359f3439c2e0ae20a7d299d8ccd" + "reference": "a5817c49ca3037829ec1509d14724be5f29c35a0" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/picoFarad/zipball/1bc48a4367adf359f3439c2e0ae20a7d299d8ccd", - "reference": "1bc48a4367adf359f3439c2e0ae20a7d299d8ccd", + "url": "https://api.github.com/repos/fguillot/picoFarad/zipball/a5817c49ca3037829ec1509d14724be5f29c35a0", + "reference": "a5817c49ca3037829ec1509d14724be5f29c35a0", "shasum": "" }, "require": { "php": ">=5.3.0" }, - "time": "2015-02-01 19:40:13", + "time": "2015-04-14 01:53:02", "type": "library", "installation-source": "dist", "autoload": { @@ -66,7 +66,7 @@ }, "notification-url": "https://packagist.org/downloads/", "license": [ - "Unlicense" + "MIT" ], "authors": [ { @@ -84,18 +84,18 @@ "source": { "type": "git", "url": "https://github.com/fguillot/simpleValidator.git", - "reference": "41655dc7b9224395f5bb3b5623f6e428fe6d64e8" + "reference": "2f30078bb6e688cf123c150d58fda322792a1532" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/simpleValidator/zipball/41655dc7b9224395f5bb3b5623f6e428fe6d64e8", - "reference": "41655dc7b9224395f5bb3b5623f6e428fe6d64e8", + "url": "https://api.github.com/repos/fguillot/simpleValidator/zipball/2f30078bb6e688cf123c150d58fda322792a1532", + "reference": "2f30078bb6e688cf123c150d58fda322792a1532", "shasum": "" }, "require": { "php": ">=5.3.0" }, - "time": "2015-04-05 21:44:06", + "time": "2015-04-14 02:03:43", "type": "library", "installation-source": "dist", "autoload": { @@ -109,8 +109,7 @@ ], "authors": [ { - "name": "Frédéric Guillot", - "homepage": "http://fredericguillot.com" + "name": "Frédéric Guillot" } ], "description": "The most easy to use validator library for PHP :)", @@ -123,18 +122,18 @@ "source": { "type": "git", "url": "https://github.com/fguillot/JsonRPC.git", - "reference": "29d63a09ecd450d5e29fef74f687aab221055910" + "reference": "1a397be7739ddabba87b07f0354655bd91087518" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/JsonRPC/zipball/29d63a09ecd450d5e29fef74f687aab221055910", - "reference": "29d63a09ecd450d5e29fef74f687aab221055910", + "url": "https://api.github.com/repos/fguillot/JsonRPC/zipball/1a397be7739ddabba87b07f0354655bd91087518", + "reference": "1a397be7739ddabba87b07f0354655bd91087518", "shasum": "" }, "require": { "php": ">=5.3.0" }, - "time": "2015-04-05 21:49:38", + "time": "2015-04-14 01:50:16", "type": "library", "installation-source": "dist", "autoload": { @@ -144,7 +143,7 @@ }, "notification-url": "https://packagist.org/downloads/", "license": [ - "Unlicense" + "MIT" ], "authors": [ { @@ -152,7 +151,7 @@ "homepage": "http://fredericguillot.com" } ], - "description": "A simple Json-RPC client/server library that just works", + "description": "Simple Json-RPC client/server library that just works", "homepage": "https://github.com/fguillot/JsonRPC" }, { @@ -162,12 +161,12 @@ "source": { "type": "git", "url": "https://github.com/fguillot/picoFeed.git", - "reference": "273c344b35b468b6c8053f635332c3a404f8c7b9" + "reference": "a6087e8264550891c1b8a6da77eca0cab9328709" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/273c344b35b468b6c8053f635332c3a404f8c7b9", - "reference": "273c344b35b468b6c8053f635332c3a404f8c7b9", + "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/a6087e8264550891c1b8a6da77eca0cab9328709", + "reference": "a6087e8264550891c1b8a6da77eca0cab9328709", "shasum": "" }, "require": { @@ -181,7 +180,7 @@ "suggest": { "ext-curl": "PicoFeed will use cURL if present" }, - "time": "2015-04-11 12:46:50", + "time": "2015-04-27 22:22:06", "bin": [ "picofeed" ], @@ -194,7 +193,7 @@ }, "notification-url": "https://packagist.org/downloads/", "license": [ - "Unlicense" + "MIT" ], "authors": [ { diff --git a/vendor/fguillot/json-rpc/LICENSE b/vendor/fguillot/json-rpc/LICENSE new file mode 100644 index 0000000..6a362bc --- /dev/null +++ b/vendor/fguillot/json-rpc/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Frederic Guillot + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/fguillot/json-rpc/README.markdown b/vendor/fguillot/json-rpc/README.markdown index 7a5f5f1..38c8430 100644 --- a/vendor/fguillot/json-rpc/README.markdown +++ b/vendor/fguillot/json-rpc/README.markdown @@ -11,7 +11,7 @@ Features - Authentication and IP based client restrictions - Minimalist: there is only 2 files - Fully unit tested -- License: Unlicense http://unlicense.org/ +- License: MIT Requirements ------------ diff --git a/vendor/fguillot/json-rpc/composer.json b/vendor/fguillot/json-rpc/composer.json index da33c6c..3dc805c 100644 --- a/vendor/fguillot/json-rpc/composer.json +++ b/vendor/fguillot/json-rpc/composer.json @@ -1,9 +1,9 @@ { "name": "fguillot/json-rpc", - "description": "A simple Json-RPC client/server library that just works", + "description": "Simple Json-RPC client/server library that just works", "homepage": "https://github.com/fguillot/JsonRPC", "type": "library", - "license": "Unlicense", + "license": "MIT", "authors": [ { "name": "Frédéric Guillot", diff --git a/vendor/fguillot/picofarad/LICENCE b/vendor/fguillot/picofarad/LICENCE new file mode 100644 index 0000000..6a362bc --- /dev/null +++ b/vendor/fguillot/picofarad/LICENCE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Frederic Guillot + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/fguillot/picofarad/README.md b/vendor/fguillot/picofarad/README.md index 9ea9bd9..b9c97ea 100644 --- a/vendor/fguillot/picofarad/README.md +++ b/vendor/fguillot/picofarad/README.md @@ -10,7 +10,7 @@ Features - No dependency - Easy to use, fast and very lightweight - Only 4 files: Request, Response, Router and Session -- License: Do what the fuck you want with that +- License: MIT Requirements ------------ diff --git a/vendor/fguillot/picofarad/composer.json b/vendor/fguillot/picofarad/composer.json index bbbaf2b..96073b6 100644 --- a/vendor/fguillot/picofarad/composer.json +++ b/vendor/fguillot/picofarad/composer.json @@ -3,7 +3,7 @@ "description": "Minimalist micro-framework", "homepage": "https://github.com/fguillot/picoFarad", "type": "library", - "license": "Unlicense", + "license": "MIT", "authors": [ { "name": "Frédéric Guillot", diff --git a/vendor/fguillot/picofeed/.gitignore b/vendor/fguillot/picofeed/.gitignore index b0ef068..acbddf5 100644 --- a/vendor/fguillot/picofeed/.gitignore +++ b/vendor/fguillot/picofeed/.gitignore @@ -1,2 +1,3 @@ .DS_Store -vendor/ \ No newline at end of file +vendor/ +*.py \ No newline at end of file diff --git a/vendor/fguillot/picofeed/.travis.yml b/vendor/fguillot/picofeed/.travis.yml index 0c3d0fe..00b2b5b 100644 --- a/vendor/fguillot/picofeed/.travis.yml +++ b/vendor/fguillot/picofeed/.travis.yml @@ -1,12 +1,19 @@ language: php php: - - "5.6" - - "5.5" - - "5.4" - - "5.3" + - 7.0 + - 5.6 + - 5.5 + - 5.4 + - 5.3 + +matrix: + fast_finish: true + allow_failures: + - php: 7.0 + +before_script: + - composer dump-autoload -before_script: wget https://phar.phpunit.de/phpunit.phar script: - - composer dump-autoload - - php phpunit.phar + - phpunit \ No newline at end of file diff --git a/vendor/fguillot/picofeed/LICENSE b/vendor/fguillot/picofeed/LICENSE new file mode 100644 index 0000000..6a362bc --- /dev/null +++ b/vendor/fguillot/picofeed/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Frederic Guillot + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/fguillot/picofeed/README.markdown b/vendor/fguillot/picofeed/README.markdown index 4a958c2..7e94eba 100644 --- a/vendor/fguillot/picofeed/README.markdown +++ b/vendor/fguillot/picofeed/README.markdown @@ -24,7 +24,7 @@ Features - Content grabber: download from the original website the full content - Enclosure detection - RTL languages support -- License: Unlicense +- License: MIT Requirements ------------ @@ -47,7 +47,6 @@ Authors Real world usage ---------------- -- [AnythingNew](http://anythingnew.co) - [Miniflux](http://miniflux.net) - [Owncloud News](https://github.com/owncloud/news) diff --git a/vendor/fguillot/picofeed/UNLICENSE b/vendor/fguillot/picofeed/UNLICENSE deleted file mode 100644 index 68a49da..0000000 --- a/vendor/fguillot/picofeed/UNLICENSE +++ /dev/null @@ -1,24 +0,0 @@ -This is free and unencumbered software released into the public domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - -For more information, please refer to diff --git a/vendor/fguillot/picofeed/composer.json b/vendor/fguillot/picofeed/composer.json index 4c13bd6..56a5ee2 100644 --- a/vendor/fguillot/picofeed/composer.json +++ b/vendor/fguillot/picofeed/composer.json @@ -3,7 +3,7 @@ "description": "Modern library to write or read feeds (RSS/Atom)", "homepage": "http://fguillot.github.io/picoFeed", "type": "library", - "license": "Unlicense", + "license": "MIT", "authors": [ { "name": "Frédéric Guillot", diff --git a/vendor/fguillot/picofeed/docs/feed-parsing.markdown b/vendor/fguillot/picofeed/docs/feed-parsing.markdown index 1ee2145..8ab2dac 100644 --- a/vendor/fguillot/picofeed/docs/feed-parsing.markdown +++ b/vendor/fguillot/picofeed/docs/feed-parsing.markdown @@ -215,6 +215,27 @@ catch (PicoFeedException $e) { } ``` +Custom regex filters +-------------------- +In case you want modify the content with a simple regex, you can create a rule file named after the domain of the feed's link attribute. For the feed pointing to **http://www.twogag.com/** the file is stored under **Rules/twogag.com.php** + +For filtering, only the array with the key **filter** will be considered. The first level key is a preg_match regex that will match the sub url, e.g. to only match a feed whose link attribute points to **twogag.com/test**, the regex could look like **%/test.*%**. The second level array contains a list of search and replace strings, which will be passed to the preg\_replace function. The first string is the argument that should be matched, the second is the replacement. + +To replace all occurences of links to smaller images for twogag, the following rule can be used: + + +```php + array( + '%.*%' => array( + "%http://www.twogag.com/comics-rss/([^.]+)\\.jpg%" => + "http://www.twogag.com/comics/$1.jpg" + ) + ) +); +``` + Feed and item properties ------------------------ diff --git a/vendor/fguillot/picofeed/docs/grabber.markdown b/vendor/fguillot/picofeed/docs/grabber.markdown index b99b756..4ac8306 100644 --- a/vendor/fguillot/picofeed/docs/grabber.markdown +++ b/vendor/fguillot/picofeed/docs/grabber.markdown @@ -15,23 +15,41 @@ How the content grabber works? Standalone usage ---------------- +Fetch remote content: + ```php download(); -$grabber->parse(); +$config = new Config; + +$grabber = new Scraper($config) +$grabber->setUrl($url); +$grabber->execute(); // Get raw HTML content echo $grabber->getRawContent(); // Get relevant content -echo $grabber->getContent(); +echo $grabber->getRelevantContent(); // Get filtered relevant content echo $grabber->getFilteredContent(); + +// Return true if there is relevant content +var_dump($grabber->hasRelevantContent()); +``` + +Parse HTML content: + +```php +setRawContent($html); +$grabber->execute(); ``` Fetch full item contents during feed parsing @@ -79,11 +97,11 @@ Configuration ### Enable content grabber for items - Method name: `enableContentGrabber()` -- Default value: false (content grabber is disabled by default) -- Argument value: none +- Default value: false (also fetch content if no rule file exist) +- Argument value: bool (true scrape only webpages which have a rule file) ```php -$parser->enableContentGrabber(); +$parser->enableContentGrabber(false); ``` ### Ignore item urls for the content grabber @@ -106,30 +124,71 @@ Example with the BBC website, `www.bbc.co.uk.php`: ```php 'http://www.bbc.co.uk/news/world-middle-east-23911833', - 'body' => array( - '//div[@class="story-body"]', - ), - 'strip' => array( - '//script', - '//form', - '//style', - '//*[@class="story-date"]', - '//*[@class="story-header"]', - '//*[@class="story-related"]', - '//*[contains(@class, "byline")]', - '//*[contains(@class, "story-feature")]', - '//*[@id="video-carousel-container"]', - '//*[@id="also-related-links"]', - '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.bbc.co.uk/news/world-middle-east-23911833', + 'body' => array( + '//div[@class="story-body"]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//*[@class="story-date"]', + '//*[@class="story-header"]', + '//*[@class="story-related"]', + '//*[contains(@class, "byline")]', + '//*[contains(@class, "story-feature")]', + '//*[@id="video-carousel-container"]', + '//*[@id="also-related-links"]', + '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]', + ) + ) ) ); ``` +Each rule file can contain multiple rules, based so links to different website URLs can be handled differently. The first level key is a regex, which will be matched against the full path of the URL using **preg_match**, e.g. for **http://www.bbc.co.uk/news/world-middle-east-23911833?test=1** the URL that would be matched is **/news/world-middle-east-23911833?test=1** -Actually, only `body`, `strip` and `test_url` are supported. +Each rule has the following keys: +* **body**: An array of xpath expressions which will be extracted from the page +* **strip**: An array of xpath expressions which will be removed from the matched content +* **test_url**: A test url to a matching page to test the grabber Don't forget to send a pull request or a ticket to share your contribution with everybody, +**A more complex example**: + +Let's say you wanted to extract a div with the id **video** if the article points to an URL like **http://comix.com/videos/423**, **audio** if the article points to an URL like **http://comix.com/podcasts/5** and all other links to the page should instead take the div with the id **content**. The following rulefile would fit that requirement and would be stored in a file called **lib/PicoFeed/Rules/comix.com.php**: + + +```php +return array( + 'grabber' => array( + '%^/videos.*%' => array( + 'test_url' => 'http://comix.com/videos/423', + 'body' => array( + '//div[@id="video"]', + ), + 'strip' => array() + ), + '%^/podcasts.*%' => array( + 'test_url' => 'http://comix.com/podcasts/5', + 'body' => array( + '//div[@id="audio"]', + ), + 'strip' => array() + ), + '%.*%' => array( + 'test_url' => 'http://comix.com/blog/1', + 'body' => array( + '//div[@id="content"]', + ), + 'strip' => array() + ) + ) +); +``` + List of content grabber rules ----------------------------- diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php index 4ad3f14..0c609db 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php @@ -80,7 +80,7 @@ class Curl extends Client { $length = strlen($buffer); - if ($buffer === "\r\n") { + if ($buffer === "\r\n" || $buffer === "\n") { $this->response_headers_count++; } else { @@ -162,6 +162,7 @@ class Curl extends Client * Prepare curl proxy context * * @access private + * @param resource $ch * @return resource $ch */ private function prepareProxyContext($ch) @@ -190,6 +191,7 @@ class Curl extends Client * Prepare curl auth context * * @access private + * @param resource $ch * @return resource $ch */ private function prepareAuthContext($ch) @@ -205,6 +207,7 @@ class Curl extends Client * Set write/header functions * * @access private + * @param resource $ch * @return resource $ch */ private function prepareDownloadMode($ch) @@ -305,7 +308,7 @@ class Curl extends Client { $this->executeContext(); - list($status, $headers) = HttpHeaders::parse(explode("\r\n", $this->response_headers[$this->response_headers_count - 1])); + list($status, $headers) = HttpHeaders::parse(explode("\n", $this->response_headers[$this->response_headers_count - 1])); // When restricted with open_basedir if ($this->needToHandleRedirection($follow_location, $status)) { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php deleted file mode 100644 index bec8ab0..0000000 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php +++ /dev/null @@ -1,592 +0,0 @@ -url = $url; - $this->html = $html; - $this->encoding = $encoding; - - $this->handleFiles(); - $this->handleStreamingVideos(); - } - - /** - * Set config object - * - * @access public - * @param \PicoFeed\Config\Config $config Config instance - * @return Grabber - */ - public function setConfig($config) - { - $this->config = $config; - return $this; - } - - /** - * Get URL to download. - * - * @access public - * @return string - */ - public function getUrl() - { - return $this->url; - } - - /** - * Set URL to download and reset object to use for another grab. - * - * @access public - * @param string $url URL - * @return string - */ - public function setUrl($url) - { - $this->url = $url; - $this->html = ""; - $this->content = ""; - $this->encoding = ""; - - $this->handleFiles(); - $this->handleStreamingVideos(); - } - - /** - * Get relevant content - * - * @access public - * @return string - */ - public function getContent() - { - return $this->content; - } - - /** - * Get raw content (unfiltered) - * - * @access public - * @return string - */ - public function getRawContent() - { - return $this->html; - } - - /** - * Get filtered relevant content - * - * @access public - * @return string - */ - public function getFilteredContent() - { - $filter = Filter::html($this->content, $this->url); - $filter->setConfig($this->config); - return $filter->execute(); - } - - /** - * Return the Youtube embed player and skip processing - * - * @access public - * @return string - */ - public function handleStreamingVideos() - { - if (preg_match("#(?<=v=|v\/|vi=|vi\/|youtu.be\/)[a-zA-Z0-9_-]{11}#", $this->url, $matches)) { - $this->content = ''; - $this->skip_processing = true; - } - } - - /** - * Skip processing for PDF documents - * - * @access public - * @return string - */ - public function handleFiles() - { - if (substr($this->url, -3) === 'pdf') { - $this->skip_processing = true; - Logger::setMessage(get_called_class().': PDF document => processing skipped'); - } - } - - /** - * Parse the HTML content - * - * @access public - * @return bool - */ - public function parse() - { - if ($this->skip_processing) { - return true; - } - - if ($this->html) { - $html_encoding = XmlParser::getEncodingFromMetaTag($this->html); - - // Encode everything in UTF-8 - Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'" ; HTML Encoding "'.$html_encoding.'"'); - $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding); - $this->html = Filter::stripHeadTags($this->html); - - Logger::setMessage(get_called_class().': Content length: '.strlen($this->html).' bytes'); - $rules = $this->getRules(); - - if (! empty($rules)) { - Logger::setMessage(get_called_class().': Parse content with rules'); - $this->parseContentWithRules($rules); - } - else { - Logger::setMessage(get_called_class().': Parse content with candidates'); - $this->parseContentWithCandidates(); - } - } - else { - Logger::setMessage(get_called_class().': No content fetched'); - } - - Logger::setMessage(get_called_class().': Content length: '.strlen($this->content).' bytes'); - Logger::setMessage(get_called_class().': Grabber done'); - - return $this->content !== ''; - } - - /** - * Download the HTML content - * - * @access public - * @return HTML content - */ - public function download() - { - if (! $this->skip_processing && $this->url != '') { - - try { - - $client = Client::getInstance(); - - if ($this->config !== null) { - $client->setConfig($this->config); - $client->setTimeout($this->config->getGrabberTimeout()); - $client->setUserAgent($this->config->getGrabberUserAgent()); - } - - $client->execute($this->url); - - $this->url = $client->getUrl(); - $this->html = $client->getContent(); - $this->encoding = $client->getEncoding(); - } - catch (ClientException $e) { - Logger::setMessage(get_called_class().': '.$e->getMessage()); - } - } - - return $this->html; - } - - /** - * Try to find a predefined rule - * - * @access public - * @return array - */ - public function getRules() - { - $hostname = parse_url($this->url, PHP_URL_HOST); - - if ($hostname !== false) { - - $files = $this->getRulesFileList($hostname); - - foreach ($this->getRulesFolders() as $folder) { - $rule = $this->loadRuleFile($folder, $files); - - if (! empty($rule)) { - return $rule; - } - } - } - - return array(); - } - - /** - * Get the list of possible rules file names for a given hostname - * - * @access public - * @param string $hostname Hostname - * @return array - */ - public function getRulesFileList($hostname) - { - $files = array($hostname); // subdomain.domain.tld - $parts = explode('.', $hostname); - $len = count($parts); - - if ($len > 2) { - $subdomain = array_shift($parts); - $files[] = implode('.', $parts); // domain.tld - $files[] = '.'.implode('.', $parts); // .domain.tld - $files[] = $subdomain; // subdomain - } - else if ($len === 2) { - $files[] = '.'.implode('.', $parts); // .domain.tld - $files[] = $parts[0]; // domain - } - - return $files; - } - - /** - * Load a rule file from the defined folder - * - * @access public - * @param string $folder Rule directory - * @param array $files List of possible file names - * @return array - */ - public function loadRuleFile($folder, array $files) - { - foreach ($files as $file) { - $filename = $folder.'/'.$file.'.php'; - - if (file_exists($filename)) { - Logger::setMessage(get_called_class().' Load rule: '.$file); - return include $filename; - } - } - - return array(); - } - - /** - * Get the list of folders that contains rules - * - * @access public - * @return array - */ - public function getRulesFolders() - { - $folders = array(__DIR__.'/../Rules'); - - if ($this->config !== null && $this->config->getGrabberRulesFolder() !== null) { - $folders[] = $this->config->getGrabberRulesFolder(); - } - - return $folders; - } - - /** - * Get the relevant content with predefined rules - * - * @access public - * @param array $rules Rules - */ - public function parseContentWithRules(array $rules) - { - // Logger::setMessage($this->html); - $dom = XmlParser::getHtmlDocument(''.$this->html); - $xpath = new DOMXPath($dom); - - if (isset($rules['strip']) && is_array($rules['strip'])) { - - foreach ($rules['strip'] as $pattern) { - - $nodes = $xpath->query($pattern); - - if ($nodes !== false && $nodes->length > 0) { - foreach ($nodes as $node) { - $node->parentNode->removeChild($node); - } - } - } - } - - if (isset($rules['body']) && is_array($rules['body'])) { - - foreach ($rules['body'] as $pattern) { - - $nodes = $xpath->query($pattern); - - if ($nodes !== false && $nodes->length > 0) { - foreach ($nodes as $node) { - $this->content .= $dom->saveXML($node); - } - } - } - } - } - - /** - * Get the relevant content with the list of potential attributes - * - * @access public - */ - public function parseContentWithCandidates() - { - $dom = XmlParser::getHtmlDocument(''.$this->html); - $xpath = new DOMXPath($dom); - - // Try to lookup in each tag - foreach ($this->candidatesAttributes as $candidate) { - - Logger::setMessage(get_called_class().': Try this candidate: "'.$candidate.'"'); - - $nodes = $xpath->query('//*[(contains(@class, "'.$candidate.'") or @id="'.$candidate.'") and not (contains(@class, "nav") or contains(@class, "page"))]'); - - if ($nodes !== false && $nodes->length > 0) { - $this->content = $dom->saveXML($nodes->item(0)); - Logger::setMessage(get_called_class().': Find candidate "'.$candidate.'" ('.strlen($this->content).' bytes)'); - break; - } - } - - // Try to fetch
- if (strlen($this->content) < 200) { - - $nodes = $xpath->query('//article'); - - if ($nodes !== false && $nodes->length > 0) { - $this->content = $dom->saveXML($nodes->item(0)); - Logger::setMessage(get_called_class().': Find
tag ('.strlen($this->content).' bytes)'); - } - } - - // Get everything - if (strlen($this->content) < 50) { - - $nodes = $xpath->query('//body'); - - if ($nodes !== false && $nodes->length > 0) { - Logger::setMessage(get_called_class().' No enought content fetched, get //body'); - $this->content = $dom->saveXML($nodes->item(0)); - } - } - - Logger::setMessage(get_called_class().': Strip garbage'); - $this->stripGarbage(); - } - - /** - * Strip useless tags - * - * @access public - */ - public function stripGarbage() - { - $dom = XmlParser::getDomDocument($this->content); - - if ($dom !== false) { - - $xpath = new DOMXPath($dom); - - foreach ($this->stripTags as $tag) { - - $nodes = $xpath->query('//'.$tag); - - if ($nodes !== false && $nodes->length > 0) { - Logger::setMessage(get_called_class().': Strip tag: "'.$tag.'"'); - foreach ($nodes as $node) { - $node->parentNode->removeChild($node); - } - } - } - - foreach ($this->stripAttributes as $attribute) { - - $nodes = $xpath->query('//*[contains(@class, "'.$attribute.'") or contains(@id, "'.$attribute.'")]'); - - if ($nodes !== false && $nodes->length > 0) { - Logger::setMessage(get_called_class().': Strip attribute: "'.$attribute.'"'); - foreach ($nodes as $node) { - if ($this->shouldRemove($dom, $node)) { - $node->parentNode->removeChild($node); - } - } - } - } - - $this->content = $dom->saveXML($dom->documentElement); - } - } - - /** - * Return false if the node should not be removed - * - * @access public - * @param DomDocument $dom - * @param DomNode $node - * @return boolean - */ - public function shouldRemove($dom, $node) - { - $document_length = strlen($dom->textContent); - $node_length = strlen($node->textContent); - - if ($document_length === 0) { - return true; - } - - $ratio = $node_length * 100 / $document_length; - - if ($ratio >= 90) { - Logger::setMessage(get_called_class().': Should not remove this node ('.$node->nodeName.') ratio: '.$ratio.'%'); - return false; - } - - return true; - } -} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php index ae77ff7..ec1dac4 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php @@ -235,6 +235,7 @@ class Attribute 'filterProtocolUrlAttribute', 'rewriteImageProxyUrl', 'secureIframeSrc', + 'removeYouTubeAutoplay' ); /** @@ -404,6 +405,25 @@ class Attribute return true; } + /** + * Removes YouTube autoplay from iframes + * + * @access public + * @param string $tag Tag name + * @param array $attribute Atttributes name + * @param string $value Attribute value + * @return boolean + */ + public function removeYouTubeAutoplay($tag, $attribute, &$value) + { + $regex = '%^(https://(?:www\.)?youtube.com/.*\?.*autoplay=)(1)(.*)%i'; + if ($tag === 'iframe' && $attribute === 'src' && preg_match($regex, $value)) { + $value = preg_replace($regex, '${1}0$3', $value); + } + + return true; + } + /** * Rewrite image url to use with a proxy * diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php index 4e04660..36ab3f1 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Html.php @@ -2,7 +2,9 @@ namespace PicoFeed\Filter; +use PicoFeed\Config\Config; use PicoFeed\Client\Url; +use PicoFeed\Scraper\RuleLoader; use PicoFeed\Parser\XmlParser; /** @@ -69,6 +71,14 @@ class Html */ public $attribute = ''; + /** + * The website to filter + * + * @access private + * @var string + */ + private $website; + /** * Initialize the filter, all inputs data must be encoded in UTF-8 before * @@ -81,6 +91,7 @@ class Html $this->input = XmlParser::HtmlToXml($html); $this->output = ''; $this->tag = new Tag; + $this->website = $website; $this->attribute = new Attribute(new Url($website)); } @@ -155,9 +166,45 @@ class Html public function postFilter() { $this->output = $this->tag->removeEmptyTags($this->output); + $this->output = $this->filterRules($this->output); + $this->output = $this->tag->removeMultipleBreakTags($this->output); $this->output = trim($this->output); } + /** + * Called after XML parsing + * @param string $content the content that should be filtered + * + * @access public + */ + public function filterRules($content) + { + // the constructor should require a config, then this if can be removed + if ($this->config === null) { + $config = new Config; + } else { + $config = $this->config; + } + + $loader = new RuleLoader($config); + $rules = $loader->getRules($this->website); + + $url = new Url($this->website); + $sub_url = $url->getFullPath(); + + if (isset($rules['filter'])) { + foreach ($rules['filter'] as $pattern => $rule) { + if (preg_match($pattern, $sub_url)) { + foreach($rule as $search => $replace) { + $content = preg_replace($search, $replace, $content); + } + } + } + } + + return $content; + } + /** * Parse opening tag * diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php index aa7efe4..b14ed94 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php @@ -194,7 +194,7 @@ class Tag * @param string $data Input data * @return string */ - public function removeMultipleTags($data) + public function removeMultipleBreakTags($data) { return preg_replace("/(\s*)+/", "
", $data); } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php index 7ef904f..810494b 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php @@ -3,11 +3,11 @@ namespace PicoFeed\Parser; use SimpleXMLElement; +use PicoFeed\Client\Url; use PicoFeed\Encoding\Encoding; use PicoFeed\Filter\Filter; use PicoFeed\Logging\Logger; -use PicoFeed\Client\Url; -use PicoFeed\Client\Grabber; +use PicoFeed\Scraper\Scraper; /** * Base parser class @@ -81,6 +81,14 @@ abstract class Parser */ private $enable_grabber = false; + /** + * Enable the content grabber on all pages + * + * @access private + * @var bool + */ + private $grabber_needs_rule_file = false; + /** * Ignore those urls for the content scraper * @@ -237,11 +245,16 @@ abstract class Parser { if ($this->enable_grabber && ! in_array($item->getUrl(), $this->grabber_ignore_urls)) { - $grabber = new Grabber($item->getUrl()); - $grabber->setConfig($this->config); - $grabber->download(); + $grabber = new Scraper($this->config); + $grabber->setUrl($item->getUrl()); - if ($grabber->parse()) { + if ($this->grabber_needs_rule_file) { + $grabber->disableCandidateParser(); + } + + $grabber->execute(); + + if ($grabber->hasRelevantContent()) { $item->content = $grabber->getFilteredContent(); } } @@ -270,7 +283,6 @@ abstract class Parser * Generate a unique id for an entry (hash all arguments) * * @access public - * @param string $args Pieces of data to hash * @return string */ public function generateId() @@ -383,11 +395,14 @@ abstract class Parser * Enable the content grabber * * @access public + * @param bool $needs_rule_file true if only pages with rule files should be + * scraped * @return \PicoFeed\Parser\Parser */ - public function enableContentGrabber() + public function enableContentGrabber($needs_rule_file = false) { $this->enable_grabber = true; + $this->grabber_needs_rule_file = $needs_rule_file; } /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blog.lemonde.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blog.lemonde.fr.php index 226169b..eec5e12 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blog.lemonde.fr.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blog.lemonde.fr.php @@ -1,10 +1,14 @@ 'http://combat.blog.lemonde.fr/2013/08/31/teddy-riner-le-rookie-devenu-rambo/#xtor=RSS-3208', - 'body' => array( - '//div[@class="entry-content"]', - ), - 'strip' => array( - '//*[contains(@class, "fb-like") or contains(@class, "social")]' - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://combat.blog.lemonde.fr/2013/08/31/teddy-riner-le-rookie-devenu-rambo/#xtor=RSS-3208', + 'body' => array( + '//div[@class="entry-content"]', + ), + 'strip' => array( + '//*[contains(@class, "fb-like") or contains(@class, "social")]' + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blogs.nytimes.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blogs.nytimes.com.php index aa17033..ee641b0 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blogs.nytimes.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.blogs.nytimes.com.php @@ -1,11 +1,15 @@ '//header/h1', - 'test_url' => 'http://bits.blogs.nytimes.com/2012/01/16/wikipedia-plans-to-go-dark-on-wednesday-to-protest-sopa/', - 'body' => array( - '//div[@class="postContent"]', - ), - 'strip' => array( - '//*[@class="shareToolsBox"]', - ), + 'grabber' => array( + '%.*%' => array( + 'title' => '//header/h1', + 'test_url' => 'http://bits.blogs.nytimes.com/2012/01/16/wikipedia-plans-to-go-dark-on-wednesday-to-protest-sopa/', + 'body' => array( + '//div[@class="postContent"]', + ), + 'strip' => array( + '//*[@class="shareToolsBox"]', + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.igen.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.igen.fr.php index 0e0436e..e2d1cc6 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.igen.fr.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.igen.fr.php @@ -1,9 +1,13 @@ 'http://www.igen.fr/ailleurs/2014/05/nvidia-va-delaisser-les-smartphones-grand-public-86031', - 'body' => array( - '//div[contains(@class, "field-name-body")]' - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.igen.fr/ailleurs/2014/05/nvidia-va-delaisser-les-smartphones-grand-public-86031', + 'body' => array( + '//div[contains(@class, "field-name-body")]' + ), + 'strip' => array( + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.nytimes.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.nytimes.com.php index 31f4d78..ed27bb5 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.nytimes.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.nytimes.com.php @@ -1,8 +1,11 @@ 'http://www.nytimes.com/2011/05/15/world/middleeast/15prince.html', - 'title' => '//h1[@class="articleHeadline"]', - 'body' => array( - '//div[@class="articleBody"]', - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.nytimes.com/2011/05/15/world/middleeast/15prince.html', + 'body' => array( + '//div[@class="articleBody"]', + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php index 0fd99f7..a2be240 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.phoronix.com.php @@ -1,9 +1,12 @@ 'http://www.phoronix.com/scan.php?page=article&item=amazon_ec2_bare&num=1', - 'body' => array( - '//div[@class="KonaBody"]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.phoronix.com/scan.php?page=article&item=amazon_ec2_bare&num=1', + 'body' => array( + '//div[@class="KonaBody"]', + ), + 'strip' => array() + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.slate.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.slate.com.php index ad6f9c9..164ded6 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.slate.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.slate.com.php @@ -1,16 +1,20 @@ 'http://www.slate.com/articles/business/moneybox/2013/08/microsoft_ceo_steve_ballmer_retires_a_firsthand_account_of_the_company_s.html', - 'body' => array( - '//div[@class="sl-art-body"]', - ), - 'strip' => array( - '//*[contains(@class, "social") or contains(@class, "comments") or contains(@class, "sl-article-floatin-tools") or contains(@class, "sl-art-pag")]', - '//*[@id="mys_slate_logged_in"]', - '//*[@id="sl_article_tools_myslate_bottom"]', - '//*[@id="mys_myslate"]', - '//*[@class="sl-viral-container"]', - '//*[@class="sl-art-creds-cntr"]', - '//*[@class="sl-art-ad-midflex"]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.slate.com/articles/business/moneybox/2013/08/microsoft_ceo_steve_ballmer_retires_a_firsthand_account_of_the_company_s.html', + 'body' => array( + '//div[@class="sl-art-body"]', + ), + 'strip' => array( + '//*[contains(@class, "social") or contains(@class, "comments") or contains(@class, "sl-article-floatin-tools") or contains(@class, "sl-art-pag")]', + '//*[@id="mys_slate_logged_in"]', + '//*[@id="sl_article_tools_myslate_bottom"]', + '//*[@id="mys_myslate"]', + '//*[@class="sl-viral-container"]', + '//*[@class="sl-art-creds-cntr"]', + '//*[@class="sl-art-ad-midflex"]', + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.theguardian.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.theguardian.com.php index 6118488..4a1e8d2 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.theguardian.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.theguardian.com.php @@ -1,10 +1,14 @@ 'http://www.theguardian.com/sustainable-business/2015/feb/02/2015-hyper-transparency-global-business', - 'body' => array( - '//div[contains(@class, "content__main-column--article")]', - ), - 'strip' => array( - '//div[contains(@class, "meta-container")]', - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.theguardian.com/sustainable-business/2015/feb/02/2015-hyper-transparency-global-business', + 'body' => array( + '//div[contains(@class, "content__main-column--article")]', + ), + 'strip' => array( + '//div[contains(@class, "meta-container")]', + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wikipedia.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wikipedia.org.php index ea99ab6..7b8f76e 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wikipedia.org.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wikipedia.org.php @@ -1,25 +1,29 @@ 'https://en.wikipedia.org/wiki/Grace_Hopper', - 'body' => array( - '//div[@id="bodyContent"]', - ), - 'strip' => array( - "//div[@id='toc']", - "//div[@id='catlinks']", - "//div[@id='jump-to-nav']", - "//div[@class='thumbcaption']//div[@class='magnify']", - "//table[@class='navbox']", - "//table[contains(@class, 'infobox')]", - "//div[@class='dablink']", - "//div[@id='contentSub']", - "//div[@id='siteSub']", - "//table[@id='persondata']", - "//table[contains(@class, 'metadata')]", - "//*[contains(@class, 'noprint')]", - "//*[contains(@class, 'printfooter')]", - "//*[contains(@class, 'editsection')]", - "//*[contains(@class, 'error')]", - "//span[@title='pronunciation:']", - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'https://en.wikipedia.org/wiki/Grace_Hopper', + 'body' => array( + '//div[@id="bodyContent"]', + ), + 'strip' => array( + "//div[@id='toc']", + "//div[@id='catlinks']", + "//div[@id='jump-to-nav']", + "//div[@class='thumbcaption']//div[@class='magnify']", + "//table[@class='navbox']", + "//table[contains(@class, 'infobox')]", + "//div[@class='dablink']", + "//div[@id='contentSub']", + "//div[@id='siteSub']", + "//table[@id='persondata']", + "//table[contains(@class, 'metadata')]", + "//*[contains(@class, 'noprint')]", + "//*[contains(@class, 'printfooter')]", + "//*[contains(@class, 'editsection')]", + "//*[contains(@class, 'error')]", + "//span[@title='pronunciation:']", + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wired.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wired.com.php index 32c0475..dcc4735 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wired.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wired.com.php @@ -1,17 +1,21 @@ 'http://www.wired.com/gamelife/2013/09/ouya-free-the-games/', - 'body' => array( - '//div[@class="entry"]', - ), - 'strip' => array( - '//*[@id="linker_widget"]', - '//*[contains(@class, "bio")]', - '//*[contains(@class, "entry-footer")]', - '//*[contains(@class, "mobify_backtotop_link")]', - '//*[contains(@class, "gallery-navigation")]', - '//*[contains(@class, "gallery-thumbnail")]', - '//img[contains(@src, "1x1")]', - '//a[contains(@href, "creativecommons")]', - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.wired.com/gamelife/2013/09/ouya-free-the-games/', + 'body' => array( + '//div[@class="entry"]', + ), + 'strip' => array( + '//*[@id="linker_widget"]', + '//*[contains(@class, "bio")]', + '//*[contains(@class, "entry-footer")]', + '//*[contains(@class, "mobify_backtotop_link")]', + '//*[contains(@class, "gallery-navigation")]', + '//*[contains(@class, "gallery-thumbnail")]', + '//img[contains(@src, "1x1")]', + '//a[contains(@href, "creativecommons")]', + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wsj.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wsj.com.php index 113feb5..752e8c2 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wsj.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/.wsj.com.php @@ -1,11 +1,15 @@ 'http://online.wsj.com/article/SB10001424127887324108204579023143974408428.html', - 'body' => array( - '//div[@class="articlePage"]', - ), - 'strip' => array( - '//*[@id="articleThumbnail_2"]', - '//*[@class="socialByline"]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://online.wsj.com/article/SB10001424127887324108204579023143974408428.html', + 'body' => array( + '//div[@class="articlePage"]', + ), + 'strip' => array( + '//*[@id="articleThumbnail_2"]', + '//*[@class="socialByline"]', + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/01net.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/01net.com.php index 615ad77..9c64491 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/01net.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/01net.com.php @@ -1,14 +1,18 @@ 'http://www.01net.com/editorial/624550/twitter-rachete-madbits-un-specialiste-francais-de-lanalyse-dimages/', - 'body' => array( - '//div[@class="article_ventre_box"]', - ), - 'strip' => array( - '//link', - '//*[contains(@class, "article_navigation")]', - '//h1', - '//*[contains(@class, "article_toolbarMain")]', - '//*[contains(@class, "article_imagehaute_box")]' + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.01net.com/editorial/624550/twitter-rachete-madbits-un-specialiste-francais-de-lanalyse-dimages/', + 'body' => array( + '//div[@class="article_ventre_box"]', + ), + 'strip' => array( + '//link', + '//*[contains(@class, "article_navigation")]', + '//h1', + '//*[contains(@class, "article_toolbarMain")]', + '//*[contains(@class, "article_imagehaute_box")]' + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/alainonline.net.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/alainonline.net.php index bbe26af..2faf0c4 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/alainonline.net.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/alainonline.net.php @@ -1,10 +1,14 @@ 'http://www.alainonline.net/news_details.php?lang=arabic&sid=18907', - 'body' => array( - '//div[@class="news_details"]' - ), - 'strip' => array( - '//div[@class="news_details"]/div/div[last()]', - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.alainonline.net/news_details.php?lang=arabic&sid=18907', + 'body' => array( + '//div[@class="news_details"]' + ), + 'strip' => array( + '//div[@class="news_details"]/div/div[last()]', + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/allgemeine-zeitung.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/allgemeine-zeitung.de.php index 603bcdc..984e827 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/allgemeine-zeitung.de.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/allgemeine-zeitung.de.php @@ -1,20 +1,23 @@ 'http://www.allgemeine-zeitung.de/lokales/polizei/mainz-gonsenheim-unbekannte-rauben-esso-tankstelle-in-kurt-schumacher-strasse-aus_14913147.htm', - 'body' => array( - '//div[contains(@class, "article")][1]', - ), - 'strip' => array( - '//read/h1', - '//*[@id="t-map"]', - '//*[contains(@class, "modules")]', - '//*[contains(@class, "adsense")]', - '//*[contains(@class, "linkbox")]', - '//*[contains(@class, "info")]', - '//*[@class="skip"]', - '//*[@class="funcs"]', - '//span[@class="nd address"]', - '//a[contains(@href, "abo-und-services")]' + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.allgemeine-zeitung.de/lokales/polizei/mainz-gonsenheim-unbekannte-rauben-esso-tankstelle-in-kurt-schumacher-strasse-aus_14913147.htm', + 'body' => array( + '//div[contains(@class, "article")][1]', + ), + 'strip' => array( + '//read/h1', + '//*[@id="t-map"]', + '//*[contains(@class, "modules")]', + '//*[contains(@class, "adsense")]', + '//*[contains(@class, "linkbox")]', + '//*[contains(@class, "info")]', + '//*[@class="skip"]', + '//*[@class="funcs"]', + '//span[@class="nd address"]', + '//a[contains(@href, "abo-und-services")]' + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/areadvd.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/areadvd.de.php new file mode 100644 index 0000000..ce93b3e --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/areadvd.de.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.areadvd.de/news/daily-deals-angebote-bei-lautsprecher-teufel-3/', + 'body' => array('//div[contains(@class,"entry")]'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/awkwardzombie.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/awkwardzombie.com.php new file mode 100644 index 0000000..ab3fde0 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/awkwardzombie.com.php @@ -0,0 +1,10 @@ + array( + '%/index.php.*comic=.*%' => array( + 'test_url' => 'http://www.awkwardzombie.com/index.php?comic=041315', + 'body' => array('//*[@id="comic"]/img'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.fefe.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.fefe.de.php index f540759..829cf3f 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.fefe.de.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/blog.fefe.de.php @@ -1,9 +1,13 @@ 'http://blog.fefe.de/?ts=ad706a73', - 'body' => array( - '/html/body/ul' - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://blog.fefe.de/?ts=ad706a73', + 'body' => array( + '/html/body/ul' + ), + 'strip' => array( + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bunicomic.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bunicomic.com.php index 2ddd17c..2073faf 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bunicomic.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bunicomic.com.php @@ -1,9 +1,13 @@ 'http://www.bunicomic.com/comic/buni-623/', - 'body' => array( - '//div[@class="comic-table"]', - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.bunicomic.com/comic/buni-623/', + 'body' => array( + '//div[@class="comic-table"]', + ), + 'strip' => array( + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cad-comic.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cad-comic.com.php new file mode 100644 index 0000000..e12f69f --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cad-comic.com.php @@ -0,0 +1,12 @@ + array( + '%/cad/.+%' => array( + 'test_url' => 'http://www.cad-comic.com/cad/20150417', + 'body' => array( + '//*[@id="content"]/img' + ), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/chaoslife.findchaos.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/chaoslife.findchaos.com.php new file mode 100644 index 0000000..b013a1d --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/chaoslife.findchaos.com.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://chaoslife.findchaos.com/pets-in-the-wild', + 'body' => array('//div[@id="comic"]'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cliquerefresh.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cliquerefresh.com.php new file mode 100644 index 0000000..40e3eb8 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/cliquerefresh.com.php @@ -0,0 +1,10 @@ + array( + '%/comic.*%' => array( + 'test_url' => 'http://cliquerefresh.com/comic/078-stating-the-obvious/', + 'body' => array('//div[@class="comicImg"]/img | //div[@class="comicImg"]/a/img'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/consomac.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/consomac.fr.php index 99a358f..7953b9a 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/consomac.fr.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/consomac.fr.php @@ -1,9 +1,13 @@ 'http://consomac.fr/news-2430-l-iphone-6-toujours-un-secret-bien-garde.html', - 'body' => array( - '//div[contains(@id, "newscontent")]', - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://consomac.fr/news-2430-l-iphone-6-toujours-un-secret-bien-garde.html', + 'body' => array( + '//div[contains(@id, "newscontent")]', + ), + 'strip' => array( + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailyjs.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailyjs.com.php old mode 100755 new mode 100644 index bde5895..44ba13e --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailyjs.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dailyjs.com.php @@ -1,15 +1,19 @@ 'http://dailyjs.com/2014/08/07/p5js/', - 'body' => array( - '//div[@id="post"]', - ), - 'strip' => array( - '//h2[@class="post"]', - '//div[@class="meta"]', - '//*[contains(@class, "addthis_toolbox")]', - '//*[contains(@class, "addthis_default_style")]', - '//*[@class="navigation small"]', - '//*[@id="related"]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://dailyjs.com/2014/08/07/p5js/', + 'body' => array( + '//div[@id="post"]', + ), + 'strip' => array( + '//h2[@class="post"]', + '//div[@class="meta"]', + '//*[contains(@class, "addthis_toolbox")]', + '//*[contains(@class, "addthis_default_style")]', + '//*[@class="navigation small"]', + '//*[@id="related"]', + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/degroupnews.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/degroupnews.com.php index e5f17de..c1499d4 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/degroupnews.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/degroupnews.com.php @@ -1,10 +1,14 @@ 'http://www.degroupnews.com/medias/vodsvod/amazon-concurrence-la-chromecast-de-google-avec-fire-tv-stick', - 'body' => array( - '//div[@class="contenu"]', - ), - 'strip' => array( - '//div[contains(@class, "a2a")]' - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.degroupnews.com/medias/vodsvod/amazon-concurrence-la-chromecast-de-google-avec-fire-tv-stick', + 'body' => array( + '//div[@class="contenu"]', + ), + 'strip' => array( + '//div[contains(@class, "a2a")]' + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/derstandard.at.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/derstandard.at.php index 687d72d..599a686 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/derstandard.at.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/derstandard.at.php @@ -1,10 +1,14 @@ 'http://derstandard.at/2000010267354/The-Witcher-3-Hohe-Hardware-Anforderungen-fuer-PC-Spieler?ref=rss', - 'body' => array( - '//div[@class="copytext"]', - '//ul[@id="media-list"]', - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://derstandard.at/2000010267354/The-Witcher-3-Hohe-Hardware-Anforderungen-fuer-PC-Spieler?ref=rss', + 'body' => array( + '//div[@class="copytext"]', + '//ul[@id="media-list"]', + ), + 'strip' => array( + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/distrowatch.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/distrowatch.com.php index 39837cc..5c143f4 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/distrowatch.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/distrowatch.com.php @@ -1,9 +1,13 @@ 'http://distrowatch.com/?newsid=08355', - 'body' => array( - '//td[@class="NewsText"][1]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://distrowatch.com/?newsid=08355', + 'body' => array( + '//td[@class="NewsText"][1]', + ), + 'strip' => array( + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dozodomo.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dozodomo.com.php index 7ef5737..a57b7f4 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dozodomo.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/dozodomo.com.php @@ -1,11 +1,15 @@ 'http://dozodomo.com/bento/2014/03/04/lart-des-maki-de-takayo-kiyota/', - 'body' => array( - '//div[@class="joke"]', - '//div[@class="story-cover"]', - '//div[@class="story-content"]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://dozodomo.com/bento/2014/03/04/lart-des-maki-de-takayo-kiyota/', + 'body' => array( + '//div[@class="joke"]', + '//div[@class="story-cover"]', + '//div[@class="story-content"]', + ), + 'strip' => array( + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/engadget.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/engadget.com.php new file mode 100644 index 0000000..aada167 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/engadget.com.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.engadget.com/2015/04/20/dark-matter-discovery/?ncid=rss_truncated', + 'body' => array('//div[@class="article-content"]/p[not(@class="read-more")] | //div[@class="article-content"]/div[@style="text-align: center;"]'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/escapistmagazine.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/escapistmagazine.com.php new file mode 100644 index 0000000..6b6b20b --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/escapistmagazine.com.php @@ -0,0 +1,42 @@ + array( + '%/articles/view/comicsandcosplay/comics/critical-miss.*%' => array( + 'body' => array('//*[@class="body"]/span/img | //div[@class="folder_nav_links"]/following::p'), + 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/critical-miss/13776-Critical-Miss-on-Framerates?utm_source=rss&utm_medium=rss&utm_campaign=articles', + 'strip' => array() + ), + '%/articles/view/comicsandcosplay/comics/namegame.*%' => array( + 'body' => array('//*[@class="body"]/span/p/img[@height != "120"]'), + 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/namegame/9759-Leaving-the-Nest?utm_source=rss&utm_medium=rss&utm_campaign=articles', + 'strip' => array() + ), + '%/articles/view/comicsandcosplay/comics/stolen-pixels.*%' => array( + 'body' => array('//*[@class="body"]/span/p[2]/img'), + 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/stolen-pixels/8866-Stolen-Pixels-258-Where-the-Boys-Are?utm_source=rss&utm_medium=rss&utm_campaign=articles', + 'strip' => array() + ), + '%/articles/view/comicsandcosplay/comics/bumhugparade.*%' => array( + 'body' => array('//*[@class="body"]/span/p[2]/img'), + 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/bumhugparade/8262-Bumhug-Parade-13?utm_source=rss&utm_medium=rss&utm_campaign=articles', + 'strip' => array() + ), + '%/articles/view/comicsandcosplay.*/comics/escapistradiotheater%' => array( + 'body' => array('//*[@class="body"]/span/p[2]/img'), + 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/escapistradiotheater/8265-The-Escapist-Radio-Theater-13?utm_source=rss&utm_medium=rss&utm_campaign=articles', + 'strip' => array() + ), + '%/articles/view/comicsandcosplay/comics/paused.*%' => array( + 'body' => array('//*[@class="body"]/span/p[2]/img | //*[@class="body"]/span/div/img'), + 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/paused/8263-Paused-16?utm_source=rss&utm_medium=rss&utm_campaign=articles', + 'strip' => array() + ), + '%/articles/view/comicsandcosplay/comics/fraughtwithperil.*%' => array( + 'body' => array('//*[@class="body"]'), + 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/fraughtwithperil/12166-The-Escapist-Presents-Escapist-Comics-Critical-Miss-B-lyeh-Fhlop?utm_source=rss&utm_medium=rss&utm_campaign=articles', + 'strip' => array() + ) + ) +); + + diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/explosm.net.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/explosm.net.php index b9ca9b0..83a0890 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/explosm.net.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/explosm.net.php @@ -1,9 +1,13 @@ 'http://explosm.net/comics/3803/', - 'body' => array( - '//div[@id="comic-container"]', - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://explosm.net/comics/3803/', + 'body' => array( + '//div[@id="comic-container"]', + ), + 'strip' => array( + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcodesign.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcodesign.com.php index 58cb9df..50995b8 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcodesign.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcodesign.com.php @@ -1,9 +1,13 @@ 'http://www.fastcodesign.com/3026548/exposure/peek-inside-the-worlds-forbidden-subway-tunnels', - 'body' => array( - '//article[contains(@class, "body prose")]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.fastcodesign.com/3026548/exposure/peek-inside-the-worlds-forbidden-subway-tunnels', + 'body' => array( + '//article[contains(@class, "body prose")]', + ), + 'strip' => array( + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcoexist.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcoexist.com.php index 9d31b0a..6aacbdb 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcoexist.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcoexist.com.php @@ -1,9 +1,13 @@ 'http://www.fastcoexist.com/3026114/take-a-seat-on-this-gates-funded-future-toilet-that-will-change-how-we-think-about-poop', - 'body' => array( - '//article[contains(@class, "body prose")]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.fastcoexist.com/3026114/take-a-seat-on-this-gates-funded-future-toilet-that-will-change-how-we-think-about-poop', + 'body' => array( + '//article[contains(@class, "body prose")]', + ), + 'strip' => array( + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcompany.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcompany.com.php index 3bce2aa..778adbf 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcompany.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fastcompany.com.php @@ -1,9 +1,13 @@ 'http://www.fastcompany.com/3026712/fast-feed/elon-musk-an-apple-tesla-merger-is-very-unlikely', - 'body' => array( - '//article[contains(@class, "body prose")]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.fastcompany.com/3026712/fast-feed/elon-musk-an-apple-tesla-merger-is-very-unlikely', + 'body' => array( + '//article[contains(@class, "body prose")]', + ), + 'strip' => array( + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ffworld.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ffworld.com.php index ba5db57..64dd263 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ffworld.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ffworld.com.php @@ -1,9 +1,13 @@ 'http://www.ffworld.com/?rub=news&page=voir&id=2709', - 'body' => array( - '//div[@class="news_body"]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.ffworld.com/?rub=news&page=voir&id=2709', + 'body' => array( + '//div[@class="news_body"]', + ), + 'strip' => array( + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fowllanguagecomics.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fowllanguagecomics.com.php new file mode 100644 index 0000000..027c57a --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fowllanguagecomics.com.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'body' => array('//*[@id="comic"] | //*[@class="post-image"]'), + 'strip' => array(), + 'test_url' => 'http://www.fowllanguagecomics.com/comic/working-out/' + ) + ) +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/github.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/github.com.php index 9ddd030..6e28b99 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/github.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/github.com.php @@ -1,10 +1,14 @@ 'https://github.com/audreyr/favicon-cheat-sheet', - 'body' => array( - '//article[contains(@class, "entry-content")]', - ), - 'strip' => array( - '//h1' + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'https://github.com/audreyr/favicon-cheat-sheet', + 'body' => array( + '//article[contains(@class, "entry-content")]', + ), + 'strip' => array( + '//h1' + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/golem.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/golem.de.php index 1a45fa6..269170c 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/golem.de.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/golem.de.php @@ -1,8 +1,12 @@ 'http://www.golem.de/news/breko-telekom-verzoegert-gezielt-den-vectoring-ausbau-1311-102974.html', - 'body' => array( - '//header[@class="cluster-header"]', - '//div[@class="formatted"]' + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.golem.de/news/breko-telekom-verzoegert-gezielt-den-vectoring-ausbau-1311-102974.html', + 'body' => array( + '//header[@class="cluster-header"]', + '//div[@class="formatted"]' + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/heise.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/heise.de.php index fa1d548..93343bb 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/heise.de.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/heise.de.php @@ -1,7 +1,11 @@ 'http://www.heise.de/security/meldung/BND-300-Millionen-Euro-fuer-Fruehwarnsystem-gegen-Cyber-Attacken-2192237.html', - 'body' => array( - '//div[@class="meldung_wrapper"]' + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.heise.de/security/meldung/BND-300-Millionen-Euro-fuer-Fruehwarnsystem-gegen-Cyber-Attacken-2192237.html', + 'body' => array( + '//div[@class="meldung_wrapper"]' + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/huffingtonpost.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/huffingtonpost.com.php index 18ad465..07f4816 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/huffingtonpost.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/huffingtonpost.com.php @@ -1,9 +1,13 @@ 'http://www.huffingtonpost.com/2014/02/20/centscere-social-media-syracuse_n_4823848.html', - 'body' => array( - '//article[@class="content")]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.huffingtonpost.com/2014/02/20/centscere-social-media-syracuse_n_4823848.html', + 'body' => array( + '//article[@class="content")]', + ), + 'strip' => array( + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ing.dk.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ing.dk.php index c4a80be..e61e09a 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ing.dk.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/ing.dk.php @@ -1,8 +1,12 @@ 'http://ing.dk/artikel/smart-husisolering-og-styring-skal-mindske-japans-energikrise-164517', - 'body' => array( - '//section[contains(@class, "teaser")]', - '//section[contains(@class, "body")]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://ing.dk/artikel/smart-husisolering-og-styring-skal-mindske-japans-energikrise-164517', + 'body' => array( + '//section[contains(@class, "teaser")]', + '//section[contains(@class, "body")]', + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/journaldugeek.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/journaldugeek.com.php index ad0d67a..99d4ab1 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/journaldugeek.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/journaldugeek.com.php @@ -1,7 +1,11 @@ 'http://www./2014/05/20/le-playstation-now-arrive-en-beta-fermee-aux-etats-unis/', - 'body' => array( - '//div[@class="post-content"]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www./2014/05/20/le-playstation-now-arrive-en-beta-fermee-aux-etats-unis/', + 'body' => array( + '//div[@class="post-content"]', + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/kanpai.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/kanpai.fr.php index 5a13053..3471bf5 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/kanpai.fr.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/kanpai.fr.php @@ -1,9 +1,13 @@ 'http://www.kanpai.fr/japon/comment-donner-lheure-en-japonais.html', - 'body' => array( - '//div[@class="single-left"]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.kanpai.fr/japon/comment-donner-lheure-en-japonais.html', + 'body' => array( + '//div[@class="single-left"]', + ), + 'strip' => array( + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/karriere.jobfinder.dk.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/karriere.jobfinder.dk.php index 2ffafd6..cdd6389 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/karriere.jobfinder.dk.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/karriere.jobfinder.dk.php @@ -1,8 +1,12 @@ 'http://karriere.jobfinder.dk/artikel/dansk-professor-skal-lede-smart-grid-forskning-20-millioner-dollars-763', - 'body' => array( - '//section[contains(@class, "teaser")]', - '//section[contains(@class, "body")]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://karriere.jobfinder.dk/artikel/dansk-professor-skal-lede-smart-grid-forskning-20-millioner-dollars-763', + 'body' => array( + '//section[contains(@class, "teaser")]', + '//section[contains(@class, "body")]', + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lejapon.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lejapon.fr.php index fbc2e53..4de41e4 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lejapon.fr.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lejapon.fr.php @@ -1,13 +1,17 @@ 'http://lejapon.fr/guide-voyage-japon/5223/tokyo-sous-la-neige.htm', - 'body' => array( - '//div[@class="entry"]' - ), - 'strip' => array( - '//*[contains(@class, "addthis_toolbox")]', - '//*[contains(@class, "addthis_default_style")]', - '//*[@class="navigation small"]', - '//*[@id="related"]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://lejapon.fr/guide-voyage-japon/5223/tokyo-sous-la-neige.htm', + 'body' => array( + '//div[@class="entry"]' + ), + 'strip' => array( + '//*[contains(@class, "addthis_toolbox")]', + '//*[contains(@class, "addthis_default_style")]', + '//*[@class="navigation small"]', + '//*[@id="related"]', + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lesjoiesducode.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lesjoiesducode.fr.php index 68e097a..861e725 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lesjoiesducode.fr.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lesjoiesducode.fr.php @@ -1,9 +1,13 @@ 'http://lesjoiesducode.fr/post/75576211207/quand-lappli-ne-fonctionne-plus-sans-aucune-raison', - 'body' => array( - '//div[@class="blog-post-content"]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://lesjoiesducode.fr/post/75576211207/quand-lappli-ne-fonctionne-plus-sans-aucune-raison', + 'body' => array( + '//div[@class="blog-post-content"]', + ), + 'strip' => array( + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lfg.co.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lfg.co.php new file mode 100644 index 0000000..50e84fd --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lfg.co.php @@ -0,0 +1,13 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.lfg.co/page/871/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+LookingForGroup+%28Looking+For+Group%29&utm_content=FeedBurner', + 'body' => array( + '//*[@id="comic"]/img | //*[@class="content"]' + ), + 'strip' => array(), + ) + ) +); + diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.com.php index 9b22995..77c6cf3 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.com.php @@ -1,14 +1,18 @@ 'http://lifehacker.com/bring-water-bottle-caps-into-concerts-to-protect-your-d-1269334973', - 'body' => array( - '//div[contains(@class, "row")/img', - '//div[contains(@class, "content-column")]', - ), - 'strip' => array( - '//*[contains(@class, "meta")]', - '//span[contains(@class, "icon")]', - '//h1', - '//aside', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://lifehacker.com/bring-water-bottle-caps-into-concerts-to-protect-your-d-1269334973', + 'body' => array( + '//div[contains(@class, "row")/img', + '//div[contains(@class, "content-column")]', + ), + 'strip' => array( + '//*[contains(@class, "meta")]', + '//span[contains(@class, "icon")]', + '//h1', + '//aside', + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lists.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lists.php index fb9c8d0..fdd92d3 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lists.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lists.php @@ -1,9 +1,13 @@ 'http://lists.freebsd.org/pipermail/freebsd-announce/2013-September/001504.html', - 'body' => array( - '//pre', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://lists.freebsd.org/pipermail/freebsd-announce/2013-September/001504.html', + 'body' => array( + '//pre', + ), + 'strip' => array( + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loadingartist.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loadingartist.com.php new file mode 100644 index 0000000..3e07a22 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loadingartist.com.php @@ -0,0 +1,10 @@ + array( + '%/comic.*%' => array( + 'test_url' => 'http://www.loadingartist.com/comic/lifted-spirits/', + 'body' => array('//div[@class="comic"]'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loldwell.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loldwell.com.php new file mode 100644 index 0000000..282013c --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/loldwell.com.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://loldwell.com/?comic=food-math-101', + 'body' => array('//*[@id="comic"]'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/macg.co.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/macg.co.php index ae54540..695190a 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/macg.co.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/macg.co.php @@ -1,9 +1,13 @@ 'http://www.macg.co//logiciels/2014/05/feedly-sameliore-un-petit-peu-sur-mac-82205', - 'body' => array( - '//div[contains(@class, "field-name-body")]' - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.macg.co//logiciels/2014/05/feedly-sameliore-un-petit-peu-sur-mac-82205', + 'body' => array( + '//div[contains(@class, "field-name-body")]' + ), + 'strip' => array( + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/marc.info.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/marc.info.php index f2016e6..4cda04b 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/marc.info.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/marc.info.php @@ -1,9 +1,13 @@ 'http://marc.info/?l=openbsd-misc&m=141987113202061&w=2', - 'body' => array( - '//pre', - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://marc.info/?l=openbsd-misc&m=141987113202061&w=2', + 'body' => array( + '//pre', + ), + 'strip' => array( + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/maximumble.thebookofbiff.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/maximumble.thebookofbiff.com.php new file mode 100644 index 0000000..88c5fdc --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/maximumble.thebookofbiff.com.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://maximumble.thebookofbiff.com/2015/04/20/1084-change/', + 'body' => array('//div[@id="comic"]/div/a/img'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/medium.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/medium.com.php index 79ed5bc..c0dfc49 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/medium.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/medium.com.php @@ -1,9 +1,13 @@ 'https://medium.com/lessons-learned/917b8b63ae3e', - 'body' => array( - '//div[contains(@class, "post-field body")]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'https://medium.com/lessons-learned/917b8b63ae3e', + 'body' => array( + '//div[contains(@class, "post-field body")]', + ), + 'strip' => array( + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/metronieuws.nl.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/metronieuws.nl.php new file mode 100644 index 0000000..787553f --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/metronieuws.nl.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.metronieuws.nl/sport/2015/04/broer-fellaini-zorgde-bijna-voor-paniek-bij-mourinho', + 'body' => array('//div[contains(@class,"article-top")]/div[contains(@class,"image-component")] | //div[@class="article-full-width"]/div[1]'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/mokepon.smackjeeves.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/mokepon.smackjeeves.com.php new file mode 100644 index 0000000..632c864 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/mokepon.smackjeeves.com.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://mokepon.smackjeeves.com/comics/2120096/chapter-9-page-68/', + 'body' => array('//*[@id="comic_area_inner"]/img | //*[@id="comic_area_inner"]/a/img'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindowsphone.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindowsphone.com.php index cfc4b2d..d3838af 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindowsphone.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindowsphone.com.php @@ -1,9 +1,13 @@ 'http://www.monwindowsphone.com/tout-savoir-sur-le-centre-d-action-de-windows-phone-8-1-t40574.html', - 'body' => array( - '//div[@class="blog-post-body"]' - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.monwindowsphone.com/tout-savoir-sur-le-centre-d-action-de-windows-phone-8-1-t40574.html', + 'body' => array( + '//div[@class="blog-post-body"]' + ), + 'strip' => array( + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php new file mode 100644 index 0000000..ac41ee6 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/neustadt-ticker.de.php @@ -0,0 +1,12 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.neustadt-ticker.de/36480/aktuell/nachrichten/buergerbuero-neustadt-ab-heute-wieder-geoeffnet', + 'body' => array('//div[contains(@class,"article")]/div[@class="PostContent" and *[not(contains(@class, "navigation"))]]'), + 'strip' => array( + '//*[@id="wp_rp_first"]' + ), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/niceteethcomic.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/niceteethcomic.com.php new file mode 100644 index 0000000..d3048c4 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/niceteethcomic.com.php @@ -0,0 +1,10 @@ + array( + '%/archives.*%' => array( + 'test_url' => 'http://niceteethcomic.com/archives/page119/', + 'body' => array('//*[@class="comicpane"]/a/img'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/nichtlustig.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/nichtlustig.de.php new file mode 100644 index 0000000..b4fb73f --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/nichtlustig.de.php @@ -0,0 +1,8 @@ + array( + '%.*%' => array( + '%.*static.nichtlustig.de/comics/full/(\\d+).*%s' => '' + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/openrightsgroup.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/openrightsgroup.org.php index 74d3fa1..1bdc199 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/openrightsgroup.org.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/openrightsgroup.org.php @@ -1,16 +1,20 @@ 'https://www.openrightsgroup.org/blog/2014/3-days-to-go-till-orgcon2014', - 'body' => array( - '//div[contains(@class, "content")]/div', - ), - 'strip' => array( - '//h2[1]', - '//div[@class="info"]', - '//div[@class="tags"]', - '//div[@class="comments"]', - '//div[@class="breadcrumbs"]', - '//h1[@class="pageTitle"]', - '//p[@class="bookmarkThis"]', - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'https://www.openrightsgroup.org/blog/2014/3-days-to-go-till-orgcon2014', + 'body' => array( + '//div[contains(@class, "content")]/div', + ), + 'strip' => array( + '//h2[1]', + '//div[@class="info"]', + '//div[@class="tags"]', + '//div[@class="comments"]', + '//div[@class="breadcrumbs"]', + '//h1[@class="pageTitle"]', + '//p[@class="bookmarkThis"]', + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/pastebin.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/pastebin.com.php index 9a576f7..232cbca 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/pastebin.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/pastebin.com.php @@ -1,9 +1,13 @@ 'http://pastebin.com/ed1pP9Ak', - 'body' => array( - '//div[@class="text"]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://pastebin.com/ed1pP9Ak', + 'body' => array( + '//div[@class="text"]', + ), + 'strip' => array( + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/penny-arcade.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/penny-arcade.com.php new file mode 100644 index 0000000..dcd35a5 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/penny-arcade.com.php @@ -0,0 +1,21 @@ + array( + '%/news/.*%' => array( + 'test_url' => 'http://penny-arcade.com/news/post/2015/04/15/101-part-two', + 'body' => array( + '//*[@class="postBody"]/*', + ), + 'strip' => array( + ) + ), + '%/comic/.*%' => array( + 'test_url' => 'http://penny-arcade.com/comic/2015/04/15', + 'body' => array( + '//*[@id="comicFrame"]/a/img', + ), + 'strip' => array( + ) + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/plus.google.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/plus.google.com.php index d18e1db..3b09b40 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/plus.google.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/plus.google.com.php @@ -1,7 +1,11 @@ 'https://plus.google.com/+LarryPage/posts/Lh8SKC6sED1', - 'body' => array( - '//div[@role="article"]/div[contains(@class, "eE")]', - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'https://plus.google.com/+LarryPage/posts/Lh8SKC6sED1', + 'body' => array( + '//div[@role="article"]/div[contains(@class, "eE")]', + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/putaindecode.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/putaindecode.fr.php old mode 100755 new mode 100644 index 6857c20..e44a130 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/putaindecode.fr.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/putaindecode.fr.php @@ -1,12 +1,16 @@ 'http://putaindecode.fr/posts/js/etat-lieux-js-modulaire-front/', - 'body' => array( - '//*[@class="putainde-Post-md"]', - ), - 'strip' => array( - '//*[contains(@class, "inlineimg")]', - '//*[contains(@class, "comment-respond")]', - '//header' + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://putaindecode.fr/posts/js/etat-lieux-js-modulaire-front/', + 'body' => array( + '//*[@class="putainde-Post-md"]', + ), + 'strip' => array( + '//*[contains(@class, "inlineimg")]', + '//*[contains(@class, "comment-respond")]', + '//header' + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/rue89.nouvelobs.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/rue89.nouvelobs.com.php index bf3c8d8..08a2b2f 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/rue89.nouvelobs.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/rue89.nouvelobs.com.php @@ -1,9 +1,13 @@ 'http://rue89.feedsportal.com/c/33822/f/608948/s/30999fa0/sc/24/l/0L0Srue890N0C20A130C0A80C30A0Cfaisait0Eboris0Eboillon0Eex0Esarko0Eboy0E350A0E0A0A0A0Eeuros0Egare0Enord0E245315/story01.htm', - 'body' => array( - '//*[@id="article"]/div[contains(@class, "content")]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://rue89.feedsportal.com/c/33822/f/608948/s/30999fa0/sc/24/l/0L0Srue890N0C20A130C0A80C30A0Cfaisait0Eboris0Eboillon0Eex0Esarko0Eboy0E350A0E0A0A0A0Eeuros0Egare0Enord0E245315/story01.htm', + 'body' => array( + '//*[@id="article"]/div[contains(@class, "content")]', + ), + 'strip' => array( + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/satwcomic.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/satwcomic.com.php new file mode 100644 index 0000000..173f563 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/satwcomic.com.php @@ -0,0 +1,12 @@ + array( + '%.*%' => array( + 'test_url' => 'http://satwcomic.com/day-at-the-beach', + 'body' => array( + '//div[@class="container"]/center/a/img' + ), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/scrumalliance.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/scrumalliance.org.php new file mode 100644 index 0000000..34f385d --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/scrumalliance.org.php @@ -0,0 +1,12 @@ + array( + '%.*%' => array( + 'test_url' => 'https://www.scrumalliance.org/community/articles/2015/march/an-introduction-to-agile-project-intake?feed=articles', + 'body' => array( + '//div[@class="article_content"]', + ), + 'strip' => array() + ) + ) +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sitepoint.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sitepoint.com.php index 8f3f588..5b3cbc7 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sitepoint.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sitepoint.com.php @@ -1,9 +1,13 @@ 'http://www.sitepoint.com/creating-hello-world-app-swift/', - 'body' => array( - '//section[@class="article_body"]', - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.sitepoint.com/creating-hello-world-app-swift/', + 'body' => array( + '//section[@class="article_body"]', + ), + 'strip' => array( + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/slashdot.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/slashdot.org.php new file mode 100644 index 0000000..b212fce --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/slashdot.org.php @@ -0,0 +1,11 @@ + array( + '%.*%' => array( + 'test_url' => 'http://science.slashdot.org/story/15/04/20/0528253/pull-top-can-tabs-at-50-reach-historic-archaeological-status', + 'body' => array( + '//article/div[@class="body"] | //article[@class="layout-article"]/div[@class="elips"]'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smallhousebliss.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smallhousebliss.com.php index 8bea4fb..c82e31d 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smallhousebliss.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smallhousebliss.com.php @@ -1,15 +1,19 @@ 'http://smallhousebliss.com/2013/08/29/house-g-by-lode-architecture/', - 'body' => array( - '//div[@class="post-content"]', - ), - 'strip' => array( - '//*[contains(@class, "gallery")]', - '//*[contains(@class, "share")]', - '//*[contains(@class, "wpcnt")]', - '//*[contains(@class, "meta")]', - '//*[contains(@class, "postitle")]', - '//*[@id="nav-below"]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://smallhousebliss.com/2013/08/29/house-g-by-lode-architecture/', + 'body' => array( + '//div[@class="post-content"]', + ), + 'strip' => array( + '//*[contains(@class, "gallery")]', + '//*[contains(@class, "share")]', + '//*[contains(@class, "wpcnt")]', + '//*[contains(@class, "meta")]', + '//*[contains(@class, "postitle")]', + '//*[@id="nav-below"]', + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smarthomewelt.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smarthomewelt.de.php new file mode 100644 index 0000000..83f93f1 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smarthomewelt.de.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://smarthomewelt.de/apple-tv-amazon-echo-smart-home/', + 'body' => array('//div[@class="entry-inner"]/p | //div[@class="entry-inner"]/div[contains(@class,"wp-caption")]'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smashingmagazine.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smashingmagazine.com.php new file mode 100644 index 0000000..5000072 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smashingmagazine.com.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.smashingmagazine.com/2015/04/17/using-sketch-for-responsive-web-design-case-study/', + 'body' => array('//article[contains(@class,"post")]/p'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/spiegel.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/spiegel.de.php index 375b17c..d71893a 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/spiegel.de.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/spiegel.de.php @@ -1,7 +1,11 @@ 'http://www.spiegel.de/politik/ausland/afrika-angola-geht-gegen-islam-vor-und-schliesst-moscheen-a-935788.html', - 'body' => array( - '//div[contains(@class, "article-section")]' + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.spiegel.de/politik/ausland/afrika-angola-geht-gegen-islam-vor-und-schliesst-moscheen-a-935788.html', + 'body' => array( + '//div[contains(@class, "article-section")]' + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sz.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sz.de.php new file mode 100644 index 0000000..8629a58 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/sz.de.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://sz.de/1.2443161', + 'body' => array('//article[@id="sitecontent"]/section[@class="topenrichment"]//img | //article[@id="sitecontent"]/section[@class="body"]/section[@class="authors"]/preceding-sibling::*[not(contains(@class, "ad"))]'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/techcrunch.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/techcrunch.com.php index bc4d5b8..5646a17 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/techcrunch.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/techcrunch.com.php @@ -1,11 +1,15 @@ 'http://techcrunch.com/2013/08/31/indias-visa-maze/', - 'body' => array( - '//div[contains(@class, "media-container")]', - '//div[@class="body-copy"]', - ), - 'strip' => array( - '//*[contains(@class, "module-crunchbase")]' + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://techcrunch.com/2013/08/31/indias-visa-maze/', + 'body' => array( + '//div[contains(@class, "media-container")]', + '//div[@class="body-copy"]', + ), + 'strip' => array( + '//*[contains(@class, "module-crunchbase")]' + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thecodinglove.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thecodinglove.com.php new file mode 100644 index 0000000..d33e127 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thecodinglove.com.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://thecodinglove.com/post/116897934767', + 'body' => array('//div[@class="bodytype"]'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thegamercat.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thegamercat.com.php new file mode 100644 index 0000000..e733730 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thegamercat.com.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.thegamercat.com/comic/just-no/', + 'body' => array('//div[@id="comic"] | //div[@class="post-content"]/div[@class="entry"]/p'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/themerepublic.net.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/themerepublic.net.php new file mode 100644 index 0000000..b625ac2 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/themerepublic.net.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.themerepublic.net/2015/04/david-lopez-pitoko.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+blogspot%2FDngUJ+%28Theme+Republic%29&utm_content=FeedBurner', + 'body' => array('//*[@class="post-body"]'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/travel-dealz.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/travel-dealz.de.php new file mode 100644 index 0000000..b563a71 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/travel-dealz.de.php @@ -0,0 +1,15 @@ + array( + '%^/blog.*%' => array( + 'test_url' => 'http://travel-dealz.de/blog/venere-gutschein/', + 'body' => array('//div[@class="post-entry"]'), + 'strip' => array( + '//*[@id="jp-relatedposts"]', + '//*[@class="post-meta"]', + '//*[@class="post-data"]', + '//*[@id="author-meta"]', + ), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/treehugger.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/treehugger.com.php index 7fbbb0c..f324f4c 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/treehugger.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/treehugger.com.php @@ -1,10 +1,14 @@ 'http://www.treehugger.com/uncategorized/top-ten-posts-week-bunnies-2.html', - 'body' => array( - '//div[contains(@class, "promo-image")]', - '//div[contains(@id, "entry-body")]', - ), - 'strip' => array( + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.treehugger.com/uncategorized/top-ten-posts-week-bunnies-2.html', + 'body' => array( + '//div[contains(@class, "promo-image")]', + '//div[contains(@id, "entry-body")]', + ), + 'strip' => array( + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/twogag.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/twogag.com.php new file mode 100644 index 0000000..77caec8 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/twogag.com.php @@ -0,0 +1,9 @@ + array( + '%.*%' => array( + "%http://www.twogag.com/comics-rss/([^.]+)\\.jpg%" => + "http://www.twogag.com/comics/$1.jpg" + ) + ) +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/twokinds.keenspot.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/twokinds.keenspot.com.php new file mode 100644 index 0000000..aafb71c --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/twokinds.keenspot.com.php @@ -0,0 +1,10 @@ + array( + '%.*%' => array( + 'test_url' => 'http://twokinds.keenspot.com/archive.php?p=0', + 'body' => array('//*[@class="comic"]/div/a/img | //*[@class="comic"]/div/img | //*[@id="cg_img"]/img | //*[@id="cg_img"]/a/img'), + 'strip' => array(), + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/undeadly.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/undeadly.org.php index f36ccfe..8b14d96 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/undeadly.org.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/undeadly.org.php @@ -1,10 +1,14 @@ 'http://undeadly.org/cgi?action=article&sid=20141101181155', - 'body' => array( - '/html/body/table[3]/tbody/tr/td[1]/table[2]/tr/td[1]' - ), - 'strip' => array( - '//font', - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://undeadly.org/cgi?action=article&sid=20141101181155', + 'body' => array( + '/html/body/table[3]/tbody/tr/td[1]/table[2]/tr/td[1]' + ), + 'strip' => array( + '//font', + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/version2.dk.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/version2.dk.php index ce57beb..520496a 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/version2.dk.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/version2.dk.php @@ -1,8 +1,12 @@ 'http://www.version2.dk/artikel/surface-pro-2-fungerer-bedre-til-arbejde-end-fornoejelse-55195', - 'body' => array( - '//section[contains(@class, "teaser")]', - '//section[contains(@class, "body")]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.version2.dk/artikel/surface-pro-2-fungerer-bedre-til-arbejde-end-fornoejelse-55195', + 'body' => array( + '//section[contains(@class, "teaser")]', + '//section[contains(@class, "body")]', + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/vgcats.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/vgcats.com.php new file mode 100644 index 0000000..05de7c2 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/vgcats.com.php @@ -0,0 +1,16 @@ + array( + '%/comics.*%' => array( + 'test_url' => 'http://www.vgcats.com/comics/?strip_id=358', + 'body' => array('//*[@align="center"]/img'), + 'strip' => array(), + ), + '%/super.*%' => array( + 'test_url' => 'http://www.vgcats.com/super/?strip_id=84', + 'body' => array('//*[@align="center"]/p/img'), + 'strip' => array(), + ) + ) +); + diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/vuxml.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/vuxml.org.php new file mode 100644 index 0000000..e9880f2 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/vuxml.org.php @@ -0,0 +1,17 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.vuxml.org/freebsd/a5f160fa-deee-11e4-99f8-080027ef73ec.html', + 'body' => array( + '//body' + ), + 'strip' => array( + '//h1', + '//div[@class="blurb"]', + '//hr', + '//p[@class="copyright"]' + ) + ) + ) +); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bbc.co.uk.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bbc.co.uk.php index 5440781..76895c2 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bbc.co.uk.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bbc.co.uk.php @@ -1,20 +1,24 @@ 'http://www.bbc.co.uk/news/world-middle-east-23911833', - 'body' => array( - '//div[@class="story-body"]', - '//div[@class="indPost"]' - ), - 'strip' => array( - '//form', - '//*[@class="warning"]', - '//*[@class="story-date"]', - '//*[@class="story-header"]', - '//*[@class="story-related"]', - '//*[contains(@class, "byline")]', - '//*[contains(@class, "story-feature")]', - '//*[@id="video-carousel-container"]', - '//*[@id="also-related-links"]', - '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.bbc.co.uk/news/world-middle-east-23911833', + 'body' => array( + '//div[@class="story-body"]', + '//div[@class="indPost"]' + ), + 'strip' => array( + '//form', + '//*[@class="warning"]', + '//*[@class="story-date"]', + '//*[@class="story-header"]', + '//*[@class="story-related"]', + '//*[contains(@class, "byline")]', + '//*[contains(@class, "story-feature")]', + '//*[@id="video-carousel-container"]', + '//*[@id="also-related-links"]', + '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]', + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bdgest.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bdgest.com.php index 528ad41..be1cbcd 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bdgest.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.bdgest.com.php @@ -1,11 +1,15 @@ 'http://www.bdgest.com/chronique-6027-BD-Adrastee-Tome-2.html', - 'body' => array( - '//*[contains(@class, "chronique")]', - ), - 'strip' => array( - '//*[contains(@class, "post-review")]', - '//*[contains(@class, "footer-review")]', - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.bdgest.com/chronique-6027-BD-Adrastee-Tome-2.html', + 'body' => array( + '//*[contains(@class, "chronique")]', + ), + 'strip' => array( + '//*[contains(@class, "post-review")]', + '//*[contains(@class, "footer-review")]', + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.businessweek.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.businessweek.com.php index 1ac90a3..1f2ad25 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.businessweek.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.businessweek.com.php @@ -1,11 +1,15 @@ 'http://www.businessweek.com/articles/2013-09-18/elon-musks-hyperloop-will-work-says-some-very-smart-software', - 'body' => array( - '//div[@id="lead_graphic"]', - '//div[@id="article_body"]', - ), - 'strip' => array( - '//*[contains(@class, "related_item")]', - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.businessweek.com/articles/2013-09-18/elon-musks-hyperloop-will-work-says-some-very-smart-software', + 'body' => array( + '//div[@id="lead_graphic"]', + '//div[@id="article_body"]', + ), + 'strip' => array( + '//*[contains(@class, "related_item")]', + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.cnn.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.cnn.com.php index 5ceb3bd..c041aec 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.cnn.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.cnn.com.php @@ -1,20 +1,24 @@ 'http://www.cnn.com/2013/08/31/world/meast/syria-civil-war/index.html?hpt=hp_t1', - 'body' => array( - '//div[@class="cnn_strycntntlft"]', - ), - 'strip' => array( - '//div[@class="cnn_stryshrwdgtbtm"]', - '//div[@class="cnn_strybtmcntnt"]', - '//div[@class="cnn_strylftcntnt"]', - '//div[contains(@class, "cnnGalleryContainer")]', - '//div[contains(@class, "cnn_strylftcexpbx")]', - '//div[contains(@class, "articleGalleryNavContainer")]', - '//div[contains(@class, "cnnArticleGalleryCaptionControl")]', - '//div[contains(@class, "cnnArticleGalleryNavPrevNextDisabled")]', - '//div[contains(@class, "cnnArticleGalleryNavPrevNext")]', - '//div[contains(@class, "cnn_html_media_title_new")]', - '//div[contains(@id, "disqus")]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.cnn.com/2013/08/31/world/meast/syria-civil-war/index.html?hpt=hp_t1', + 'body' => array( + '//div[@class="cnn_strycntntlft"]', + ), + 'strip' => array( + '//div[@class="cnn_stryshrwdgtbtm"]', + '//div[@class="cnn_strybtmcntnt"]', + '//div[@class="cnn_strylftcntnt"]', + '//div[contains(@class, "cnnGalleryContainer")]', + '//div[contains(@class, "cnn_strylftcexpbx")]', + '//div[contains(@class, "articleGalleryNavContainer")]', + '//div[contains(@class, "cnnArticleGalleryCaptionControl")]', + '//div[contains(@class, "cnnArticleGalleryNavPrevNextDisabled")]', + '//div[contains(@class, "cnnArticleGalleryNavPrevNext")]', + '//div[contains(@class, "cnn_html_media_title_new")]', + '//div[contains(@id, "disqus")]', + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.developpez.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.developpez.com.php index d56bcca..3f1dd59 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.developpez.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.developpez.com.php @@ -1,17 +1,21 @@ 'http://www.developpez.com/actu/81757/Mozilla-annonce-la-disponibilite-de-Firefox-36-qui-passe-au-HTTP-2-et-permet-la-synchronisation-de-son-ecran-d-accueil/', - 'body' => array( - '//*[@itemprop="articleBody"]', - ), - 'strip' => array( - '//form', - '//div[@class="content"]/img', - '//a[last()]/following-sibling::*', - '//*[contains(@class,"actuTitle")]', - '//*[contains(@class,"date")]', - '//*[contains(@class,"inlineimg")]', - '//*[@id="signaler"]', - '//*[@id="signalerFrame"]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.developpez.com/actu/81757/Mozilla-annonce-la-disponibilite-de-Firefox-36-qui-passe-au-HTTP-2-et-permet-la-synchronisation-de-son-ecran-d-accueil/', + 'body' => array( + '//*[@itemprop="articleBody"]', + ), + 'strip' => array( + '//form', + '//div[@class="content"]/img', + '//a[last()]/following-sibling::*', + '//*[contains(@class,"actuTitle")]', + '//*[contains(@class,"date")]', + '//*[contains(@class,"inlineimg")]', + '//*[@id="signaler"]', + '//*[@id="signalerFrame"]', + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.egscomics.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.egscomics.com.php index 9c9b73f..ada54ab 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.egscomics.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.egscomics.com.php @@ -1,8 +1,12 @@ 'http://www.egscomics.com/index.php?id=1690', - 'title' => '/html/head/title', - 'body' => array( - '//img[@id="comic"]' + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.egscomics.com/index.php?id=1690', + 'title' => '/html/head/title', + 'body' => array( + '//img[@id="comic"]' + ) + ) ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.forbes.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.forbes.com.php index 0eff7a7..4b92aaf 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.forbes.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.forbes.com.php @@ -1,16 +1,20 @@ 'http://www.forbes.com/sites/andygreenberg/2013/09/05/follow-the-bitcoins-how-we-got-busted-buying-drugs-on-silk-roads-black-market/', - 'body' => array( - '//div[@id="leftRail"]/div[contains(@class, body)]', - ), - 'strip' => array( - '//aside', - '//div[contains(@class, "entity_block")]', - '//div[contains(@class, "vestpocket") and not contains(@class, "body")]', - '//div[contains(@style, "display")]', - '//div[contains(@id, "comment")]', - '//div[contains(@class, "widget")]', - '//div[contains(@class, "pagination")]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.forbes.com/sites/andygreenberg/2013/09/05/follow-the-bitcoins-how-we-got-busted-buying-drugs-on-silk-roads-black-market/', + 'body' => array( + '//div[@id="leftRail"]/div[contains(@class, body)]', + ), + 'strip' => array( + '//aside', + '//div[contains(@class, "entity_block")]', + '//div[contains(@class, "vestpocket") and not contains(@class, "body")]', + '//div[contains(@style, "display")]', + '//div[contains(@id, "comment")]', + '//div[contains(@class, "widget")]', + '//div[contains(@class, "pagination")]', + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.futura-sciences.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.futura-sciences.com.php index 73d5744..238b056 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.futura-sciences.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.futura-sciences.com.php @@ -1,15 +1,19 @@ 'http://www.futura-sciences.com/magazines/espace/infos/actu/d/astronautique-curiosity-franchi-succes-dune-dingo-gap-52289/#xtor=RSS-8', - 'body' => array( - '//div[contains(@class, "content fiche-")]', - ), - 'strip' => array( - '//h1', - '//*[contains(@class, "content-date")]', - '//*[contains(@class, "diaporama")]', - '//*[contains(@class, "slider")]', - '//*[contains(@class, "cartouche")]', - '//*[contains(@class, "noprint")]', - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.futura-sciences.com/magazines/espace/infos/actu/d/astronautique-curiosity-franchi-succes-dune-dingo-gap-52289/#xtor=RSS-8', + 'body' => array( + '//div[contains(@class, "content fiche-")]', + ), + 'strip' => array( + '//h1', + '//*[contains(@class, "content-date")]', + '//*[contains(@class, "diaporama")]', + '//*[contains(@class, "slider")]', + '//*[contains(@class, "cartouche")]', + '//*[contains(@class, "noprint")]', + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lemonde.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lemonde.fr.php index 125bb6a..e72ddcf 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lemonde.fr.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lemonde.fr.php @@ -1,13 +1,17 @@ array( - 'http://www.lemonde.fr/societe/article/2013/08/30/boris-boillon-ancien-ambassadeur-de-sarkozy-arrete-avec-350-000-euros-en-liquide_3469109_3224.html', - 'http://www.lemonde.fr/afrique/article/2015/04/06/plonge-dans-la-crise-l-angola-revele-son-vrai-visage_4610364_3212.html', - ), - 'body' => array( - '//div[@id="articleBody"]', - '//div[@itemprop="articleBody"]', - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => array( + 'http://www.lemonde.fr/societe/article/2013/08/30/boris-boillon-ancien-ambassadeur-de-sarkozy-arrete-avec-350-000-euros-en-liquide_3469109_3224.html', + 'http://www.lemonde.fr/afrique/article/2015/04/06/plonge-dans-la-crise-l-angola-revele-son-vrai-visage_4610364_3212.html', + ), + 'body' => array( + '//div[@id="articleBody"]', + '//div[@itemprop="articleBody"]', + ), + 'strip' => array( + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lepoint.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lepoint.fr.php index adb5749..9a3107f 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lepoint.fr.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.lepoint.fr.php @@ -1,14 +1,18 @@ 'http://www.lepoint.fr/c-est-arrive-aujourd-hui/19-septembre-1783-pour-la-premiere-fois-un-mouton-un-canard-et-un-coq-s-envoient-en-l-air-devant-louis-xvi-18-09-2012-1507704_494.php', - 'body' => array( - '//article', - ), - 'strip' => array( - '//*[contains(@class, "info_article")]', - '//*[contains(@class, "fildariane_titre")]', - '//*[contains(@class, "entete2_article")]', - '//*[contains(@class, "signature_article")]', - '//*[contains(@id, "share")]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.lepoint.fr/c-est-arrive-aujourd-hui/19-septembre-1783-pour-la-premiere-fois-un-mouton-un-canard-et-un-coq-s-envoient-en-l-air-devant-louis-xvi-18-09-2012-1507704_494.php', + 'body' => array( + '//article', + ), + 'strip' => array( + '//*[contains(@class, "info_article")]', + '//*[contains(@class, "fildariane_titre")]', + '//*[contains(@class, "entete2_article")]', + '//*[contains(@class, "signature_article")]', + '//*[contains(@id, "share")]', + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.mac4ever.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.mac4ever.com.php index ee91ae9..3951329 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.mac4ever.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.mac4ever.com.php @@ -1,9 +1,13 @@ 'http://www.mac4ever.com/actu/87392_video-quand-steve-jobs-et-bill-gates-jouaient-au-bachelor-avec-le-mac', - 'body' => array( - '//div[contains(@class, "news-news-content")]', - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.mac4ever.com/actu/87392_video-quand-steve-jobs-et-bill-gates-jouaient-au-bachelor-avec-le-mac', + 'body' => array( + '//div[contains(@class, "news-news-content")]', + ), + 'strip' => array( + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.nextinpact.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.nextinpact.com.php index fc45ef2..2010e09 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.nextinpact.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.nextinpact.com.php @@ -1,9 +1,13 @@ 'http://www.pcinpact.com/news/85954-air-france-ne-vous-demande-plus-deteindre-vos-appareils-electroniques.htm?utm_source=PCi_RSS_Feed&utm_medium=news&utm_campaign=pcinpact', - 'body' => array( - '//div[contains(@id, "actu_content")]', - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.pcinpact.com/news/85954-air-france-ne-vous-demande-plus-deteindre-vos-appareils-electroniques.htm?utm_source=PCi_RSS_Feed&utm_medium=news&utm_campaign=pcinpact', + 'body' => array( + '//div[contains(@id, "actu_content")]', + ), + 'strip' => array( + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.npr.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.npr.org.php index 630c060..e924982 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.npr.org.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.npr.org.php @@ -1,13 +1,17 @@ 'http://www.npr.org/blogs/thesalt/2013/09/17/223345977/auto-brewery-syndrome-apparently-you-can-make-beer-in-your-gut', - 'body' => array( - '//div[@id="storytext"]', - ), - 'strip' => array( - '//*[@class="bucket img"]', - '//*[@class="creditwrap"]', - '//*[@class="captionwrap"]', - '//*[contains(@class, "enlargebtn")]', - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.npr.org/blogs/thesalt/2013/09/17/223345977/auto-brewery-syndrome-apparently-you-can-make-beer-in-your-gut', + 'body' => array( + '//div[@id="storytext"]', + ), + 'strip' => array( + '//*[@class="bucket img"]', + '//*[@class="creditwrap"]', + '//*[@class="captionwrap"]', + '//*[contains(@class, "enlargebtn")]', + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.numerama.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.numerama.com.php index b6387da..1f75e4b 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.numerama.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.numerama.com.php @@ -1,11 +1,15 @@ 'http://www.numerama.com/magazine/26857-bientot-des-robots-dans-les-cuisines-de-mcdo.html', - 'body' => array( - '//div[@class="col_left"]//div[@class="content"]', - ), - 'strip' => array( - '//div[@class="news_social"]', - '//div[@id="newssuiv"]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.numerama.com/magazine/26857-bientot-des-robots-dans-les-cuisines-de-mcdo.html', + 'body' => array( + '//div[@class="col_left"]//div[@class="content"]', + ), + 'strip' => array( + '//div[@class="news_social"]', + '//div[@id="newssuiv"]', + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pcinpact.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pcinpact.com.php index fc45ef2..2010e09 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pcinpact.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pcinpact.com.php @@ -1,9 +1,13 @@ 'http://www.pcinpact.com/news/85954-air-france-ne-vous-demande-plus-deteindre-vos-appareils-electroniques.htm?utm_source=PCi_RSS_Feed&utm_medium=news&utm_campaign=pcinpact', - 'body' => array( - '//div[contains(@id, "actu_content")]', - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.pcinpact.com/news/85954-air-france-ne-vous-demande-plus-deteindre-vos-appareils-electroniques.htm?utm_source=PCi_RSS_Feed&utm_medium=news&utm_campaign=pcinpact', + 'body' => array( + '//div[contains(@id, "actu_content")]', + ), + 'strip' => array( + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pseudo-sciences.org.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pseudo-sciences.org.php index bfb9303..63a918e 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pseudo-sciences.org.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.pseudo-sciences.org.php @@ -1,12 +1,16 @@ 'http://www.pseudo-sciences.org/spip.php?article2275', - 'body' => array( - '//div[@id="art_main"]', - ), - 'strip' => array( - '//div[@id="art_print"]', - '//div[@id="art_chapo"]', - '//img[@class="puce"]', - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.pseudo-sciences.org/spip.php?article2275', + 'body' => array( + '//div[@id="art_main"]', + ), + 'strip' => array( + '//div[@id="art_print"]', + '//div[@id="art_chapo"]', + '//img[@class="puce"]', + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.slate.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.slate.fr.php index de211f4..c619199 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.slate.fr.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.slate.fr.php @@ -1,15 +1,19 @@ 'http://www.slate.fr/monde/77034/allemagne-2013-couacs-campagne', - 'body' => array( - '//div[@class="article_content"]', - ), - 'strip' => array( - '//*[@id="slate_associated_bn"]', - '//*[@id="ligatus-article"]', - '//*[@id="article_sidebar"]', - '//div[contains(@id, "reseaux")]', - '//*[contains(@class, "smart") or contains(@class, "article_tags") or contains(@class, "article_reactions")]', - '//*[contains(@class, "OUTBRAIN") or contains(@class, "related_item") or contains(@class, "share")]', + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.slate.fr/monde/77034/allemagne-2013-couacs-campagne', + 'body' => array( + '//div[@class="article_content"]', + ), + 'strip' => array( + '//*[@id="slate_associated_bn"]', + '//*[@id="ligatus-article"]', + '//*[@id="article_sidebar"]', + '//div[contains(@id, "reseaux")]', + '//*[contains(@class, "smart") or contains(@class, "article_tags") or contains(@class, "article_reactions")]', + '//*[contains(@class, "OUTBRAIN") or contains(@class, "related_item") or contains(@class, "share")]', + ) + ) ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.universfreebox.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.universfreebox.com.php index 8679a7b..8203b97 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.universfreebox.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/www.universfreebox.com.php @@ -1,11 +1,15 @@ 'http://www.universfreebox.com/article/24305/4G-Bouygues-Telecom-lance-une-vente-flash-sur-son-forfait-Sensation-3Go', - 'body' => array( - '//div[@id="corps_corps"]' - ), - 'strip' => array( - '//*[@id="formulaire"]', - '//*[@id="commentaire"]', - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://www.universfreebox.com/article/24305/4G-Bouygues-Telecom-lance-une-vente-flash-sur-son-forfait-Sensation-3Go', + 'body' => array( + '//div[@id="corps_corps"]' + ), + 'strip' => array( + '//*[@id="formulaire"]', + '//*[@id="commentaire"]', + ), + ) + ) ); \ No newline at end of file diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/xkcd.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/xkcd.com.php index 37fec18..6f83cb8 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/xkcd.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/xkcd.com.php @@ -1,9 +1,13 @@ 'http://xkcd.com/1472/', - 'body' => array( - '//div[@id="comic"]', - ), - 'strip' => array( - ), + 'grabber' => array( + '%.*%' => array( + 'test_url' => 'http://xkcd.com/1472/', + 'body' => array( + '//div[@id="comic"]', + ), + 'strip' => array( + ), + ) + ) ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/CandidateParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/CandidateParser.php new file mode 100644 index 0000000..907c4d8 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/CandidateParser.php @@ -0,0 +1,286 @@ +dom = XmlParser::getHtmlDocument(''.$html); + $this->xpath = new DOMXPath($this->dom); + } + + /** + * Get the relevant content with the list of potential attributes + * + * @access public + * @return string + */ + public function execute() + { + $content = $this->findContentWithCandidates(); + + if (strlen($content) < 200) { + $content = $this->findContentWithArticle(); + } + + if (strlen($content) < 50) { + $content = $this->findContentWithBody(); + } + + return $this->stripGarbage($content); + } + + /** + * Find content based on the list of tag candidates + * + * @access public + * @return string + */ + public function findContentWithCandidates() + { + foreach ($this->candidatesAttributes as $candidate) { + + Logger::setMessage(get_called_class().': Try this candidate: "'.$candidate.'"'); + + $nodes = $this->xpath->query('//*[(contains(@class, "'.$candidate.'") or @id="'.$candidate.'") and not (contains(@class, "nav") or contains(@class, "page"))]'); + + if ($nodes !== false && $nodes->length > 0) { + Logger::setMessage(get_called_class().': Find candidate "'.$candidate.'"'); + return $this->dom->saveXML($nodes->item(0)); + } + } + + return ''; + } + + /** + * Find
tag + * + * @access public + * @return string + */ + public function findContentWithArticle() + { + $nodes = $this->xpath->query('//article'); + + if ($nodes !== false && $nodes->length > 0) { + Logger::setMessage(get_called_class().': Find
tag'); + return $this->dom->saveXML($nodes->item(0)); + } + + return ''; + } + + /** + * Find tag + * + * @access public + * @return string + */ + public function findContentWithBody() + { + $nodes = $this->xpath->query('//body'); + + if ($nodes !== false && $nodes->length > 0) { + Logger::setMessage(get_called_class().' Find '); + return $this->dom->saveXML($nodes->item(0)); + } + + return ''; + } + + /** + * Strip useless tags + * + * @access public + * @param string $content + * @return string + */ + public function stripGarbage($content) + { + $dom = XmlParser::getDomDocument($content); + + if ($dom !== false) { + + $xpath = new DOMXPath($dom); + + $this->stripTags($xpath); + $this->stripAttributes($dom, $xpath); + + $content = $dom->saveXML($dom->documentElement); + } + + return $content; + } + + /** + * Remove blacklisted tags + * + * @access public + * @param DOMXPath $xpath + */ + public function stripTags(DOMXPath $xpath) + { + foreach ($this->stripTags as $tag) { + + $nodes = $xpath->query('//'.$tag); + + if ($nodes !== false && $nodes->length > 0) { + + Logger::setMessage(get_called_class().': Strip tag: "'.$tag.'"'); + + foreach ($nodes as $node) { + $node->parentNode->removeChild($node); + } + } + } + } + + /** + * Remove blacklisted attributes + * + * @access public + * @param DomDocument $dom + * @param DOMXPath $xpath + */ + public function stripAttributes(DomDocument $dom, DOMXPath $xpath) + { + foreach ($this->stripAttributes as $attribute) { + + $nodes = $xpath->query('//*[contains(@class, "'.$attribute.'") or contains(@id, "'.$attribute.'")]'); + + if ($nodes !== false && $nodes->length > 0) { + + Logger::setMessage(get_called_class().': Strip attribute: "'.$attribute.'"'); + + foreach ($nodes as $node) { + if ($this->shouldRemove($dom, $node)) { + $node->parentNode->removeChild($node); + } + } + } + } + } + + /** + * Return false if the node should not be removed + * + * @access public + * @param DomDocument $dom + * @param DomNode $node + * @return boolean + */ + public function shouldRemove(DomDocument $dom, $node) + { + $document_length = strlen($dom->textContent); + $node_length = strlen($node->textContent); + + if ($document_length === 0) { + return true; + } + + $ratio = $node_length * 100 / $document_length; + + if ($ratio >= 90) { + Logger::setMessage(get_called_class().': Should not remove this node ('.$node->nodeName.') ratio: '.$ratio.'%'); + return false; + } + + return true; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/ParserInterface.php b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/ParserInterface.php new file mode 100644 index 0000000..4cce68f --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/ParserInterface.php @@ -0,0 +1,13 @@ +config = $config; + } + + /** + * Get the rules for an URL + * + * @access public + * @param string $url the URL that should be looked up + * @return array the array containing the rules + */ + public function getRules($url) + { + $hostname = parse_url($url, PHP_URL_HOST); + + if ($hostname !== false) { + + $files = $this->getRulesFileList($hostname); + + foreach ($this->getRulesFolders() as $folder) { + $rule = $this->loadRuleFile($folder, $files); + + if (! empty($rule)) { + return $rule; + } + } + } + + return array(); + } + + /** + * Get the list of possible rules file names for a given hostname + * + * @access public + * @param string $hostname Hostname + * @return array + */ + public function getRulesFileList($hostname) + { + $files = array($hostname); // subdomain.domain.tld + $parts = explode('.', $hostname); + $len = count($parts); + + if ($len > 2) { + $subdomain = array_shift($parts); + $files[] = implode('.', $parts); // domain.tld + $files[] = '.'.implode('.', $parts); // .domain.tld + $files[] = $subdomain; // subdomain + } + else if ($len === 2) { + $files[] = '.'.implode('.', $parts); // .domain.tld + $files[] = $parts[0]; // domain + } + + return $files; + } + + /** + * Load a rule file from the defined folder + * + * @access public + * @param string $folder Rule directory + * @param array $files List of possible file names + * @return array + */ + public function loadRuleFile($folder, array $files) + { + foreach ($files as $file) { + $filename = $folder.'/'.$file.'.php'; + if (file_exists($filename)) { + Logger::setMessage(get_called_class().' Load rule: '.$file); + return include $filename; + } + } + + return array(); + } + + /** + * Get the list of folders that contains rules + * + * @access public + * @return array + */ + public function getRulesFolders() + { + $folders = array(__DIR__.'/../Rules'); + + if ($this->config !== null && $this->config->getGrabberRulesFolder() !== null) { + $folders[] = $this->config->getGrabberRulesFolder(); + } + + return $folders; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/RuleParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/RuleParser.php new file mode 100644 index 0000000..f89ed0b --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/RuleParser.php @@ -0,0 +1,93 @@ +rules = $rules; + $this->dom = XmlParser::getHtmlDocument(''.$html); + $this->xpath = new DOMXPath($this->dom); + } + + /** + * Get the relevant content with predefined rules + * + * @access public + * @return string + */ + public function execute() + { + $this->stripTags(); + return $this->findContent(); + } + + /** + * Remove HTML tags + * + * @access public + */ + public function stripTags() + { + if (isset($this->rules['strip']) && is_array($this->rules['strip'])) { + + foreach ($this->rules['strip'] as $pattern) { + + $nodes = $this->xpath->query($pattern); + + if ($nodes !== false && $nodes->length > 0) { + foreach ($nodes as $node) { + $node->parentNode->removeChild($node); + } + } + } + } + } + + /** + * Fetch content based on Xpath rules + * + * @access public + */ + public function findContent() + { + $content = ''; + + if (isset($this->rules['body']) && is_array($this->rules['body'])) { + + foreach ($this->rules['body'] as $pattern) { + + $nodes = $this->xpath->query($pattern); + + if ($nodes !== false && $nodes->length > 0) { + foreach ($nodes as $node) { + $content .= $this->dom->saveXML($node); + } + } + } + } + + return $content; + } +} diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php new file mode 100644 index 0000000..a9d8e65 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php @@ -0,0 +1,361 @@ +config = $config; + Logger::setTimezone($this->config->getTimezone()); + } + + /** + * Disable candidates parsing + * + * @access public + * @return Scraper + */ + public function disableCandidateParser() + { + $this->enableCandidateParser = false; + return $this; + } + + /** + * Get encoding + * + * @access public + * @return string + */ + public function getEncoding() + { + return $this->encoding; + } + + /** + * Set encoding + * + * @access public + * @param string $encoding + * @return Scraper + */ + public function setEncoding($encoding) + { + $this->encoding = $encoding; + return $this; + } + + /** + * Get URL to download + * + * @access public + * @return string + */ + public function getUrl() + { + return $this->url; + } + + /** + * Set URL to download + * + * @access public + * @param string $url URL + * @return Scraper + */ + public function setUrl($url) + { + $this->url = $url; + return $this; + } + + /** + * Return true if the scraper found relevant content + * + * @access public + * @return boolean + */ + public function hasRelevantContent() + { + return ! empty($this->content); + } + + /** + * Get relevant content + * + * @access public + * @return string + */ + public function getRelevantContent() + { + return $this->content; + } + + /** + * Get raw content (unfiltered) + * + * @access public + * @return string + */ + public function getRawContent() + { + return $this->html; + } + + /** + * Set raw content (unfiltered) + * + * @access public + * @param string $html + * @return Scraper + */ + public function setRawContent($html) + { + $this->html = $html; + return $this; + } + + /** + * Get filtered relevant content + * + * @access public + * @return string + */ + public function getFilteredContent() + { + $filter = Filter::html($this->content, $this->url); + $filter->setConfig($this->config); + return $filter->execute(); + } + + /** + * Download the HTML content + * + * @access public + * @return boolean + */ + public function download() + { + if (! empty($this->url)) { + + // Clear everything + $this->html = ''; + $this->content = ''; + $this->encoding = ''; + + try { + + $client = Client::getInstance(); + $client->setConfig($this->config); + $client->setTimeout($this->config->getGrabberTimeout()); + $client->setUserAgent($this->config->getGrabberUserAgent()); + $client->execute($this->url); + + $this->url = $client->getUrl(); + $this->html = $client->getContent(); + $this->encoding = $client->getEncoding(); + + return true; + } + catch (ClientException $e) { + Logger::setMessage(get_called_class().': '.$e->getMessage()); + } + } + + return false; + } + + /** + * Execute the scraper + * + * @access public + */ + public function execute() + { + $this->download(); + + if (! $this->skipProcessing()) { + $this->prepareHtml(); + + $parser = $this->getParser(); + + if ($parser !== null) { + $this->content = $parser->execute(); + Logger::setMessage(get_called_class().': Content length: '.strlen($this->content).' bytes'); + } + } + } + + /** + * Returns true if the parsing must be skipped + * + * @access public + * @return boolean + */ + public function skipProcessing() + { + $handlers = array( + 'detectStreamingVideos', + 'detectPdfFiles', + ); + + foreach ($handlers as $handler) { + if ($this->$handler()) { + return true; + } + } + + if (empty($this->html)) { + Logger::setMessage(get_called_class().': Raw HTML is empty'); + return true; + } + + return false; + } + + /** + * Get the parser + * + * @access public + * @return ParserInterface + */ + public function getParser() + { + $ruleLoader = new RuleLoader($this->config); + $rules = $ruleLoader->getRules($this->url); + + if (! empty($rules['grabber'])) { + + Logger::setMessage(get_called_class().': Parse content with rules'); + + foreach ($rules['grabber'] as $pattern => $rule) { + + $url = new Url($this->url); + $sub_url = $url->getFullPath(); + + if (preg_match($pattern, $sub_url)) { + Logger::setMessage(get_called_class().': Matched url '.$sub_url); + return new RuleParser($this->html, $rule); + } + } + } + else if ($this->enableCandidateParser) { + Logger::setMessage(get_called_class().': Parse content with candidates'); + return new CandidateParser($this->html); + } + + return null; + } + + /** + * Normalize encoding and strip head tag + * + * @access public + */ + public function prepareHtml() + { + $html_encoding = XmlParser::getEncodingFromMetaTag($this->html); + + $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding); + $this->html = Filter::stripHeadTags($this->html); + + Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'" ; HTML Encoding "'.$html_encoding.'"'); + } + + /** + * Return the Youtube embed player and skip processing + * + * @access public + * @return boolean + */ + public function detectStreamingVideos() + { + if (preg_match("#(?<=v=|v\/|vi=|vi\/|youtu.be\/)[a-zA-Z0-9_-]{11}#", $this->url, $matches)) { + $this->content = ''; + return true; + } + + return false; + } + + /** + * Skip processing for PDF documents + * + * @access public + * @return boolean + */ + public function detectPdfFiles() + { + return substr($this->url, -3) === 'pdf'; + } +} diff --git a/vendor/fguillot/picofeed/picofeed b/vendor/fguillot/picofeed/picofeed index 3055ae1..b1a8d80 100755 --- a/vendor/fguillot/picofeed/picofeed +++ b/vendor/fguillot/picofeed/picofeed @@ -3,7 +3,8 @@ require_once 'vendor/autoload.php'; -use PicoFeed\Client\Grabber; +use PicoFeed\Config\Config; +use PicoFeed\Scraper\Scraper; use PicoFeed\Reader\Reader; use PicoFeed\Logging\Logger; use PicoFeed\PicoFeedException; @@ -80,13 +81,13 @@ function nofilter_item($url, $item_id) function grabber($url) { - $grabber = new Grabber($url); - $grabber->download(); - $grabber->parse(); + $grabber = new Scraper(new Config); + $grabber->setUrl($url); + $grabber->execute(); print_r(Logger::getMessages()); echo "============= CONTENT ================\n"; - echo $grabber->getContent().PHP_EOL; + echo $grabber->getRelevantContent().PHP_EOL; echo "============= FILTERED ================\n"; echo $grabber->getFilteredContent().PHP_EOL; } diff --git a/vendor/fguillot/picofeed/tests/Client/GrabberTest.php b/vendor/fguillot/picofeed/tests/Client/GrabberTest.php deleted file mode 100644 index 9b057fc..0000000 --- a/vendor/fguillot/picofeed/tests/Client/GrabberTest.php +++ /dev/null @@ -1,152 +0,0 @@ -getRulesFolders(); - $this->assertNotEmpty($dirs); - $this->assertCount(1, $dirs); - $this->assertTrue(strpos($dirs[0], '/../Rules') !== false); - - // Custom path - $config = new Config; - $config->setGrabberRulesFolder('/foobar/rules'); - - $grabber = new Grabber(''); - $grabber->setConfig($config); - - $dirs = $grabber->getRulesFolders(); - - $this->assertNotEmpty($dirs); - $this->assertCount(2, $dirs); - $this->assertTrue(strpos($dirs[0], '/../Rules') !== false); - $this->assertEquals('/foobar/rules', $dirs[1]); - - // No custom path with empty config object - $grabber = new Grabber(''); - $grabber->setConfig(new Config); - - $dirs = $grabber->getRulesFolders(); - - $this->assertNotEmpty($dirs); - $this->assertCount(1, $dirs); - $this->assertTrue(strpos($dirs[0], '/../Rules') !== false); - } - - public function testLoadRuleFile() - { - $grabber = new Grabber(''); - $dirs = $grabber->getRulesFolders(); - - $this->assertEmpty($grabber->loadRuleFile($dirs[0], array('test'))); - $this->assertNotEmpty($grabber->loadRuleFile($dirs[0], array('test', 'xkcd.com'))); - } - - public function testGetRulesFileList() - { - $grabber = new Grabber(''); - $this->assertEquals( - array('www.google.ca', 'google.ca', '.google.ca', 'www'), - $grabber->getRulesFileList('www.google.ca') - ); - - $grabber = new Grabber(''); - $this->assertEquals( - array('google.ca', '.google.ca', 'google'), - $grabber->getRulesFileList('google.ca') - ); - - $grabber = new Grabber(''); - $this->assertEquals( - array('a.b.c.d', 'b.c.d', '.b.c.d', 'a'), - $grabber->getRulesFileList('a.b.c.d') - ); - - $grabber = new Grabber(''); - $this->assertEquals( - array('localhost'), - $grabber->getRulesFileList('localhost') - ); - } - - public function testGetRules() - { - $grabber = new Grabber('http://www.egscomics.com/index.php?id=1690'); - $this->assertNotEmpty($grabber->getRules()); - - $grabber = new Grabber('http://localhost/foobar'); - $this->assertEmpty($grabber->getRules()); - } - - /** - * @group online - */ - public function testGrabContentWithCandidates() - { - $grabber = new Grabber('http://theonion.com.feedsportal.com/c/34529/f/632231/s/309a7fe4/sc/20/l/0L0Stheonion0N0Carticles0Cobama0Ethrows0Eup0Eright0Ethere0Eduring0Esyria0Emeeting0H336850C/story01.htm'); - $grabber->download(); - $this->assertTrue($grabber->parse()); - - $grabber = new Grabber('http://www.lemonde.fr/proche-orient/article/2013/08/30/la-france-nouvelle-plus-ancienne-alliee-des-etats-unis_3469218_3218.html'); - $grabber->download(); - $this->assertTrue($grabber->parse()); - - $grabber = new Grabber('http://www.rue89.com/2013/08/30/faisait-boris-boillon-ex-sarko-boy-350-000-euros-gare-nord-245315'); - $grabber->download(); - $this->assertTrue($grabber->parse()); - - $grabber = new Grabber('http://www.inc.com/suzanne-lucas/why-employee-turnover-is-so-costly.html'); - $grabber->download(); - $this->assertTrue($grabber->parse()); - - $grabber = new Grabber('http://arstechnica.com/information-technology/2013/08/sysadmin-security-fail-nsa-finds-snowden-hijacked-officials-logins/'); - $grabber->download(); - $this->assertTrue($grabber->parse()); - } - - /** - * @group online - */ - public function testGetRules_afterRedirection() - { - $grabber = new Grabber('http://rss.feedsportal.com/c/629/f/502199/s/422f8c8a/sc/44/l/0L0S0A1net0N0Ceditorial0C640A3130Cces0E20A150Eimprimer0Eune0Epizza0Eet0Edes0Ebiscuits0Evideo0C0T0Dxtor0FRSS0E16/story01.htm'); - $grabber->download(); - $this->assertTrue(is_array($grabber->getRules())); - } - - /** - * @group online - */ - public function testGrabContent() - { - $grabber = new Grabber('http://www.egscomics.com/index.php?id=1690'); - $grabber->download(); - $this->assertTrue($grabber->parse()); - - $this->assertEquals('', $grabber->getContent()); - } - - /** - * @group online - */ - public function testRssGrabContent() - { - $reader = new Reader; - $client = $reader->download('http://www.egscomics.com/rss.php'); - $parser = $reader->getParser($client->getUrl(), $client->getContent(), $client->getEncoding()); - $parser->enableContentGrabber(); - $feed = $parser->execute(); - - $this->assertTrue(is_array($feed->items)); - $this->assertTrue(strpos($feed->items[0]->content, '= 0); - } -} diff --git a/vendor/fguillot/picofeed/tests/Filter/AttributeFilterTest.php b/vendor/fguillot/picofeed/tests/Filter/AttributeFilterTest.php index bf7dd22..5dcfda1 100644 --- a/vendor/fguillot/picofeed/tests/Filter/AttributeFilterTest.php +++ b/vendor/fguillot/picofeed/tests/Filter/AttributeFilterTest.php @@ -128,6 +128,24 @@ class AttributeFilterTest extends PHPUnit_Framework_TestCase $this->assertEquals(array('src' => 'https://www.youtube.com/test'), $filter->filter('iframe', array('src' => '//www.youtube.com/test'))); } + public function testRemoveYouTubeAutoplay() + { + $filter = new Attribute(new Url('http://google.com')); + $urls = array( + 'https://www.youtube.com/something/?autoplay=1' => 'https://www.youtube.com/something/?autoplay=0', + 'https://www.youtube.com/something/?test=s&autoplay=1&a=2' => 'https://www.youtube.com/something/?test=s&autoplay=0&a=2', + 'https://www.youtube.com/something/?test=s' => 'https://www.youtube.com/something/?test=s', + 'https://youtube.com/something/?autoplay=1' => 'https://youtube.com/something/?autoplay=0', + 'https://youtube.com/something/?test=s&autoplay=1&a=2' => 'https://youtube.com/something/?test=s&autoplay=0&a=2', + 'https://youtube.com/something/?test=s' => 'https://youtube.com/something/?test=s', + ); + + foreach ($urls as $before => $after) { + $filter->removeYouTubeAutoplay('iframe', 'src', $before); + $this->assertEquals($after, $before); + } + } + public function testFilterBlacklistAttribute() { $filter = new Attribute(new Url('http://google.com')); diff --git a/vendor/fguillot/picofeed/tests/Filter/HtmlFilterTest.php b/vendor/fguillot/picofeed/tests/Filter/HtmlFilterTest.php index 03aeb33..9149e59 100644 --- a/vendor/fguillot/picofeed/tests/Filter/HtmlFilterTest.php +++ b/vendor/fguillot/picofeed/tests/Filter/HtmlFilterTest.php @@ -23,6 +23,22 @@ class HtmlFilterTest extends PHPUnit_Framework_TestCase $this->assertEquals('

boo
foo.

', $filter->execute()); } + public function testFilterRules() + { + $html = '

' . + '559 – The Cookie

'. + 'I always throw up in hindsight if I find out something I ate was vegan. Twogag’s super free but if you want to support the comic look no further than the Twogag patreon!'; + + $filter = new Html($html, 'http://www.twogag.com/'); + + $expected = '

' . + '559 – The Cookie

'. + 'I always throw up in hindsight if I find out something I ate was vegan. Twogag’s super free but if you want to support the comic look no further than the Twogag patreon!'; + + $this->assertEquals($expected, $filter->execute()); + } + + public function testIframe() { $data = ''; @@ -148,4 +164,10 @@ x-amz-id-2: DDjqfqz2ZJufzqRAcj1mh+9XvSogrPohKHwXlo8IlkzH67G6w4wnjn9HYgbs4uI0 $f = new Html('
', 'http://blabla'); $this->assertEquals('', $f->execute()); } + + public function testRemoveMultipleTags() + { + $f = new Html('

toto




momo





', 'http://blabla'); + $this->assertEquals('

toto


momo


', $f->execute()); + } } diff --git a/vendor/fguillot/picofeed/tests/Reader/ReaderTest.php b/vendor/fguillot/picofeed/tests/Reader/ReaderTest.php index 821c6b9..55abe53 100644 --- a/vendor/fguillot/picofeed/tests/Reader/ReaderTest.php +++ b/vendor/fguillot/picofeed/tests/Reader/ReaderTest.php @@ -1,9 +1,9 @@ getRulesFolders(); + $this->assertNotEmpty($dirs); + $this->assertCount(1, $dirs); + $this->assertTrue(strpos($dirs[0], '/../Rules') !== false); + + // Custom path + $config = new Config; + $config->setGrabberRulesFolder('/foobar/rules'); + + $loader = new RuleLoader($config); + + $dirs = $loader->getRulesFolders(); + + $this->assertNotEmpty($dirs); + $this->assertCount(2, $dirs); + $this->assertTrue(strpos($dirs[0], '/../Rules') !== false); + $this->assertEquals('/foobar/rules', $dirs[1]); + + // No custom path with empty config object + $loader = new RuleLoader(new Config); + + $dirs = $loader->getRulesFolders(); + + $this->assertNotEmpty($dirs); + $this->assertCount(1, $dirs); + $this->assertTrue(strpos($dirs[0], '/../Rules') !== false); + } + + public function testLoadRuleFile() + { + $loader = new RuleLoader(new Config); + $dirs = $loader->getRulesFolders(); + + $this->assertEmpty($loader->loadRuleFile($dirs[0], array('test'))); + $this->assertNotEmpty($loader->loadRuleFile($dirs[0], array('test', 'xkcd.com'))); + } + + public function testGetRulesFileList() + { + $loader = new RuleLoader(new Config); + $this->assertEquals( + array('www.google.ca', 'google.ca', '.google.ca', 'www'), + $loader->getRulesFileList('www.google.ca') + ); + + $loader = new RuleLoader(new Config); + $this->assertEquals( + array('google.ca', '.google.ca', 'google'), + $loader->getRulesFileList('google.ca') + ); + + $loader = new RuleLoader(new Config); + $this->assertEquals( + array('a.b.c.d', 'b.c.d', '.b.c.d', 'a'), + $loader->getRulesFileList('a.b.c.d') + ); + + $loader = new RuleLoader(new Config); + $this->assertEquals( + array('localhost'), + $loader->getRulesFileList('localhost') + ); + } + + public function testGetRules() + { + $loader = new RuleLoader(new Config); + $this->assertNotEmpty($loader->getRules('http://www.egscomics.com/index.php?id=1690')); + + $loader = new RuleLoader(new Config); + $this->assertEmpty($loader->getRules('http://localhost/foobar')); + } +} diff --git a/vendor/fguillot/picofeed/tests/Scraper/ScraperTest.php b/vendor/fguillot/picofeed/tests/Scraper/ScraperTest.php new file mode 100644 index 0000000..9e4df5e --- /dev/null +++ b/vendor/fguillot/picofeed/tests/Scraper/ScraperTest.php @@ -0,0 +1,89 @@ +setUrl('http://theonion.com.feedsportal.com/c/34529/f/632231/s/309a7fe4/sc/20/l/0L0Stheonion0N0Carticles0Cobama0Ethrows0Eup0Eright0Ethere0Eduring0Esyria0Emeeting0H336850C/story01.htm'); + $grabber->execute(); + $this->assertTrue($grabber->hasRelevantContent()); + + $grabber = new Scraper(new Config); + $grabber->setUrl('http://www.lemonde.fr/proche-orient/article/2013/08/30/la-france-nouvelle-plus-ancienne-alliee-des-etats-unis_3469218_3218.html'); + $grabber->execute(); + $this->assertTrue($grabber->hasRelevantContent()); + + $grabber = new Scraper(new Config); + $grabber->setUrl('http://www.inc.com/suzanne-lucas/why-employee-turnover-is-so-costly.html'); + $grabber->execute(); + $this->assertTrue($grabber->hasRelevantContent()); + + $grabber = new Scraper(new Config); + $grabber->setUrl('http://arstechnica.com/information-technology/2013/08/sysadmin-security-fail-nsa-finds-snowden-hijacked-officials-logins/'); + $grabber->execute(); + $this->assertTrue($grabber->hasRelevantContent()); + + $grabber = new Scraper(new Config); + $grabber->disableCandidateParser(); + $grabber->setUrl('http://linuxfr.org/news/grammalecte-correcteur-grammatical'); + $grabber->execute(); + $this->assertFalse($grabber->hasRelevantContent()); + } + + /** + * @group online + */ + public function testRuleParser() + { + $grabber = new Scraper(new Config); + $grabber->setUrl('http://www.egscomics.com/index.php?id=1690'); + $grabber->execute(); + $this->assertTrue($grabber->hasRelevantContent()); + + $this->assertEquals('', $grabber->getRelevantContent()); + } + + /** + * @group online + */ + public function testGrabContentRegex() + { + $grabber = new Scraper(new Config); + $grabber->setUrl('http://penny-arcade.com/comic/2015/04/13/101-part-one'); + $grabber->execute(); + + $this->assertTrue($grabber->hasRelevantContent()); + $this->assertEquals('101, Part One', $grabber->getRelevantContent()); + + $grabber->setUrl('http://penny-arcade.com/news/post/2015/04/15/101-part-two'); + $grabber->execute(); + + $this->assertTrue($grabber->hasRelevantContent()); + $this->assertContains('101, Part Two', $grabber->getRelevantContent()); + } + + /** + * @group online + */ + public function testRssGrabContent() + { + $reader = new Reader; + $client = $reader->download('http://www.egscomics.com/rss.php'); + $parser = $reader->getParser($client->getUrl(), $client->getContent(), $client->getEncoding()); + $parser->enableContentGrabber(); + $feed = $parser->execute(); + + $this->assertTrue(is_array($feed->items)); + $this->assertTrue(strpos($feed->items[0]->content, '= 0); + } +} diff --git a/vendor/fguillot/simple-validator/LICENSE b/vendor/fguillot/simple-validator/LICENSE index 750d8d7..6a362bc 100644 --- a/vendor/fguillot/simple-validator/LICENSE +++ b/vendor/fguillot/simple-validator/LICENSE @@ -1,14 +1,16 @@ -Copyright (c) 2012 Frederic Guillot +The MIT License (MIT) + +Copyright (c) 2015 Frederic Guillot Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is furnished -to do so, subject to the following conditions: +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, diff --git a/vendor/fguillot/simple-validator/README.md b/vendor/fguillot/simple-validator/README.md index 8ead7a7..f57a7ed 100644 --- a/vendor/fguillot/simple-validator/README.md +++ b/vendor/fguillot/simple-validator/README.md @@ -22,13 +22,7 @@ Requirements Author ------ -Frédéric Guillot: [http://fredericguillot.com](http://fredericguillot.com) - - -Source code ------------ - -On Github: [https://github.com/fguillot/simpleValidator](https://github.com/fguillot/simpleValidator) +Frédéric Guillot License @@ -164,10 +158,10 @@ Example: phpunit --bootstrap vendor/autoload.php tests PHPUnit 4.4.2 by Sebastian Bergmann. - + ............ - + Time: 69 ms, Memory: 3.75Mb - + OK (12 tests, 149 assertions) diff --git a/vendor/fguillot/simple-validator/composer.json b/vendor/fguillot/simple-validator/composer.json index 532f729..b2b53be 100644 --- a/vendor/fguillot/simple-validator/composer.json +++ b/vendor/fguillot/simple-validator/composer.json @@ -6,8 +6,7 @@ "license": "MIT", "authors": [ { - "name": "Frédéric Guillot", - "homepage": "http://fredericguillot.com" + "name": "Frédéric Guillot" } ], "require": {