diff --git a/composer.json b/composer.json index 605f731..ddcc2a9 100644 --- a/composer.json +++ b/composer.json @@ -15,7 +15,7 @@ "fguillot/simple-validator": "v1.0.0", "fguillot/json-rpc": "v1.2.3", "fguillot/picodb": "v1.0.14 ", - "fguillot/picofeed": "v0.1.25", + "fguillot/picofeed": "v0.1.27", "pda/pheanstalk": "v3.1.0", "ircmaxell/password-compat": "^1.0.4" }, diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json index 3ef15b5..afc6135 100644 --- a/vendor/composer/installed.json +++ b/vendor/composer/installed.json @@ -222,56 +222,6 @@ "password" ] }, - { - "name": "fguillot/picofeed", - "version": "v0.1.25", - "version_normalized": "0.1.25.0", - "source": { - "type": "git", - "url": "https://github.com/fguillot/picoFeed.git", - "reference": "2bf5bc40361e788eda6b1bd5d444630986721e69" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/2bf5bc40361e788eda6b1bd5d444630986721e69", - "reference": "2bf5bc40361e788eda6b1bd5d444630986721e69", - "shasum": "" - }, - "require": { - "ext-dom": "*", - "ext-iconv": "*", - "ext-libxml": "*", - "ext-simplexml": "*", - "ext-xml": "*", - "php": ">=5.3.0", - "zendframework/zendxml": "^1.0" - }, - "suggest": { - "ext-curl": "PicoFeed will use cURL if present" - }, - "time": "2016-08-30 01:33:18", - "bin": [ - "picofeed" - ], - "type": "library", - "installation-source": "dist", - "autoload": { - "psr-0": { - "PicoFeed": "lib/" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Frédéric Guillot" - } - ], - "description": "Modern library to handle RSS/Atom feeds", - "homepage": "https://github.com/fguillot/picoFeed" - }, { "name": "fguillot/json-rpc", "version": "v1.2.3", @@ -312,5 +262,60 @@ ], "description": "Simple Json-RPC client/server library that just works", "homepage": "https://github.com/fguillot/JsonRPC" + }, + { + "name": "fguillot/picofeed", + "version": "v0.1.27", + "version_normalized": "0.1.27.0", + "source": { + "type": "git", + "url": "https://github.com/fguillot/picoFeed.git", + "reference": "41924841d3cd0480364ca9bcb90abe095d744457" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/41924841d3cd0480364ca9bcb90abe095d744457", + "reference": "41924841d3cd0480364ca9bcb90abe095d744457", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-iconv": "*", + "ext-libxml": "*", + "ext-simplexml": "*", + "ext-xml": "*", + "php": ">=5.3.0", + "zendframework/zendxml": "^1.0" + }, + "require-dev": { + "phpdocumentor/reflection-docblock": "2.0.4", + "phpunit/phpunit": "4.8.26", + "symfony/yaml": "2.8.7" + }, + "suggest": { + "ext-curl": "PicoFeed will use cURL if present" + }, + "time": "2016-12-26 22:25:33", + "bin": [ + "picofeed" + ], + "type": "library", + "installation-source": "dist", + "autoload": { + "psr-0": { + "PicoFeed": "lib/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Frédéric Guillot" + } + ], + "description": "Modern library to handle RSS/Atom feeds", + "homepage": "https://github.com/fguillot/picoFeed" } ] diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php index dfbb024..09957ff 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php @@ -11,6 +11,8 @@ use PicoFeed\Logging\Logger; */ class Curl extends Client { + protected $nbRedirects = 0; + /** * HTTP response body. * @@ -136,6 +138,7 @@ class Curl extends Client if ($this->etag) { $headers[] = 'If-None-Match: '.$this->etag; + $headers[] = 'A-IM: feed'; } if ($this->last_modified) { @@ -199,6 +202,9 @@ class Curl extends Client */ private function prepareDownloadMode($ch) { + $this->body = ''; + $this->response_headers = array(); + $this->response_headers_count = 0; $write_function = 'readBody'; $header_function = 'readHeaders'; @@ -304,12 +310,11 @@ class Curl extends Client * Handle HTTP redirects * * @param string $location Redirected URL - * * @return array + * @throws MaxRedirectException */ private function handleRedirection($location) { - $nb_redirects = 0; $result = array(); $this->url = Url::resolve($location, $this->url); $this->body = ''; @@ -318,9 +323,9 @@ class Curl extends Client $this->response_headers_count = 0; while (true) { - ++$nb_redirects; + $this->nbRedirects++; - if ($nb_redirects >= $this->max_redirects) { + if ($this->nbRedirects >= $this->max_redirects) { throw new MaxRedirectException('Maximum number of redirections reached'); } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php index 75a0122..2e91d47 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php @@ -31,6 +31,7 @@ class Stream extends Client if ($this->etag) { $headers[] = 'If-None-Match: '.$this->etag; + $headers[] = 'A-IM: feed'; } if ($this->last_modified) { @@ -104,6 +105,9 @@ class Stream extends Client * Do the HTTP request. * * @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...] + * @throws InvalidUrlException + * @throws MaxSizeException + * @throws TimeoutException */ public function doRequest() { diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php index eea9574..f002153 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Attribute.php @@ -51,6 +51,7 @@ class Attribute 'td' => array(), 'tbody' => array(), 'thead' => array(), + 'h1' => array(), 'h2' => array(), 'h3' => array(), 'h4' => array(), diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php index 5fd8d6d..84a298a 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Tag.php @@ -42,6 +42,7 @@ class Tag extends Base 'td', 'tbody', 'thead', + 'h1', 'h2', 'h3', 'h4', @@ -67,6 +68,8 @@ class Tag extends Base 'abbr', 'iframe', 'q', + 'sup', + 'sub', ); /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php index d415ad6..a56e71c 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Feed.php @@ -13,7 +13,7 @@ class Feed /** * Feed items. * - * @var array + * @var Item[] */ public $items = array(); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php index a1cda83..588d93f 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Parser/Parser.php @@ -222,18 +222,20 @@ abstract class Parser implements ParserInterface public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed) { $this->findItemPublishedDate($entry, $item, $feed); - $published = $item->getPublishedDate(); - $this->findItemUpdatedDate($entry, $item, $feed); - $updated = $item->getUpdatedDate(); - if ($published === null && $updated === null) { - $item->setDate($feed->getDate()); // We use the feed date if there is no date for the item - } elseif ($published !== null && $updated !== null) { - $item->setDate(max($published, $updated)); // We use the most recent date between published and updated - } else { - $item->setDate($updated ?: $published); + if ($item->getPublishedDate() === null) { + // Use the updated date if available, otherwise use the feed date + $item->setPublishedDate($item->getUpdatedDate() ?: $feed->getDate()); } + + if ($item->getUpdatedDate() === null) { + // Use the published date as fallback + $item->setUpdatedDate($item->getPublishedDate()); + } + + // Use the most recent of published and updated dates + $item->setDate(max($item->getPublishedDate(), $item->getUpdatedDate())); } /** diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bigpicture.ru.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bigpicture.ru.php new file mode 100755 index 0000000..55c4089 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/bigpicture.ru.php @@ -0,0 +1,31 @@ + array( + '%.*%' => array( + 'test_url' => 'http://bigpicture.ru/?p=556658', + 'body' => array( + '//div[@class="article container"]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//h1', + '//*[@class="wp-smiley"]', + '//div[@class="ipmd"]', + '//div[@class="tags"]', + '//div[@class="social-button"]', + '//div[@class="bottom-share"]', + '//div[@class="raccoonbox"]', + '//div[@class="yndadvert"]', + '//div[@class="we-recommend"]', + '//div[@class="relap-bigpicture_ru-wrapper"]', + '//div[@id="mmail"]', + '//div[@id="mobile-ads-cut"]', + '//div[@id="liquidstorm-alt-html"]', + '//div[contains(@class, "post-tags")]', + '//*[contains(text(),"Смотрите также")]', + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/e-w-e.ru.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/e-w-e.ru.php new file mode 100755 index 0000000..8139cc9 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/e-w-e.ru.php @@ -0,0 +1,22 @@ + array( + '%.*%' => array( + 'test_url' => 'http://e-w-e.ru/16-prekrasnyx-izobretenij-zhenshhin/', + 'body' => array( + '//div[contains(@class, "post_text")]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//*[@class="views_post"]', + '//*[@class="adman_mobile"]', + '//*[@class="adman_desctop"]', + '//*[contains(@rel, "nofollow")]', + '//*[contains(@class, "wp-smiley")]', + '//*[contains(text(),"Источник:")]', + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/factroom.ru.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/factroom.ru.php new file mode 100755 index 0000000..a572061 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/factroom.ru.php @@ -0,0 +1,27 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.factroom.ru/life/20-facts-about-oil', + 'body' => array( + '//div[@class="post"]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//h1', + '//div[@id="yandex_ad2"]', + '//*[@class="jp-relatedposts"]', + '//div[contains(@class, "likely-desktop")]', + '//div[contains(@class, "likely-mobile")]', + '//p[last()]', + '//div[contains(@class, "facebook")]', + '//div[contains(@class, "desktop-underpost-direct")]', + '//div[contains(@class, "source-box")]', + '//div[contains(@class, "under-likely-desktop")]', + '//div[contains(@class, "mobile-down-post")]', + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fototelegraf.ru.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fototelegraf.ru.php new file mode 100755 index 0000000..ca2f85a --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/fototelegraf.ru.php @@ -0,0 +1,19 @@ + array( + '%.*%' => array( + 'test_url' => 'http://fototelegraf.ru/?p=348232', + 'body' => array( + '//div[@class="post-content"]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//div[@class="imageButtonsBlock"]', + '//div[@class="adOnPostBtwImg"]', + '//div[contains(@class, "post-tags")]', + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/golem.de.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/golem.de.php index ea1af87..7386827 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/golem.de.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/golem.de.php @@ -6,7 +6,15 @@ return array( 'test_url' => 'http://www.golem.de/news/breko-telekom-verzoegert-gezielt-den-vectoring-ausbau-1311-102974.html', 'body' => array( '//header[@class="cluster-header"]', + '//header[@class="paged-cluster-header"]', '//div[@class="formatted"]', + ), + 'next_page' => array( + '//a[@id="atoc_next"]' + ), + 'strip' => array( + '//header[@class="cluster-header"]/a', + '//div[@id="iqadtile4"]', ), ), ), diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/gorabbit.ru.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/gorabbit.ru.php new file mode 100755 index 0000000..4e43248 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/gorabbit.ru.php @@ -0,0 +1,19 @@ + array( + '%.*%' => array( + 'test_url' => 'http://gorabbit.ru/article/10-oshchushcheniy-za-rulem-kogda-tolko-poluchil-voditelskie-prava', + 'body' => array( + '//div[@class="detail_text"]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//div[@class="socials"]', + '//div[@id="cr_1"]', + '//div[@class="related_items"]', + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/hotshowlife.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/hotshowlife.com.php new file mode 100755 index 0000000..faf01f3 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/hotshowlife.com.php @@ -0,0 +1,23 @@ + array( + '%.*%' => array( + 'test_url' => 'https://hotshowlife.com/top-10-chempionov-produktov-po-szhiganiyu-kalorij/', + 'body' => array( + '//div[@class="entry-content"]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//div[@class="ads2"]', + '//div[@class="mistape_caption"]', + '//div[contains(@class, "et_social_media_hidden")]', + '//div[contains(@class, "et_social_inline_bottom")]', + '//div[contains(@class, "avatar")]', + '//ul[contains(@class, "entry-tags")]', + '//div[contains(@class, "entry-meta")]', + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/justcoolidea.ru.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/justcoolidea.ru.php new file mode 100755 index 0000000..089ff29 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/justcoolidea.ru.php @@ -0,0 +1,19 @@ + array( + '%.*%' => array( + 'test_url' => 'http://justcoolidea.ru/idealnyj-sad-samodelnye-proekty-dlya-berezhlivogo-domovladeltsa/', + 'body' => array( + '//section[@class="entry-content"]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//*[contains(@class, "essb_links")]', + '//*[contains(@rel, "nofollow")]', + '//*[contains(@class, "ads")]', + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/legorafi.fr.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/legorafi.fr.php new file mode 100644 index 0000000..715bde6 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/legorafi.fr.php @@ -0,0 +1,23 @@ + array( + '%.*%' => array( + 'test_url' => array( + 'http://www.legorafi.fr/2016/12/16/gorafi-magazine-bravo-vous-avez-bientot-presque-survecu-a-2016/', + 'http://www.legorafi.fr/2016/12/15/manuel-valls-promet-quune-fois-elu-il-debarrassera-la-france-de-manuel-valls/', + ), + 'body' => array( + '//section[@id="banner_magazine"]', + '//figure[@class="main_picture"]', + '//div[@class="content"]', + ), + 'strip' => array( + '//figcaption', + '//div[@class="sharebox"]', + '//div[@class="tags"]', + '//section[@class="taboola_article"]', + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.ru.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.ru.php new file mode 100755 index 0000000..bc140f6 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.ru.php @@ -0,0 +1,22 @@ + array( + '%.*%' => array( + 'test_url' => 'http://lifehacker.ru/2016/03/03/polymail/', + 'body' => array( + '//div[@class="post-content"]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//*[@class="wp-thumbnail-caption"]', + '//*[contains(@class, "social-likes")]', + '//*[@class="jp-relatedposts"]', + '//*[contains(@class, "wpappbox")]', + '//*[contains(@class, "icon__image")]', + '//div[@id="hypercomments_widget"]', + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monandroid.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monandroid.com.php new file mode 100644 index 0000000..149bf9f --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monandroid.com.php @@ -0,0 +1,14 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.monandroid.com/blog/tutoriel-avance-activer-le-stockage-fusionne-sur-android-6-marshamallow-t12.html', + 'body' => array( + '//div[@class="blog-post-body"]', + ), + 'strip' => array( + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindowsphone.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindows.com.php similarity index 64% rename from vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindowsphone.com.php rename to vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindows.com.php index 47c93d7..060f369 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindowsphone.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/monwindows.com.php @@ -3,7 +3,7 @@ return array( 'grabber' => array( '%.*%' => array( - 'test_url' => 'http://www.monwindowsphone.com/tout-savoir-sur-le-centre-d-action-de-windows-phone-8-1-t40574.html', + 'test_url' => 'http://www.monwindows.com/tout-savoir-sur-le-centre-d-action-de-windows-phone-8-1-t40574.html', 'body' => array( '//div[@class="blog-post-body"]', ), diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/moya-planeta.ru.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/moya-planeta.ru.php new file mode 100755 index 0000000..dd84284 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/moya-planeta.ru.php @@ -0,0 +1,21 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.moya-planeta.ru/travel/view/chto_yaponcu_horosho_russkomu_ne_ponyat_20432/', + 'body' => array( + '//div[@class="full_object"]', + ), + 'strip' => array( + '//div[@class="full_object_panel object_panel"]', + '//div[@class="full_object_panel_geo object_panel"]', + '//div[@class="full_object_title"]', + '//div[@class="full_object_social_likes"]', + '//div[@class="full_object_planeta_likes"]', + '//div[@class="full_object_go2comments"]', + '//div[@id="yandex_ad_R-163191-3"]', + '//div[@class="full_object_shop_article_recommend"]', + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/nat-geo.ru.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/nat-geo.ru.php new file mode 100755 index 0000000..1a42d99 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/nat-geo.ru.php @@ -0,0 +1,11 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.nat-geo.ru/fact/868093-knidos-antichnyy-naukograd/', + 'body' => array( + '//div[@class="article-inner-text"]', + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/publy.ru.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/publy.ru.php new file mode 100755 index 0000000..bcfeeb9 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/publy.ru.php @@ -0,0 +1,24 @@ + array( + '%.*%' => array( + 'test_url' => 'http://www.publy.ru/post/19988', + 'body' => array( + '//div[@class="singlepost"]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//*[@class="featured"]', + '//*[@class="toc_white no_bullets"]', + '//*[@class="toc_title"]', + '//*[@class="pba"]', + '//*[@class="comments"]', + '//*[contains(@class, "g-single")]', + '//*[@class="ts-fab-wrapper"]', + '//*[contains(@class, "wp_rp_wrap")]', + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smbc-comics.com.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smbc-comics.com.php index cbf6c99..e97493c 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smbc-comics.com.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/smbc-comics.com.php @@ -1,9 +1,15 @@ array( + 'grabber' => array( '%.*%' => array( - '%()%' => '$1$2$1after$2', + 'test_url' => 'http://www.smbc-comics.com/comic/the-troll-toll', + 'body' => array( + '//div[@id="cc-comicbody"]', + '//div[@id="aftercomic"]', + ), + 'strip' => array( + ), ), ), ); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/takprosto.cc.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/takprosto.cc.php new file mode 100755 index 0000000..624ef90 --- /dev/null +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/takprosto.cc.php @@ -0,0 +1,21 @@ + array( + '%.*%' => array( + 'test_url' => 'http://takprosto.cc/kokteyl-dlya-pohudeniya-v-domashnih-usloviyah/', + 'body' => array( + '//div[contains(@class, "entry-contentt")]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//*[@class="views_post"]', + '//*[contains(@class, "mailchimp-box")]', + '//*[contains(@class, "essb_links")]', + '//*[contains(@rel, "nofollow")]', + '//*[contains(@class, "ads")]', + ), + ), + ), +); diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thelocal.se.php b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thelocal.se.php index 4e6051e..c3ec250 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thelocal.se.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Rules/thelocal.se.php @@ -2,20 +2,16 @@ return array( 'grabber' => array( '%.*%' => array( - 'test_url' => 'http://www.thelocal.se/20151018/swedish-moderates-tighten-focus-on-begging-ban', + 'test_url' => 'www.thelocal.se/20161219/this-swede-can-memorize-hundreds-of-numbers-in-only-five-minutes', 'body' => array( - '//article', + '//div[@id="article-photo"]', + '//div[@id="article-description"]', + '//div[@id="article-body"]', ), 'strip' => array( - '//p[@id="mobile-signature"]', - '//article/div[4]', - '//article/ul[1]', - '//div[@class="clr"]', - '//p[@class="small"]', - '//p[@style="font-weight: bold; font-size: 14px;"]', - '//div[@class="author"]', - '//div[@class="ad_container"]', + '//div[@id="article-info-middle"]', ) ) ) ); + diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/CandidateParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/CandidateParser.php index 802b01b..6c53a28 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/CandidateParser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/CandidateParser.php @@ -243,6 +243,16 @@ class CandidateParser implements ParserInterface } } + /** + * Find link for next page of the article. + * + * @return string + */ + public function findNextLink() + { + return null; + } + /** * Return false if the node should not be removed. * diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/ParserInterface.php b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/ParserInterface.php index a33fefc..3ded4b1 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/ParserInterface.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/ParserInterface.php @@ -10,4 +10,11 @@ interface ParserInterface * @return string */ public function execute(); + + /** + * Find link for next page of the article. + * + * @return string + */ + public function findNextLink(); } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/RuleParser.php b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/RuleParser.php index 95665bf..9beb59c 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/RuleParser.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/RuleParser.php @@ -65,7 +65,6 @@ class RuleParser implements ParserInterface public function findContent() { $content = ''; - if (isset($this->rules['body']) && is_array($this->rules['body'])) { foreach ($this->rules['body'] as $pattern) { $nodes = $this->xpath->query($pattern); @@ -80,4 +79,24 @@ class RuleParser implements ParserInterface return $content; } + + /** + * Fetch next link based on Xpath rules. + * + * @return string + */ + public function findNextLink() + { + if (isset($this->rules['next_page']) && is_array($this->rules['next_page'])) { + foreach ($this->rules['next_page'] as $pattern) { + $nodes = $this->xpath->query($pattern); + if ($nodes !== false && $nodes->length > 0) { + foreach ($nodes as $node) { + return $node->getAttribute('href'); + } + } + } + } + return null; + } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php index 980a88d..e5b9817 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Scraper/Scraper.php @@ -206,19 +206,31 @@ class Scraper extends Base /** * Execute the scraper. */ - public function execute() + public function execute($pageContent = '', $recursionDepth = 0) { - $this->content = ''; $this->html = ''; $this->encoding = ''; - + $this->content = ''; $this->download(); $this->prepareHtml(); $parser = $this->getParser(); if ($parser !== null) { - $this->content = $parser->execute(); + $maxRecursions = $this->config->getMaxRecursions(); + if(!isset($maxRecursions)){ + $maxRecursions = 25; + } + $pageContent .= $parser->execute(); + // check if there is a link to next page and recursively get content (max 25 pages) + if((($nextLink = $parser->findNextLink()) !== null) && $recursionDepth < $maxRecursions){ + $nextLink = Url::resolve($nextLink,$this->url); + $this->setUrl($nextLink); + $this->execute($pageContent,$recursionDepth+1); + } + else{ + $this->content = $pageContent; + } Logger::setMessage(get_called_class().': Content length: '.strlen($this->content).' bytes'); } } diff --git a/vendor/fguillot/picofeed/lib/PicoFeed/Syndication/Rss20Helper.php b/vendor/fguillot/picofeed/lib/PicoFeed/Syndication/Rss20Helper.php index c99688a..72a19e5 100644 --- a/vendor/fguillot/picofeed/lib/PicoFeed/Syndication/Rss20Helper.php +++ b/vendor/fguillot/picofeed/lib/PicoFeed/Syndication/Rss20Helper.php @@ -36,7 +36,7 @@ class Rss20Helper * @param DOMElement $element * @param string $tag * @param string $value - * @return AtomHelper + * @return $this */ public function buildNode(DOMElement $element, $tag, $value) { @@ -52,7 +52,7 @@ class Rss20Helper * @access public * @param DOMElement $element * @param string $title - * @return AtomHelper + * @return $this */ public function buildTitle(DOMElement $element, $title) { @@ -66,7 +66,7 @@ class Rss20Helper * @param DOMElement $element * @param DateTime $date * @param string $type - * @return AtomHelper + * @return $this */ public function buildDate(DOMElement $element, DateTime $date, $type = 'pubDate') { @@ -79,7 +79,7 @@ class Rss20Helper * @access public * @param DOMElement $element * @param string $url - * @return AtomHelper + * @return $this */ public function buildLink(DOMElement $element, $url) { @@ -94,7 +94,7 @@ class Rss20Helper * @param string $tag * @param string $authorName * @param string $authorEmail - * @return AtomHelper + * @return $this */ public function buildAuthor(DOMElement $element, $tag, $authorName, $authorEmail) {