Update PicoFeed to version 0.1.27
This commit is contained in:
parent
533bba270b
commit
1a85a76c5c
@ -15,7 +15,7 @@
|
||||
"fguillot/simple-validator": "v1.0.0",
|
||||
"fguillot/json-rpc": "v1.2.3",
|
||||
"fguillot/picodb": "v1.0.14 ",
|
||||
"fguillot/picofeed": "v0.1.25",
|
||||
"fguillot/picofeed": "v0.1.27",
|
||||
"pda/pheanstalk": "v3.1.0",
|
||||
"ircmaxell/password-compat": "^1.0.4"
|
||||
},
|
||||
|
105
vendor/composer/installed.json
vendored
105
vendor/composer/installed.json
vendored
@ -222,56 +222,6 @@
|
||||
"password"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "fguillot/picofeed",
|
||||
"version": "v0.1.25",
|
||||
"version_normalized": "0.1.25.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/fguillot/picoFeed.git",
|
||||
"reference": "2bf5bc40361e788eda6b1bd5d444630986721e69"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/2bf5bc40361e788eda6b1bd5d444630986721e69",
|
||||
"reference": "2bf5bc40361e788eda6b1bd5d444630986721e69",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"ext-dom": "*",
|
||||
"ext-iconv": "*",
|
||||
"ext-libxml": "*",
|
||||
"ext-simplexml": "*",
|
||||
"ext-xml": "*",
|
||||
"php": ">=5.3.0",
|
||||
"zendframework/zendxml": "^1.0"
|
||||
},
|
||||
"suggest": {
|
||||
"ext-curl": "PicoFeed will use cURL if present"
|
||||
},
|
||||
"time": "2016-08-30 01:33:18",
|
||||
"bin": [
|
||||
"picofeed"
|
||||
],
|
||||
"type": "library",
|
||||
"installation-source": "dist",
|
||||
"autoload": {
|
||||
"psr-0": {
|
||||
"PicoFeed": "lib/"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Frédéric Guillot"
|
||||
}
|
||||
],
|
||||
"description": "Modern library to handle RSS/Atom feeds",
|
||||
"homepage": "https://github.com/fguillot/picoFeed"
|
||||
},
|
||||
{
|
||||
"name": "fguillot/json-rpc",
|
||||
"version": "v1.2.3",
|
||||
@ -312,5 +262,60 @@
|
||||
],
|
||||
"description": "Simple Json-RPC client/server library that just works",
|
||||
"homepage": "https://github.com/fguillot/JsonRPC"
|
||||
},
|
||||
{
|
||||
"name": "fguillot/picofeed",
|
||||
"version": "v0.1.27",
|
||||
"version_normalized": "0.1.27.0",
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/fguillot/picoFeed.git",
|
||||
"reference": "41924841d3cd0480364ca9bcb90abe095d744457"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/41924841d3cd0480364ca9bcb90abe095d744457",
|
||||
"reference": "41924841d3cd0480364ca9bcb90abe095d744457",
|
||||
"shasum": ""
|
||||
},
|
||||
"require": {
|
||||
"ext-dom": "*",
|
||||
"ext-iconv": "*",
|
||||
"ext-libxml": "*",
|
||||
"ext-simplexml": "*",
|
||||
"ext-xml": "*",
|
||||
"php": ">=5.3.0",
|
||||
"zendframework/zendxml": "^1.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpdocumentor/reflection-docblock": "2.0.4",
|
||||
"phpunit/phpunit": "4.8.26",
|
||||
"symfony/yaml": "2.8.7"
|
||||
},
|
||||
"suggest": {
|
||||
"ext-curl": "PicoFeed will use cURL if present"
|
||||
},
|
||||
"time": "2016-12-26 22:25:33",
|
||||
"bin": [
|
||||
"picofeed"
|
||||
],
|
||||
"type": "library",
|
||||
"installation-source": "dist",
|
||||
"autoload": {
|
||||
"psr-0": {
|
||||
"PicoFeed": "lib/"
|
||||
}
|
||||
},
|
||||
"notification-url": "https://packagist.org/downloads/",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [
|
||||
{
|
||||
"name": "Frédéric Guillot"
|
||||
}
|
||||
],
|
||||
"description": "Modern library to handle RSS/Atom feeds",
|
||||
"homepage": "https://github.com/fguillot/picoFeed"
|
||||
}
|
||||
]
|
||||
|
@ -11,6 +11,8 @@ use PicoFeed\Logging\Logger;
|
||||
*/
|
||||
class Curl extends Client
|
||||
{
|
||||
protected $nbRedirects = 0;
|
||||
|
||||
/**
|
||||
* HTTP response body.
|
||||
*
|
||||
@ -136,6 +138,7 @@ class Curl extends Client
|
||||
|
||||
if ($this->etag) {
|
||||
$headers[] = 'If-None-Match: '.$this->etag;
|
||||
$headers[] = 'A-IM: feed';
|
||||
}
|
||||
|
||||
if ($this->last_modified) {
|
||||
@ -199,6 +202,9 @@ class Curl extends Client
|
||||
*/
|
||||
private function prepareDownloadMode($ch)
|
||||
{
|
||||
$this->body = '';
|
||||
$this->response_headers = array();
|
||||
$this->response_headers_count = 0;
|
||||
$write_function = 'readBody';
|
||||
$header_function = 'readHeaders';
|
||||
|
||||
@ -304,12 +310,11 @@ class Curl extends Client
|
||||
* Handle HTTP redirects
|
||||
*
|
||||
* @param string $location Redirected URL
|
||||
*
|
||||
* @return array
|
||||
* @throws MaxRedirectException
|
||||
*/
|
||||
private function handleRedirection($location)
|
||||
{
|
||||
$nb_redirects = 0;
|
||||
$result = array();
|
||||
$this->url = Url::resolve($location, $this->url);
|
||||
$this->body = '';
|
||||
@ -318,9 +323,9 @@ class Curl extends Client
|
||||
$this->response_headers_count = 0;
|
||||
|
||||
while (true) {
|
||||
++$nb_redirects;
|
||||
$this->nbRedirects++;
|
||||
|
||||
if ($nb_redirects >= $this->max_redirects) {
|
||||
if ($this->nbRedirects >= $this->max_redirects) {
|
||||
throw new MaxRedirectException('Maximum number of redirections reached');
|
||||
}
|
||||
|
||||
|
@ -31,6 +31,7 @@ class Stream extends Client
|
||||
|
||||
if ($this->etag) {
|
||||
$headers[] = 'If-None-Match: '.$this->etag;
|
||||
$headers[] = 'A-IM: feed';
|
||||
}
|
||||
|
||||
if ($this->last_modified) {
|
||||
@ -104,6 +105,9 @@ class Stream extends Client
|
||||
* Do the HTTP request.
|
||||
*
|
||||
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
|
||||
* @throws InvalidUrlException
|
||||
* @throws MaxSizeException
|
||||
* @throws TimeoutException
|
||||
*/
|
||||
public function doRequest()
|
||||
{
|
||||
|
@ -51,6 +51,7 @@ class Attribute
|
||||
'td' => array(),
|
||||
'tbody' => array(),
|
||||
'thead' => array(),
|
||||
'h1' => array(),
|
||||
'h2' => array(),
|
||||
'h3' => array(),
|
||||
'h4' => array(),
|
||||
|
@ -42,6 +42,7 @@ class Tag extends Base
|
||||
'td',
|
||||
'tbody',
|
||||
'thead',
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
@ -67,6 +68,8 @@ class Tag extends Base
|
||||
'abbr',
|
||||
'iframe',
|
||||
'q',
|
||||
'sup',
|
||||
'sub',
|
||||
);
|
||||
|
||||
/**
|
||||
|
@ -13,7 +13,7 @@ class Feed
|
||||
/**
|
||||
* Feed items.
|
||||
*
|
||||
* @var array
|
||||
* @var Item[]
|
||||
*/
|
||||
public $items = array();
|
||||
|
||||
|
@ -222,18 +222,20 @@ abstract class Parser implements ParserInterface
|
||||
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
|
||||
{
|
||||
$this->findItemPublishedDate($entry, $item, $feed);
|
||||
$published = $item->getPublishedDate();
|
||||
|
||||
$this->findItemUpdatedDate($entry, $item, $feed);
|
||||
$updated = $item->getUpdatedDate();
|
||||
|
||||
if ($published === null && $updated === null) {
|
||||
$item->setDate($feed->getDate()); // We use the feed date if there is no date for the item
|
||||
} elseif ($published !== null && $updated !== null) {
|
||||
$item->setDate(max($published, $updated)); // We use the most recent date between published and updated
|
||||
} else {
|
||||
$item->setDate($updated ?: $published);
|
||||
if ($item->getPublishedDate() === null) {
|
||||
// Use the updated date if available, otherwise use the feed date
|
||||
$item->setPublishedDate($item->getUpdatedDate() ?: $feed->getDate());
|
||||
}
|
||||
|
||||
if ($item->getUpdatedDate() === null) {
|
||||
// Use the published date as fallback
|
||||
$item->setUpdatedDate($item->getPublishedDate());
|
||||
}
|
||||
|
||||
// Use the most recent of published and updated dates
|
||||
$item->setDate(max($item->getPublishedDate(), $item->getUpdatedDate()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
31
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bigpicture.ru.php
vendored
Executable file
31
vendor/fguillot/picofeed/lib/PicoFeed/Rules/bigpicture.ru.php
vendored
Executable file
@ -0,0 +1,31 @@
|
||||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://bigpicture.ru/?p=556658',
|
||||
'body' => array(
|
||||
'//div[@class="article container"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//h1',
|
||||
'//*[@class="wp-smiley"]',
|
||||
'//div[@class="ipmd"]',
|
||||
'//div[@class="tags"]',
|
||||
'//div[@class="social-button"]',
|
||||
'//div[@class="bottom-share"]',
|
||||
'//div[@class="raccoonbox"]',
|
||||
'//div[@class="yndadvert"]',
|
||||
'//div[@class="we-recommend"]',
|
||||
'//div[@class="relap-bigpicture_ru-wrapper"]',
|
||||
'//div[@id="mmail"]',
|
||||
'//div[@id="mobile-ads-cut"]',
|
||||
'//div[@id="liquidstorm-alt-html"]',
|
||||
'//div[contains(@class, "post-tags")]',
|
||||
'//*[contains(text(),"Смотрите также")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
22
vendor/fguillot/picofeed/lib/PicoFeed/Rules/e-w-e.ru.php
vendored
Executable file
22
vendor/fguillot/picofeed/lib/PicoFeed/Rules/e-w-e.ru.php
vendored
Executable file
@ -0,0 +1,22 @@
|
||||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://e-w-e.ru/16-prekrasnyx-izobretenij-zhenshhin/',
|
||||
'body' => array(
|
||||
'//div[contains(@class, "post_text")]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//*[@class="views_post"]',
|
||||
'//*[@class="adman_mobile"]',
|
||||
'//*[@class="adman_desctop"]',
|
||||
'//*[contains(@rel, "nofollow")]',
|
||||
'//*[contains(@class, "wp-smiley")]',
|
||||
'//*[contains(text(),"Источник:")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
27
vendor/fguillot/picofeed/lib/PicoFeed/Rules/factroom.ru.php
vendored
Executable file
27
vendor/fguillot/picofeed/lib/PicoFeed/Rules/factroom.ru.php
vendored
Executable file
@ -0,0 +1,27 @@
|
||||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.factroom.ru/life/20-facts-about-oil',
|
||||
'body' => array(
|
||||
'//div[@class="post"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//h1',
|
||||
'//div[@id="yandex_ad2"]',
|
||||
'//*[@class="jp-relatedposts"]',
|
||||
'//div[contains(@class, "likely-desktop")]',
|
||||
'//div[contains(@class, "likely-mobile")]',
|
||||
'//p[last()]',
|
||||
'//div[contains(@class, "facebook")]',
|
||||
'//div[contains(@class, "desktop-underpost-direct")]',
|
||||
'//div[contains(@class, "source-box")]',
|
||||
'//div[contains(@class, "under-likely-desktop")]',
|
||||
'//div[contains(@class, "mobile-down-post")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/fototelegraf.ru.php
vendored
Executable file
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/fototelegraf.ru.php
vendored
Executable file
@ -0,0 +1,19 @@
|
||||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://fototelegraf.ru/?p=348232',
|
||||
'body' => array(
|
||||
'//div[@class="post-content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//div[@class="imageButtonsBlock"]',
|
||||
'//div[@class="adOnPostBtwImg"]',
|
||||
'//div[contains(@class, "post-tags")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
@ -6,8 +6,16 @@ return array(
|
||||
'test_url' => 'http://www.golem.de/news/breko-telekom-verzoegert-gezielt-den-vectoring-ausbau-1311-102974.html',
|
||||
'body' => array(
|
||||
'//header[@class="cluster-header"]',
|
||||
'//header[@class="paged-cluster-header"]',
|
||||
'//div[@class="formatted"]',
|
||||
),
|
||||
'next_page' => array(
|
||||
'//a[@id="atoc_next"]'
|
||||
),
|
||||
'strip' => array(
|
||||
'//header[@class="cluster-header"]/a',
|
||||
'//div[@id="iqadtile4"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/gorabbit.ru.php
vendored
Executable file
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/gorabbit.ru.php
vendored
Executable file
@ -0,0 +1,19 @@
|
||||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://gorabbit.ru/article/10-oshchushcheniy-za-rulem-kogda-tolko-poluchil-voditelskie-prava',
|
||||
'body' => array(
|
||||
'//div[@class="detail_text"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//div[@class="socials"]',
|
||||
'//div[@id="cr_1"]',
|
||||
'//div[@class="related_items"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
23
vendor/fguillot/picofeed/lib/PicoFeed/Rules/hotshowlife.com.php
vendored
Executable file
23
vendor/fguillot/picofeed/lib/PicoFeed/Rules/hotshowlife.com.php
vendored
Executable file
@ -0,0 +1,23 @@
|
||||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'https://hotshowlife.com/top-10-chempionov-produktov-po-szhiganiyu-kalorij/',
|
||||
'body' => array(
|
||||
'//div[@class="entry-content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//div[@class="ads2"]',
|
||||
'//div[@class="mistape_caption"]',
|
||||
'//div[contains(@class, "et_social_media_hidden")]',
|
||||
'//div[contains(@class, "et_social_inline_bottom")]',
|
||||
'//div[contains(@class, "avatar")]',
|
||||
'//ul[contains(@class, "entry-tags")]',
|
||||
'//div[contains(@class, "entry-meta")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/justcoolidea.ru.php
vendored
Executable file
19
vendor/fguillot/picofeed/lib/PicoFeed/Rules/justcoolidea.ru.php
vendored
Executable file
@ -0,0 +1,19 @@
|
||||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://justcoolidea.ru/idealnyj-sad-samodelnye-proekty-dlya-berezhlivogo-domovladeltsa/',
|
||||
'body' => array(
|
||||
'//section[@class="entry-content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//*[contains(@class, "essb_links")]',
|
||||
'//*[contains(@rel, "nofollow")]',
|
||||
'//*[contains(@class, "ads")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
23
vendor/fguillot/picofeed/lib/PicoFeed/Rules/legorafi.fr.php
vendored
Normal file
23
vendor/fguillot/picofeed/lib/PicoFeed/Rules/legorafi.fr.php
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => array(
|
||||
'http://www.legorafi.fr/2016/12/16/gorafi-magazine-bravo-vous-avez-bientot-presque-survecu-a-2016/',
|
||||
'http://www.legorafi.fr/2016/12/15/manuel-valls-promet-quune-fois-elu-il-debarrassera-la-france-de-manuel-valls/',
|
||||
),
|
||||
'body' => array(
|
||||
'//section[@id="banner_magazine"]',
|
||||
'//figure[@class="main_picture"]',
|
||||
'//div[@class="content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//figcaption',
|
||||
'//div[@class="sharebox"]',
|
||||
'//div[@class="tags"]',
|
||||
'//section[@class="taboola_article"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
22
vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.ru.php
vendored
Executable file
22
vendor/fguillot/picofeed/lib/PicoFeed/Rules/lifehacker.ru.php
vendored
Executable file
@ -0,0 +1,22 @@
|
||||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://lifehacker.ru/2016/03/03/polymail/',
|
||||
'body' => array(
|
||||
'//div[@class="post-content"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//*[@class="wp-thumbnail-caption"]',
|
||||
'//*[contains(@class, "social-likes")]',
|
||||
'//*[@class="jp-relatedposts"]',
|
||||
'//*[contains(@class, "wpappbox")]',
|
||||
'//*[contains(@class, "icon__image")]',
|
||||
'//div[@id="hypercomments_widget"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/monandroid.com.php
vendored
Normal file
14
vendor/fguillot/picofeed/lib/PicoFeed/Rules/monandroid.com.php
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.monandroid.com/blog/tutoriel-avance-activer-le-stockage-fusionne-sur-android-6-marshamallow-t12.html',
|
||||
'body' => array(
|
||||
'//div[@class="blog-post-body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
@ -3,7 +3,7 @@
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.monwindowsphone.com/tout-savoir-sur-le-centre-d-action-de-windows-phone-8-1-t40574.html',
|
||||
'test_url' => 'http://www.monwindows.com/tout-savoir-sur-le-centre-d-action-de-windows-phone-8-1-t40574.html',
|
||||
'body' => array(
|
||||
'//div[@class="blog-post-body"]',
|
||||
),
|
21
vendor/fguillot/picofeed/lib/PicoFeed/Rules/moya-planeta.ru.php
vendored
Executable file
21
vendor/fguillot/picofeed/lib/PicoFeed/Rules/moya-planeta.ru.php
vendored
Executable file
@ -0,0 +1,21 @@
|
||||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.moya-planeta.ru/travel/view/chto_yaponcu_horosho_russkomu_ne_ponyat_20432/',
|
||||
'body' => array(
|
||||
'//div[@class="full_object"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//div[@class="full_object_panel object_panel"]',
|
||||
'//div[@class="full_object_panel_geo object_panel"]',
|
||||
'//div[@class="full_object_title"]',
|
||||
'//div[@class="full_object_social_likes"]',
|
||||
'//div[@class="full_object_planeta_likes"]',
|
||||
'//div[@class="full_object_go2comments"]',
|
||||
'//div[@id="yandex_ad_R-163191-3"]',
|
||||
'//div[@class="full_object_shop_article_recommend"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/nat-geo.ru.php
vendored
Executable file
11
vendor/fguillot/picofeed/lib/PicoFeed/Rules/nat-geo.ru.php
vendored
Executable file
@ -0,0 +1,11 @@
|
||||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.nat-geo.ru/fact/868093-knidos-antichnyy-naukograd/',
|
||||
'body' => array(
|
||||
'//div[@class="article-inner-text"]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
24
vendor/fguillot/picofeed/lib/PicoFeed/Rules/publy.ru.php
vendored
Executable file
24
vendor/fguillot/picofeed/lib/PicoFeed/Rules/publy.ru.php
vendored
Executable file
@ -0,0 +1,24 @@
|
||||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.publy.ru/post/19988',
|
||||
'body' => array(
|
||||
'//div[@class="singlepost"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//*[@class="featured"]',
|
||||
'//*[@class="toc_white no_bullets"]',
|
||||
'//*[@class="toc_title"]',
|
||||
'//*[@class="pba"]',
|
||||
'//*[@class="comments"]',
|
||||
'//*[contains(@class, "g-single")]',
|
||||
'//*[@class="ts-fab-wrapper"]',
|
||||
'//*[contains(@class, "wp_rp_wrap")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
@ -1,9 +1,15 @@
|
||||
<?php
|
||||
|
||||
return array(
|
||||
'filter' => array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'%(<img.+)(\.png"/>)%' => '$1$2$1after$2',
|
||||
'test_url' => 'http://www.smbc-comics.com/comic/the-troll-toll',
|
||||
'body' => array(
|
||||
'//div[@id="cc-comicbody"]',
|
||||
'//div[@id="aftercomic"]',
|
||||
),
|
||||
'strip' => array(
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
||||
|
21
vendor/fguillot/picofeed/lib/PicoFeed/Rules/takprosto.cc.php
vendored
Executable file
21
vendor/fguillot/picofeed/lib/PicoFeed/Rules/takprosto.cc.php
vendored
Executable file
@ -0,0 +1,21 @@
|
||||
<?php
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://takprosto.cc/kokteyl-dlya-pohudeniya-v-domashnih-usloviyah/',
|
||||
'body' => array(
|
||||
'//div[contains(@class, "entry-contentt")]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//script',
|
||||
'//form',
|
||||
'//style',
|
||||
'//*[@class="views_post"]',
|
||||
'//*[contains(@class, "mailchimp-box")]',
|
||||
'//*[contains(@class, "essb_links")]',
|
||||
'//*[contains(@rel, "nofollow")]',
|
||||
'//*[contains(@class, "ads")]',
|
||||
),
|
||||
),
|
||||
),
|
||||
);
|
@ -2,20 +2,16 @@
|
||||
return array(
|
||||
'grabber' => array(
|
||||
'%.*%' => array(
|
||||
'test_url' => 'http://www.thelocal.se/20151018/swedish-moderates-tighten-focus-on-begging-ban',
|
||||
'test_url' => 'www.thelocal.se/20161219/this-swede-can-memorize-hundreds-of-numbers-in-only-five-minutes',
|
||||
'body' => array(
|
||||
'//article',
|
||||
'//div[@id="article-photo"]',
|
||||
'//div[@id="article-description"]',
|
||||
'//div[@id="article-body"]',
|
||||
),
|
||||
'strip' => array(
|
||||
'//p[@id="mobile-signature"]',
|
||||
'//article/div[4]',
|
||||
'//article/ul[1]',
|
||||
'//div[@class="clr"]',
|
||||
'//p[@class="small"]',
|
||||
'//p[@style="font-weight: bold; font-size: 14px;"]',
|
||||
'//div[@class="author"]',
|
||||
'//div[@class="ad_container"]',
|
||||
'//div[@id="article-info-middle"]',
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
|
@ -243,6 +243,16 @@ class CandidateParser implements ParserInterface
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find link for next page of the article.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function findNextLink()
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return false if the node should not be removed.
|
||||
*
|
||||
|
@ -10,4 +10,11 @@ interface ParserInterface
|
||||
* @return string
|
||||
*/
|
||||
public function execute();
|
||||
|
||||
/**
|
||||
* Find link for next page of the article.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function findNextLink();
|
||||
}
|
||||
|
@ -65,7 +65,6 @@ class RuleParser implements ParserInterface
|
||||
public function findContent()
|
||||
{
|
||||
$content = '';
|
||||
|
||||
if (isset($this->rules['body']) && is_array($this->rules['body'])) {
|
||||
foreach ($this->rules['body'] as $pattern) {
|
||||
$nodes = $this->xpath->query($pattern);
|
||||
@ -80,4 +79,24 @@ class RuleParser implements ParserInterface
|
||||
|
||||
return $content;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch next link based on Xpath rules.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function findNextLink()
|
||||
{
|
||||
if (isset($this->rules['next_page']) && is_array($this->rules['next_page'])) {
|
||||
foreach ($this->rules['next_page'] as $pattern) {
|
||||
$nodes = $this->xpath->query($pattern);
|
||||
if ($nodes !== false && $nodes->length > 0) {
|
||||
foreach ($nodes as $node) {
|
||||
return $node->getAttribute('href');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -206,19 +206,31 @@ class Scraper extends Base
|
||||
/**
|
||||
* Execute the scraper.
|
||||
*/
|
||||
public function execute()
|
||||
public function execute($pageContent = '', $recursionDepth = 0)
|
||||
{
|
||||
$this->content = '';
|
||||
$this->html = '';
|
||||
$this->encoding = '';
|
||||
|
||||
$this->content = '';
|
||||
$this->download();
|
||||
$this->prepareHtml();
|
||||
|
||||
$parser = $this->getParser();
|
||||
|
||||
if ($parser !== null) {
|
||||
$this->content = $parser->execute();
|
||||
$maxRecursions = $this->config->getMaxRecursions();
|
||||
if(!isset($maxRecursions)){
|
||||
$maxRecursions = 25;
|
||||
}
|
||||
$pageContent .= $parser->execute();
|
||||
// check if there is a link to next page and recursively get content (max 25 pages)
|
||||
if((($nextLink = $parser->findNextLink()) !== null) && $recursionDepth < $maxRecursions){
|
||||
$nextLink = Url::resolve($nextLink,$this->url);
|
||||
$this->setUrl($nextLink);
|
||||
$this->execute($pageContent,$recursionDepth+1);
|
||||
}
|
||||
else{
|
||||
$this->content = $pageContent;
|
||||
}
|
||||
Logger::setMessage(get_called_class().': Content length: '.strlen($this->content).' bytes');
|
||||
}
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ class Rss20Helper
|
||||
* @param DOMElement $element
|
||||
* @param string $tag
|
||||
* @param string $value
|
||||
* @return AtomHelper
|
||||
* @return $this
|
||||
*/
|
||||
public function buildNode(DOMElement $element, $tag, $value)
|
||||
{
|
||||
@ -52,7 +52,7 @@ class Rss20Helper
|
||||
* @access public
|
||||
* @param DOMElement $element
|
||||
* @param string $title
|
||||
* @return AtomHelper
|
||||
* @return $this
|
||||
*/
|
||||
public function buildTitle(DOMElement $element, $title)
|
||||
{
|
||||
@ -66,7 +66,7 @@ class Rss20Helper
|
||||
* @param DOMElement $element
|
||||
* @param DateTime $date
|
||||
* @param string $type
|
||||
* @return AtomHelper
|
||||
* @return $this
|
||||
*/
|
||||
public function buildDate(DOMElement $element, DateTime $date, $type = 'pubDate')
|
||||
{
|
||||
@ -79,7 +79,7 @@ class Rss20Helper
|
||||
* @access public
|
||||
* @param DOMElement $element
|
||||
* @param string $url
|
||||
* @return AtomHelper
|
||||
* @return $this
|
||||
*/
|
||||
public function buildLink(DOMElement $element, $url)
|
||||
{
|
||||
@ -94,7 +94,7 @@ class Rss20Helper
|
||||
* @param string $tag
|
||||
* @param string $authorName
|
||||
* @param string $authorEmail
|
||||
* @return AtomHelper
|
||||
* @return $this
|
||||
*/
|
||||
public function buildAuthor(DOMElement $element, $tag, $authorName, $authorEmail)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user