From 2486498d677b75e6612bb03cacd7d9228c9c5dd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Sat, 15 Feb 2014 19:31:22 -0500 Subject: [PATCH] Add new content grabber rules --- vendor/PicoFeed/Filter.php | 9 +++++---- vendor/PicoFeed/Grabber.php | 3 ++- vendor/PicoFeed/Rules/.igen.fr.php | 11 +++++++++++ vendor/PicoFeed/Rules/consomac.fr.php | 9 +++++++++ vendor/PicoFeed/Rules/www.bdgest.com.php | 11 +++++++++++ vendor/PicoFeed/Rules/www.universfreebox.com.php | 11 +++++++++++ 6 files changed, 49 insertions(+), 5 deletions(-) create mode 100644 vendor/PicoFeed/Rules/.igen.fr.php create mode 100644 vendor/PicoFeed/Rules/consomac.fr.php create mode 100644 vendor/PicoFeed/Rules/www.bdgest.com.php create mode 100644 vendor/PicoFeed/Rules/www.universfreebox.com.php diff --git a/vendor/PicoFeed/Filter.php b/vendor/PicoFeed/Filter.php index ebda1ca..ae80f34 100644 --- a/vendor/PicoFeed/Filter.php +++ b/vendor/PicoFeed/Filter.php @@ -152,10 +152,11 @@ class Filter // Iframe source whitelist, everything else is ignored public static $iframe_whitelist = array( - 'http://www.youtube.com/', - 'https://www.youtube.com/', - 'http://player.vimeo.com/', - 'https://player.vimeo.com/', + '//www.youtube.com', + 'http://www.youtube.com', + 'https://www.youtube.com', + 'http://player.vimeo.com', + 'https://player.vimeo.com', 'http://www.dailymotion.com', 'https://www.dailymotion.com', ); diff --git a/vendor/PicoFeed/Grabber.php b/vendor/PicoFeed/Grabber.php index cfb6c98..dde60d8 100644 --- a/vendor/PicoFeed/Grabber.php +++ b/vendor/PicoFeed/Grabber.php @@ -92,7 +92,7 @@ class Grabber $this->html = Encoding::toUTF8($this->html); } - Logging::log(\get_called_class().' Try to find rules'); + Logging::log(\get_called_class().' Content length: '.strlen($this->html).' bytes'); $rules = $this->getRules(); if (is_array($rules)) { @@ -147,6 +147,7 @@ class Grabber $filename = __DIR__.'/Rules/'.$file.'.php'; if (file_exists($filename)) { + Logging::log(\get_called_class().' Load rule: '.$file); return include $filename; } } diff --git a/vendor/PicoFeed/Rules/.igen.fr.php b/vendor/PicoFeed/Rules/.igen.fr.php new file mode 100644 index 0000000..c86e0c2 --- /dev/null +++ b/vendor/PicoFeed/Rules/.igen.fr.php @@ -0,0 +1,11 @@ + 'http://www.igen.fr/iphone/ios-7-cree-des-milliers-de-requetes-fantomes-sur-le-web-110130', + 'body' => array( + '//div[contains(@id, "news")]', + ), + 'strip' => array( + '//*[contains(@class, "submitted")]', + '//*[contains(@class, "clear-block")]', + ), +); \ No newline at end of file diff --git a/vendor/PicoFeed/Rules/consomac.fr.php b/vendor/PicoFeed/Rules/consomac.fr.php new file mode 100644 index 0000000..99a358f --- /dev/null +++ b/vendor/PicoFeed/Rules/consomac.fr.php @@ -0,0 +1,9 @@ + 'http://consomac.fr/news-2430-l-iphone-6-toujours-un-secret-bien-garde.html', + 'body' => array( + '//div[contains(@id, "newscontent")]', + ), + 'strip' => array( + ), +); \ No newline at end of file diff --git a/vendor/PicoFeed/Rules/www.bdgest.com.php b/vendor/PicoFeed/Rules/www.bdgest.com.php new file mode 100644 index 0000000..528ad41 --- /dev/null +++ b/vendor/PicoFeed/Rules/www.bdgest.com.php @@ -0,0 +1,11 @@ + 'http://www.bdgest.com/chronique-6027-BD-Adrastee-Tome-2.html', + 'body' => array( + '//*[contains(@class, "chronique")]', + ), + 'strip' => array( + '//*[contains(@class, "post-review")]', + '//*[contains(@class, "footer-review")]', + ), +); \ No newline at end of file diff --git a/vendor/PicoFeed/Rules/www.universfreebox.com.php b/vendor/PicoFeed/Rules/www.universfreebox.com.php new file mode 100644 index 0000000..8679a7b --- /dev/null +++ b/vendor/PicoFeed/Rules/www.universfreebox.com.php @@ -0,0 +1,11 @@ + 'http://www.universfreebox.com/article/24305/4G-Bouygues-Telecom-lance-une-vente-flash-sur-son-forfait-Sensation-3Go', + 'body' => array( + '//div[@id="corps_corps"]' + ), + 'strip' => array( + '//*[@id="formulaire"]', + '//*[@id="commentaire"]', + ), +); \ No newline at end of file