Add new content grabber rules

This commit is contained in:
Frédéric Guillot 2014-02-15 19:31:22 -05:00
parent 33ab9d7fb6
commit 2486498d67
6 changed files with 49 additions and 5 deletions

View File

@ -152,10 +152,11 @@ class Filter
// Iframe source whitelist, everything else is ignored
public static $iframe_whitelist = array(
'http://www.youtube.com/',
'https://www.youtube.com/',
'http://player.vimeo.com/',
'https://player.vimeo.com/',
'//www.youtube.com',
'http://www.youtube.com',
'https://www.youtube.com',
'http://player.vimeo.com',
'https://player.vimeo.com',
'http://www.dailymotion.com',
'https://www.dailymotion.com',
);

View File

@ -92,7 +92,7 @@ class Grabber
$this->html = Encoding::toUTF8($this->html);
}
Logging::log(\get_called_class().' Try to find rules');
Logging::log(\get_called_class().' Content length: '.strlen($this->html).' bytes');
$rules = $this->getRules();
if (is_array($rules)) {
@ -147,6 +147,7 @@ class Grabber
$filename = __DIR__.'/Rules/'.$file.'.php';
if (file_exists($filename)) {
Logging::log(\get_called_class().' Load rule: '.$file);
return include $filename;
}
}

11
vendor/PicoFeed/Rules/.igen.fr.php vendored Normal file
View File

@ -0,0 +1,11 @@
<?php
return array(
'test_url' => 'http://www.igen.fr/iphone/ios-7-cree-des-milliers-de-requetes-fantomes-sur-le-web-110130',
'body' => array(
'//div[contains(@id, "news")]',
),
'strip' => array(
'//*[contains(@class, "submitted")]',
'//*[contains(@class, "clear-block")]',
),
);

9
vendor/PicoFeed/Rules/consomac.fr.php vendored Normal file
View File

@ -0,0 +1,9 @@
<?php
return array(
'test_url' => 'http://consomac.fr/news-2430-l-iphone-6-toujours-un-secret-bien-garde.html',
'body' => array(
'//div[contains(@id, "newscontent")]',
),
'strip' => array(
),
);

View File

@ -0,0 +1,11 @@
<?php
return array(
'test_url' => 'http://www.bdgest.com/chronique-6027-BD-Adrastee-Tome-2.html',
'body' => array(
'//*[contains(@class, "chronique")]',
),
'strip' => array(
'//*[contains(@class, "post-review")]',
'//*[contains(@class, "footer-review")]',
),
);

View File

@ -0,0 +1,11 @@
<?php
return array(
'test_url' => 'http://www.universfreebox.com/article/24305/4G-Bouygues-Telecom-lance-une-vente-flash-sur-son-forfait-Sensation-3Go',
'body' => array(
'//div[@id="corps_corps"]'
),
'strip' => array(
'//*[@id="formulaire"]',
'//*[@id="commentaire"]',
),
);