Update grabber rule for igen.fr

This commit is contained in:
Frédéric Guillot 2014-05-26 18:49:52 -04:00
parent c6f5606070
commit 0146e96fcf
4 changed files with 17 additions and 4 deletions

View File

@ -261,9 +261,12 @@ class Grabber
if (($pos = strpos($hostname, '.')) !== false) { if (($pos = strpos($hostname, '.')) !== false) {
$files[] = substr($hostname, $pos); $files[] = substr($hostname, $pos);
$files[] = substr($hostname, $pos + 1);
$files[] = substr($hostname, 0, $pos); $files[] = substr($hostname, 0, $pos);
} }
// Logging::setMessage(var_export($files, true));
foreach ($files as $file) { foreach ($files as $file) {
$filename = __DIR__.'/Rules/'.$file.'.php'; $filename = __DIR__.'/Rules/'.$file.'.php';
@ -285,6 +288,7 @@ class Grabber
*/ */
public function parseContentWithRules(array $rules) public function parseContentWithRules(array $rules)
{ {
// Logging::setMessage($this->html);
$dom = XmlParser::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$this->html); $dom = XmlParser::getHtmlDocument('<?xml version="1.0" encoding="UTF-8">'.$this->html);
$xpath = new DOMXPath($dom); $xpath = new DOMXPath($dom);

View File

@ -235,8 +235,8 @@ class Reader
$xpath = new DOMXPath($dom); $xpath = new DOMXPath($dom);
$queries = array( $queries = array(
"//link[@type='application/atom+xml']", '//link[@type="application/rss+xml"]',
"//link[@type='application/rss+xml']" '//link[@type="application/atom+xml"]',
); );
foreach ($queries as $query) { foreach ($queries as $query) {

View File

@ -1,8 +1,8 @@
<?php <?php
return array( return array(
'test_url' => 'http://www.igen.fr/iphone/ios-7-cree-des-milliers-de-requetes-fantomes-sur-le-web-110130', 'test_url' => 'http://www.igen.fr/ailleurs/2014/05/nvidia-va-delaisser-les-smartphones-grand-public-86031',
'body' => array( 'body' => array(
'//*[starts-with(@id, "news")]/*[contains(@class, "content")]' '//div[contains(@class, "field-name-body")]'
), ),
'strip' => array( 'strip' => array(
), ),

9
vendor/PicoFeed/Rules/macg.co.php vendored Normal file
View File

@ -0,0 +1,9 @@
<?php
return array(
'test_url' => 'http://www.macg.co//logiciels/2014/05/feedly-sameliore-un-petit-peu-sur-mac-82205',
'body' => array(
'//div[contains(@class, "field-name-body")]'
),
'strip' => array(
),
);