Add new content grabber rules

This commit is contained in:
Frédéric Guillot 2013-09-30 22:15:18 -04:00
parent accf789395
commit bc2b5e7c3d
6 changed files with 56 additions and 2 deletions

View File

@ -344,6 +344,7 @@ Don't forget to send a pull request or a ticket to share your contribution with
- *.blog.lemonde.fr
- *.blog.nytimes.com
- *.nytimes.com
- *.phoronix.com
- *.slate.com
- *.theguardian.com
- *.wikipedia.org
@ -351,6 +352,9 @@ Don't forget to send a pull request or a ticket to share your contribution with
- *.wsj.com
- github.com
- lifehacker.com
- lists.*
- medium.com
- pastebin.com
- plus.google.com
- rue89.com
- smallhousebliss.com

View File

@ -25,6 +25,8 @@ class Grabber
'post_content',
'entry-content',
'main-content',
'story_content',
'storycontent',
'entryBox',
'entrytext',
'comic',
@ -46,7 +48,12 @@ class Grabber
'nav',
'header',
'social',
'tag',
'metadata',
'entry-utility',
'related-posts',
'tweet',
'categories',
);
public $stripTags = array(
@ -56,6 +63,7 @@ class Grabber
'header',
'footer',
'aside',
'form',
);
@ -114,8 +122,14 @@ class Grabber
$hostname = parse_url($this->url, PHP_URL_HOST);
$files = array($hostname);
if (substr($hostname, 0, 4) == 'www.') $files[] = substr($hostname, 4);
if (($pos = strpos($hostname, '.')) !== false) $files[] = substr($hostname, $pos);
if (substr($hostname, 0, 4) == 'www.') {
$files[] = substr($hostname, 4);
}
if (($pos = strpos($hostname, '.')) !== false) {
$files[] = substr($hostname, $pos);
$files[] = substr($hostname, 0, $pos);
}
foreach ($files as $file) {

View File

@ -0,0 +1,9 @@
<?php
return array(
'test_url' => 'http://www.phoronix.com/scan.php?page=article&item=amazon_ec2_bare&num=1',
'body' => array(
'//article[@class="KonaBody"]',
),
'strip' => array(
)
);

9
vendor/PicoFeed/Rules/lists.php vendored Normal file
View File

@ -0,0 +1,9 @@
<?php
return array(
'test_url' => 'http://lists.freebsd.org/pipermail/freebsd-announce/2013-September/001504.html',
'body' => array(
'//pre',
),
'strip' => array(
)
);

9
vendor/PicoFeed/Rules/medium.com.php vendored Normal file
View File

@ -0,0 +1,9 @@
<?php
return array(
'test_url' => 'https://medium.com/lessons-learned/917b8b63ae3e',
'body' => array(
'//div[contains(@class, "post-field body")]',
),
'strip' => array(
)
);

View File

@ -0,0 +1,9 @@
<?php
return array(
'test_url' => 'http://pastebin.com/ed1pP9Ak',
'body' => array(
'//div[@class="text"]',
),
'strip' => array(
)
);