Add new content grabber rules
This commit is contained in:
parent
accf789395
commit
bc2b5e7c3d
@ -344,6 +344,7 @@ Don't forget to send a pull request or a ticket to share your contribution with
|
||||
- *.blog.lemonde.fr
|
||||
- *.blog.nytimes.com
|
||||
- *.nytimes.com
|
||||
- *.phoronix.com
|
||||
- *.slate.com
|
||||
- *.theguardian.com
|
||||
- *.wikipedia.org
|
||||
@ -351,6 +352,9 @@ Don't forget to send a pull request or a ticket to share your contribution with
|
||||
- *.wsj.com
|
||||
- github.com
|
||||
- lifehacker.com
|
||||
- lists.*
|
||||
- medium.com
|
||||
- pastebin.com
|
||||
- plus.google.com
|
||||
- rue89.com
|
||||
- smallhousebliss.com
|
||||
|
18
vendor/PicoFeed/Grabber.php
vendored
18
vendor/PicoFeed/Grabber.php
vendored
@ -25,6 +25,8 @@ class Grabber
|
||||
'post_content',
|
||||
'entry-content',
|
||||
'main-content',
|
||||
'story_content',
|
||||
'storycontent',
|
||||
'entryBox',
|
||||
'entrytext',
|
||||
'comic',
|
||||
@ -46,7 +48,12 @@ class Grabber
|
||||
'nav',
|
||||
'header',
|
||||
'social',
|
||||
'tag',
|
||||
'metadata',
|
||||
'entry-utility',
|
||||
'related-posts',
|
||||
'tweet',
|
||||
'categories',
|
||||
);
|
||||
|
||||
public $stripTags = array(
|
||||
@ -56,6 +63,7 @@ class Grabber
|
||||
'header',
|
||||
'footer',
|
||||
'aside',
|
||||
'form',
|
||||
);
|
||||
|
||||
|
||||
@ -114,8 +122,14 @@ class Grabber
|
||||
$hostname = parse_url($this->url, PHP_URL_HOST);
|
||||
$files = array($hostname);
|
||||
|
||||
if (substr($hostname, 0, 4) == 'www.') $files[] = substr($hostname, 4);
|
||||
if (($pos = strpos($hostname, '.')) !== false) $files[] = substr($hostname, $pos);
|
||||
if (substr($hostname, 0, 4) == 'www.') {
|
||||
$files[] = substr($hostname, 4);
|
||||
}
|
||||
|
||||
if (($pos = strpos($hostname, '.')) !== false) {
|
||||
$files[] = substr($hostname, $pos);
|
||||
$files[] = substr($hostname, 0, $pos);
|
||||
}
|
||||
|
||||
foreach ($files as $file) {
|
||||
|
||||
|
9
vendor/PicoFeed/Rules/.phoronix.com.php
vendored
Normal file
9
vendor/PicoFeed/Rules/.phoronix.com.php
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
<?php
|
||||
return array(
|
||||
'test_url' => 'http://www.phoronix.com/scan.php?page=article&item=amazon_ec2_bare&num=1',
|
||||
'body' => array(
|
||||
'//article[@class="KonaBody"]',
|
||||
),
|
||||
'strip' => array(
|
||||
)
|
||||
);
|
9
vendor/PicoFeed/Rules/lists.php
vendored
Normal file
9
vendor/PicoFeed/Rules/lists.php
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
<?php
|
||||
return array(
|
||||
'test_url' => 'http://lists.freebsd.org/pipermail/freebsd-announce/2013-September/001504.html',
|
||||
'body' => array(
|
||||
'//pre',
|
||||
),
|
||||
'strip' => array(
|
||||
)
|
||||
);
|
9
vendor/PicoFeed/Rules/medium.com.php
vendored
Normal file
9
vendor/PicoFeed/Rules/medium.com.php
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
<?php
|
||||
return array(
|
||||
'test_url' => 'https://medium.com/lessons-learned/917b8b63ae3e',
|
||||
'body' => array(
|
||||
'//div[contains(@class, "post-field body")]',
|
||||
),
|
||||
'strip' => array(
|
||||
)
|
||||
);
|
9
vendor/PicoFeed/Rules/pastebin.com.php
vendored
Normal file
9
vendor/PicoFeed/Rules/pastebin.com.php
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
<?php
|
||||
return array(
|
||||
'test_url' => 'http://pastebin.com/ed1pP9Ak',
|
||||
'body' => array(
|
||||
'//div[@class="text"]',
|
||||
),
|
||||
'strip' => array(
|
||||
)
|
||||
);
|
Loading…
Reference in New Issue
Block a user