Add new content grabber rules
This commit is contained in:
parent
accf789395
commit
bc2b5e7c3d
@ -344,6 +344,7 @@ Don't forget to send a pull request or a ticket to share your contribution with
|
|||||||
- *.blog.lemonde.fr
|
- *.blog.lemonde.fr
|
||||||
- *.blog.nytimes.com
|
- *.blog.nytimes.com
|
||||||
- *.nytimes.com
|
- *.nytimes.com
|
||||||
|
- *.phoronix.com
|
||||||
- *.slate.com
|
- *.slate.com
|
||||||
- *.theguardian.com
|
- *.theguardian.com
|
||||||
- *.wikipedia.org
|
- *.wikipedia.org
|
||||||
@ -351,6 +352,9 @@ Don't forget to send a pull request or a ticket to share your contribution with
|
|||||||
- *.wsj.com
|
- *.wsj.com
|
||||||
- github.com
|
- github.com
|
||||||
- lifehacker.com
|
- lifehacker.com
|
||||||
|
- lists.*
|
||||||
|
- medium.com
|
||||||
|
- pastebin.com
|
||||||
- plus.google.com
|
- plus.google.com
|
||||||
- rue89.com
|
- rue89.com
|
||||||
- smallhousebliss.com
|
- smallhousebliss.com
|
||||||
|
18
vendor/PicoFeed/Grabber.php
vendored
18
vendor/PicoFeed/Grabber.php
vendored
@ -25,6 +25,8 @@ class Grabber
|
|||||||
'post_content',
|
'post_content',
|
||||||
'entry-content',
|
'entry-content',
|
||||||
'main-content',
|
'main-content',
|
||||||
|
'story_content',
|
||||||
|
'storycontent',
|
||||||
'entryBox',
|
'entryBox',
|
||||||
'entrytext',
|
'entrytext',
|
||||||
'comic',
|
'comic',
|
||||||
@ -46,7 +48,12 @@ class Grabber
|
|||||||
'nav',
|
'nav',
|
||||||
'header',
|
'header',
|
||||||
'social',
|
'social',
|
||||||
|
'tag',
|
||||||
|
'metadata',
|
||||||
'entry-utility',
|
'entry-utility',
|
||||||
|
'related-posts',
|
||||||
|
'tweet',
|
||||||
|
'categories',
|
||||||
);
|
);
|
||||||
|
|
||||||
public $stripTags = array(
|
public $stripTags = array(
|
||||||
@ -56,6 +63,7 @@ class Grabber
|
|||||||
'header',
|
'header',
|
||||||
'footer',
|
'footer',
|
||||||
'aside',
|
'aside',
|
||||||
|
'form',
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
@ -114,8 +122,14 @@ class Grabber
|
|||||||
$hostname = parse_url($this->url, PHP_URL_HOST);
|
$hostname = parse_url($this->url, PHP_URL_HOST);
|
||||||
$files = array($hostname);
|
$files = array($hostname);
|
||||||
|
|
||||||
if (substr($hostname, 0, 4) == 'www.') $files[] = substr($hostname, 4);
|
if (substr($hostname, 0, 4) == 'www.') {
|
||||||
if (($pos = strpos($hostname, '.')) !== false) $files[] = substr($hostname, $pos);
|
$files[] = substr($hostname, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (($pos = strpos($hostname, '.')) !== false) {
|
||||||
|
$files[] = substr($hostname, $pos);
|
||||||
|
$files[] = substr($hostname, 0, $pos);
|
||||||
|
}
|
||||||
|
|
||||||
foreach ($files as $file) {
|
foreach ($files as $file) {
|
||||||
|
|
||||||
|
9
vendor/PicoFeed/Rules/.phoronix.com.php
vendored
Normal file
9
vendor/PicoFeed/Rules/.phoronix.com.php
vendored
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
<?php
|
||||||
|
return array(
|
||||||
|
'test_url' => 'http://www.phoronix.com/scan.php?page=article&item=amazon_ec2_bare&num=1',
|
||||||
|
'body' => array(
|
||||||
|
'//article[@class="KonaBody"]',
|
||||||
|
),
|
||||||
|
'strip' => array(
|
||||||
|
)
|
||||||
|
);
|
9
vendor/PicoFeed/Rules/lists.php
vendored
Normal file
9
vendor/PicoFeed/Rules/lists.php
vendored
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
<?php
|
||||||
|
return array(
|
||||||
|
'test_url' => 'http://lists.freebsd.org/pipermail/freebsd-announce/2013-September/001504.html',
|
||||||
|
'body' => array(
|
||||||
|
'//pre',
|
||||||
|
),
|
||||||
|
'strip' => array(
|
||||||
|
)
|
||||||
|
);
|
9
vendor/PicoFeed/Rules/medium.com.php
vendored
Normal file
9
vendor/PicoFeed/Rules/medium.com.php
vendored
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
<?php
|
||||||
|
return array(
|
||||||
|
'test_url' => 'https://medium.com/lessons-learned/917b8b63ae3e',
|
||||||
|
'body' => array(
|
||||||
|
'//div[contains(@class, "post-field body")]',
|
||||||
|
),
|
||||||
|
'strip' => array(
|
||||||
|
)
|
||||||
|
);
|
9
vendor/PicoFeed/Rules/pastebin.com.php
vendored
Normal file
9
vendor/PicoFeed/Rules/pastebin.com.php
vendored
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
<?php
|
||||||
|
return array(
|
||||||
|
'test_url' => 'http://pastebin.com/ed1pP9Ak',
|
||||||
|
'body' => array(
|
||||||
|
'//div[@class="text"]',
|
||||||
|
),
|
||||||
|
'strip' => array(
|
||||||
|
)
|
||||||
|
);
|
Loading…
Reference in New Issue
Block a user