Add new content downloader rules

This commit is contained in:
Frédéric Guillot 2013-08-31 19:05:19 -04:00
parent 242234c0a0
commit 3b8d62a237
3 changed files with 20 additions and 0 deletions

View File

@ -333,11 +333,15 @@ Don't forget to send a pull request or a ticket to share your contribution with
- *.blog.nytimes.com - *.blog.nytimes.com
- *.nytimes.php - *.nytimes.php
- *.slate.com - *.slate.com
- *.wikipedia.org
- *.wsj.com - *.wsj.com
- rue89.com - rue89.com
- smallhousebliss.com
- techcrunch.com
- www.bbc.co.uk - www.bbc.co.uk
- www.cnn.com - www.cnn.com
- www.egscomics.com - www.egscomics.com
- www.lemonde.fr - www.lemonde.fr
- www.numerama.com - www.numerama.com
- www.slate.fr - www.slate.fr
- www.theguardian.com

View File

@ -0,0 +1,15 @@
<?php
return array(
'test_url' => 'http://smallhousebliss.com/2013/08/29/house-g-by-lode-architecture/',
'body' => array(
'//div[@class="single-entry-content"]',
),
'strip' => array(
'//style',
'//script',
'//*[contains(@class, "gallery")]',
'//*[contains(@class, "share")]',
'//*[contains(@class, "wpcnt")]',
'//*[contains(@class, "entry-meta")]',
)
);

View File

@ -8,6 +8,7 @@ return array(
'//script', '//script',
'//form', '//form',
'//style', '//style',
'//*[@class="warning"]',
'//*[@class="story-date"]', '//*[@class="story-date"]',
'//*[@class="story-header"]', '//*[@class="story-header"]',
'//*[@class="story-related"]', '//*[@class="story-related"]',