Add content grabber rules and improve filtering

This commit is contained in:
Frédéric Guillot 2013-09-08 18:29:27 -04:00
parent 517ac8dcf0
commit 2348f316f4
5 changed files with 43 additions and 8 deletions

View File

@ -14,13 +14,15 @@ Features
- Import/Export of OPML feeds
- Feed updates via a cronjob or with the user interface with one click
- Protected by a login/password (only one possible user)
- Use secure headers (only external images and Youtube/Vimeo videos are allowed)
- Use secure headers (only external images and Youtube/Vimeo/Dailymotion videos are allowed)
- Open external links inside a new tab with a `rel="noreferrer"` attribute
- Mobile CSS (responsive design)
- Keyboard shortcuts (pressing '?' displays a pop-up listing the shortcuts; pressing 'q' closes it)
- Basic bookmarks
- Translated in English, French, German, Italian, Czech and Simplified Chinese
- Themes
- Themes support
- Alternative login with a Google Account or Mozilla Persona
- **Full article download for feeds that display only a summary** (website scraper based on Xpath rules)
Todo and known bugs
-------------------
@ -335,12 +337,15 @@ Don't forget to send a pull request or a ticket to share your contribution with
- *.slate.com
- *.wikipedia.org
- *.wsj.com
- github.com
- lifehacker.com
- rue89.com
- smallhousebliss.com
- techcrunch.com
- www.bbc.co.uk
- www.cnn.com
- www.egscomics.com
- www.forbes.com
- www.lemonde.fr
- www.numerama.com
- www.slate.fr

View File

@ -563,6 +563,11 @@ section li {
list-style-type: square;
}
video,
iframe {
max-width: 98%;
}
/* mobile design */
@media only screen and (max-width: 480px) {
@ -632,9 +637,4 @@ section li {
section li {
font-size: 0.85em;
}
video,
iframe {
max-width: 98%;
}
}

View File

@ -306,7 +306,7 @@ class Filter
$content = preg_replace('!\s+!', ' ', $content);
}
if (! $this->strip_content && trim($content) !== '') {
if (! $this->strip_content) {
$this->data .= htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false);
}
}

View File

@ -0,0 +1,14 @@
<?php
return array(
'test_url' => 'http://lifehacker.com/bring-water-bottle-caps-into-concerts-to-protect-your-d-1269334973',
'body' => array(
'//div[contains(@class, "row")/img',
'//div[contains(@class, "content-column")]',
),
'strip' => array(
'//*[contains(@class, "meta")]',
'//span[contains(@class, "icon")]',
'//h1',
'//aside',
)
);

View File

@ -0,0 +1,16 @@
<?php
return array(
'test_url' => 'http://www.forbes.com/sites/andygreenberg/2013/09/05/follow-the-bitcoins-how-we-got-busted-buying-drugs-on-silk-roads-black-market/',
'body' => array(
'//div[@id="leftRail"]/div[contains(@class, body)]',
),
'strip' => array(
'//aside',
'//div[contains(@class, "entity_block")]',
'//div[contains(@class, "vestpocket") and not contains(@class, "body")]',
'//div[contains(@style, "display")]',
'//div[contains(@id, "comment")]',
'//div[contains(@class, "widget")]',
'//div[contains(@class, "pagination")]',
)
);