Add content grabber rules and improve filtering

This commit is contained in:
Frédéric Guillot 2013-09-08 18:29:27 -04:00
parent 517ac8dcf0
commit 2348f316f4
5 changed files with 43 additions and 8 deletions

View File

@ -14,13 +14,15 @@ Features
- Import/Export of OPML feeds - Import/Export of OPML feeds
- Feed updates via a cronjob or with the user interface with one click - Feed updates via a cronjob or with the user interface with one click
- Protected by a login/password (only one possible user) - Protected by a login/password (only one possible user)
- Use secure headers (only external images and Youtube/Vimeo videos are allowed) - Use secure headers (only external images and Youtube/Vimeo/Dailymotion videos are allowed)
- Open external links inside a new tab with a `rel="noreferrer"` attribute - Open external links inside a new tab with a `rel="noreferrer"` attribute
- Mobile CSS (responsive design) - Mobile CSS (responsive design)
- Keyboard shortcuts (pressing '?' displays a pop-up listing the shortcuts; pressing 'q' closes it) - Keyboard shortcuts (pressing '?' displays a pop-up listing the shortcuts; pressing 'q' closes it)
- Basic bookmarks - Basic bookmarks
- Translated in English, French, German, Italian, Czech and Simplified Chinese - Translated in English, French, German, Italian, Czech and Simplified Chinese
- Themes - Themes support
- Alternative login with a Google Account or Mozilla Persona
- **Full article download for feeds that display only a summary** (website scraper based on Xpath rules)
Todo and known bugs Todo and known bugs
------------------- -------------------
@ -335,12 +337,15 @@ Don't forget to send a pull request or a ticket to share your contribution with
- *.slate.com - *.slate.com
- *.wikipedia.org - *.wikipedia.org
- *.wsj.com - *.wsj.com
- github.com
- lifehacker.com
- rue89.com - rue89.com
- smallhousebliss.com - smallhousebliss.com
- techcrunch.com - techcrunch.com
- www.bbc.co.uk - www.bbc.co.uk
- www.cnn.com - www.cnn.com
- www.egscomics.com - www.egscomics.com
- www.forbes.com
- www.lemonde.fr - www.lemonde.fr
- www.numerama.com - www.numerama.com
- www.slate.fr - www.slate.fr

View File

@ -563,6 +563,11 @@ section li {
list-style-type: square; list-style-type: square;
} }
video,
iframe {
max-width: 98%;
}
/* mobile design */ /* mobile design */
@media only screen and (max-width: 480px) { @media only screen and (max-width: 480px) {
@ -632,9 +637,4 @@ section li {
section li { section li {
font-size: 0.85em; font-size: 0.85em;
} }
video,
iframe {
max-width: 98%;
}
} }

View File

@ -306,7 +306,7 @@ class Filter
$content = preg_replace('!\s+!', ' ', $content); $content = preg_replace('!\s+!', ' ', $content);
} }
if (! $this->strip_content && trim($content) !== '') { if (! $this->strip_content) {
$this->data .= htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false); $this->data .= htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false);
} }
} }

View File

@ -0,0 +1,14 @@
<?php
return array(
'test_url' => 'http://lifehacker.com/bring-water-bottle-caps-into-concerts-to-protect-your-d-1269334973',
'body' => array(
'//div[contains(@class, "row")/img',
'//div[contains(@class, "content-column")]',
),
'strip' => array(
'//*[contains(@class, "meta")]',
'//span[contains(@class, "icon")]',
'//h1',
'//aside',
)
);

View File

@ -0,0 +1,16 @@
<?php
return array(
'test_url' => 'http://www.forbes.com/sites/andygreenberg/2013/09/05/follow-the-bitcoins-how-we-got-busted-buying-drugs-on-silk-roads-black-market/',
'body' => array(
'//div[@id="leftRail"]/div[contains(@class, body)]',
),
'strip' => array(
'//aside',
'//div[contains(@class, "entity_block")]',
'//div[contains(@class, "vestpocket") and not contains(@class, "body")]',
'//div[contains(@style, "display")]',
'//div[contains(@id, "comment")]',
'//div[contains(@class, "widget")]',
'//div[contains(@class, "pagination")]',
)
);