From 2348f316f4275576b603df534d20773c6ea38367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Sun, 8 Sep 2013 18:29:27 -0400 Subject: [PATCH] Add content grabber rules and improve filtering --- README.markdown | 9 +++++++-- assets/css/app.css | 10 +++++----- vendor/PicoFeed/Filter.php | 2 +- vendor/PicoFeed/Rules/lifehacker.com.php | 14 ++++++++++++++ vendor/PicoFeed/Rules/www.forbes.com.php | 16 ++++++++++++++++ 5 files changed, 43 insertions(+), 8 deletions(-) create mode 100644 vendor/PicoFeed/Rules/lifehacker.com.php create mode 100644 vendor/PicoFeed/Rules/www.forbes.com.php diff --git a/README.markdown b/README.markdown index 0a56a72..6a78892 100644 --- a/README.markdown +++ b/README.markdown @@ -14,13 +14,15 @@ Features - Import/Export of OPML feeds - Feed updates via a cronjob or with the user interface with one click - Protected by a login/password (only one possible user) -- Use secure headers (only external images and Youtube/Vimeo videos are allowed) +- Use secure headers (only external images and Youtube/Vimeo/Dailymotion videos are allowed) - Open external links inside a new tab with a `rel="noreferrer"` attribute - Mobile CSS (responsive design) - Keyboard shortcuts (pressing '?' displays a pop-up listing the shortcuts; pressing 'q' closes it) - Basic bookmarks - Translated in English, French, German, Italian, Czech and Simplified Chinese -- Themes +- Themes support +- Alternative login with a Google Account or Mozilla Persona +- **Full article download for feeds that display only a summary** (website scraper based on Xpath rules) Todo and known bugs ------------------- @@ -335,12 +337,15 @@ Don't forget to send a pull request or a ticket to share your contribution with - *.slate.com - *.wikipedia.org - *.wsj.com +- github.com +- lifehacker.com - rue89.com - smallhousebliss.com - techcrunch.com - www.bbc.co.uk - www.cnn.com - www.egscomics.com +- www.forbes.com - www.lemonde.fr - www.numerama.com - www.slate.fr diff --git a/assets/css/app.css b/assets/css/app.css index 695ccb3..a4c7a95 100644 --- a/assets/css/app.css +++ b/assets/css/app.css @@ -563,6 +563,11 @@ section li { list-style-type: square; } +video, +iframe { + max-width: 98%; +} + /* mobile design */ @media only screen and (max-width: 480px) { @@ -632,9 +637,4 @@ section li { section li { font-size: 0.85em; } - - video, - iframe { - max-width: 98%; - } } diff --git a/vendor/PicoFeed/Filter.php b/vendor/PicoFeed/Filter.php index 971e0af..7bd7025 100644 --- a/vendor/PicoFeed/Filter.php +++ b/vendor/PicoFeed/Filter.php @@ -306,7 +306,7 @@ class Filter $content = preg_replace('!\s+!', ' ', $content); } - if (! $this->strip_content && trim($content) !== '') { + if (! $this->strip_content) { $this->data .= htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false); } } diff --git a/vendor/PicoFeed/Rules/lifehacker.com.php b/vendor/PicoFeed/Rules/lifehacker.com.php new file mode 100644 index 0000000..9b22995 --- /dev/null +++ b/vendor/PicoFeed/Rules/lifehacker.com.php @@ -0,0 +1,14 @@ + 'http://lifehacker.com/bring-water-bottle-caps-into-concerts-to-protect-your-d-1269334973', + 'body' => array( + '//div[contains(@class, "row")/img', + '//div[contains(@class, "content-column")]', + ), + 'strip' => array( + '//*[contains(@class, "meta")]', + '//span[contains(@class, "icon")]', + '//h1', + '//aside', + ) +); \ No newline at end of file diff --git a/vendor/PicoFeed/Rules/www.forbes.com.php b/vendor/PicoFeed/Rules/www.forbes.com.php new file mode 100644 index 0000000..0eff7a7 --- /dev/null +++ b/vendor/PicoFeed/Rules/www.forbes.com.php @@ -0,0 +1,16 @@ + 'http://www.forbes.com/sites/andygreenberg/2013/09/05/follow-the-bitcoins-how-we-got-busted-buying-drugs-on-silk-roads-black-market/', + 'body' => array( + '//div[@id="leftRail"]/div[contains(@class, body)]', + ), + 'strip' => array( + '//aside', + '//div[contains(@class, "entity_block")]', + '//div[contains(@class, "vestpocket") and not contains(@class, "body")]', + '//div[contains(@style, "display")]', + '//div[contains(@id, "comment")]', + '//div[contains(@class, "widget")]', + '//div[contains(@class, "pagination")]', + ) +); \ No newline at end of file