diff --git a/common.php b/common.php index d815135..78289f5 100644 --- a/common.php +++ b/common.php @@ -12,7 +12,7 @@ require 'model.php'; if (file_exists('config.php')) require 'config.php'; defined('APP_VERSION') or define('APP_VERSION', 'master'); -defined('HTTP_TIMEOUT') or define('HTTP_TIMEOUT', 10); +defined('HTTP_TIMEOUT') or define('HTTP_TIMEOUT', 20); defined('DB_FILENAME') or define('DB_FILENAME', 'data/db.sqlite'); defined('DEBUG') or define('DEBUG', true); defined('DEBUG_FILENAME') or define('DEBUG_FILENAME', 'data/debug.log'); diff --git a/index.php b/index.php index 4cad2fe..a6eaab2 100644 --- a/index.php +++ b/index.php @@ -322,6 +322,38 @@ Router\get_action('refresh-all', function() { }); +// Disable content grabber for a feed +Router\get_action('disable-grabber-feed', function() { + + $id = Request\int_param('feed_id'); + + if ($id && Model\disable_grabber_feed($id)) { + Session\flash(t('The content grabber is disabled successfully.')); + } + else { + Session\flash_error(t('Unable to disable the content grabber for this subscription.')); + } + + Response\redirect('?action=feeds'); +}); + + +// Enable content grabber for a feed +Router\get_action('enable-grabber-feed', function() { + + $id = Request\int_param('feed_id'); + + if ($id && Model\enable_grabber_feed($id)) { + Session\flash(t('The content grabber is enabled successfully.')); + } + else { + Session\flash_error(t('Unable to activate the content grabber for this subscription.')); + } + + Response\redirect('?action=feeds'); +}); + + // Confirmation box to disable a feed Router\get_action('confirm-disable-feed', function() { @@ -467,7 +499,7 @@ Router\get_action('add', function() { // Add the feed Router\post_action('add', function() { - $result = Model\import_feed(trim($_POST['url'])); + $result = Model\import_feed(trim($_POST['url']), isset($_POST['download_content']) && $_POST['download_content'] == 1); if ($result) { @@ -590,11 +622,9 @@ Router\post_action('config', function() { if ($valid) { if (Model\save_config($values)) { - Session\flash(t('Your preferences are updated.')); } else { - Session\flash_error(t('Unable to update your preferences.')); } diff --git a/locales/fr_FR/translations.php b/locales/fr_FR/translations.php index c27c684..3798320 100644 --- a/locales/fr_FR/translations.php +++ b/locales/fr_FR/translations.php @@ -1,6 +1,13 @@ 'Le téléchargement de contenu est activé avec succès.', + 'Unable to activate the content grabber for this subscription.' => 'Impossible d\'activer le téléchargement de contenu pour cet abonnement.', + 'enable full content' => 'télécharger le contenu complet', + 'disable full content' => 'désactiver le téléchargement du contenu', + 'Download full content' => 'Télécharger le contenu complet', + 'Downloading full content is slower because Miniflux grab the content from the original website. You should use that for subscriptions that display only a summary. This feature doesn\'t work with all websites.' => + 'Le téléchargement complet du contenu est plus lent car Miniflux va chercher le contenu sur le site original. Vous devriez utiliser cela uniquement pour les abonnements qui affichent seulement un résumé. Cette fonctionnalité ne marche pas avec tous les sites web.', 'No message' => 'Aucun message', 'flush messages' => 'supprimer les messages', 'API endpoint:' => 'URL de l\'API : ', diff --git a/model.php b/model.php index 6d3087b..a53c636 100644 --- a/model.php +++ b/model.php @@ -2,7 +2,6 @@ namespace Model; -require_once 'vendor/PicoFeed/Encoding.php'; require_once 'vendor/PicoFeed/Filter.php'; require_once 'vendor/PicoFeed/Client.php'; require_once 'vendor/PicoFeed/Export.php'; @@ -25,8 +24,9 @@ use PicoFeed\Reader; use PicoFeed\Export; -const DB_VERSION = 14; +const DB_VERSION = 15; const HTTP_USERAGENT = 'Miniflux - http://miniflux.net'; +const HTTP_FAKE_USERAGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36'; const LIMIT_ALL = -1; @@ -169,7 +169,7 @@ function import_feeds($content) } -function import_feed($url) +function import_feed($url, $grabber = false) { $reader = new Reader; $resource = $reader->download($url, '', '', HTTP_TIMEOUT, HTTP_USERAGENT); @@ -178,6 +178,7 @@ function import_feed($url) if ($parser !== false) { + $parser->grabber = $grabber; $feed = $parser->execute(); if ($feed === false || ! $feed->title || ! $feed->url) { @@ -193,13 +194,14 @@ function import_feed($url) $rs = $db->table('feeds')->save(array( 'title' => $feed->title, 'site_url' => $feed->url, - 'feed_url' => $reader->getUrl() + 'feed_url' => $reader->getUrl(), + 'download_content' => $grabber ? 1 : 0 )); if ($rs) { $feed_id = $db->getConnection()->getLastId(); - update_items($feed_id, $feed->items); + update_items($feed_id, $feed->items, $grabber); write_debug(); return (int) $feed_id; @@ -255,12 +257,25 @@ function update_feed($feed_id) if ($parser !== false) { - $feed = $parser->execute(); + if ($feed['download_content']) { - if ($feed !== false) { + // Don't fetch previous items, only new one + $parser->grabber_ignore_urls = \PicoTools\singleton('db') + ->table('items') + ->eq('feed_id', $feed_id) + ->findAllByColumn('url'); + + $parser->grabber = true; + $parser->grabber_timeout = HTTP_TIMEOUT; + $parser->grabber_user_agent = HTTP_FAKE_USERAGENT; + } + + $result = $parser->execute(); + + if ($result !== false) { update_feed_cache_infos($feed_id, $resource->getLastModified(), $resource->getEtag()); - update_items($feed_id, $feed->items); + update_items($feed_id, $result->items, $parser->grabber); write_debug(); return true; @@ -349,52 +364,82 @@ function update_feed_cache_infos($feed_id, $last_modified, $etag) } -function download_item($item_id) +function parse_content_with_readability($content, $url) { require_once 'vendor/Readability/Readability.php'; - - $item = get_item($item_id); - - $client = \PicoFeed\Client::create(); - $client->url = $item['url']; - $client->timeout = HTTP_TIMEOUT; - $client->user_agent = HTTP_USERAGENT; - $client->execute(); - - $content = $client->getContent(); + require_once 'vendor/PicoFeed/Encoding.php'; if (! empty($content)) { $content = \PicoFeed\Encoding::toUTF8($content); - - $readability = new \Readability($content, $item['url']); + $readability = new \Readability($content, $url); if ($readability->init()) { - - // Get relevant content - $content = $readability->getContent()->innerHTML; - - // Filter content - $filter = new \PicoFeed\Filter($content, $item['url']); - $content = $filter->execute(); - - $nocontent = (bool) get_config_value('nocontent'); - if ($nocontent === false) { - - // Save content - \PicoTools\singleton('db') - ->table('items') - ->eq('id', $item['id']) - ->save(array('content' => $content)); - } - - return array( - 'result' => true, - 'content' => $content - ); + return $readability->getContent()->innerHTML; } } + return ''; +} + + +function download_content($url) +{ + require_once 'vendor/PicoFeed/Grabber.php'; + + $client = \PicoFeed\Client::create(); + $client->url = $url; + $client->timeout = HTTP_TIMEOUT; + $client->user_agent = HTTP_FAKE_USERAGENT; + $client->execute(); + + $html = $client->getContent(); + + if (! empty($html)) { + + // Try first with PicoFeed grabber and with Readability after + $grabber = new \PicoFeed\Grabber($url); + $grabber->html = $html; + + if ($grabber->parse()) { + $content = $grabber->content; + } + + if (empty($content)) { + $content = parse_content_with_readability($html, $url); + } + + // Filter content + $filter = new \PicoFeed\Filter($content, $url); + return $filter->execute(); + } + + return ''; +} + + +function download_item($item_id) +{ + $item = get_item($item_id); + $content = download_content($item['url']); + + if (! empty($content)) { + + if (! get_config_value('nocontent')) { + + // Save content + \PicoTools\singleton('db') + ->table('items') + ->eq('id', $item['id']) + ->save(array('content' => $content)); + } + + return array( + 'result' => true, + 'content' => $content + ); + } + return array( 'result' => false, 'content' => '' @@ -427,6 +472,18 @@ function disable_feed($feed_id) } +function enable_grabber_feed($feed_id) +{ + return \PicoTools\singleton('db')->table('feeds')->eq('id', $feed_id)->save((array('download_content' => 1))); +} + + +function disable_grabber_feed($feed_id) +{ + return \PicoTools\singleton('db')->table('feeds')->eq('id', $feed_id)->save((array('download_content' => 0))); +} + + function get_items($status, $offset = null, $limit = null) { return \PicoTools\singleton('db') @@ -727,7 +784,7 @@ function autoflush() } -function update_items($feed_id, array $items) +function update_items($feed_id, array $items, $grabber = false) { $nocontent = (bool) get_config_value('nocontent'); @@ -744,6 +801,10 @@ function update_items($feed_id, array $items) // Insert only new item if ($db->table('items')->eq('id', $item->id)->count() !== 1) { + if (! $item->content && ! $nocontent && $grabber) { + $item->content = download_content($item->url); + } + $db->table('items')->save(array( 'id' => $item->id, 'title' => $item->title, diff --git a/schema.php b/schema.php index f953050..4e0de33 100644 --- a/schema.php +++ b/schema.php @@ -3,6 +3,12 @@ namespace Schema; +function version_15($pdo) +{ + $pdo->exec('ALTER TABLE feeds ADD COLUMN download_content INTEGER DEFAULT 0'); +} + + function version_14($pdo) { $pdo->exec('ALTER TABLE config ADD COLUMN feed_token TEXT DEFAULT "'.\Model\generate_token().'"'); diff --git a/templates/add.php b/templates/add.php index 2b51fe7..291f9a8 100644 --- a/templates/add.php +++ b/templates/add.php @@ -10,6 +10,8 @@
+
+

diff --git a/templates/feeds.php b/templates/feeds.php index 9c96457..460dc02 100644 --- a/templates/feeds.php +++ b/templates/feeds.php @@ -28,7 +28,7 @@ - + @@ -47,14 +47,18 @@ | - - | - | + + | - | + | - + + | + + + +

diff --git a/vendor/PicoDb/Table.php b/vendor/PicoDb/Table.php index d592eaa..e4cf018 100644 --- a/vendor/PicoDb/Table.php +++ b/vendor/PicoDb/Table.php @@ -138,6 +138,7 @@ class Table public function findAllByColumn($column) { + $this->columns = array($column); $rq = $this->db->execute($this->buildSelectQuery(), $this->values); if (false === $rq) return false; diff --git a/vendor/PicoFeed/Clients/Curl.php b/vendor/PicoFeed/Clients/Curl.php index cbb935c..103fcde 100644 --- a/vendor/PicoFeed/Clients/Curl.php +++ b/vendor/PicoFeed/Clients/Curl.php @@ -64,6 +64,8 @@ class Curl extends \PicoFeed\Client curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // For auto-signed certificates... curl_setopt($ch, CURLOPT_WRITEFUNCTION, array($this, 'readBody')); curl_setopt($ch, CURLOPT_HEADERFUNCTION, array($this, 'readHeaders')); + curl_setopt($ch, CURLOPT_COOKIEJAR, 'php://memory'); + curl_setopt($ch, CURLOPT_COOKIEFILE, 'php://memory'); curl_exec($ch); Logging::log(\get_called_class().' cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME)); diff --git a/vendor/PicoFeed/Filter.php b/vendor/PicoFeed/Filter.php index c66d99f..74e9147 100644 --- a/vendor/PicoFeed/Filter.php +++ b/vendor/PicoFeed/Filter.php @@ -454,4 +454,14 @@ class Filter { return preg_replace('~<(?:!DOCTYPE|/?(?:html|head|body))[^>]*>\s*~i', '', $data); } + + + public static function stripXmlTag($data) + { + if (strpos($data, '') + 2); + } + + return $data; + } } diff --git a/vendor/PicoFeed/Grabber.php b/vendor/PicoFeed/Grabber.php new file mode 100644 index 0000000..ec1e00f --- /dev/null +++ b/vendor/PicoFeed/Grabber.php @@ -0,0 +1,241 @@ +url = $url; + } + + + public function parse() + { + if ($this->html) { + + Logging::log(\get_called_class().' HTML fetched'); + + $rules = $this->getRules(); + + \libxml_use_internal_errors(true); + $dom = new \DOMDocument; + $dom->loadHTML($this->html); + + if (is_array($rules)) { + Logging::log(\get_called_class().' Parse content with rules'); + $this->parseContentWithRules($dom, $rules); + } + else { + + Logging::log(\get_called_class().' Parse content with candidates'); + $this->parseContentWithCandidates($dom); + + if (strlen($this->content) < 50) { + Logging::log(\get_called_class().' No enought content fetched, get the full body'); + $this->content = $dom->saveXML($dom->firstChild); + } + + Logging::log(\get_called_class().' Strip garbage'); + $this->stripGarbage(); + } + } + else { + + Logging::log(\get_called_class().' No content fetched'); + } + + Logging::log(\get_called_class().' Grabber done'); + + return $this->content !== ''; + } + + + public function download($timeout = 5, $user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36') + { + $client = Client::create(); + $client->url = $this->url; + $client->timeout = $timeout; + $client->user_agent = $user_agent; + $client->execute(); + $this->html = $client->getContent(); + + return $this->html; + } + + + public function getRules() + { + $hostname = parse_url($this->url, PHP_URL_HOST); + $files = array($hostname); + + if (substr($hostname, 0, 4) == 'www.') $files[] = substr($hostname, 4); + if (($pos = strpos($hostname, '.')) !== false) $files[] = substr($hostname, $pos); + + foreach ($files as $file) { + + $filename = __DIR__.'/Rules/'.$file.'.php'; + + if (file_exists($filename)) { + return include $filename; + } + } + + return false; + } + + + public function parseContentWithRules($dom, array $rules) + { + $xpath = new \DOMXPath($dom); + + if (isset($rules['strip']) && is_array($rules['strip'])) { + + foreach ($rules['strip'] as $pattern) { + + $nodes = $xpath->query($pattern); + + if ($nodes !== false && $nodes->length > 0) { + foreach ($nodes as $node) { + $node->parentNode->removeChild($node); + } + } + } + } + + if (isset($rules['strip_id_or_class']) && is_array($rules['strip_id_or_class'])) { + + foreach ($rules['strip_id_or_class'] as $pattern) { + + $pattern = strtr($pattern, array("'" => '', '"' => '')); + $nodes = $xpath->query("//*[contains(@class, '$pattern') or contains(@id, '$pattern')]"); + + if ($nodes !== false && $nodes->length > 0) { + foreach ($nodes as $node) { + $node->parentNode->removeChild($node); + } + } + } + } + + if (isset($rules['body']) && is_array($rules['body'])) { + + foreach ($rules['body'] as $pattern) { + + $nodes = $xpath->query($pattern); + + if ($nodes !== false && $nodes->length > 0) { + foreach ($nodes as $node) { + $this->content .= $dom->saveXML($node); + } + } + } + } + } + + + public function parseContentWithCandidates($dom) + { + $xpath = new \DOMXPath($dom); + + // Try to fetch
+ $nodes = $xpath->query('//article'); + + if ($nodes !== false && $nodes->length > 0) { + $this->content = $dom->saveXML($nodes->item(0)); + return; + } + + // Try to lookup in each
+ foreach ($this->candidatesAttributes as $candidate) { + + $nodes = $xpath->query('//div[(contains(@class, "'.$candidate.'") or @id="'.$candidate.'") and not (contains(@class, "nav") or contains(@class, "page"))]'); + + if ($nodes !== false && $nodes->length > 0) { + $this->content = $dom->saveXML($nodes->item(0)); + return; + } + } + } + + + public function stripGarbage() + { + \libxml_use_internal_errors(true); + $dom = new \DOMDocument; + $dom->loadXML($this->content); + $xpath = new \DOMXPath($dom); + + foreach ($this->stripTags as $tag) { + + $nodes = $xpath->query('//'.$tag); + + if ($nodes !== false && $nodes->length > 0) { + foreach ($nodes as $node) { + $node->parentNode->removeChild($node); + } + } + } + + foreach ($this->stripAttributes as $attribute) { + + $nodes = $xpath->query('//*[contains(@class, "'.$attribute.'") or contains(@id, "'.$attribute.'")]'); + + if ($nodes !== false && $nodes->length > 0) { + foreach ($nodes as $node) { + $node->parentNode->removeChild($node); + } + } + } + + $this->content = ''; + + foreach($dom->childNodes as $node) { + $this->content .= $dom->saveXML($node); + } + } +} diff --git a/vendor/PicoFeed/Parser.php b/vendor/PicoFeed/Parser.php index cb58de1..f33e745 100644 --- a/vendor/PicoFeed/Parser.php +++ b/vendor/PicoFeed/Parser.php @@ -5,6 +5,7 @@ namespace PicoFeed; require_once __DIR__.'/Logging.php'; require_once __DIR__.'/Filter.php'; require_once __DIR__.'/Encoding.php'; +require_once __DIR__.'/Grabber.php'; abstract class Parser { @@ -15,6 +16,10 @@ abstract class Parser public $title = ''; public $updated = ''; public $items = array(); + public $grabber = false; + public $grabber_ignore_urls = array(); + public $grabber_timeout = 5; + public $grabber_user_agent = 'PicoFeed (https://github.com/fguillot/picoFeed)'; abstract public function execute(); @@ -23,7 +28,7 @@ abstract class Parser public function __construct($content) { // Strip XML tag to avoid multiple encoding/decoding in next XML processing - $this->content = $this->stripXmlTag($content); + $this->content = Filter::stripXmlTag($content); // Encode everything in UTF-8 $this->content = Encoding::toUTF8($this->content); @@ -33,13 +38,19 @@ abstract class Parser } - public function filterHtml($str, $item_url) + public function filterHtml($item_content, $item_url) { $content = ''; - if ($str) { + if ($this->grabber && ! in_array($item_url, $this->grabber_ignore_urls)) { + $grabber = new Grabber($item_url); + $grabber->download($this->grabber_timeout, $this->grabber_user_agent); + $grabber->parse(); + if ($grabber->content) $item_content = $grabber->content; + } - $filter = new Filter($str, $item_url); + if ($item_content) { + $filter = new Filter($item_content, $item_url); $content = $filter->execute(); } @@ -72,17 +83,6 @@ abstract class Parser } - public function stripXmlTag($data) - { - if (strpos($data, '') + 2); - } - - return $data; - } - - // Trim whitespace from the begining, the end and inside a string and don't break utf-8 string public function stripWhiteSpace($value) { diff --git a/vendor/PicoFeed/Reader.php b/vendor/PicoFeed/Reader.php index a4720c8..c76baee 100644 --- a/vendor/PicoFeed/Reader.php +++ b/vendor/PicoFeed/Reader.php @@ -5,6 +5,7 @@ namespace PicoFeed; require_once __DIR__.'/Logging.php'; require_once __DIR__.'/Parser.php'; require_once __DIR__.'/Client.php'; +require_once __DIR__.'/Filter.php'; class Reader { @@ -59,25 +60,20 @@ class Reader $data = preg_replace('//Uis', '', $data); /* Strip Doctype: - * Doctype needs to be within the first 500 characters. (Ideally the first!) + * Doctype needs to be within the first 100 characters. (Ideally the first!) * If it's not found by then, we need to stop looking to prevent PREG * from reaching max backtrack depth and crashing. */ - $data = preg_replace('/^.{0,500}]*)>/Uis', '', $data); + $data = preg_replace('/^.{0,100}]*)>/Uis', '', $data); - // Find ') + 2); + // Find the first tag + $open_tag = strpos($data, '<'); + $close_tag = strpos($data, '>'); - // Find the first tag - $open_tag = strpos($data, '<'); - $close_tag = strpos($data, '>'); - - return substr($data, $open_tag, $close_tag); - } - - return $data; + return substr($data, $open_tag, $close_tag); } diff --git a/vendor/PicoFeed/Rules/.blog.lemonde.fr.php b/vendor/PicoFeed/Rules/.blog.lemonde.fr.php new file mode 100644 index 0000000..a6f35f5 --- /dev/null +++ b/vendor/PicoFeed/Rules/.blog.lemonde.fr.php @@ -0,0 +1,10 @@ + 'http://combat.blog.lemonde.fr/2013/08/31/teddy-riner-le-rookie-devenu-rambo/#xtor=RSS-3208', + 'body' => array( + '//div[@class="entry-content"]', + ), + 'strip' => array( + '//*[contains(@class, "fb-like") or contains(@class, "social")]' + ) +); \ No newline at end of file diff --git a/vendor/PicoFeed/Rules/.blogs.nytimes.com.php b/vendor/PicoFeed/Rules/.blogs.nytimes.com.php new file mode 100644 index 0000000..58673ee --- /dev/null +++ b/vendor/PicoFeed/Rules/.blogs.nytimes.com.php @@ -0,0 +1,13 @@ + '//header/h1', + 'test_url' => 'http://opinionator.blogs.nytimes.com/2011/02/03/lost-and-gone-forever/', + 'test_url' => 'http://krugman.blogs.nytimes.com/2012/09/12/a-vote-of-confidence/', + 'test_url' => 'http://bits.blogs.nytimes.com/2012/01/16/wikipedia-plans-to-go-dark-on-wednesday-to-protest-sopa/', + 'body' => array( + '//div[@class="postContent"]', + ), + 'strip' => array( + '//*[@class="shareToolsBox"]', + ), +); diff --git a/vendor/PicoFeed/Rules/.nytimes.com.php b/vendor/PicoFeed/Rules/.nytimes.com.php new file mode 100644 index 0000000..31f4d78 --- /dev/null +++ b/vendor/PicoFeed/Rules/.nytimes.com.php @@ -0,0 +1,8 @@ + 'http://www.nytimes.com/2011/05/15/world/middleeast/15prince.html', + 'title' => '//h1[@class="articleHeadline"]', + 'body' => array( + '//div[@class="articleBody"]', + ), +); diff --git a/vendor/PicoFeed/Rules/.slate.com.php b/vendor/PicoFeed/Rules/.slate.com.php new file mode 100644 index 0000000..ad6f9c9 --- /dev/null +++ b/vendor/PicoFeed/Rules/.slate.com.php @@ -0,0 +1,16 @@ + 'http://www.slate.com/articles/business/moneybox/2013/08/microsoft_ceo_steve_ballmer_retires_a_firsthand_account_of_the_company_s.html', + 'body' => array( + '//div[@class="sl-art-body"]', + ), + 'strip' => array( + '//*[contains(@class, "social") or contains(@class, "comments") or contains(@class, "sl-article-floatin-tools") or contains(@class, "sl-art-pag")]', + '//*[@id="mys_slate_logged_in"]', + '//*[@id="sl_article_tools_myslate_bottom"]', + '//*[@id="mys_myslate"]', + '//*[@class="sl-viral-container"]', + '//*[@class="sl-art-creds-cntr"]', + '//*[@class="sl-art-ad-midflex"]', + ) +); \ No newline at end of file diff --git a/vendor/PicoFeed/Rules/.wsj.com.php b/vendor/PicoFeed/Rules/.wsj.com.php new file mode 100644 index 0000000..113feb5 --- /dev/null +++ b/vendor/PicoFeed/Rules/.wsj.com.php @@ -0,0 +1,11 @@ + 'http://online.wsj.com/article/SB10001424127887324108204579023143974408428.html', + 'body' => array( + '//div[@class="articlePage"]', + ), + 'strip' => array( + '//*[@id="articleThumbnail_2"]', + '//*[@class="socialByline"]', + ) +); \ No newline at end of file diff --git a/vendor/PicoFeed/Rules/rue89.feedsportal.com.php b/vendor/PicoFeed/Rules/rue89.feedsportal.com.php new file mode 100644 index 0000000..bf3c8d8 --- /dev/null +++ b/vendor/PicoFeed/Rules/rue89.feedsportal.com.php @@ -0,0 +1,9 @@ + 'http://rue89.feedsportal.com/c/33822/f/608948/s/30999fa0/sc/24/l/0L0Srue890N0C20A130C0A80C30A0Cfaisait0Eboris0Eboillon0Eex0Esarko0Eboy0E350A0E0A0A0A0Eeuros0Egare0Enord0E245315/story01.htm', + 'body' => array( + '//*[@id="article"]/div[contains(@class, "content")]', + ), + 'strip' => array( + ) +); \ No newline at end of file diff --git a/vendor/PicoFeed/Rules/www.bbc.co.uk.php b/vendor/PicoFeed/Rules/www.bbc.co.uk.php new file mode 100644 index 0000000..978458b --- /dev/null +++ b/vendor/PicoFeed/Rules/www.bbc.co.uk.php @@ -0,0 +1,20 @@ + 'http://www.bbc.co.uk/news/world-middle-east-23911833', + 'body' => array( + '//div[@class="story-body"]', + ), + 'strip' => array( + '//script', + '//form', + '//style', + '//*[@class="story-date"]', + '//*[@class="story-header"]', + '//*[@class="story-related"]', + '//*[contains(@class, "byline")]', + '//*[contains(@class, "story-feature")]', + '//*[@id="video-carousel-container"]', + '//*[@id="also-related-links"]', + '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]', + ) +); \ No newline at end of file diff --git a/vendor/PicoFeed/Rules/www.cnn.com.php b/vendor/PicoFeed/Rules/www.cnn.com.php new file mode 100644 index 0000000..4ac468e --- /dev/null +++ b/vendor/PicoFeed/Rules/www.cnn.com.php @@ -0,0 +1,8 @@ + 'http://www.cnn.com/2013/08/31/world/meast/syria-civil-war/index.html?hpt=hp_t1', + 'body' => array( + '//*[contains(@class, "cnn_storypgraphtxt")]]', + '//*[contains(@class, "cnnvideo_wrapper")]]', + ), +); diff --git a/vendor/PicoFeed/Rules/www.egscomics.com.php b/vendor/PicoFeed/Rules/www.egscomics.com.php new file mode 100644 index 0000000..9c9b73f --- /dev/null +++ b/vendor/PicoFeed/Rules/www.egscomics.com.php @@ -0,0 +1,8 @@ + 'http://www.egscomics.com/index.php?id=1690', + 'title' => '/html/head/title', + 'body' => array( + '//img[@id="comic"]' + ) +); diff --git a/vendor/PicoFeed/Rules/www.lemonde.fr.php b/vendor/PicoFeed/Rules/www.lemonde.fr.php new file mode 100644 index 0000000..ce2a950 --- /dev/null +++ b/vendor/PicoFeed/Rules/www.lemonde.fr.php @@ -0,0 +1,9 @@ + 'http://www.lemonde.fr/societe/article/2013/08/30/boris-boillon-ancien-ambassadeur-de-sarkozy-arrete-avec-350-000-euros-en-liquide_3469109_3224.html', + 'body' => array( + '//div[@id="articleBody"]', + ), + 'strip' => array( + ), +); \ No newline at end of file diff --git a/vendor/PicoFeed/Rules/www.numerama.com.php b/vendor/PicoFeed/Rules/www.numerama.com.php new file mode 100644 index 0000000..5149c69 --- /dev/null +++ b/vendor/PicoFeed/Rules/www.numerama.com.php @@ -0,0 +1,10 @@ + 'http://www.numerama.com/magazine/26857-bientot-des-robots-dans-les-cuisines-de-mcdo.html', + 'body' => array( + '//*[@id="general_content"]/table/tbody/tr/td[1]/div/div/div[6]/h2', + '//div[@id="newstext"]', + ), + 'strip' => array( + ) +); \ No newline at end of file diff --git a/vendor/PicoFeed/Rules/www.slate.fr.php b/vendor/PicoFeed/Rules/www.slate.fr.php new file mode 100644 index 0000000..d2dc500 --- /dev/null +++ b/vendor/PicoFeed/Rules/www.slate.fr.php @@ -0,0 +1,17 @@ + 'http://www.slate.fr/monde/77034/allemagne-2013-couacs-campagne', + 'body' => array( + '//div[@class="article_content"]', + ), + 'strip' => array( + '//script', + '//style', + '//*[@id="slate_associated_bn"]', + '//*[@id="ligatus-article"]', + '//*[@id="article_sidebar"]', + '//div[contains(@id, "reseaux")]', + '//*[contains(@class, "smart") or contains(@class, "article_tags") or contains(@class, "article_reactions")]', + '//*[contains(@class, "OUTBRAIN") or contains(@class, "related_item") or contains(@class, "share")]', + ) +); \ No newline at end of file