diff --git a/README.markdown b/README.markdown index 44b8aa3..e2d103e 100644 --- a/README.markdown +++ b/README.markdown @@ -38,7 +38,8 @@ Features ### Awesome features - Keyboard shortcuts -- Full article download for feeds that display only a summary (website scraper based on Xpath rules) +- Full article download for feeds that display only a summary +- Enclosure support (videos and podcasts) - Feed updates via a cronjob or with the user interface with one click ### More diff --git a/vendor/PicoFeed/Filter.php b/vendor/PicoFeed/Filter.php index 6253bdf..bbf5b7c 100644 --- a/vendor/PicoFeed/Filter.php +++ b/vendor/PicoFeed/Filter.php @@ -711,16 +711,25 @@ class Filter } /** - * Strip meta tags from the HTML content + * Strip head tag from the HTML content * * @static * @access public * @param string $data Input data * @return string */ - public static function stripMetaTags($data) + public static function stripHeadTags($data) { - return preg_replace('//is', '', $data); + $start = strpos($data, ''); + $end = strpos($data, ''); + + if ($start !== false && $end !== false) { + $before = substr($data, 0, $start); + $after = substr($data, $end + 7); + $data = $before.$after; + } + + return $data; } /** diff --git a/vendor/PicoFeed/Grabber.php b/vendor/PicoFeed/Grabber.php index dde60d8..329d291 100644 --- a/vendor/PicoFeed/Grabber.php +++ b/vendor/PicoFeed/Grabber.php @@ -83,7 +83,7 @@ class Grabber Logging::log(\get_called_class().' Fix encoding'); Logging::log(\get_called_class().': HTTP Encoding "'.$this->encoding.'"'); - $this->html = Filter::stripMetaTags($this->html); + $this->html = Filter::stripHeadTags($this->html); if ($this->encoding == 'windows-1251') { $this->html = Encoding::cp1251ToUtf8($this->html); diff --git a/vendor/PicoFeed/Rules/www.nextinpact.com.php b/vendor/PicoFeed/Rules/www.nextinpact.com.php new file mode 100644 index 0000000..fc45ef2 --- /dev/null +++ b/vendor/PicoFeed/Rules/www.nextinpact.com.php @@ -0,0 +1,9 @@ + 'http://www.pcinpact.com/news/85954-air-france-ne-vous-demande-plus-deteindre-vos-appareils-electroniques.htm?utm_source=PCi_RSS_Feed&utm_medium=news&utm_campaign=pcinpact', + 'body' => array( + '//div[contains(@id, "actu_content")]', + ), + 'strip' => array( + ), +); \ No newline at end of file diff --git a/vendor/PicoFeed/Rules/www.pseudo-sciences.org.php b/vendor/PicoFeed/Rules/www.pseudo-sciences.org.php index 11073a5..bfb9303 100644 --- a/vendor/PicoFeed/Rules/www.pseudo-sciences.org.php +++ b/vendor/PicoFeed/Rules/www.pseudo-sciences.org.php @@ -5,5 +5,8 @@ return array( '//div[@id="art_main"]', ), 'strip' => array( + '//div[@id="art_print"]', + '//div[@id="art_chapo"]', + '//img[@class="puce"]', ), );