From bc2b5e7c3d3538eb397dcc99f627092e4b703add Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Mon, 30 Sep 2013 22:15:18 -0400 Subject: [PATCH] Add new content grabber rules --- README.markdown | 4 ++++ vendor/PicoFeed/Grabber.php | 18 ++++++++++++++++-- vendor/PicoFeed/Rules/.phoronix.com.php | 9 +++++++++ vendor/PicoFeed/Rules/lists.php | 9 +++++++++ vendor/PicoFeed/Rules/medium.com.php | 9 +++++++++ vendor/PicoFeed/Rules/pastebin.com.php | 9 +++++++++ 6 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 vendor/PicoFeed/Rules/.phoronix.com.php create mode 100644 vendor/PicoFeed/Rules/lists.php create mode 100644 vendor/PicoFeed/Rules/medium.com.php create mode 100644 vendor/PicoFeed/Rules/pastebin.com.php diff --git a/README.markdown b/README.markdown index cf1723d..8f5429a 100644 --- a/README.markdown +++ b/README.markdown @@ -344,6 +344,7 @@ Don't forget to send a pull request or a ticket to share your contribution with - *.blog.lemonde.fr - *.blog.nytimes.com - *.nytimes.com +- *.phoronix.com - *.slate.com - *.theguardian.com - *.wikipedia.org @@ -351,6 +352,9 @@ Don't forget to send a pull request or a ticket to share your contribution with - *.wsj.com - github.com - lifehacker.com +- lists.* +- medium.com +- pastebin.com - plus.google.com - rue89.com - smallhousebliss.com diff --git a/vendor/PicoFeed/Grabber.php b/vendor/PicoFeed/Grabber.php index 5483d82..32a6a9a 100644 --- a/vendor/PicoFeed/Grabber.php +++ b/vendor/PicoFeed/Grabber.php @@ -25,6 +25,8 @@ class Grabber 'post_content', 'entry-content', 'main-content', + 'story_content', + 'storycontent', 'entryBox', 'entrytext', 'comic', @@ -46,7 +48,12 @@ class Grabber 'nav', 'header', 'social', + 'tag', + 'metadata', 'entry-utility', + 'related-posts', + 'tweet', + 'categories', ); public $stripTags = array( @@ -56,6 +63,7 @@ class Grabber 'header', 'footer', 'aside', + 'form', ); @@ -114,8 +122,14 @@ class Grabber $hostname = parse_url($this->url, PHP_URL_HOST); $files = array($hostname); - if (substr($hostname, 0, 4) == 'www.') $files[] = substr($hostname, 4); - if (($pos = strpos($hostname, '.')) !== false) $files[] = substr($hostname, $pos); + if (substr($hostname, 0, 4) == 'www.') { + $files[] = substr($hostname, 4); + } + + if (($pos = strpos($hostname, '.')) !== false) { + $files[] = substr($hostname, $pos); + $files[] = substr($hostname, 0, $pos); + } foreach ($files as $file) { diff --git a/vendor/PicoFeed/Rules/.phoronix.com.php b/vendor/PicoFeed/Rules/.phoronix.com.php new file mode 100644 index 0000000..0d10eff --- /dev/null +++ b/vendor/PicoFeed/Rules/.phoronix.com.php @@ -0,0 +1,9 @@ + 'http://www.phoronix.com/scan.php?page=article&item=amazon_ec2_bare&num=1', + 'body' => array( + '//article[@class="KonaBody"]', + ), + 'strip' => array( + ) +); diff --git a/vendor/PicoFeed/Rules/lists.php b/vendor/PicoFeed/Rules/lists.php new file mode 100644 index 0000000..fb9c8d0 --- /dev/null +++ b/vendor/PicoFeed/Rules/lists.php @@ -0,0 +1,9 @@ + 'http://lists.freebsd.org/pipermail/freebsd-announce/2013-September/001504.html', + 'body' => array( + '//pre', + ), + 'strip' => array( + ) +); diff --git a/vendor/PicoFeed/Rules/medium.com.php b/vendor/PicoFeed/Rules/medium.com.php new file mode 100644 index 0000000..79ed5bc --- /dev/null +++ b/vendor/PicoFeed/Rules/medium.com.php @@ -0,0 +1,9 @@ + 'https://medium.com/lessons-learned/917b8b63ae3e', + 'body' => array( + '//div[contains(@class, "post-field body")]', + ), + 'strip' => array( + ) +); diff --git a/vendor/PicoFeed/Rules/pastebin.com.php b/vendor/PicoFeed/Rules/pastebin.com.php new file mode 100644 index 0000000..9a576f7 --- /dev/null +++ b/vendor/PicoFeed/Rules/pastebin.com.php @@ -0,0 +1,9 @@ + 'http://pastebin.com/ed1pP9Ak', + 'body' => array( + '//div[@class="text"]', + ), + 'strip' => array( + ) +);