update picoFeed to f3ed9fe and use icon from atom feeds as favicon

This commit is contained in:
Mathias Kresin 2015-01-18 15:20:36 +01:00
parent 1c68dd4727
commit 7c31b9fff0
20 changed files with 557 additions and 1379 deletions

View File

@ -29,12 +29,12 @@ function store_favicon($feed_id, $link, $icon)
} }
// Download favicon // Download favicon
function fetch_favicon($feed_id, $site_url) function fetch_favicon($feed_id, $site_url, $icon_link)
{ {
if (Config\get('favicons') == 1 && ! has_favicon($feed_id)) { if (Config\get('favicons') == 1 && ! has_favicon($feed_id)) {
$favicon = new Favicon; $favicon = new Favicon;
$link = $favicon->find($site_url); $link = $favicon->find($site_url, $icon_link);
$icon = $favicon->getDataUri(); $icon = $favicon->getDataUri();
if ($icon !== '') { if ($icon !== '') {
@ -189,7 +189,7 @@ function create($url, $enable_grabber = false, $force_rtl = false)
$feed_id = $db->getConnection()->getLastId(); $feed_id = $db->getConnection()->getLastId();
Item\update_all($feed_id, $feed->getItems()); Item\update_all($feed_id, $feed->getItems());
fetch_favicon($feed_id, $feed->getSiteUrl()); fetch_favicon($feed_id, $feed->getSiteUrl(), $feed->getIcon());
Config\write_debug(); Config\write_debug();
@ -261,7 +261,7 @@ function refresh($feed_id)
update_cache($feed_id, $resource->getLastModified(), $resource->getEtag()); update_cache($feed_id, $resource->getLastModified(), $resource->getEtag());
Item\update_all($feed_id, $feed->getItems()); Item\update_all($feed_id, $feed->getItems());
fetch_favicon($feed_id, $feed->getSiteUrl()); fetch_favicon($feed_id, $feed->getSiteUrl(), $feed->getIcon());
} }
update_parsing_error($feed_id, 0); update_parsing_error($feed_id, 0);

2
vendor/autoload.php vendored
View File

@ -4,4 +4,4 @@
require_once __DIR__ . '/composer' . '/autoload_real.php'; require_once __DIR__ . '/composer' . '/autoload_real.php';
return ComposerAutoloaderInit1aea6e0e97930d7617e83f53a5287aca::getLoader(); return ComposerAutoloaderInitdd123afa0ab8d569c051c35ab70311cb::getLoader();

View File

@ -2,7 +2,7 @@
// autoload_real.php @generated by Composer // autoload_real.php @generated by Composer
class ComposerAutoloaderInit1aea6e0e97930d7617e83f53a5287aca class ComposerAutoloaderInitdd123afa0ab8d569c051c35ab70311cb
{ {
private static $loader; private static $loader;
@ -19,9 +19,9 @@ class ComposerAutoloaderInit1aea6e0e97930d7617e83f53a5287aca
return self::$loader; return self::$loader;
} }
spl_autoload_register(array('ComposerAutoloaderInit1aea6e0e97930d7617e83f53a5287aca', 'loadClassLoader'), true, true); spl_autoload_register(array('ComposerAutoloaderInitdd123afa0ab8d569c051c35ab70311cb', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader(); self::$loader = $loader = new \Composer\Autoload\ClassLoader();
spl_autoload_unregister(array('ComposerAutoloaderInit1aea6e0e97930d7617e83f53a5287aca', 'loadClassLoader')); spl_autoload_unregister(array('ComposerAutoloaderInitdd123afa0ab8d569c051c35ab70311cb', 'loadClassLoader'));
$map = require __DIR__ . '/autoload_namespaces.php'; $map = require __DIR__ . '/autoload_namespaces.php';
foreach ($map as $namespace => $path) { foreach ($map as $namespace => $path) {
@ -42,14 +42,14 @@ class ComposerAutoloaderInit1aea6e0e97930d7617e83f53a5287aca
$includeFiles = require __DIR__ . '/autoload_files.php'; $includeFiles = require __DIR__ . '/autoload_files.php';
foreach ($includeFiles as $file) { foreach ($includeFiles as $file) {
composerRequire1aea6e0e97930d7617e83f53a5287aca($file); composerRequiredd123afa0ab8d569c051c35ab70311cb($file);
} }
return $loader; return $loader;
} }
} }
function composerRequire1aea6e0e97930d7617e83f53a5287aca($file) function composerRequiredd123afa0ab8d569c051c35ab70311cb($file)
{ {
require $file; require $file;
} }

View File

@ -162,12 +162,12 @@
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/fguillot/picoFeed.git", "url": "https://github.com/fguillot/picoFeed.git",
"reference": "d3785fc54d0bf9d521fd85e369cc5600f66099cc" "reference": "f3ed9fef18b4cd0d25a1fd389c8a2e2370bcb51b"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/d3785fc54d0bf9d521fd85e369cc5600f66099cc", "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/d3785fc54d0bf9d521fd85e369cc5600f66099cc",
"reference": "d3785fc54d0bf9d521fd85e369cc5600f66099cc", "reference": "f3ed9fef18b4cd0d25a1fd389c8a2e2370bcb51b",
"shasum": "" "shasum": ""
}, },
"require": { "require": {

View File

@ -21,6 +21,21 @@ PicoFeed will try first to find the favicon from the meta tags and fallback to t
When the HTML page is parsed, relative links and protocol relative links are converted to absolute url. When the HTML page is parsed, relative links and protocol relative links are converted to absolute url.
Download a know favicon
-----------------------
It's possible to download a known favicon using the second optional parameter of Favicon::find(). The link to the favicon can be a relative or protocol relative url as well, but it has to be relative to the specified website.
If the requested favicon could not be found, the HTML of the website is parsed instead, with the fallback to the `favicon.ico` located in the website's root.
```php
use PicoFeed\Reader\Favicon;
$favicon = new Favicon;
$icon_link = $favicon->find('https://en.wikipedia.org/','https://bits.wikimedia.org/favicon/wikipedia.ico');
$icon_content = $favicon->getContent();
```
Get Favicon file type Get Favicon file type
--------------------- ---------------------

View File

@ -183,6 +183,7 @@ class Curl extends Client
curl_setopt($ch, CURLOPT_HEADERFUNCTION, array($this, 'readHeaders')); curl_setopt($ch, CURLOPT_HEADERFUNCTION, array($this, 'readHeaders'));
curl_setopt($ch, CURLOPT_COOKIEJAR, 'php://memory'); curl_setopt($ch, CURLOPT_COOKIEJAR, 'php://memory');
curl_setopt($ch, CURLOPT_COOKIEFILE, 'php://memory'); curl_setopt($ch, CURLOPT_COOKIEFILE, 'php://memory');
curl_setopt($ch, CURLOPT_SSLVERSION, 1); // Enforce TLS v1
$ch = $this->prepareProxyContext($ch); $ch = $this->prepareProxyContext($ch);
$ch = $this->prepareAuthContext($ch); $ch = $this->prepareAuthContext($ch);

View File

@ -171,6 +171,35 @@ class Grabber
return $this; return $this;
} }
/**
* Get URL to download.
*
* @access public
* @return string
*/
public function getUrl()
{
return $this->url;
}
/**
* Set URL to download and reset object to use for another grab.
*
* @access public
* @param string $url URL
* @return string
*/
public function setUrl($url)
{
$this->url = $url;
$this->html = "";
$this->content = "";
$this->encoding = "";
$this->handleFiles();
$this->handleStreamingVideos();
}
/** /**
* Get relevant content * Get relevant content
* *
@ -284,7 +313,7 @@ class Grabber
*/ */
public function download() public function download()
{ {
if (! $this->skip_processing) { if (! $this->skip_processing && $this->url != '') {
try { try {

View File

@ -74,6 +74,18 @@ class Atom extends Parser
$feed->logo = (string) $xml->logo; $feed->logo = (string) $xml->logo;
} }
/**
* Find the feed icon
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
{
$feed->icon = (string) $xml->icon;
}
/** /**
* Find the feed title * Find the feed title
* *

View File

@ -75,13 +75,21 @@ class Feed
public $language = ''; public $language = '';
/** /**
* Feed logo URL (not the same as icon) * Feed logo URL
* *
* @access public * @access public
* @var string * @var string
*/ */
public $logo = ''; public $logo = '';
/**
* Feed icon URL
*
* @access public
* @var string
*/
public $icon = '';
/** /**
* Return feed information * Return feed information
* *
@ -140,6 +148,17 @@ class Feed
return $this->logo; return $this->logo;
} }
/**
* Get the icon url
*
* @access public
* $return string
*/
public function getIcon()
{
return $this->icon;
}
/** /**
* Get feed url * Get feed url
* *

View File

@ -149,6 +149,7 @@ abstract class Parser
$this->findFeedId($xml, $feed); $this->findFeedId($xml, $feed);
$this->findFeedDate($xml, $feed); $this->findFeedDate($xml, $feed);
$this->findFeedLogo($xml, $feed); $this->findFeedLogo($xml, $feed);
$this->findFeedIcon($xml, $feed);
foreach ($this->getItemsTree($xml) as $entry) { foreach ($this->getItemsTree($xml) as $entry) {
@ -549,6 +550,15 @@ abstract class Parser
*/ */
public abstract function findFeedLogo(SimpleXMLElement $xml, Feed $feed); public abstract function findFeedLogo(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed icon
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public abstract function findFeedIcon(SimpleXMLElement $xml, Feed $feed);
/** /**
* Get the path to the items XML tree * Get the path to the items XML tree
* *

View File

@ -76,6 +76,18 @@ class Rss20 extends Parser
} }
} }
/**
* Find the feed icon
*
* @access public
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
{
$feed->icon = '';
}
/** /**
* Find the feed title * Find the feed title
* *

View File

@ -157,17 +157,21 @@ class Favicon
* *
* @access public * @access public
* @param string $website_link URL * @param string $website_link URL
* @param string $favicon_link optional URL
* @return string * @return string
*/ */
public function find($website_link) public function find($website_link, $favicon_link = '')
{ {
$website = new Url($website_link); $website = new Url($website_link);
if ($favicon_link !== '') {
$icons = array($favicon_link);
} else {
$icons = $this->extract($this->download($website->getBaseUrl('/'))->getContent()); $icons = $this->extract($this->download($website->getBaseUrl('/'))->getContent());
$icons[] = $website->getBaseUrl('/favicon.ico'); $icons[] = $website->getBaseUrl('/favicon.ico');
}
foreach ($icons as $icon_link) { foreach ($icons as $icon_link) {
$icon_link = $this->convertLink($website, new Url($icon_link)); $icon_link = $this->convertLink($website, new Url($icon_link));
$resource = $this->download($icon_link); $resource = $this->download($icon_link);
$this->content = $resource->getContent(); $this->content = $resource->getContent();
@ -175,6 +179,8 @@ class Favicon
if ($this->content !== '') { if ($this->content !== '') {
return $icon_link; return $icon_link;
} elseif ($favicon_link !== '') {
return $this->find($website_link);
} }
} }

View File

@ -1,17 +1,20 @@
<?php <?php
return array( return array(
'test_url' => 'http://www.allgemeine-zeitung.de/lokales/polizei/zweimal-totalschaden-nach-unfaellen-auf-eisglatten-fahrbahnen-bei-mainz-und-bei-bad-sobernheim-mit-baeumen-kollidiert_14904737.htm', 'test_url' => 'http://www.allgemeine-zeitung.de/lokales/polizei/mainz-gonsenheim-unbekannte-rauben-esso-tankstelle-in-kurt-schumacher-strasse-aus_14913147.htm',
'body' => array( 'body' => array(
'//div[contains(@class, "article")][1]', '//div[contains(@class, "article")][1]',
), ),
'strip' => array( 'strip' => array(
'//read/h1', '//read/h1',
'//*[@id="t-map"]',
'//*[contains(@class, "modules")]',
'//*[contains(@class, "adsense")]', '//*[contains(@class, "adsense")]',
'//*[contains(@class, "linkbox")]', '//*[contains(@class, "linkbox")]',
'//*[contains(@class, "info")]', '//*[contains(@class, "info")]',
'//*[@class="skip"]', '//*[@class="skip"]',
'//*[@class="funcs"]', '//*[@class="funcs"]',
'//span[@class="nd address"]',
'//a[contains(@href, "abo-und-services")]' '//a[contains(@href, "abo-und-services")]'
) )
); );

View File

@ -36,6 +36,14 @@ class GrabberTest extends PHPUnit_Framework_TestCase
$this->assertTrue(is_array($grabber->getRules())); $this->assertTrue(is_array($grabber->getRules()));
} }
// 01net.com - https://github.com/fguillot/miniflux/issues/267
public function testGetRules_afterRedirection()
{
$grabber = new Grabber('http://rss.feedsportal.com/c/629/f/502199/s/422f8c8a/sc/44/l/0L0S0A1net0N0Ceditorial0C640A3130Cces0E20A150Eimprimer0Eune0Epizza0Eet0Edes0Ebiscuits0Evideo0C0T0Dxtor0FRSS0E16/story01.htm');
$grabber->download();
$this->assertTrue(is_array($grabber->getRules()));
}
public function testGrabContent() public function testGrabContent()
{ {
$grabber = new Grabber('http://www.egscomics.com/index.php?id=1690'); $grabber = new Grabber('http://www.egscomics.com/index.php?id=1690');

View File

@ -14,6 +14,10 @@ class FilterTest extends PHPUnit_Framework_TestCase
$expected = '<html><body><h1>boo</h1></body>'; $expected = '<html><body><h1>boo</h1></body>';
$this->assertEquals($expected, Filter::stripHeadTags($input)); $this->assertEquals($expected, Filter::stripHeadTags($input));
$input = file_get_contents('tests/fixtures/html4_page.html');
$expected = file_get_contents('tests/fixtures/html4_head_stripped_page.html');
$this->assertEquals($expected, Filter::stripHeadTags($input));
$input = file_get_contents('tests/fixtures/html_page.html'); $input = file_get_contents('tests/fixtures/html_page.html');
$expected = file_get_contents('tests/fixtures/html_head_stripped_page.html'); $expected = file_get_contents('tests/fixtures/html_head_stripped_page.html');
$this->assertEquals($expected, Filter::stripHeadTags($input)); $this->assertEquals($expected, Filter::stripHeadTags($input));

View File

@ -47,6 +47,17 @@ class AtomParserTest extends PHPUnit_Framework_TestCase
$this->assertEquals('http://www.bbc.co.uk/urdu/images/gel/rss_logo.gif', $feed->getLogo()); $this->assertEquals('http://www.bbc.co.uk/urdu/images/gel/rss_logo.gif', $feed->getLogo());
} }
public function testFeedIcon()
{
$parser = new Atom(file_get_contents('tests/fixtures/atom.xml'));
$feed = $parser->execute();
$this->assertEquals('', $feed->getIcon());
$parser = new Atom(file_get_contents('tests/fixtures/lagrange.xml'));
$feed = $parser->execute();
$this->assertEquals('http://www.la-grange.net/favicon.png', $feed->getIcon());
}
public function testFeedUrl() public function testFeedUrl()
{ {
$parser = new Atom(file_get_contents('tests/fixtures/atom.xml')); $parser = new Atom(file_get_contents('tests/fixtures/atom.xml'));

View File

@ -11,137 +11,218 @@ class FaviconTest extends PHPUnit_Framework_TestCase
{ {
$favicon = new Favicon; $favicon = new Favicon;
$html = '<!DOCTYPE html><html><head>
<link rel="shortcut icon" href="http://example.com/myicon.ico" />
</head><body><p>boo</p></body></html>';
$this->assertEquals(array('http://example.com/myicon.ico'), $favicon->extract($html));
$html = '<!DOCTYPE html><html><head> $html = '<!DOCTYPE html><html><head>
<link rel="icon" href="http://example.com/myicon.ico" /> <link rel="icon" href="http://example.com/myicon.ico" />
</head><body><p>boo</p></body></html>'; </head><body><p>boo</p></body></html>';
$this->assertEquals(array('http://example.com/myicon.ico'), $favicon->extract($html)); $this->assertEquals(array('http://example.com/myicon.ico'), $favicon->extract($html));
// multiple values in rel attribute
$html = '<!DOCTYPE html><html><head>
<link rel="shortcut icon" href="http://example.com/myicon.ico" />
</head><body><p>boo</p></body></html>';
$this->assertEquals(array('http://example.com/myicon.ico'), $favicon->extract($html));
// icon part of another string
$html = '<!DOCTYPE html><html><head>
<link rel="fluid-icon" href="http://example.com/myicon.ico" />
</head><body><p>boo</p></body></html>';
$this->assertEquals(array('http://example.com/myicon.ico'), $favicon->extract($html));
// with other attributes present
$html = '<!DOCTYPE html><html><head> $html = '<!DOCTYPE html><html><head>
<link rel="icon" type="image/vnd.microsoft.icon" href="http://example.com/image.ico" /> <link rel="icon" type="image/vnd.microsoft.icon" href="http://example.com/image.ico" />
</head><body><p>boo</p></body></html>'; </head><body><p>boo</p></body></html>';
$this->assertEquals(array('http://example.com/image.ico'), $favicon->extract($html)); $this->assertEquals(array('http://example.com/image.ico'), $favicon->extract($html));
// ignore icon in other attribute
$html = '<!DOCTYPE html><html><head> $html = '<!DOCTYPE html><html><head>
<link type="icon" href="http://example.com/image.ico" />
</head><body><p>boo</p></body></html>';
// ignores apple icon
$html = '<!DOCTYPE html><html><head>
<link rel="apple-touch-icon" href="assets/img/touch-icon-iphone.png">
<link rel="icon" type="image/png" href="http://example.com/image.png" /> <link rel="icon" type="image/png" href="http://example.com/image.png" />
</head><body><p>boo</p></body></html>'; </head><body><p>boo</p></body></html>';
$this->assertEquals(array('http://example.com/image.png'), $favicon->extract($html)); $this->assertEquals(array('http://example.com/image.png'), $favicon->extract($html));
// allows multiple icons
$html = '<!DOCTYPE html><html><head> $html = '<!DOCTYPE html><html><head>
<link rel="icon" type="image/gif" href="http://example.com/image.gif" />
</head><body><p>boo</p></body></html>';
$this->assertEquals(array('http://example.com/image.gif'), $favicon->extract($html));
$html = '<!DOCTYPE html><html><head>
<link rel="icon" type="image/x-icon" href="http://example.com/image.ico"/>
</head><body><p>boo</p></body></html>';
$this->assertEquals(array('http://example.com/image.ico'), $favicon->extract($html));
$html = '<!DOCTYPE html><html><head>
<link rel="apple-touch-icon" href="assets/img/touch-icon-iphone.png">
<link rel="icon" type="image/png" href="http://example.com/image.png" /> <link rel="icon" type="image/png" href="http://example.com/image.png" />
<link rel="icon" type="image/x-icon" href="http://example.com/image.ico"/> <link rel="icon" type="image/x-icon" href="http://example.com/image.ico"/>
</head><body><p>boo</p></body></html>'; </head><body><p>boo</p></body></html>';
$this->assertEquals(array('http://example.com/image.png', 'http://example.com/image.ico'), $favicon->extract($html)); $this->assertEquals(array('http://example.com/image.png', 'http://example.com/image.ico'), $favicon->extract($html));
// empty array with broken html
$html = '!DOCTYPE html html head
link rel="icon" type="image/png" href="http://example.com/image.png" /
link rel="icon" type="image/x-icon" href="http://example.com/image.ico"/
/head body /p boo /p body /html';
$this->assertEquals(array(), $favicon->extract($html));
// empty array on no input
$this->assertEquals(array(), $favicon->extract(''));
// empty array on no icon found
$html = '<!DOCTYPE html><html><head>
</head><body><p>boo</p></body></html>';
$this->assertEquals(array(), $favicon->extract($html));
} }
/*
public function testHasFile() public function testExists()
{ {
$favicon = new Favicon; $favicon = new Favicon;
$this->assertTrue($favicon->exists('https://en.wikipedia.org/favicon.ico')); $this->assertTrue($favicon->exists('https://en.wikipedia.org/favicon.ico'));
$this->assertFalse($favicon->exists('http://minicoders.com/favicon.ico')); $this->assertFalse($favicon->exists('http://minicoders.com/favicon.ico'));
$this->assertFalse($favicon->exists('http://blabla')); $this->assertFalse($favicon->exists('http://blabla'));
$this->assertFalse($favicon->exists(''));
} }
*/
public function testConvertLink() public function testConvertLink()
{ {
$favicon = new Favicon; $favicon = new Favicon;
// relative link
$this->assertEquals( $this->assertEquals(
'http://miniflux.net/assets/img/favicon.png', 'http://miniflux.net/assets/img/favicon.png',
$favicon->convertLink(new Url('http://miniflux.net'), new Url('assets/img/favicon.png')) $favicon->convertLink(new Url('http://miniflux.net'), new Url('assets/img/favicon.png'))
); );
// relative link + HTTPS
$this->assertEquals( $this->assertEquals(
'https://miniflux.net/assets/img/favicon.png', 'https://miniflux.net/assets/img/favicon.png',
$favicon->convertLink(new Url('https://miniflux.net'), new Url('assets/img/favicon.png')) $favicon->convertLink(new Url('https://miniflux.net'), new Url('assets/img/favicon.png'))
); );
// absolute link
$this->assertEquals(
'http://miniflux.net/assets/img/favicon.png',
$favicon->convertLink(new Url('http://miniflux.net'), new Url('/assets/img/favicon.png'))
);
// absolute link + HTTPS
$this->assertEquals(
'https://miniflux.net/assets/img/favicon.png',
$favicon->convertLink(new Url('https://miniflux.net'), new Url('/assets/img/favicon.png'))
);
// Protocol relative link
$this->assertEquals( $this->assertEquals(
'http://google.com/assets/img/favicon.png', 'http://google.com/assets/img/favicon.png',
$favicon->convertLink(new Url('http://miniflux.net'), new Url('//google.com/assets/img/favicon.png')) $favicon->convertLink(new Url('http://miniflux.net'), new Url('//google.com/assets/img/favicon.png'))
); );
// Protocol relative link + HTTPS
$this->assertEquals( $this->assertEquals(
'https://google.com/assets/img/favicon.png', 'https://google.com/assets/img/favicon.png',
$favicon->convertLink(new Url('https://miniflux.net'), new Url('//google.com/assets/img/favicon.png')) $favicon->convertLink(new Url('https://miniflux.net'), new Url('//google.com/assets/img/favicon.png'))
); );
// URL same fqdn
$this->assertEquals(
'http://miniflux.net/assets/img/favicon.png',
$favicon->convertLink(new Url('https://miniflux.net'), new Url('http://miniflux.net/assets/img/favicon.png'))
);
// URL different fqdn
$this->assertEquals(
'https://www.google.com/assets/img/favicon.png',
$favicon->convertLink(new Url('https://miniflux.net'), new Url('https://www.google.com/assets/img/favicon.png'))
);
// HTTPS URL
$this->assertEquals(
'https://miniflux.net/assets/img/favicon.png',
$favicon->convertLink(new Url('https://miniflux.net'), new Url('https://miniflux.net/assets/img/favicon.png'))
);
// empty string on missing website parameter
$this->assertEquals(
'',
$favicon->convertLink(new Url(''), new Url('favicon.png'))
);
// website only on missing icon parameter
$this->assertEquals(
'https://miniflux.net/',
$favicon->convertLink(new Url('https://miniflux.net'), new Url(''))
);
// empty string on missing website and icon parameter
$this->assertEquals(
'',
$favicon->convertLink(new Url(''), new Url(''))
);
} }
public function testFind() public function testFind_inMeta()
{ {
$favicon = new Favicon; $favicon = new Favicon;
// Relative favicon in html // favicon in meta
$this->assertEquals( $this->assertEquals(
'http://miniflux.net/assets/img/favicon.png', 'http://miniflux.net/assets/img/favicon.png',
$favicon->find('http://miniflux.net') $favicon->find('http://miniflux.net')
); );
$this->assertNotEmpty($favicon->getContent()); $this->assertNotEmpty($favicon->getContent());
}
// Absolute html favicon // public function testFind_inRootDir()
$this->assertEquals( // {
'http://php.net/favicon.ico', // // favicon not in meta, only in website root (need example page)
$favicon->find('http://php.net/parse_url') // $favicon = new Favicon;
); //
// $this->assertEquals(
// 'http://minicoders.com/favicon.ico',
// $favicon->find('http://minicoders.com')
// );
// }
$this->assertNotEmpty($favicon->getContent()); public function testFind_noIcons()
{
$favicon = new Favicon;
// Protocol relative favicon
$this->assertEquals(
'https://bits.wikimedia.org/favicon/wikipedia.ico',
$favicon->find('https://en.wikipedia.org/')
);
$this->assertNotEmpty($favicon->getContent());
// fluid-icon + https
$this->assertEquals(
'https://github.com/fluidicon.png',
$favicon->find('https://github.com')
);
$this->assertNotEmpty($favicon->getContent());
// favicon in meta
$this->assertEquals(
'http://www.microsoft.com/favicon.ico?v2',
$favicon->find('http://www.microsoft.com')
);
$this->assertNotEmpty($favicon->getContent());
// no icon
$this->assertEquals( $this->assertEquals(
'', '',
$favicon->find('http://minicoders.com/favicon.ico') $favicon->find('http://minicoders.com')
); );
$this->assertEmpty($favicon->getContent()); $this->assertEmpty($favicon->getContent());
} }
public function testFind_directLinkFirst()
{
$favicon = new Favicon;
$this->assertEquals(
'http://miniflux.net/assets/img/touch-icon-ipad.png',
$favicon->find('http://miniflux.net', '/assets/img/touch-icon-ipad.png')
);
$this->assertNotEmpty($favicon->getContent());
}
public function testFind_fallsBackToExtract()
{
$favicon = new Favicon;
$this->assertEquals(
'http://miniflux.net/assets/img/favicon.png',
$favicon->find('http://miniflux.net','/nofavicon.ico')
);
$this->assertNotEmpty($favicon->getContent());
}
public function testDataUri() public function testDataUri()
{ {
$favicon = new Favicon; $favicon = new Favicon;
@ -156,7 +237,7 @@ class FaviconTest extends PHPUnit_Framework_TestCase
$this->assertEquals($expected, $favicon->getDataUri()); $this->assertEquals($expected, $favicon->getDataUri());
} }
public function testDataUriWithBadContentType() public function testDataUri_withBadContentType()
{ {
$favicon = new Favicon; $favicon = new Favicon;
$this->assertNotEmpty($favicon->find('http://www.lemonde.fr/')); $this->assertNotEmpty($favicon->find('http://www.lemonde.fr/'));

View File

@ -15,14 +15,21 @@ class ReaderTest extends PHPUnit_Framework_TestCase
$this->assertEquals('https://google.com', $reader->prependScheme('https://google.com')); $this->assertEquals('https://google.com', $reader->prependScheme('https://google.com'));
} }
public function testDownload() public function testDownload_withHTTP()
{ {
$reader = new Reader; $reader = new Reader;
$feed = $reader->download('http://wordpress.org/news/feed/')->getContent(); $feed = $reader->download('http://wordpress.org/news/feed/')->getContent();
$this->assertNotEmpty($feed); $this->assertNotEmpty($feed);
} }
public function testDownloadWithCache() public function testDownload_withHTTPS()
{
$reader = new Reader;
$feed = $reader->download('https://wordpress.org/news/feed/')->getContent();
$this->assertNotEmpty($feed);
}
public function testDownload_withCache()
{ {
$reader = new Reader; $reader = new Reader;
$resource = $reader->download('http://linuxfr.org/robots.txt'); $resource = $reader->download('http://linuxfr.org/robots.txt');
@ -78,30 +85,129 @@ class ReaderTest extends PHPUnit_Framework_TestCase
$this->assertEquals('Rss20', $reader->detectFormat($content)); $this->assertEquals('Rss20', $reader->detectFormat($content));
} }
public function testFind() public function testFind_rssFeed()
{ {
$reader = new Reader; $reader = new Reader;
$resource = $reader->download('http://miniflux.net/');
$feeds = $reader->find($resource->getUrl(), $resource->getContent());
$this->assertTrue(is_array($feeds));
$this->assertNotEmpty($feeds);
$this->assertEquals('http://miniflux.net/feed', $feeds[0]);
$reader = new Reader; $html = '<!DOCTYPE html><html><head>
$resource = $reader->download('http://www.bbc.com/news/'); <link type="application/rss+xml" href="http://miniflux.net/feed">
$feeds = $reader->find($resource->getUrl(), $resource->getContent()); </head><body><p>boo</p></body></html>';
$this->assertTrue(is_array($feeds));
$this->assertNotEmpty($feeds);
$this->assertEquals('http://feeds.bbci.co.uk/news/rss.xml', $feeds[0]);
$feeds = $reader->find('http://miniflux.net/', $html);
$this->assertEquals(array('http://miniflux.net/feed'), $feeds);
}
public function testFind_atomFeed()
{
$reader = new Reader; $reader = new Reader;
$resource = $reader->download('http://www.cnn.com/services/rss/');
$feeds = $reader->find($resource->getUrl(), $resource->getContent()); $html = '<!DOCTYPE html><html><head>
$this->assertTrue(is_array($feeds)); <link type="application/atom+xml" href="http://miniflux.net/feed">
$this->assertNotEmpty($feeds); </head><body><p>boo</p></body></html>';
$this->assertTrue(count($feeds) > 1);
$this->assertEquals('http://rss.cnn.com/rss/cnn_topstories.rss', $feeds[0]); $feeds = $reader->find('http://miniflux.net/', $html);
$this->assertEquals('http://rss.cnn.com/rss/cnn_world.rss', $feeds[1]); $this->assertEquals(array('http://miniflux.net/feed'), $feeds);
}
public function testFind_feedNotInHead()
{
$reader = new Reader;
$html = '<!DOCTYPE html><html><head></head>
<body>
<link type="application/atom+xml" href="http://miniflux.net/feed">
<p>boo</p></body></html>';
$feeds = $reader->find('http://miniflux.net/', $html);
$this->assertEquals(array('http://miniflux.net/feed'), $feeds);
}
public function testFind_noFeedPresent()
{
$reader = new Reader;
$html = '<!DOCTYPE html><html><head>
</head><body><p>boo</p></body></html>';
$feeds = $reader->find('http://miniflux.net/', $html);
$this->assertEquals(array(), $feeds);
}
public function testFind_ignoreUnknownType()
{
$reader = new Reader;
$html = '<!DOCTYPE html><html><head>
<link type="application/flux+xml" href="http://miniflux.net/feed">
</head><body><p>boo</p></body></html>';
$feeds = $reader->find('http://miniflux.net/', $html);
$this->assertEquals(array(), $feeds);
}
public function testFind_ignoreTypeInOtherAttribute()
{
$reader = new Reader;
$html = '<!DOCTYPE html><html><head>
<link rel="application/rss+xml" href="http://miniflux.net/feed">
</head><body><p>boo</p></body></html>';
$feeds = $reader->find('http://miniflux.net/', $html);
$this->assertEquals(array(), $feeds);
}
public function testFind_withOtherAttributesPresent()
{
$reader = new Reader;
$html = '<!DOCTYPE html><html><head>
<link rel="alternate" type="application/rss+xml" title="RSS" href="http://miniflux.net/feed">
</head><body><p>boo</p></body></html>';
$feeds = $reader->find('http://miniflux.net/', $html);
$this->assertEquals(array('http://miniflux.net/feed'), $feeds);
}
public function testFind_multipleFeeds()
{
$reader = new Reader;
$html = '<!DOCTYPE html><html><head>
<link rel="alternate" type="application/rss+xml" title="CNN International: Top Stories" href="http://rss.cnn.com/rss/edition.rss"/>
<link rel="alternate" type="application/rss+xml" title="Connect The World" href="http://rss.cnn.com/rss/edition_connecttheworld.rss"/>
<link rel="alternate" type="application/rss+xml" title="World Sport" href="http://rss.cnn.com/rss/edition_worldsportblog.rss"/>
</head><body><p>boo</p></body></html>';
$feeds = $reader->find('http://www.cnn.com/services/rss/', $html);
$this->assertEquals(
array(
'http://rss.cnn.com/rss/edition.rss',
'http://rss.cnn.com/rss/edition_connecttheworld.rss',
'http://rss.cnn.com/rss/edition_worldsportblog.rss'
),
$feeds
);
}
public function testFind_withInvalidHTML()
{
$reader = new Reader;
$html = '!DOCTYPE html html head
link type="application/rss+xml" href="http://miniflux.net/feed"
/head body /p boo /p body /html';
$feeds = $reader->find('http://miniflux.net/', '');
$this->assertEquals(array(), $feeds);
}
public function testFind_withHtmlParamEmptyString()
{
$reader = new Reader;
$feeds = $reader->find('http://miniflux.net/', '');
$this->assertEquals(array(), $feeds);
} }
public function testDiscover() public function testDiscover()

View File

@ -1,6 +1,6 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html> <html>
<head> <head profile="http://gmpg.org/xfn/11">
<meta http-equiv="content-type" content="text/html;charset=utf-8"> <meta http-equiv="content-type" content="text/html;charset=utf-8">
<title>Un bilan des plantes génétiquement modifiées aux USA - Résumé d'un rapport américain - Afis - Association française pour l'information scientifique</title> <title>Un bilan des plantes génétiquement modifiées aux USA - Résumé d'un rapport américain - Afis - Association française pour l'information scientifique</title>
<meta name="Keywords" lang="fr" content="OGM, Afis, pseudo-sciences"> <meta name="Keywords" lang="fr" content="OGM, Afis, pseudo-sciences">

File diff suppressed because it is too large Load Diff