getRulesFolders();
$this->assertNotEmpty($dirs);
$this->assertCount(1, $dirs);
$this->assertTrue(strpos($dirs[0], '/../Rules') !== false);
// Custom path
$config = new Config;
$config->setGrabberRulesFolder('/foobar/rules');
$grabber = new Grabber('');
$grabber->setConfig($config);
$dirs = $grabber->getRulesFolders();
$this->assertNotEmpty($dirs);
$this->assertCount(2, $dirs);
$this->assertTrue(strpos($dirs[0], '/../Rules') !== false);
$this->assertEquals('/foobar/rules', $dirs[1]);
// No custom path with empty config object
$grabber = new Grabber('');
$grabber->setConfig(new Config);
$dirs = $grabber->getRulesFolders();
$this->assertNotEmpty($dirs);
$this->assertCount(1, $dirs);
$this->assertTrue(strpos($dirs[0], '/../Rules') !== false);
}
public function testLoadRuleFile()
{
$grabber = new Grabber('');
$dirs = $grabber->getRulesFolders();
$this->assertEmpty($grabber->loadRuleFile($dirs[0], array('test')));
$this->assertNotEmpty($grabber->loadRuleFile($dirs[0], array('test', 'xkcd.com')));
}
public function testGetRulesFileList()
{
$grabber = new Grabber('');
$this->assertEquals(
array('www.google.ca', 'google.ca', '.google.ca', 'www'),
$grabber->getRulesFileList('www.google.ca')
);
$grabber = new Grabber('');
$this->assertEquals(
array('google.ca', '.google.ca', 'google'),
$grabber->getRulesFileList('google.ca')
);
$grabber = new Grabber('');
$this->assertEquals(
array('a.b.c.d', 'b.c.d', '.b.c.d', 'a'),
$grabber->getRulesFileList('a.b.c.d')
);
$grabber = new Grabber('');
$this->assertEquals(
array('localhost'),
$grabber->getRulesFileList('localhost')
);
}
public function testGetRules()
{
$grabber = new Grabber('http://www.egscomics.com/index.php?id=1690');
$this->assertNotEmpty($grabber->getRules());
$grabber = new Grabber('http://localhost/foobar');
$this->assertEmpty($grabber->getRules());
}
/**
* @group online
*/
public function testGrabContentWithCandidates()
{
$grabber = new Grabber('http://theonion.com.feedsportal.com/c/34529/f/632231/s/309a7fe4/sc/20/l/0L0Stheonion0N0Carticles0Cobama0Ethrows0Eup0Eright0Ethere0Eduring0Esyria0Emeeting0H336850C/story01.htm');
$grabber->download();
$this->assertTrue($grabber->parse());
$grabber = new Grabber('http://www.lemonde.fr/proche-orient/article/2013/08/30/la-france-nouvelle-plus-ancienne-alliee-des-etats-unis_3469218_3218.html');
$grabber->download();
$this->assertTrue($grabber->parse());
$grabber = new Grabber('http://www.rue89.com/2013/08/30/faisait-boris-boillon-ex-sarko-boy-350-000-euros-gare-nord-245315');
$grabber->download();
$this->assertTrue($grabber->parse());
$grabber = new Grabber('http://www.inc.com/suzanne-lucas/why-employee-turnover-is-so-costly.html');
$grabber->download();
$this->assertTrue($grabber->parse());
$grabber = new Grabber('http://arstechnica.com/information-technology/2013/08/sysadmin-security-fail-nsa-finds-snowden-hijacked-officials-logins/');
$grabber->download();
$this->assertTrue($grabber->parse());
}
/**
* @group online
*/
public function testGetRules_afterRedirection()
{
$grabber = new Grabber('http://rss.feedsportal.com/c/629/f/502199/s/422f8c8a/sc/44/l/0L0S0A1net0N0Ceditorial0C640A3130Cces0E20A150Eimprimer0Eune0Epizza0Eet0Edes0Ebiscuits0Evideo0C0T0Dxtor0FRSS0E16/story01.htm');
$grabber->download();
$this->assertTrue(is_array($grabber->getRules()));
}
/**
* @group online
*/
public function testGrabContent()
{
$grabber = new Grabber('http://www.egscomics.com/index.php?id=1690');
$grabber->download();
$this->assertTrue($grabber->parse());
$this->assertEquals('', $grabber->getContent());
}
/**
* @group online
*/
public function testRssGrabContent()
{
$reader = new Reader;
$client = $reader->download('http://www.egscomics.com/rss.php');
$parser = $reader->getParser($client->getUrl(), $client->getContent(), $client->getEncoding());
$parser->enableContentGrabber();
$feed = $parser->execute();
$this->assertTrue(is_array($feed->items));
$this->assertTrue(strpos($feed->items[0]->content, '= 0);
}
}