108 lines
2.6 KiB
PHP
Raw Normal View History

2015-04-28 18:08:42 +02:00
<?php
namespace PicoFeed\Scraper;
use PicoFeed\Base;
2015-04-28 18:08:42 +02:00
use PicoFeed\Logging\Logger;
/**
* RuleLoader class.
2015-04-28 18:08:42 +02:00
*
* @author Frederic Guillot
* @author Bernhard Posselt
*/
class RuleLoader extends Base
2015-04-28 18:08:42 +02:00
{
/**
* Get the rules for an URL.
2015-04-28 18:08:42 +02:00
*
* @param string $url the URL that should be looked up
*
2015-04-28 18:08:42 +02:00
* @return array the array containing the rules
*/
public function getRules($url)
{
$hostname = parse_url($url, PHP_URL_HOST);
if ($hostname !== false) {
$files = $this->getRulesFileList($hostname);
foreach ($this->getRulesFolders() as $folder) {
$rule = $this->loadRuleFile($folder, $files);
if (!empty($rule)) {
2015-04-28 18:08:42 +02:00
return $rule;
}
}
}
return array();
}
/**
* Get the list of possible rules file names for a given hostname.
*
* @param string $hostname Hostname
2015-04-28 18:08:42 +02:00
*
* @return array
*/
public function getRulesFileList($hostname)
{
$files = array($hostname); // subdomain.domain.tld
$parts = explode('.', $hostname);
$len = count($parts);
if ($len > 2) {
$subdomain = array_shift($parts);
$files[] = implode('.', $parts); // domain.tld
$files[] = '.'.implode('.', $parts); // .domain.tld
$files[] = $subdomain; // subdomain
} elseif ($len === 2) {
2015-04-28 18:08:42 +02:00
$files[] = '.'.implode('.', $parts); // .domain.tld
$files[] = $parts[0]; // domain
}
return $files;
}
/**
* Load a rule file from the defined folder.
*
* @param string $folder Rule directory
* @param array $files List of possible file names
2015-04-28 18:08:42 +02:00
*
* @return array
*/
public function loadRuleFile($folder, array $files)
{
foreach ($files as $file) {
$filename = $folder.'/'.$file.'.php';
if (file_exists($filename)) {
Logger::setMessage(get_called_class().' Load rule: '.$file);
2015-04-28 18:08:42 +02:00
return include $filename;
}
}
return array();
}
/**
* Get the list of folders that contains rules.
2015-04-28 18:08:42 +02:00
*
* @return array
*/
public function getRulesFolders()
{
$folders = array();
2015-04-28 18:08:42 +02:00
if ($this->config !== null && $this->config->getGrabberRulesFolder() !== null) {
$folders[] = $this->config->getGrabberRulesFolder();
}
$folders[] = __DIR__ . '/../Rules';
2015-04-28 18:08:42 +02:00
return $folders;
}
}