content = $content; return $this; } public function download($url, $last_modified = '', $etag = '', $timeout = 5, $user_agent = 'PicoFeed (https://github.com/fguillot/picoFeed)') { if (strpos($url, 'http') !== 0) { $url = 'http://'.$url; } $resource = new RemoteResource($url, $timeout, $user_agent); $resource->setLastModified($last_modified); $resource->setEtag($etag); $resource->execute(); $this->content = $resource->getContent(); $this->url = $resource->getUrl(); return $resource; } public function getContent() { return $this->content; } public function getUrl() { return $this->url; } public function getFirstTag($data) { // Strip HTML comments (max of 5,000 characters long to prevent crashing) $data = preg_replace('//Uis', '', $data); /* Strip Doctype: * Doctype needs to be within the first 500 characters. (Ideally the first!) * If it's not found by then, we need to stop looking to prevent PREG * from reaching max backtrack depth and crashing. */ $data = preg_replace('/^.{0,500}]*)>/Uis', '', $data); // Find ') + 2); // Find the first tag $open_tag = strpos($data, '<'); $close_tag = strpos($data, '>'); return substr($data, $open_tag, $close_tag); } return $data; } public function getParser($discover = false) { $first_tag = $this->getFirstTag($this->content); if (strpos($first_tag, 'content); } else if (strpos($first_tag, 'content); } else if (strpos($first_tag, 'content); } else if (strpos($first_tag, 'content); } else if (strpos($first_tag, 'content); } else if ($discover === true) { return false; } else if ($this->discover()) { return $this->getParser(true); } return false; } public function discover() { if (! $this->content) { return false; } \libxml_use_internal_errors(true); $dom = new \DOMDocument; $dom->loadHTML($this->content); $xpath = new \DOMXPath($dom); $queries = array( "//link[@type='application/atom+xml']", "//link[@type='application/rss+xml']" ); foreach ($queries as $query) { $nodes = $xpath->query($query); if ($nodes->length !== 0) { $link = $nodes->item(0)->getAttribute('href'); // Relative links if (strpos($link, 'http') !== 0) { if ($link{0} === '/') $link = substr($link, 1); if ($this->url{strlen($this->url) - 1} !== '/') $this->url .= '/'; $link = $this->url.$link; } $this->download($link); return true; } } return false; } }