rules = $rules; $this->dom = XmlParser::getHtmlDocument(''.$html); $this->xpath = new DOMXPath($this->dom); } /** * Get the relevant content with predefined rules * * @access public * @return string */ public function execute() { $this->stripTags(); return $this->findContent(); } /** * Remove HTML tags * * @access public */ public function stripTags() { if (isset($this->rules['strip']) && is_array($this->rules['strip'])) { foreach ($this->rules['strip'] as $pattern) { $nodes = $this->xpath->query($pattern); if ($nodes !== false && $nodes->length > 0) { foreach ($nodes as $node) { $node->parentNode->removeChild($node); } } } } } /** * Fetch content based on Xpath rules * * @access public */ public function findContent() { $content = ''; if (isset($this->rules['body']) && is_array($this->rules['body'])) { foreach ($this->rules['body'] as $pattern) { $nodes = $this->xpath->query($pattern); if ($nodes !== false && $nodes->length > 0) { foreach ($nodes as $node) { $content .= $this->dom->saveXML($node); } } } } return $content; } }