2014-12-23 21:28:26 -05:00
< ? php
namespace PicoFeed\Parser ;
use DOMDocument ;
use PHPUnit_Framework_TestCase ;
class XmlParserTest extends PHPUnit_Framework_TestCase
{
2015-03-01 19:56:11 +01:00
public function testEmpty ()
{
$this -> assertFalse ( XmlParser :: getDomDocument ( '' ));
$this -> assertFalse ( XmlParser :: getSimpleXml ( '' ));
$this -> assertNotFalse ( XmlParser :: getHtmlDocument ( '' ));
}
public function testGetEncodingFromMetaTag ()
{
2015-03-25 19:59:41 -04:00
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1"/>' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1" />' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=\'Content-Type\' content=\'text/html;charset=iso-8859-1\'/>' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=\'Content-Type\' content=\'text/html;charset=iso-8859-1\' />' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=Content-Type content=text/html;charset=iso-8859-1/>' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=Content-Type content=text/html;charset=iso-8859-1 />' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1">' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1" >' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=\'Content-Type\' content=\'text/html;charset=iso-8859-1\'>' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=\'Content-Type\' content=\'text/html;charset=iso-8859-1\' >' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=Content-Type content=text/html;charset=iso-8859-1>' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=Content-Type content=text/html;charset=iso-8859-1 >' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv="Content-Type" content="text/html;charset=\'iso-8859-1\'">' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv="Content-Type" content="\'text/html;charset=iso-8859-1\'">' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv="Content-Type" content="\'text/html\';charset=\'iso-8859-1\'">' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=\'Content-Type\' content=\'text/html;charset="iso-8859-1"\'>' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=\'Content-Type\' content=\'"text/html;charset=iso-8859-1"\'>' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=\'Content-Type\' content=\'"text/html";charset="iso-8859-1"\'>' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv="Content-Type" content="text/html;;;charset=iso-8859-1">' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv="Content-Type" content="text/html;;;charset=\'iso-8859-1\'">' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv="Content-Type" content="\'text/html;;;charset=iso-8859-1\'">' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv="Content-Type" content="\'text/html\';;;charset=\'iso-8859-1\'">' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=\'Content-Type\' content=\'text/html;;;charset=iso-8859-1\'>' ));
$this -> assertEquals ( 'windows-1251' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=\'Content-Type\' content=\'text/html;;;charset="windows-1251"\'>' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=\'Content-Type\' content=\'"text/html;;;charset=iso-8859-1"\'>' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv=\'Content-Type\' content=\'"text/html";;;charset="iso-8859-1"\'>' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv = Content-Type content = text/html;charset=iso-8859-1 >' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta content = text/html;charset=iso-8859-1 http-equiv = Content-Type >' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv = Content-Type content = text/html ; charset = iso-8859-1 >' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta content = text/html ; charset = iso-8859-1 http-equiv = Content-Type >' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv = Content-Type content = text/html ;;; charset = iso-8859-1 >' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta content = text/html ;;; charset = iso-8859-1 http-equiv = Content-Type >' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv = Content-Type content = text/html ; ; ; charset = iso-8859-1 >' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta content = text/html ; ; ; charset = iso-8859-1 http-equiv = Content-Type >' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset="uTf-8"/>' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset="utf-8" />' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset=\'Utf-8\'/>' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset=\'utf-8\' />' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset=utf-8/>' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset=utf-8 />' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset="utf-8">' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset="utf-8" >' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset=\'utf-8\'>' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset=\'utf-8\' >' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset=utf-8>' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset=utf-8 >' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset = " utf-8 " >' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset = \' utf-8 \' >' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset = " utf-8 \' >' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset = \' utf-8 " >' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset = " utf-8 >' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset = \' utf-8 >' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset = utf-8 \' >' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset = utf-8 " >' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset = utf-8 >' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta charset = utf-8 />' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta name="title" value="charset=utf-8 — is it really useful (yep)?">' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta value="charset=utf-8 — is it really useful (yep)?" name="title">' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta name="title" content="charset=utf-8 — is it really useful (yep)?">' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta name="charset=utf-8" content="charset=utf-8 — is it really useful (yep)?">' ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromMetaTag ( '<meta content="charset=utf-8 — is it really useful (nope, not here, but gotta admit pretty robust otherwise)?" name="title">' ));
$this -> assertEquals ( 'iso-8859-1' , XmlParser :: getEncodingFromMetaTag ( '<meta http-equiv="Content-Type" content="text/html;charset=iSo-8859-1"/><meta charset="invalid" />' ));
2015-03-01 19:56:11 +01:00
}
2014-12-23 21:28:26 -05:00
public function testGetEncodingFromXmlTag ()
{
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromXmlTag ( " <?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet " ));
$this -> assertEquals ( 'utf-8' , XmlParser :: getEncodingFromXmlTag ( '<?xml version="1.0" encoding="UTF-8"?><feed xml:' ));
$this -> assertEquals ( 'windows-1251' , XmlParser :: getEncodingFromXmlTag ( '<?xml version="1.0" encoding="Windows-1251"?><rss version="2.0">' ));
2015-01-27 20:13:16 -05:00
$this -> assertEquals ( '' , XmlParser :: getEncodingFromXmlTag ( " <?xml version='1.0'?><?xml-stylesheet " ));
2014-12-23 21:28:26 -05:00
}
public function testScanForXEE ()
{
$xml = <<< XML
< ? xml version = " 1.0 " ?>
<! DOCTYPE results [ <! ENTITY harmless " completely harmless " > ] >
< results >
< result > This result is & harmless ; </ result >
</ results >
XML ;
$this -> assertFalse ( XmlParser :: getDomDocument ( $xml ));
}
public function testScanForXXE ()
{
$file = tempnam ( sys_get_temp_dir (), 'PicoFeed_XmlParser' );
file_put_contents ( $file , 'Content Injection' );
$xml = <<< XML
< ? xml version = " 1.0 " ?>
<! DOCTYPE root
[
<! ENTITY foo SYSTEM " file:// $file " >
] >
< results >
< result >& foo ; </ result >
</ results >
XML ;
$this -> assertFalse ( XmlParser :: getDomDocument ( $xml ));
unlink ( $file );
}
public function testScanSimpleXML ()
{
return <<< XML
< ? xml version = " 1.0 " ?>
< results >
< result > test </ result >
</ results >
XML ;
$result = XmlParser :: getSimpleXml ( $xml );
$this -> assertTrue ( $result instanceof SimpleXMLElement );
$this -> assertEquals ( $result -> result , 'test' );
}
public function testScanDomDocument ()
{
return <<< XML
< ? xml version = " 1.0 " ?>
< results >
< result > test </ result >
</ results >
XML ;
$result = XmlParser :: getDomDocument ( $xml );
$this -> assertTrue ( $result instanceof DOMDocument );
$node = $result -> getElementsByTagName ( 'result' ) -> item ( 0 );
$this -> assertEquals ( $node -> nodeValue , 'test' );
}
public function testScanInvalidXml ()
{
$xml = <<< XML
< foo > test </ bar >
XML ;
$this -> assertFalse ( XmlParser :: getDomDocument ( $xml ));
$this -> assertFalse ( XmlParser :: getSimpleXml ( $xml ));
}
public function testScanXmlWithDTD ()
{
$xml = <<< XML
< ? xml version = " 1.0 " ?>
<! DOCTYPE results [
<! ELEMENT results ( result + ) >
<! ELEMENT result ( #PCDATA)>
] >
< results >
< result > test </ result >
</ results >
XML ;
$result = XmlParser :: getDomDocument ( $xml );
$this -> assertTrue ( $result instanceof DOMDocument );
$this -> assertTrue ( $result -> validate ());
}
2015-07-19 11:19:26 -04:00
public function testReplaceXPathPrefixWithNamespaceURI ()
{
$ns = array ( 'lorem' => 'https://en.wikipedia.org/wiki/Lorem' );
$query = '//lorem:title' ;
$expected = '//*[namespace-uri()="https://en.wikipedia.org/wiki/Lorem" and local-name()="title"]' ;
$this -> assertEquals ( $expected , XmlParser :: replaceXPathPrefixWithNamespaceURI ( $query , $ns ));
$ns = array ( 'lorem' => 'https://en.wikipedia.org/wiki/Lorem' , 'ipsum' => 'https://en.wikipedia.org/wiki/Ipsum' );
$query = '//lorem:title/ipsum:name' ;
$expected = '//*[namespace-uri()="https://en.wikipedia.org/wiki/Lorem" and local-name()="title"]/*[namespace-uri()="https://en.wikipedia.org/wiki/Ipsum" and local-name()="name"]' ;
$this -> assertEquals ( $expected , XmlParser :: replaceXPathPrefixWithNamespaceURI ( $query , $ns ));
$ns = array ( 'lorem' => 'https://en.wikipedia.org/wiki/Lorem' , 'ipsum' => 'https://en.wikipedia.org/wiki/Ipsum' );
$query = '//lorem:title/ipsum:name/@xml:lang' ;
$expected = '//*[namespace-uri()="https://en.wikipedia.org/wiki/Lorem" and local-name()="title"]/*[namespace-uri()="https://en.wikipedia.org/wiki/Ipsum" and local-name()="name"]/@xml:lang' ;
$this -> assertEquals ( $expected , XmlParser :: replaceXPathPrefixWithNamespaceURI ( $query , $ns ));
}
2014-12-23 21:28:26 -05:00
}