Update picofeed

This commit is contained in:
Frederic Guillot 2015-03-25 19:59:41 -04:00
parent c73e3ff9ba
commit c795d46669
8 changed files with 158 additions and 39 deletions

2
vendor/autoload.php vendored
View File

@ -4,4 +4,4 @@
require_once __DIR__ . '/composer' . '/autoload_real.php'; require_once __DIR__ . '/composer' . '/autoload_real.php';
return ComposerAutoloaderInita56cecf18737d4c6655b021e5f21a1a6::getLoader(); return ComposerAutoloaderInit8ccf24e95a95febb275803014c1c9a9a::getLoader();

View File

@ -2,7 +2,7 @@
// autoload_real.php @generated by Composer // autoload_real.php @generated by Composer
class ComposerAutoloaderInita56cecf18737d4c6655b021e5f21a1a6 class ComposerAutoloaderInit8ccf24e95a95febb275803014c1c9a9a
{ {
private static $loader; private static $loader;
@ -19,9 +19,9 @@ class ComposerAutoloaderInita56cecf18737d4c6655b021e5f21a1a6
return self::$loader; return self::$loader;
} }
spl_autoload_register(array('ComposerAutoloaderInita56cecf18737d4c6655b021e5f21a1a6', 'loadClassLoader'), true, true); spl_autoload_register(array('ComposerAutoloaderInit8ccf24e95a95febb275803014c1c9a9a', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader(); self::$loader = $loader = new \Composer\Autoload\ClassLoader();
spl_autoload_unregister(array('ComposerAutoloaderInita56cecf18737d4c6655b021e5f21a1a6', 'loadClassLoader')); spl_autoload_unregister(array('ComposerAutoloaderInit8ccf24e95a95febb275803014c1c9a9a', 'loadClassLoader'));
$map = require __DIR__ . '/autoload_namespaces.php'; $map = require __DIR__ . '/autoload_namespaces.php';
foreach ($map as $namespace => $path) { foreach ($map as $namespace => $path) {
@ -42,14 +42,14 @@ class ComposerAutoloaderInita56cecf18737d4c6655b021e5f21a1a6
$includeFiles = require __DIR__ . '/autoload_files.php'; $includeFiles = require __DIR__ . '/autoload_files.php';
foreach ($includeFiles as $file) { foreach ($includeFiles as $file) {
composerRequirea56cecf18737d4c6655b021e5f21a1a6($file); composerRequire8ccf24e95a95febb275803014c1c9a9a($file);
} }
return $loader; return $loader;
} }
} }
function composerRequirea56cecf18737d4c6655b021e5f21a1a6($file) function composerRequire8ccf24e95a95febb275803014c1c9a9a($file)
{ {
require $file; require $file;
} }

View File

@ -162,12 +162,12 @@
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/fguillot/picoFeed.git", "url": "https://github.com/fguillot/picoFeed.git",
"reference": "acc16f1a0854fdaeae2416f1b12ee51a9c150b52" "reference": "8973f403ff6c16fb5200cfac44a58111c564b60d"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/acc16f1a0854fdaeae2416f1b12ee51a9c150b52", "url": "https://api.github.com/repos/fguillot/picoFeed/zipball/8973f403ff6c16fb5200cfac44a58111c564b60d",
"reference": "acc16f1a0854fdaeae2416f1b12ee51a9c150b52", "reference": "8973f403ff6c16fb5200cfac44a58111c564b60d",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
@ -181,7 +181,7 @@
"suggest": { "suggest": {
"ext-curl": "PicoFeed will use cURL if present" "ext-curl": "PicoFeed will use cURL if present"
}, },
"time": "2015-03-19 22:19:36", "time": "2015-03-25 18:09:25",
"bin": [ "bin": [
"picofeed" "picofeed"
], ],

View File

@ -205,6 +205,45 @@ $feed->items[0]->getContent(); // Item content (filtered or raw)
$feed->items[0]->isRTL(); // Return true if the item language is Right-To-Left $feed->items[0]->isRTL(); // Return true if the item language is Right-To-Left
``` ```
Get raw XML tags/attributes or non standard tags for items
----------------------------------------------------------
Get the original `guid` tag for RSS 2.0 feeds:
```php
echo $feed->items[0]->getTag('guid');
```
Get a specific attribute value:
```php
echo $feed->items[1]->getTag('category', 'term');
```
Get value of namespaced tag:
```php
echo $feed->items[1]->getTag('wfw:commentRss');
```
Get attribute value of a namespaced tag:
```php
echo $feed->items[0]->getTag('media:content', 'url');
```
Get the xml of the item (returns a SimpleXMLElement instance):
```php
$simplexml = $feed->items[0]->xml;
```
Get the list of namespaces:
```php
print_r($feed->items[0]->namespaces);
```
RTL language detection RTL language detection
---------------------- ----------------------

View File

@ -99,6 +99,47 @@ class Item
*/ */
public $language = ''; public $language = '';
/**
* Raw XML
*
* @access public
* @var \SimpleXMLElement
*/
public $xml;
/**
* List of namespaces
*
* @access public
* @var array
*/
public $namespaces = array();
/**
* Get specific XML tag or attribute value
*
* @access public
* @param string $tag Tag name (examples: guid, media:content)
* @param string $attribute Tag attribute
* @return string
*/
public function getTag($tag, $attribute = '')
{
// Get namespaced value
if (strpos($tag, ':') !== false) {
list(,$tag) = explode(':', $tag);
return XmlParser::getNamespaceValue($this->xml, $this->namespaces, $tag, $attribute);
}
// Return attribute value
if (! empty($attribute)) {
return (string) $this->xml->{$tag}[$attribute];
}
// Return tag content
return (string) $this->xml->$tag;
}
/** /**
* Return item information * Return item information
* *

View File

@ -153,6 +153,9 @@ abstract class Parser
foreach ($this->getItemsTree($xml) as $entry) { foreach ($this->getItemsTree($xml) as $entry) {
$item = new Item; $item = new Item;
$item->xml = $entry;
$item->namespaces = $this->namespaces;
$this->findItemAuthor($xml, $entry, $item); $this->findItemAuthor($xml, $entry, $item);
$this->findItemUrl($entry, $item); $this->findItemUrl($entry, $item);

View File

@ -212,21 +212,7 @@ class XmlParser
} }
/** /**
* Extract charset from meta tag * Get the charset from a meta tag
*
* @static
* @access public
* @param string $data meta tag content
* @return string
*/
public static function findCharset($data)
{
$result = explode('charset=', $data);
return isset($result[1]) ? $result[1] : $data;
}
/**
* Get the encoding from a xml tag
* *
* @static * @static
* @access public * @access public
@ -237,18 +223,8 @@ class XmlParser
{ {
$encoding = ''; $encoding = '';
$dom = static::getHtmlDocument($data); if (preg_match('/<meta.*?charset\s*=\s*["\']?\s*([^"\'\s\/>;]+)/i', $data, $match) === 1) {
$xpath = new DOMXPath($dom); $encoding = strtolower($match[1]);
$tags = array(
'/html/head/meta[translate(@http-equiv, "CENOPTY", "cenopty")="content-type"]/@content', //HTML4, convert upper to lower-case
'/html/head/meta/@charset', //HTML5
);
$nodes = $xpath->query(implode(' | ', $tags));
foreach ($nodes as $node) {
$encoding = static::findCharset($node->nodeValue);
} }
return $encoding; return $encoding;

View File

@ -17,8 +17,68 @@ class XmlParserTest extends PHPUnit_Framework_TestCase
public function testGetEncodingFromMetaTag() public function testGetEncodingFromMetaTag()
{ {
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<html><head><meta content="text/html; charset=iso-8859-1" http-equiv="Content-Type"/></head></html>')); $this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1"/>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<html><head><meta charset="iso-8859-1"></head></html>')); $this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1" />'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;charset=iso-8859-1\'/>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;charset=iso-8859-1\' />'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=Content-Type content=text/html;charset=iso-8859-1/>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=Content-Type content=text/html;charset=iso-8859-1 />'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1" >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;charset=iso-8859-1\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;charset=iso-8859-1\' >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=Content-Type content=text/html;charset=iso-8859-1>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=Content-Type content=text/html;charset=iso-8859-1 >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;charset=\'iso-8859-1\'">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="\'text/html;charset=iso-8859-1\'">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="\'text/html\';charset=\'iso-8859-1\'">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;charset="iso-8859-1"\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'"text/html;charset=iso-8859-1"\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'"text/html";charset="iso-8859-1"\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;;;charset=iso-8859-1">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;;;charset=\'iso-8859-1\'">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="\'text/html;;;charset=iso-8859-1\'">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="\'text/html\';;;charset=\'iso-8859-1\'">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;;;charset=iso-8859-1\'>'));
$this->assertEquals('windows-1251', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;;;charset="windows-1251"\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'"text/html;;;charset=iso-8859-1"\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'"text/html";;;charset="iso-8859-1"\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv = Content-Type content = text/html;charset=iso-8859-1 >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta content = text/html;charset=iso-8859-1 http-equiv = Content-Type >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv = Content-Type content = text/html ; charset = iso-8859-1 >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta content = text/html ; charset = iso-8859-1 http-equiv = Content-Type >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv = Content-Type content = text/html ;;; charset = iso-8859-1 >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta content = text/html ;;; charset = iso-8859-1 http-equiv = Content-Type >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv = Content-Type content = text/html ; ; ; charset = iso-8859-1 >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta content = text/html ; ; ; charset = iso-8859-1 http-equiv = Content-Type >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset="uTf-8"/>'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset="utf-8" />'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=\'Utf-8\'/>'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=\'utf-8\' />'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=utf-8/>'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=utf-8 />'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset="utf-8">'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset="utf-8" >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=\'utf-8\'>'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=\'utf-8\' >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=utf-8>'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=utf-8 >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = " utf-8 " >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = \' utf-8 \' >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = " utf-8 \' >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = \' utf-8 " >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = " utf-8 >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = \' utf-8 >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = utf-8 \' >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = utf-8 " >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = utf-8 >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = utf-8 />'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta name="title" value="charset=utf-8 — is it really useful (yep)?">'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta value="charset=utf-8 — is it really useful (yep)?" name="title">'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta name="title" content="charset=utf-8 — is it really useful (yep)?">'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta name="charset=utf-8" content="charset=utf-8 — is it really useful (yep)?">'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta content="charset=utf-8 — is it really useful (nope, not here, but gotta admit pretty robust otherwise)?" name="title">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;charset=iSo-8859-1"/><meta charset="invalid" />'));
} }
public function testGetEncodingFromXmlTag() public function testGetEncodingFromXmlTag()