Update picofeed

This commit is contained in:
Frederic Guillot 2015-03-25 19:59:41 -04:00
parent c73e3ff9ba
commit c795d46669
8 changed files with 158 additions and 39 deletions

2
vendor/autoload.php vendored
View File

@ -4,4 +4,4 @@
require_once __DIR__ . '/composer' . '/autoload_real.php';
return ComposerAutoloaderInita56cecf18737d4c6655b021e5f21a1a6::getLoader();
return ComposerAutoloaderInit8ccf24e95a95febb275803014c1c9a9a::getLoader();

View File

@ -2,7 +2,7 @@
// autoload_real.php @generated by Composer
class ComposerAutoloaderInita56cecf18737d4c6655b021e5f21a1a6
class ComposerAutoloaderInit8ccf24e95a95febb275803014c1c9a9a
{
private static $loader;
@ -19,9 +19,9 @@ class ComposerAutoloaderInita56cecf18737d4c6655b021e5f21a1a6
return self::$loader;
}
spl_autoload_register(array('ComposerAutoloaderInita56cecf18737d4c6655b021e5f21a1a6', 'loadClassLoader'), true, true);
spl_autoload_register(array('ComposerAutoloaderInit8ccf24e95a95febb275803014c1c9a9a', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader();
spl_autoload_unregister(array('ComposerAutoloaderInita56cecf18737d4c6655b021e5f21a1a6', 'loadClassLoader'));
spl_autoload_unregister(array('ComposerAutoloaderInit8ccf24e95a95febb275803014c1c9a9a', 'loadClassLoader'));
$map = require __DIR__ . '/autoload_namespaces.php';
foreach ($map as $namespace => $path) {
@ -42,14 +42,14 @@ class ComposerAutoloaderInita56cecf18737d4c6655b021e5f21a1a6
$includeFiles = require __DIR__ . '/autoload_files.php';
foreach ($includeFiles as $file) {
composerRequirea56cecf18737d4c6655b021e5f21a1a6($file);
composerRequire8ccf24e95a95febb275803014c1c9a9a($file);
}
return $loader;
}
}
function composerRequirea56cecf18737d4c6655b021e5f21a1a6($file)
function composerRequire8ccf24e95a95febb275803014c1c9a9a($file)
{
require $file;
}

View File

@ -162,12 +162,12 @@
"source": {
"type": "git",
"url": "https://github.com/fguillot/picoFeed.git",
"reference": "acc16f1a0854fdaeae2416f1b12ee51a9c150b52"
"reference": "8973f403ff6c16fb5200cfac44a58111c564b60d"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/acc16f1a0854fdaeae2416f1b12ee51a9c150b52",
"reference": "acc16f1a0854fdaeae2416f1b12ee51a9c150b52",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/8973f403ff6c16fb5200cfac44a58111c564b60d",
"reference": "8973f403ff6c16fb5200cfac44a58111c564b60d",
"shasum": ""
},
"require": {
@ -181,7 +181,7 @@
"suggest": {
"ext-curl": "PicoFeed will use cURL if present"
},
"time": "2015-03-19 22:19:36",
"time": "2015-03-25 18:09:25",
"bin": [
"picofeed"
],

View File

@ -205,6 +205,45 @@ $feed->items[0]->getContent(); // Item content (filtered or raw)
$feed->items[0]->isRTL(); // Return true if the item language is Right-To-Left
```
Get raw XML tags/attributes or non standard tags for items
----------------------------------------------------------
Get the original `guid` tag for RSS 2.0 feeds:
```php
echo $feed->items[0]->getTag('guid');
```
Get a specific attribute value:
```php
echo $feed->items[1]->getTag('category', 'term');
```
Get value of namespaced tag:
```php
echo $feed->items[1]->getTag('wfw:commentRss');
```
Get attribute value of a namespaced tag:
```php
echo $feed->items[0]->getTag('media:content', 'url');
```
Get the xml of the item (returns a SimpleXMLElement instance):
```php
$simplexml = $feed->items[0]->xml;
```
Get the list of namespaces:
```php
print_r($feed->items[0]->namespaces);
```
RTL language detection
----------------------

View File

@ -99,6 +99,47 @@ class Item
*/
public $language = '';
/**
* Raw XML
*
* @access public
* @var \SimpleXMLElement
*/
public $xml;
/**
* List of namespaces
*
* @access public
* @var array
*/
public $namespaces = array();
/**
* Get specific XML tag or attribute value
*
* @access public
* @param string $tag Tag name (examples: guid, media:content)
* @param string $attribute Tag attribute
* @return string
*/
public function getTag($tag, $attribute = '')
{
// Get namespaced value
if (strpos($tag, ':') !== false) {
list(,$tag) = explode(':', $tag);
return XmlParser::getNamespaceValue($this->xml, $this->namespaces, $tag, $attribute);
}
// Return attribute value
if (! empty($attribute)) {
return (string) $this->xml->{$tag}[$attribute];
}
// Return tag content
return (string) $this->xml->$tag;
}
/**
* Return item information
*

View File

@ -153,6 +153,9 @@ abstract class Parser
foreach ($this->getItemsTree($xml) as $entry) {
$item = new Item;
$item->xml = $entry;
$item->namespaces = $this->namespaces;
$this->findItemAuthor($xml, $entry, $item);
$this->findItemUrl($entry, $item);

View File

@ -212,21 +212,7 @@ class XmlParser
}
/**
* Extract charset from meta tag
*
* @static
* @access public
* @param string $data meta tag content
* @return string
*/
public static function findCharset($data)
{
$result = explode('charset=', $data);
return isset($result[1]) ? $result[1] : $data;
}
/**
* Get the encoding from a xml tag
* Get the charset from a meta tag
*
* @static
* @access public
@ -237,18 +223,8 @@ class XmlParser
{
$encoding = '';
$dom = static::getHtmlDocument($data);
$xpath = new DOMXPath($dom);
$tags = array(
'/html/head/meta[translate(@http-equiv, "CENOPTY", "cenopty")="content-type"]/@content', //HTML4, convert upper to lower-case
'/html/head/meta/@charset', //HTML5
);
$nodes = $xpath->query(implode(' | ', $tags));
foreach ($nodes as $node) {
$encoding = static::findCharset($node->nodeValue);
if (preg_match('/<meta.*?charset\s*=\s*["\']?\s*([^"\'\s\/>;]+)/i', $data, $match) === 1) {
$encoding = strtolower($match[1]);
}
return $encoding;

View File

@ -17,8 +17,68 @@ class XmlParserTest extends PHPUnit_Framework_TestCase
public function testGetEncodingFromMetaTag()
{
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<html><head><meta content="text/html; charset=iso-8859-1" http-equiv="Content-Type"/></head></html>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<html><head><meta charset="iso-8859-1"></head></html>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1"/>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1" />'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;charset=iso-8859-1\'/>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;charset=iso-8859-1\' />'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=Content-Type content=text/html;charset=iso-8859-1/>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=Content-Type content=text/html;charset=iso-8859-1 />'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;charset=ISO-8859-1">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1" >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;charset=iso-8859-1\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;charset=iso-8859-1\' >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=Content-Type content=text/html;charset=iso-8859-1>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=Content-Type content=text/html;charset=iso-8859-1 >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;charset=\'iso-8859-1\'">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="\'text/html;charset=iso-8859-1\'">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="\'text/html\';charset=\'iso-8859-1\'">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;charset="iso-8859-1"\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'"text/html;charset=iso-8859-1"\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'"text/html";charset="iso-8859-1"\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;;;charset=iso-8859-1">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;;;charset=\'iso-8859-1\'">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="\'text/html;;;charset=iso-8859-1\'">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="\'text/html\';;;charset=\'iso-8859-1\'">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;;;charset=iso-8859-1\'>'));
$this->assertEquals('windows-1251', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'text/html;;;charset="windows-1251"\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'"text/html;;;charset=iso-8859-1"\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv=\'Content-Type\' content=\'"text/html";;;charset="iso-8859-1"\'>'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv = Content-Type content = text/html;charset=iso-8859-1 >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta content = text/html;charset=iso-8859-1 http-equiv = Content-Type >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv = Content-Type content = text/html ; charset = iso-8859-1 >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta content = text/html ; charset = iso-8859-1 http-equiv = Content-Type >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv = Content-Type content = text/html ;;; charset = iso-8859-1 >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta content = text/html ;;; charset = iso-8859-1 http-equiv = Content-Type >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv = Content-Type content = text/html ; ; ; charset = iso-8859-1 >'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta content = text/html ; ; ; charset = iso-8859-1 http-equiv = Content-Type >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset="uTf-8"/>'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset="utf-8" />'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=\'Utf-8\'/>'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=\'utf-8\' />'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=utf-8/>'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=utf-8 />'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset="utf-8">'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset="utf-8" >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=\'utf-8\'>'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=\'utf-8\' >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=utf-8>'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset=utf-8 >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = " utf-8 " >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = \' utf-8 \' >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = " utf-8 \' >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = \' utf-8 " >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = " utf-8 >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = \' utf-8 >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = utf-8 \' >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = utf-8 " >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = utf-8 >'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta charset = utf-8 />'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta name="title" value="charset=utf-8 — is it really useful (yep)?">'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta value="charset=utf-8 — is it really useful (yep)?" name="title">'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta name="title" content="charset=utf-8 — is it really useful (yep)?">'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta name="charset=utf-8" content="charset=utf-8 — is it really useful (yep)?">'));
$this->assertEquals('utf-8', XmlParser::getEncodingFromMetaTag('<meta content="charset=utf-8 — is it really useful (nope, not here, but gotta admit pretty robust otherwise)?" name="title">'));
$this->assertEquals('iso-8859-1', XmlParser::getEncodingFromMetaTag('<meta http-equiv="Content-Type" content="text/html;charset=iSo-8859-1"/><meta charset="invalid" />'));
}
public function testGetEncodingFromXmlTag()