Bug fixes: charset encoding/decoding
This commit is contained in:
parent
35e07a8903
commit
c6f5606070
@ -609,6 +609,12 @@ a.bookmark-icon {
|
|||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#item-content-enclosure {
|
||||||
|
border-bottom: 1px dashed #ccc;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
padding-bottom: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
/* other pages */
|
/* other pages */
|
||||||
section li {
|
section li {
|
||||||
margin-left: 15px;
|
margin-left: 15px;
|
||||||
|
@ -102,6 +102,8 @@ function relative_time($timestamp, $fallback_date_format = '%e %B %Y %k:%M')
|
|||||||
{
|
{
|
||||||
$diff = time() - $timestamp;
|
$diff = time() - $timestamp;
|
||||||
|
|
||||||
|
if ($diff < 0) return \dt($fallback_date_format, $timestamp);
|
||||||
|
|
||||||
if ($diff < 60) return \t('%d second'.($diff > 1 ? 's' : '').' ago', $diff);
|
if ($diff < 60) return \t('%d second'.($diff > 1 ? 's' : '').' ago', $diff);
|
||||||
|
|
||||||
$diff = floor($diff / 60);
|
$diff = floor($diff / 60);
|
||||||
|
@ -90,9 +90,9 @@
|
|||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
<div id="item-content" <?= Helper\isRTL($item['language']) ? 'dir="rtl"' : '' ?>>
|
<div id="item-content" <?= Helper\isRTL($item['language']) ? 'dir="rtl"' : '' ?>>
|
||||||
<?= $item['content'] ?>
|
|
||||||
|
|
||||||
<?php if ($item['enclosure']): ?>
|
<?php if ($item['enclosure']): ?>
|
||||||
|
<div id="item-content-enclosure">
|
||||||
<?php if (strpos($item['enclosure_type'], 'audio') !== false): ?>
|
<?php if (strpos($item['enclosure_type'], 'audio') !== false): ?>
|
||||||
<audio controls>
|
<audio controls>
|
||||||
<source src="<?= $item['enclosure'] ?>" type="<?= $item['enclosure_type'] ?>">
|
<source src="<?= $item['enclosure'] ?>" type="<?= $item['enclosure_type'] ?>">
|
||||||
@ -101,8 +101,13 @@
|
|||||||
<video controls>
|
<video controls>
|
||||||
<source src="<?= $item['enclosure'] ?>" type="<?= $item['enclosure_type'] ?>">
|
<source src="<?= $item['enclosure'] ?>" type="<?= $item['enclosure_type'] ?>">
|
||||||
</video>
|
</video>
|
||||||
|
<?php elseif (strpos($item['enclosure_type'], 'image') !== false): ?>
|
||||||
|
<img src="<?= $item['enclosure'] ?>" alt="enclosure"/>
|
||||||
<?php endif ?>
|
<?php endif ?>
|
||||||
|
</div>
|
||||||
<?php endif ?>
|
<?php endif ?>
|
||||||
|
|
||||||
|
<?= $item['content'] ?>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<?php if (isset($item_nav)): ?>
|
<?php if (isset($item_nav)): ?>
|
||||||
|
73
vendor/PicoFeed/Encoding.php
vendored
73
vendor/PicoFeed/Encoding.php
vendored
@ -42,36 +42,6 @@ class Encoding
|
|||||||
159 => "\xc5\xb8"
|
159 => "\xc5\xb8"
|
||||||
);
|
);
|
||||||
|
|
||||||
protected static $utf8ToWin1252 = array(
|
|
||||||
"\xe2\x82\xac" => "\x80",
|
|
||||||
"\xe2\x80\x9a" => "\x82",
|
|
||||||
"\xc6\x92" => "\x83",
|
|
||||||
"\xe2\x80\x9e" => "\x84",
|
|
||||||
"\xe2\x80\xa6" => "\x85",
|
|
||||||
"\xe2\x80\xa0" => "\x86",
|
|
||||||
"\xe2\x80\xa1" => "\x87",
|
|
||||||
"\xcb\x86" => "\x88",
|
|
||||||
"\xe2\x80\xb0" => "\x89",
|
|
||||||
"\xc5\xa0" => "\x8a",
|
|
||||||
"\xe2\x80\xb9" => "\x8b",
|
|
||||||
"\xc5\x92" => "\x8c",
|
|
||||||
"\xc5\xbd" => "\x8e",
|
|
||||||
"\xe2\x80\x98" => "\x91",
|
|
||||||
"\xe2\x80\x99" => "\x92",
|
|
||||||
"\xe2\x80\x9c" => "\x93",
|
|
||||||
"\xe2\x80\x9d" => "\x94",
|
|
||||||
"\xe2\x80\xa2" => "\x95",
|
|
||||||
"\xe2\x80\x93" => "\x96",
|
|
||||||
"\xe2\x80\x94" => "\x97",
|
|
||||||
"\xcb\x9c" => "\x98",
|
|
||||||
"\xe2\x84\xa2" => "\x99",
|
|
||||||
"\xc5\xa1" => "\x9a",
|
|
||||||
"\xe2\x80\xba" => "\x9b",
|
|
||||||
"\xc5\x93" => "\x9c",
|
|
||||||
"\xc5\xbe" => "\x9e",
|
|
||||||
"\xc5\xb8" => "\x9f"
|
|
||||||
);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Function Encoding::toUTF8
|
* Function Encoding::toUTF8
|
||||||
*
|
*
|
||||||
@ -127,9 +97,7 @@ class Encoding
|
|||||||
$i++;
|
$i++;
|
||||||
}
|
}
|
||||||
else { //not valid UTF8. Convert it.
|
else { //not valid UTF8. Convert it.
|
||||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
$buf .= self::convertInvalidCharacter($c1);
|
||||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
|
||||||
$buf .= $cc1 . $cc2;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if ($c1 >= "\xe0" & $c1 <= "\xef") { //looks like 3 bytes UTF8
|
else if ($c1 >= "\xe0" & $c1 <= "\xef") { //looks like 3 bytes UTF8
|
||||||
@ -139,9 +107,7 @@ class Encoding
|
|||||||
$i = $i + 2;
|
$i = $i + 2;
|
||||||
}
|
}
|
||||||
else { //not valid UTF8. Convert it.
|
else { //not valid UTF8. Convert it.
|
||||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
$buf .= self::convertInvalidCharacter($c1);
|
||||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
|
||||||
$buf .= $cc1 . $cc2;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if ($c1 >= "\xf0" & $c1 <= "\xf7") { //looks like 4 bytes UTF8
|
else if ($c1 >= "\xf0" & $c1 <= "\xf7") { //looks like 4 bytes UTF8
|
||||||
@ -151,15 +117,11 @@ class Encoding
|
|||||||
$i = $i + 2;
|
$i = $i + 2;
|
||||||
}
|
}
|
||||||
else { //not valid UTF8. Convert it.
|
else { //not valid UTF8. Convert it.
|
||||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
$buf .= self::convertInvalidCharacter($c1);
|
||||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
|
||||||
$buf .= $cc1 . $cc2;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else { //doesn't look like UTF8, but should be converted
|
else { //doesn't look like UTF8, but should be converted
|
||||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
$buf .= self::convertInvalidCharacter($c1);
|
||||||
$cc2 = (($c1 & "\x3f") | "\x80");
|
|
||||||
$buf .= $cc1 . $cc2;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
elseif (($c1 & "\xc0") == "\x80") { // needs conversion
|
elseif (($c1 & "\xc0") == "\x80") { // needs conversion
|
||||||
@ -168,12 +130,10 @@ class Encoding
|
|||||||
$buf .= self::$win1252ToUtf8[ord($c1)];
|
$buf .= self::$win1252ToUtf8[ord($c1)];
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
$buf .= self::convertInvalidCharacter($c1);
|
||||||
$cc2 = (($c1 & "\x3f") | "\x80");
|
|
||||||
$buf .= $cc1 . $cc2;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else { // it doesn't need convesion
|
else { // it doesn't need conversion
|
||||||
$buf .= $c1;
|
$buf .= $c1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -185,8 +145,27 @@ class Encoding
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function cp1251ToUtf8($input)
|
public static function convertInvalidCharacter($c1)
|
||||||
|
{
|
||||||
|
$cc1 = chr(ord($c1) / 64) | "\xc0";
|
||||||
|
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||||
|
return $cc1.$cc2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static function convert_CP_1251($input)
|
||||||
{
|
{
|
||||||
return iconv('CP1251', 'UTF-8//TRANSLIT', $input);
|
return iconv('CP1251', 'UTF-8//TRANSLIT', $input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static function convert($input, $encoding)
|
||||||
|
{
|
||||||
|
if ($encoding === 'windows-1251') {
|
||||||
|
return self::convert_CP_1251($input);
|
||||||
|
}
|
||||||
|
else if ($encoding === '' || $encoding !== 'utf-8') {
|
||||||
|
return self::toUTF8($input);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $input;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
27
vendor/PicoFeed/Filter.php
vendored
27
vendor/PicoFeed/Filter.php
vendored
@ -733,33 +733,6 @@ class Filter
|
|||||||
return $data;
|
return $data;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the encoding from a xml tag
|
|
||||||
*
|
|
||||||
* @static
|
|
||||||
* @access public
|
|
||||||
* @param string $data Input data
|
|
||||||
* @return string
|
|
||||||
*/
|
|
||||||
public static function getEncodingFromXmlTag($data)
|
|
||||||
{
|
|
||||||
$encoding = '';
|
|
||||||
|
|
||||||
if (strpos($data, '<?xml') !== false) {
|
|
||||||
|
|
||||||
$data = substr($data, 0, strrpos($data, '?>'));
|
|
||||||
$data = str_replace("'", '"', $data);
|
|
||||||
|
|
||||||
$p1 = strpos($data, 'encoding=');
|
|
||||||
$p2 = strpos($data, '"', $p1 + 10);
|
|
||||||
|
|
||||||
$encoding = substr($data, $p1 + 10, $p2 - $p1 - 10);
|
|
||||||
$encoding = strtolower($encoding);
|
|
||||||
}
|
|
||||||
|
|
||||||
return $encoding;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set whitelisted tags adn attributes for each tag
|
* Set whitelisted tags adn attributes for each tag
|
||||||
*
|
*
|
||||||
|
8
vendor/PicoFeed/Grabber.php
vendored
8
vendor/PicoFeed/Grabber.php
vendored
@ -191,13 +191,7 @@ class Grabber
|
|||||||
Logging::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"');
|
Logging::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"');
|
||||||
|
|
||||||
$this->html = Filter::stripHeadTags($this->html);
|
$this->html = Filter::stripHeadTags($this->html);
|
||||||
|
$this->html = Encoding::convert($this->html, $this->encoding);
|
||||||
if ($this->encoding == 'windows-1251') {
|
|
||||||
$this->html = Encoding::cp1251ToUtf8($this->html);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
$this->html = Encoding::toUTF8($this->html);
|
|
||||||
}
|
|
||||||
|
|
||||||
Logging::setMessage(get_called_class().' Content length: '.strlen($this->html).' bytes');
|
Logging::setMessage(get_called_class().' Content length: '.strlen($this->html).' bytes');
|
||||||
$rules = $this->getRules();
|
$rules = $this->getRules();
|
||||||
|
11
vendor/PicoFeed/Parser.php
vendored
11
vendor/PicoFeed/Parser.php
vendored
@ -86,19 +86,14 @@ abstract class Parser
|
|||||||
*/
|
*/
|
||||||
public function __construct($content, $http_encoding = '')
|
public function __construct($content, $http_encoding = '')
|
||||||
{
|
{
|
||||||
$xml_encoding = Filter::getEncodingFromXmlTag($content);
|
$xml_encoding = XmlParser::getEncodingFromXmlTag($content);
|
||||||
Logging::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"');
|
|
||||||
|
|
||||||
// Strip XML tag to avoid multiple encoding/decoding in the next XML processing
|
// Strip XML tag to avoid multiple encoding/decoding in the next XML processing
|
||||||
$this->content = Filter::stripXmlTag($content);
|
$this->content = Filter::stripXmlTag($content);
|
||||||
|
|
||||||
// Encode everything in UTF-8
|
// Encode everything in UTF-8
|
||||||
if ($xml_encoding == 'windows-1251' || $http_encoding == 'windows-1251') {
|
Logging::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"');
|
||||||
$this->content = Encoding::cp1251ToUtf8($this->content);
|
$this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding);
|
||||||
}
|
|
||||||
else {
|
|
||||||
$this->content = Encoding::toUTF8($this->content);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Workarounds
|
// Workarounds
|
||||||
$this->content = $this->normalizeData($this->content);
|
$this->content = $this->normalizeData($this->content);
|
||||||
|
27
vendor/PicoFeed/XmlParser.php
vendored
27
vendor/PicoFeed/XmlParser.php
vendored
@ -133,4 +133,31 @@ class XmlParser
|
|||||||
|
|
||||||
return implode(', ', $errors);
|
return implode(', ', $errors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the encoding from a xml tag
|
||||||
|
*
|
||||||
|
* @static
|
||||||
|
* @access public
|
||||||
|
* @param string $data Input data
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
public static function getEncodingFromXmlTag($data)
|
||||||
|
{
|
||||||
|
$encoding = '';
|
||||||
|
|
||||||
|
if (strpos($data, '<?xml') !== false) {
|
||||||
|
|
||||||
|
$data = substr($data, 0, strrpos($data, '?>'));
|
||||||
|
$data = str_replace("'", '"', $data);
|
||||||
|
|
||||||
|
$p1 = strpos($data, 'encoding=');
|
||||||
|
$p2 = strpos($data, '"', $p1 + 10);
|
||||||
|
|
||||||
|
$encoding = substr($data, $p1 + 10, $p2 - $p1 - 10);
|
||||||
|
$encoding = strtolower($encoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
return $encoding;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user