Bug fixes: charset encoding/decoding
This commit is contained in:
parent
35e07a8903
commit
c6f5606070
@ -609,6 +609,12 @@ a.bookmark-icon {
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
#item-content-enclosure {
|
||||
border-bottom: 1px dashed #ccc;
|
||||
margin-bottom: 20px;
|
||||
padding-bottom: 10px;
|
||||
}
|
||||
|
||||
/* other pages */
|
||||
section li {
|
||||
margin-left: 15px;
|
||||
|
@ -102,6 +102,8 @@ function relative_time($timestamp, $fallback_date_format = '%e %B %Y %k:%M')
|
||||
{
|
||||
$diff = time() - $timestamp;
|
||||
|
||||
if ($diff < 0) return \dt($fallback_date_format, $timestamp);
|
||||
|
||||
if ($diff < 60) return \t('%d second'.($diff > 1 ? 's' : '').' ago', $diff);
|
||||
|
||||
$diff = floor($diff / 60);
|
||||
|
@ -90,9 +90,9 @@
|
||||
</ul>
|
||||
|
||||
<div id="item-content" <?= Helper\isRTL($item['language']) ? 'dir="rtl"' : '' ?>>
|
||||
<?= $item['content'] ?>
|
||||
|
||||
<?php if ($item['enclosure']): ?>
|
||||
<div id="item-content-enclosure">
|
||||
<?php if (strpos($item['enclosure_type'], 'audio') !== false): ?>
|
||||
<audio controls>
|
||||
<source src="<?= $item['enclosure'] ?>" type="<?= $item['enclosure_type'] ?>">
|
||||
@ -101,8 +101,13 @@
|
||||
<video controls>
|
||||
<source src="<?= $item['enclosure'] ?>" type="<?= $item['enclosure_type'] ?>">
|
||||
</video>
|
||||
<?php elseif (strpos($item['enclosure_type'], 'image') !== false): ?>
|
||||
<img src="<?= $item['enclosure'] ?>" alt="enclosure"/>
|
||||
<?php endif ?>
|
||||
</div>
|
||||
<?php endif ?>
|
||||
|
||||
<?= $item['content'] ?>
|
||||
</div>
|
||||
|
||||
<?php if (isset($item_nav)): ?>
|
||||
|
73
vendor/PicoFeed/Encoding.php
vendored
73
vendor/PicoFeed/Encoding.php
vendored
@ -42,36 +42,6 @@ class Encoding
|
||||
159 => "\xc5\xb8"
|
||||
);
|
||||
|
||||
protected static $utf8ToWin1252 = array(
|
||||
"\xe2\x82\xac" => "\x80",
|
||||
"\xe2\x80\x9a" => "\x82",
|
||||
"\xc6\x92" => "\x83",
|
||||
"\xe2\x80\x9e" => "\x84",
|
||||
"\xe2\x80\xa6" => "\x85",
|
||||
"\xe2\x80\xa0" => "\x86",
|
||||
"\xe2\x80\xa1" => "\x87",
|
||||
"\xcb\x86" => "\x88",
|
||||
"\xe2\x80\xb0" => "\x89",
|
||||
"\xc5\xa0" => "\x8a",
|
||||
"\xe2\x80\xb9" => "\x8b",
|
||||
"\xc5\x92" => "\x8c",
|
||||
"\xc5\xbd" => "\x8e",
|
||||
"\xe2\x80\x98" => "\x91",
|
||||
"\xe2\x80\x99" => "\x92",
|
||||
"\xe2\x80\x9c" => "\x93",
|
||||
"\xe2\x80\x9d" => "\x94",
|
||||
"\xe2\x80\xa2" => "\x95",
|
||||
"\xe2\x80\x93" => "\x96",
|
||||
"\xe2\x80\x94" => "\x97",
|
||||
"\xcb\x9c" => "\x98",
|
||||
"\xe2\x84\xa2" => "\x99",
|
||||
"\xc5\xa1" => "\x9a",
|
||||
"\xe2\x80\xba" => "\x9b",
|
||||
"\xc5\x93" => "\x9c",
|
||||
"\xc5\xbe" => "\x9e",
|
||||
"\xc5\xb8" => "\x9f"
|
||||
);
|
||||
|
||||
/**
|
||||
* Function Encoding::toUTF8
|
||||
*
|
||||
@ -127,9 +97,7 @@ class Encoding
|
||||
$i++;
|
||||
}
|
||||
else { //not valid UTF8. Convert it.
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||
$buf .= $cc1 . $cc2;
|
||||
$buf .= self::convertInvalidCharacter($c1);
|
||||
}
|
||||
}
|
||||
else if ($c1 >= "\xe0" & $c1 <= "\xef") { //looks like 3 bytes UTF8
|
||||
@ -139,9 +107,7 @@ class Encoding
|
||||
$i = $i + 2;
|
||||
}
|
||||
else { //not valid UTF8. Convert it.
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||
$buf .= $cc1 . $cc2;
|
||||
$buf .= self::convertInvalidCharacter($c1);
|
||||
}
|
||||
}
|
||||
else if ($c1 >= "\xf0" & $c1 <= "\xf7") { //looks like 4 bytes UTF8
|
||||
@ -151,15 +117,11 @@ class Encoding
|
||||
$i = $i + 2;
|
||||
}
|
||||
else { //not valid UTF8. Convert it.
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||
$buf .= $cc1 . $cc2;
|
||||
$buf .= self::convertInvalidCharacter($c1);
|
||||
}
|
||||
}
|
||||
else { //doesn't look like UTF8, but should be converted
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = (($c1 & "\x3f") | "\x80");
|
||||
$buf .= $cc1 . $cc2;
|
||||
$buf .= self::convertInvalidCharacter($c1);
|
||||
}
|
||||
}
|
||||
elseif (($c1 & "\xc0") == "\x80") { // needs conversion
|
||||
@ -168,12 +130,10 @@ class Encoding
|
||||
$buf .= self::$win1252ToUtf8[ord($c1)];
|
||||
}
|
||||
else {
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = (($c1 & "\x3f") | "\x80");
|
||||
$buf .= $cc1 . $cc2;
|
||||
$buf .= self::convertInvalidCharacter($c1);
|
||||
}
|
||||
}
|
||||
else { // it doesn't need convesion
|
||||
else { // it doesn't need conversion
|
||||
$buf .= $c1;
|
||||
}
|
||||
}
|
||||
@ -185,8 +145,27 @@ class Encoding
|
||||
}
|
||||
}
|
||||
|
||||
public static function cp1251ToUtf8($input)
|
||||
public static function convertInvalidCharacter($c1)
|
||||
{
|
||||
$cc1 = chr(ord($c1) / 64) | "\xc0";
|
||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||
return $cc1.$cc2;
|
||||
}
|
||||
|
||||
public static function convert_CP_1251($input)
|
||||
{
|
||||
return iconv('CP1251', 'UTF-8//TRANSLIT', $input);
|
||||
}
|
||||
|
||||
public static function convert($input, $encoding)
|
||||
{
|
||||
if ($encoding === 'windows-1251') {
|
||||
return self::convert_CP_1251($input);
|
||||
}
|
||||
else if ($encoding === '' || $encoding !== 'utf-8') {
|
||||
return self::toUTF8($input);
|
||||
}
|
||||
|
||||
return $input;
|
||||
}
|
||||
}
|
||||
|
27
vendor/PicoFeed/Filter.php
vendored
27
vendor/PicoFeed/Filter.php
vendored
@ -733,33 +733,6 @@ class Filter
|
||||
return $data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the encoding from a xml tag
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public static function getEncodingFromXmlTag($data)
|
||||
{
|
||||
$encoding = '';
|
||||
|
||||
if (strpos($data, '<?xml') !== false) {
|
||||
|
||||
$data = substr($data, 0, strrpos($data, '?>'));
|
||||
$data = str_replace("'", '"', $data);
|
||||
|
||||
$p1 = strpos($data, 'encoding=');
|
||||
$p2 = strpos($data, '"', $p1 + 10);
|
||||
|
||||
$encoding = substr($data, $p1 + 10, $p2 - $p1 - 10);
|
||||
$encoding = strtolower($encoding);
|
||||
}
|
||||
|
||||
return $encoding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set whitelisted tags adn attributes for each tag
|
||||
*
|
||||
|
8
vendor/PicoFeed/Grabber.php
vendored
8
vendor/PicoFeed/Grabber.php
vendored
@ -191,13 +191,7 @@ class Grabber
|
||||
Logging::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"');
|
||||
|
||||
$this->html = Filter::stripHeadTags($this->html);
|
||||
|
||||
if ($this->encoding == 'windows-1251') {
|
||||
$this->html = Encoding::cp1251ToUtf8($this->html);
|
||||
}
|
||||
else {
|
||||
$this->html = Encoding::toUTF8($this->html);
|
||||
}
|
||||
$this->html = Encoding::convert($this->html, $this->encoding);
|
||||
|
||||
Logging::setMessage(get_called_class().' Content length: '.strlen($this->html).' bytes');
|
||||
$rules = $this->getRules();
|
||||
|
11
vendor/PicoFeed/Parser.php
vendored
11
vendor/PicoFeed/Parser.php
vendored
@ -86,19 +86,14 @@ abstract class Parser
|
||||
*/
|
||||
public function __construct($content, $http_encoding = '')
|
||||
{
|
||||
$xml_encoding = Filter::getEncodingFromXmlTag($content);
|
||||
Logging::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"');
|
||||
$xml_encoding = XmlParser::getEncodingFromXmlTag($content);
|
||||
|
||||
// Strip XML tag to avoid multiple encoding/decoding in the next XML processing
|
||||
$this->content = Filter::stripXmlTag($content);
|
||||
|
||||
// Encode everything in UTF-8
|
||||
if ($xml_encoding == 'windows-1251' || $http_encoding == 'windows-1251') {
|
||||
$this->content = Encoding::cp1251ToUtf8($this->content);
|
||||
}
|
||||
else {
|
||||
$this->content = Encoding::toUTF8($this->content);
|
||||
}
|
||||
Logging::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"');
|
||||
$this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding);
|
||||
|
||||
// Workarounds
|
||||
$this->content = $this->normalizeData($this->content);
|
||||
|
27
vendor/PicoFeed/XmlParser.php
vendored
27
vendor/PicoFeed/XmlParser.php
vendored
@ -133,4 +133,31 @@ class XmlParser
|
||||
|
||||
return implode(', ', $errors);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the encoding from a xml tag
|
||||
*
|
||||
* @static
|
||||
* @access public
|
||||
* @param string $data Input data
|
||||
* @return string
|
||||
*/
|
||||
public static function getEncodingFromXmlTag($data)
|
||||
{
|
||||
$encoding = '';
|
||||
|
||||
if (strpos($data, '<?xml') !== false) {
|
||||
|
||||
$data = substr($data, 0, strrpos($data, '?>'));
|
||||
$data = str_replace("'", '"', $data);
|
||||
|
||||
$p1 = strpos($data, 'encoding=');
|
||||
$p2 = strpos($data, '"', $p1 + 10);
|
||||
|
||||
$encoding = substr($data, $p1 + 10, $p2 - $p1 - 10);
|
||||
$encoding = strtolower($encoding);
|
||||
}
|
||||
|
||||
return $encoding;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user