From c6f56060703cabeb635fd78299a41005280a5504 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Sun, 25 May 2014 08:47:03 -0400 Subject: [PATCH] Bug fixes: charset encoding/decoding --- assets/css/app.css | 6 +++ lib/helpers.php | 2 + templates/show_item.php | 7 +++- vendor/PicoFeed/Encoding.php | 73 +++++++++++++---------------------- vendor/PicoFeed/Filter.php | 27 ------------- vendor/PicoFeed/Grabber.php | 8 +--- vendor/PicoFeed/Parser.php | 11 ++---- vendor/PicoFeed/XmlParser.php | 27 +++++++++++++ 8 files changed, 71 insertions(+), 90 deletions(-) diff --git a/assets/css/app.css b/assets/css/app.css index 0c8cf26..09c2b80 100644 --- a/assets/css/app.css +++ b/assets/css/app.css @@ -609,6 +609,12 @@ a.bookmark-icon { text-decoration: none; } +#item-content-enclosure { + border-bottom: 1px dashed #ccc; + margin-bottom: 20px; + padding-bottom: 10px; +} + /* other pages */ section li { margin-left: 15px; diff --git a/lib/helpers.php b/lib/helpers.php index 62afba8..a0c7b57 100644 --- a/lib/helpers.php +++ b/lib/helpers.php @@ -102,6 +102,8 @@ function relative_time($timestamp, $fallback_date_format = '%e %B %Y %k:%M') { $diff = time() - $timestamp; + if ($diff < 0) return \dt($fallback_date_format, $timestamp); + if ($diff < 60) return \t('%d second'.($diff > 1 ? 's' : '').' ago', $diff); $diff = floor($diff / 60); diff --git a/templates/show_item.php b/templates/show_item.php index e71165a..0b0ba20 100644 --- a/templates/show_item.php +++ b/templates/show_item.php @@ -90,9 +90,9 @@
> - +
+ +
diff --git a/vendor/PicoFeed/Encoding.php b/vendor/PicoFeed/Encoding.php index 1f87c30..b93bfcf 100644 --- a/vendor/PicoFeed/Encoding.php +++ b/vendor/PicoFeed/Encoding.php @@ -42,36 +42,6 @@ class Encoding 159 => "\xc5\xb8" ); - protected static $utf8ToWin1252 = array( - "\xe2\x82\xac" => "\x80", - "\xe2\x80\x9a" => "\x82", - "\xc6\x92" => "\x83", - "\xe2\x80\x9e" => "\x84", - "\xe2\x80\xa6" => "\x85", - "\xe2\x80\xa0" => "\x86", - "\xe2\x80\xa1" => "\x87", - "\xcb\x86" => "\x88", - "\xe2\x80\xb0" => "\x89", - "\xc5\xa0" => "\x8a", - "\xe2\x80\xb9" => "\x8b", - "\xc5\x92" => "\x8c", - "\xc5\xbd" => "\x8e", - "\xe2\x80\x98" => "\x91", - "\xe2\x80\x99" => "\x92", - "\xe2\x80\x9c" => "\x93", - "\xe2\x80\x9d" => "\x94", - "\xe2\x80\xa2" => "\x95", - "\xe2\x80\x93" => "\x96", - "\xe2\x80\x94" => "\x97", - "\xcb\x9c" => "\x98", - "\xe2\x84\xa2" => "\x99", - "\xc5\xa1" => "\x9a", - "\xe2\x80\xba" => "\x9b", - "\xc5\x93" => "\x9c", - "\xc5\xbe" => "\x9e", - "\xc5\xb8" => "\x9f" - ); - /** * Function Encoding::toUTF8 * @@ -127,9 +97,7 @@ class Encoding $i++; } else { //not valid UTF8. Convert it. - $cc1 = (chr(ord($c1) / 64) | "\xc0"); - $cc2 = ($c1 & "\x3f") | "\x80"; - $buf .= $cc1 . $cc2; + $buf .= self::convertInvalidCharacter($c1); } } else if ($c1 >= "\xe0" & $c1 <= "\xef") { //looks like 3 bytes UTF8 @@ -139,9 +107,7 @@ class Encoding $i = $i + 2; } else { //not valid UTF8. Convert it. - $cc1 = (chr(ord($c1) / 64) | "\xc0"); - $cc2 = ($c1 & "\x3f") | "\x80"; - $buf .= $cc1 . $cc2; + $buf .= self::convertInvalidCharacter($c1); } } else if ($c1 >= "\xf0" & $c1 <= "\xf7") { //looks like 4 bytes UTF8 @@ -151,15 +117,11 @@ class Encoding $i = $i + 2; } else { //not valid UTF8. Convert it. - $cc1 = (chr(ord($c1) / 64) | "\xc0"); - $cc2 = ($c1 & "\x3f") | "\x80"; - $buf .= $cc1 . $cc2; + $buf .= self::convertInvalidCharacter($c1); } } else { //doesn't look like UTF8, but should be converted - $cc1 = (chr(ord($c1) / 64) | "\xc0"); - $cc2 = (($c1 & "\x3f") | "\x80"); - $buf .= $cc1 . $cc2; + $buf .= self::convertInvalidCharacter($c1); } } elseif (($c1 & "\xc0") == "\x80") { // needs conversion @@ -168,12 +130,10 @@ class Encoding $buf .= self::$win1252ToUtf8[ord($c1)]; } else { - $cc1 = (chr(ord($c1) / 64) | "\xc0"); - $cc2 = (($c1 & "\x3f") | "\x80"); - $buf .= $cc1 . $cc2; + $buf .= self::convertInvalidCharacter($c1); } } - else { // it doesn't need convesion + else { // it doesn't need conversion $buf .= $c1; } } @@ -185,8 +145,27 @@ class Encoding } } - public static function cp1251ToUtf8($input) + public static function convertInvalidCharacter($c1) + { + $cc1 = chr(ord($c1) / 64) | "\xc0"; + $cc2 = ($c1 & "\x3f") | "\x80"; + return $cc1.$cc2; + } + + public static function convert_CP_1251($input) { return iconv('CP1251', 'UTF-8//TRANSLIT', $input); } + + public static function convert($input, $encoding) + { + if ($encoding === 'windows-1251') { + return self::convert_CP_1251($input); + } + else if ($encoding === '' || $encoding !== 'utf-8') { + return self::toUTF8($input); + } + + return $input; + } } diff --git a/vendor/PicoFeed/Filter.php b/vendor/PicoFeed/Filter.php index af1a877..bbfd97a 100644 --- a/vendor/PicoFeed/Filter.php +++ b/vendor/PicoFeed/Filter.php @@ -733,33 +733,6 @@ class Filter return $data; } - /** - * Get the encoding from a xml tag - * - * @static - * @access public - * @param string $data Input data - * @return string - */ - public static function getEncodingFromXmlTag($data) - { - $encoding = ''; - - if (strpos($data, '')); - $data = str_replace("'", '"', $data); - - $p1 = strpos($data, 'encoding='); - $p2 = strpos($data, '"', $p1 + 10); - - $encoding = substr($data, $p1 + 10, $p2 - $p1 - 10); - $encoding = strtolower($encoding); - } - - return $encoding; - } - /** * Set whitelisted tags adn attributes for each tag * diff --git a/vendor/PicoFeed/Grabber.php b/vendor/PicoFeed/Grabber.php index 33244cd..d2ce0c8 100644 --- a/vendor/PicoFeed/Grabber.php +++ b/vendor/PicoFeed/Grabber.php @@ -191,13 +191,7 @@ class Grabber Logging::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"'); $this->html = Filter::stripHeadTags($this->html); - - if ($this->encoding == 'windows-1251') { - $this->html = Encoding::cp1251ToUtf8($this->html); - } - else { - $this->html = Encoding::toUTF8($this->html); - } + $this->html = Encoding::convert($this->html, $this->encoding); Logging::setMessage(get_called_class().' Content length: '.strlen($this->html).' bytes'); $rules = $this->getRules(); diff --git a/vendor/PicoFeed/Parser.php b/vendor/PicoFeed/Parser.php index 52c6057..00977b0 100644 --- a/vendor/PicoFeed/Parser.php +++ b/vendor/PicoFeed/Parser.php @@ -86,19 +86,14 @@ abstract class Parser */ public function __construct($content, $http_encoding = '') { - $xml_encoding = Filter::getEncodingFromXmlTag($content); - Logging::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"'); + $xml_encoding = XmlParser::getEncodingFromXmlTag($content); // Strip XML tag to avoid multiple encoding/decoding in the next XML processing $this->content = Filter::stripXmlTag($content); // Encode everything in UTF-8 - if ($xml_encoding == 'windows-1251' || $http_encoding == 'windows-1251') { - $this->content = Encoding::cp1251ToUtf8($this->content); - } - else { - $this->content = Encoding::toUTF8($this->content); - } + Logging::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"'); + $this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding); // Workarounds $this->content = $this->normalizeData($this->content); diff --git a/vendor/PicoFeed/XmlParser.php b/vendor/PicoFeed/XmlParser.php index be063a1..0f0620c 100644 --- a/vendor/PicoFeed/XmlParser.php +++ b/vendor/PicoFeed/XmlParser.php @@ -133,4 +133,31 @@ class XmlParser return implode(', ', $errors); } + + /** + * Get the encoding from a xml tag + * + * @static + * @access public + * @param string $data Input data + * @return string + */ + public static function getEncodingFromXmlTag($data) + { + $encoding = ''; + + if (strpos($data, '')); + $data = str_replace("'", '"', $data); + + $p1 = strpos($data, 'encoding='); + $p2 = strpos($data, '"', $p1 + 10); + + $encoding = substr($data, $p1 + 10, $p2 - $p1 - 10); + $encoding = strtolower($encoding); + } + + return $encoding; + } }