miniflux-legacy/vendor/fguillot/picofeed/lib/PicoFeed/Filter/Filter.php

133 lines
2.9 KiB
PHP
Raw Normal View History

2013-02-18 03:48:21 +01:00
<?php
namespace PicoFeed\Filter;
2014-05-20 20:20:27 +02:00
/**
* Filter class
*
* @author Frederic Guillot
* @package Filter
*/
2013-02-18 03:48:21 +01:00
class Filter
{
2014-05-20 20:20:27 +02:00
/**
2014-10-19 20:42:31 +02:00
* Get the Html filter instance
*
2014-10-19 20:42:31 +02:00
* @static
* @access public
2014-10-19 20:42:31 +02:00
* @param string $html HTML content
* @param string $website Site URL (used to build absolute URL)
* @return Html
*/
2014-10-19 20:42:31 +02:00
public static function html($html, $website)
2013-02-18 03:48:21 +01:00
{
2014-10-19 20:42:31 +02:00
$filter = new Html($html, $website);
return $filter;
2013-02-18 03:48:21 +01:00
}
/**
* Escape HTML content
*
* @static
* @access public
* @return string
*/
public static function escape($content)
{
2014-10-19 20:42:31 +02:00
return @htmlspecialchars($content, ENT_QUOTES, 'UTF-8', false);
}
/**
* Remove HTML tags
*
* @access public
* @param string $data Input data
* @return string
*/
public function removeHTMLTags($data)
{
return preg_replace('~<(?:!DOCTYPE|/?(?:html|head|body))[^>]*>\s*~i', '', $data);
}
/**
* Remove the XML tag from a document
*
* @static
* @access public
* @param string $data Input data
* @return string
*/
public static function stripXmlTag($data)
{
if (strpos($data, '<?xml') !== false) {
2013-12-16 04:38:06 +01:00
$data = ltrim(substr($data, strpos($data, '?>') + 2));
}
2013-12-16 04:38:06 +01:00
do {
$pos = strpos($data, '<?xml-stylesheet ');
if ($pos !== false) {
$data = ltrim(substr($data, strpos($data, '?>') + 2));
}
} while ($pos !== false && $pos < 200);
return $data;
}
2013-09-01 00:37:26 +02:00
/**
2014-04-16 00:15:31 +02:00
* Strip head tag from the HTML content
*
* @static
* @access public
* @param string $data Input data
* @return string
*/
2014-04-16 00:15:31 +02:00
public static function stripHeadTags($data)
2013-09-01 00:37:26 +02:00
{
return preg_replace('@<head[^>]*?>.*?</head>@siu','', $data );
2013-09-01 00:37:26 +02:00
}
2013-10-04 05:14:39 +02:00
2014-05-20 20:20:27 +02:00
/**
2014-10-19 20:42:31 +02:00
* Trim whitespace from the begining, the end and inside a string and don't break utf-8 string
2014-05-20 20:20:27 +02:00
*
2014-10-19 20:42:31 +02:00
* @static
2014-05-20 20:20:27 +02:00
* @access public
2014-10-19 20:42:31 +02:00
* @param string $value Raw data
* @return string Normalized data
2014-05-20 20:20:27 +02:00
*/
2014-10-19 20:42:31 +02:00
public static function stripWhiteSpace($value)
2014-05-20 20:20:27 +02:00
{
$value = str_replace("\r", ' ', $value);
$value = str_replace("\t", ' ', $value);
$value = str_replace("\n", ' ', $value);
// $value = preg_replace('/\s+/', ' ', $value); <= break utf-8
2014-10-19 20:42:31 +02:00
return trim($value);
2014-05-20 20:20:27 +02:00
}
/**
2014-10-19 20:42:31 +02:00
* Dirty quickfixes before XML parsing
2014-05-20 20:20:27 +02:00
*
2014-10-19 20:42:31 +02:00
* @static
2014-05-20 20:20:27 +02:00
* @access public
2014-10-19 20:42:31 +02:00
* @param string $data Raw data
* @return string Normalized data
2014-05-20 20:20:27 +02:00
*/
2014-10-19 20:42:31 +02:00
public static function normalizeData($data)
2014-05-20 20:20:27 +02:00
{
2014-10-19 20:42:31 +02:00
$invalid_chars = array(
"\x10",
"\xc3\x20",
"&#x1F;",
2015-04-11 02:34:48 +02:00
"\xe2\x80\x9c\x08",
2014-10-19 20:42:31 +02:00
);
2014-05-20 20:20:27 +02:00
2014-10-19 20:42:31 +02:00
foreach ($invalid_chars as $needle) {
$data = str_replace($needle, '', $data);
2014-05-20 20:20:27 +02:00
}
2014-10-19 20:42:31 +02:00
return $data;
2014-05-20 20:20:27 +02:00
}
2013-02-18 03:48:21 +01:00
}