Download full content of articles (like Readability)

This commit is contained in:
Frederic Guillot 2013-07-28 20:10:07 -04:00
parent f9c140e69e
commit 01f7dd9802
10 changed files with 1402 additions and 21 deletions

View File

@ -507,11 +507,21 @@ nav .active a {
.infos {
padding-bottom: 20px;
color: #ddd;
color: #ccc;
}
.item .infos a {
color: #ddd;
color: #ccc;
}
.downloading img {
display: inline;
margin: 0;
padding: 0;
}
.downloading {
color: #000;
}
#items-paging {

View File

@ -5,13 +5,66 @@
var queue_length = 5;
function download_item()
{
// Change link container
var container = document.getElementById("download-item");
if (! container) return;
var item_id = container.getAttribute("data-item-id");
var message = container.getAttribute("data-before-message");
var img = document.createElement("img");
img.src = "assets/img/refresh.gif";
container.innerHTML = "";
container.className = "downloading";
container.appendChild(img);
container.appendChild(document.createTextNode(" " + message));
var request = new XMLHttpRequest();
request.onload = function() {
var response = JSON.parse(request.responseText);
if (response.result) {
var content = document.getElementById("item-content");
if (content) content.innerHTML = response.content;
if (container) {
var message = container.getAttribute("data-after-message");
container.innerHTML = "";
container.appendChild(document.createTextNode(" " + message));
}
}
else {
if (container) {
var message = container.getAttribute("data-failure-message");
container.innerHTML = "";
container.appendChild(document.createTextNode(" " + message));
}
}
};
request.open("POST", "?action=download-item&id=" + item_id, true);
request.send();
}
function switch_status(item_id, hide)
{
var request = new XMLHttpRequest();
request.onreadystatechange = function() {
request.onload = function() {
if (request.readyState === 4 && is_listing()) {
if (is_listing()) {
var response = JSON.parse(request.responseText);
@ -100,7 +153,7 @@
if (container) {
var img = document.createElement("img");
img.src = "./assets/img/refresh.gif";
img.src = "assets/img/refresh.gif";
container.appendChild(img);
}
@ -444,6 +497,10 @@
var item_id = e.target.getAttribute("data-item-id");
mark_as_read(item_id);
break;
case 'download-item':
e.preventDefault();
download_item();
break;
}
}
};
@ -451,6 +508,9 @@
document.onkeypress = function(e) {
switch (e.keyCode || e.which) {
case 100: // d
download_item();
break;
case 112: // p
case 107: // k
open_previous_item();

View File

@ -162,6 +162,13 @@ Router\get_action('mark-item-removed', function() {
});
// Ajax call to download an item (fetch the full content from the original website)
Router\post_action('download-item', function() {
Response\json(Model\download_item(Request\param('id')));
});
// Ajax call to mark item read
Router\post_action('mark-item-read', function() {

View File

@ -1,6 +1,11 @@
<?php
return array(
'content downloaded' => 'contenu téléchargé',
'in progress...' => 'en cours...',
'unable to fetch content' => 'impossible de récupérer l\'article',
'Download content' => 'Télécharger le contenu',
'download content' => 'télécharger le contenu',
'Help' => 'Aide',
'Theme' => 'Thème',
'No item' => 'Aucun élément',

View File

@ -2,6 +2,9 @@
namespace Model;
require_once 'vendor/PicoFeed/Encoding.php';
require_once 'vendor/PicoFeed/Filter.php';
require_once 'vendor/PicoFeed/Client.php';
require_once 'vendor/PicoFeed/Export.php';
require_once 'vendor/PicoFeed/Import.php';
require_once 'vendor/PicoFeed/Reader.php';
@ -310,6 +313,55 @@ function update_feed_cache_infos($feed_id, $last_modified, $etag)
}
function download_item($item_id)
{
require_once 'vendor/Readability/Readability.php';
$item = get_item($item_id);
$client = \PicoFeed\Client::create();
$client->url = $item['url'];
$client->timeout = HTTP_TIMEOUT;
$client->user_agent = HTTP_USERAGENT;
$client->execute();
$content = $client->getContent();
if (! empty($content)) {
$content = \PicoFeed\Encoding::toUTF8($content);
$readability = new \Readability($content, $item['url']);
if ($readability->init()) {
// Get relevant content
$content = $readability->getContent()->innerHTML;
// Filter content
$filter = new \PicoFeed\Filter($content, $item['url']);
$content = $filter->execute();
// Save content
\PicoTools\singleton('db')
->table('items')
->eq('id', $item['id'])
->save(array('content' => $content));
return array(
'result' => true,
'content' => $content
);
}
}
return array(
'result' => false,
'content' => ''
);
}
function remove_feed($feed_id)
{
// Items are removed by a sql constraint

View File

@ -1,6 +1,7 @@
<div class="alert alert-normal" id="shortcuts">
<h3><?= t('Keyboard shortcuts') ?></h3>
<ul>
<li><?= t('Download content') ?> = <strong>d</strong></li>
<li><?= t('Previous item') ?> = <strong>p</strong> <?= t('or') ?> <strong>j</strong></li>
<li><?= t('Next item') ?> = <strong>n</strong> <?= t('or') ?> <strong>k</strong></li>
<li><?= t('Mark as read or unread') ?> = <strong>m</strong></li>

View File

@ -13,14 +13,27 @@
<p class="infos">
<?= Helper\escape($feed['title']) ?> |
<span class="hide-mobile"><?= dt('%A %e %B %Y %k:%M', $item['updated']) ?> |</span>
<?php if ($item['bookmark']): ?>
<a href="?action=bookmark&amp;value=0&amp;id=<?= $item['id'] ?>&amp;redirect=<?= $menu ?>"><?= t('remove bookmark') ?></a>
<?php else: ?>
<a href="?action=bookmark&amp;value=1&amp;id=<?= $item['id'] ?>&amp;redirect=<?= $menu ?>"><?= t('bookmark') ?></a>
<?php endif ?>
<?php endif ?> |
<span id="download-item"
data-item-id="<?= $item['id'] ?>"
data-failure-message="<?= t('unable to fetch content') ?>"
data-before-message="<?= t('in progress...') ?>"
data-after-message="<?= t('content downloaded') ?>">
<a href="#" data-action="download-item">
<?= t('download content') ?>
</a>
</span>
</p>
<?= $item['content'] ?>
<div id="item-content">
<?= $item['content'] ?>
</div>
<?php if (isset($item_nav)): ?>
<nav>

View File

@ -167,25 +167,12 @@ class Filter
{
$this->url = $site_url;
// Workaround for old libxml2 (Debian Lenny)
if (LIBXML_DOTTED_VERSION === '2.6.32') {
$entities = get_html_translation_table(HTML_ENTITIES, ENT_NOQUOTES|ENT_XHTML, 'UTF-8');
unset($entities['&']);
unset($entities['>']);
unset($entities['<']);
$data = str_replace(array_values($entities), array_keys($entities), $data);
}
\libxml_use_internal_errors(true);
// Convert bad formatted documents to XML
$dom = new \DOMDocument;
$dom->loadHTML('<?xml version="1.0" encoding="UTF-8">'.$data);
$this->input = $dom->saveXML($dom->getElementsByTagName('body')->item(0));
// Workaround for old libxml2 (Debian Lenny)
if (LIBXML_DOTTED_VERSION === '2.6.32') $this->input = utf8_decode($this->input);
}

109
vendor/Readability/JSLikeHTMLElement.php vendored Executable file
View File

@ -0,0 +1,109 @@
<?php
/**
* JavaScript-like HTML DOM Element
*
* This class extends PHP's DOMElement to allow
* users to get and set the innerHTML property of
* HTML elements in the same way it's done in
* JavaScript.
*
* Example usage:
* @code
* require_once 'JSLikeHTMLElement.php';
* header('Content-Type: text/plain');
* $doc = new DOMDocument();
* $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
* $doc->loadHTML('<div><p>Para 1</p><p>Para 2</p></div>');
* $elem = $doc->getElementsByTagName('div')->item(0);
*
* // print innerHTML
* echo $elem->innerHTML; // prints '<p>Para 1</p><p>Para 2</p>'
* echo "\n\n";
*
* // set innerHTML
* $elem->innerHTML = '<a href="http://fivefilters.org">FiveFilters.org</a>';
* echo $elem->innerHTML; // prints '<a href="http://fivefilters.org">FiveFilters.org</a>'
* echo "\n\n";
*
* // print document (with our changes)
* echo $doc->saveXML();
* @endcode
*
* @author Keyvan Minoukadeh - http://www.keyvan.net - keyvan@keyvan.net
* @see http://fivefilters.org (the project this was written for)
*/
class JSLikeHTMLElement extends DOMElement
{
/**
* Used for setting innerHTML like it's done in JavaScript:
* @code
* $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>';
* @endcode
*/
public function __set($name, $value) {
if ($name == 'innerHTML') {
// first, empty the element
for ($x=$this->childNodes->length-1; $x>=0; $x--) {
$this->removeChild($this->childNodes->item($x));
}
// $value holds our new inner HTML
if ($value != '') {
$f = $this->ownerDocument->createDocumentFragment();
// appendXML() expects well-formed markup (XHTML)
$result = @$f->appendXML($value); // @ to suppress PHP warnings
if ($result) {
if ($f->hasChildNodes()) $this->appendChild($f);
} else {
// $value is probably ill-formed
$f = new DOMDocument();
$value = mb_convert_encoding($value, 'HTML-ENTITIES', 'UTF-8');
// Using <htmlfragment> will generate a warning, but so will bad HTML
// (and by this point, bad HTML is what we've got).
// We use it (and suppress the warning) because an HTML fragment will
// be wrapped around <html><body> tags which we don't really want to keep.
// Note: despite the warning, if loadHTML succeeds it will return true.
$result = @$f->loadHTML('<htmlfragment>'.$value.'</htmlfragment>');
if ($result) {
$import = $f->getElementsByTagName('htmlfragment')->item(0);
foreach ($import->childNodes as $child) {
$importedNode = $this->ownerDocument->importNode($child, true);
$this->appendChild($importedNode);
}
} else {
// oh well, we tried, we really did. :(
// this element is now empty
}
}
}
} else {
$trace = debug_backtrace();
trigger_error('Undefined property via __set(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE);
}
}
/**
* Used for getting innerHTML like it's done in JavaScript:
* @code
* $string = $div->innerHTML;
* @endcode
*/
public function __get($name)
{
if ($name == 'innerHTML') {
$inner = '';
foreach ($this->childNodes as $child) {
$inner .= $this->ownerDocument->saveXML($child);
}
return $inner;
}
$trace = debug_backtrace();
trigger_error('Undefined property via __get(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE);
return null;
}
public function __toString()
{
return '['.$this->tagName.']';
}
}

1137
vendor/Readability/Readability.php vendored Executable file

File diff suppressed because it is too large Load Diff