2014-12-24 03:28:26 +01:00
|
|
|
Feed parsing
|
|
|
|
============
|
|
|
|
|
|
|
|
Parsing a subscription
|
|
|
|
----------------------
|
|
|
|
|
|
|
|
```php
|
|
|
|
use PicoFeed\Reader\Reader;
|
|
|
|
use PicoFeed\PicoFeedException;
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
$reader = new Reader;
|
|
|
|
|
|
|
|
// Return a resource
|
|
|
|
$resource = $reader->download('http://linuxfr.org/news.atom');
|
|
|
|
|
|
|
|
// Return the right parser instance according to the feed format
|
|
|
|
$parser = $reader->getParser(
|
|
|
|
$resource->getUrl(),
|
|
|
|
$resource->getContent(),
|
|
|
|
$resource->getEncoding()
|
|
|
|
);
|
|
|
|
|
|
|
|
// Return a Feed object
|
|
|
|
$feed = $parser->execute();
|
|
|
|
|
|
|
|
// Print the feed properties with the magic method __toString()
|
|
|
|
echo $feed;
|
|
|
|
}
|
|
|
|
catch (PicoFeedException $e) {
|
|
|
|
// Do Something...
|
|
|
|
}
|
|
|
|
```
|
|
|
|
|
|
|
|
- The Reader class is the entry point for feed reading
|
|
|
|
- The method `download()` fetch the remote content and return a resource, an instance of `PicoFeed\Client\Client`
|
|
|
|
- The method `getParser()` returns a Parser instance according to the feed format Atom, Rss 2.0...
|
|
|
|
- The parser itself returns a `Feed` object that contains feed and item properties
|
|
|
|
|
|
|
|
Output:
|
|
|
|
|
|
|
|
```bash
|
|
|
|
Feed::id = tag:linuxfr.org,2005:/news
|
|
|
|
Feed::title = LinuxFr.org : les dépêches
|
|
|
|
Feed::feed_url = http://linuxfr.org/news.atom
|
|
|
|
Feed::site_url = http://linuxfr.org/news
|
|
|
|
Feed::language = en-US
|
|
|
|
Feed::description =
|
|
|
|
Feed::logo =
|
2015-03-01 19:56:11 +01:00
|
|
|
Feed::date = Thu, 26 Feb 15 09:33:08 +0100
|
2014-12-24 03:28:26 +01:00
|
|
|
Feed::isRTL() = false
|
2015-03-01 19:56:11 +01:00
|
|
|
Feed::items = 15 items
|
2014-12-24 03:28:26 +01:00
|
|
|
----
|
2015-03-01 19:56:11 +01:00
|
|
|
Item::id = 56198c98ae852d21c369bfb5ffbc2ad13db2f3227236dde3e21ca1a9eb943faf
|
|
|
|
Item::title = Les brevets logiciels : un frein à l'innovation et la recherche (un nouvel exemple aux États-Unis)
|
|
|
|
Item::url = http://linuxfr.org/news/les-brevets-logiciels-un-frein-a-l-innovation-et-la-recherche-un-nouvel-exemple-aux-etats-unis
|
2014-12-24 03:28:26 +01:00
|
|
|
Item::language = en-US
|
2015-03-01 19:56:11 +01:00
|
|
|
Item::author = alenvers
|
2014-12-24 03:28:26 +01:00
|
|
|
Item::enclosure_url =
|
|
|
|
Item::enclosure_type =
|
2015-03-01 19:56:11 +01:00
|
|
|
Item::date = Thu, 26 Feb 15 09:33:08 +0100
|
2014-12-24 03:28:26 +01:00
|
|
|
Item::isRTL() = false
|
2015-03-01 19:56:11 +01:00
|
|
|
Item::content = 6452 bytes
|
2014-12-24 03:28:26 +01:00
|
|
|
....
|
|
|
|
```
|
|
|
|
|
|
|
|
Get the list of available subscriptions for a website
|
|
|
|
-----------------------------------------------------
|
|
|
|
|
|
|
|
The example below will returns all available subscriptions for the website:
|
|
|
|
|
|
|
|
```php
|
|
|
|
use PicoFeed\Reader\Reader;
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
$reader = new Reader;
|
|
|
|
$resource = $reader->download('http://www.cnn.com');
|
|
|
|
|
|
|
|
$feeds = $reader->find(
|
|
|
|
$resource->getUrl(),
|
|
|
|
$resource->getContent()
|
|
|
|
);
|
|
|
|
|
|
|
|
print_r($feeds);
|
|
|
|
}
|
|
|
|
catch (PicoFeedException $e) {
|
|
|
|
// Do something...
|
|
|
|
}
|
|
|
|
```
|
|
|
|
|
|
|
|
Output:
|
|
|
|
|
|
|
|
```php
|
|
|
|
Array
|
|
|
|
(
|
|
|
|
[0] => http://rss.cnn.com/rss/cnn_topstories.rss
|
|
|
|
[1] => http://rss.cnn.com/rss/cnn_latest.rss
|
|
|
|
)
|
|
|
|
```
|
|
|
|
|
|
|
|
Feed discovery and parsing
|
|
|
|
--------------------------
|
|
|
|
|
|
|
|
This example will discover automatically the subscription and parse the feed:
|
|
|
|
|
|
|
|
```php
|
|
|
|
try {
|
|
|
|
|
|
|
|
$reader = new Reader;
|
|
|
|
$resource = $reader->discover('http://linuxfr.org');
|
|
|
|
|
|
|
|
$parser = $reader->getParser(
|
|
|
|
$resource->getUrl(),
|
|
|
|
$resource->getContent(),
|
|
|
|
$resource->getEncoding()
|
|
|
|
);
|
|
|
|
|
|
|
|
$feed = $parser->execute();
|
|
|
|
echo $feed;
|
|
|
|
}
|
|
|
|
catch (PicoFeedException $e) {
|
|
|
|
}
|
|
|
|
```
|
|
|
|
|
|
|
|
HTTP caching
|
|
|
|
------------
|
|
|
|
|
|
|
|
PicoFeed supports HTTP caching to avoid unnecessary processing.
|
|
|
|
|
|
|
|
1. After the first download, save in your database the values of the Etag and LastModified HTTP headers
|
|
|
|
2. For the next requests, provide those values to the `download()` method and check if the feed was modified or not
|
|
|
|
|
|
|
|
Here an example:
|
|
|
|
|
|
|
|
```php
|
|
|
|
try {
|
|
|
|
|
|
|
|
// Fetch from your database the previous values of the Etag and LastModified headers
|
|
|
|
$etag = '...';
|
|
|
|
$last_modified = '...';
|
|
|
|
|
|
|
|
$reader = new Reader;
|
|
|
|
|
|
|
|
// Provide those values to the download method
|
|
|
|
$resource = $reader->download('http://linuxfr.org/news.atom', $last_modified, $etag);
|
|
|
|
|
|
|
|
// Return true if the remote content has changed
|
|
|
|
if ($resource->isModified()) {
|
|
|
|
|
|
|
|
$parser = $reader->getParser(
|
|
|
|
$resource->getUrl(),
|
|
|
|
$resource->getContent(),
|
|
|
|
$resource->getEncoding()
|
|
|
|
);
|
|
|
|
|
|
|
|
$feed = $parser->execute();
|
|
|
|
|
|
|
|
// Save your feed in your database
|
|
|
|
// ...
|
|
|
|
|
|
|
|
// Store the Etag and the LastModified headers in your database for the next requests
|
|
|
|
$etag = $resource->getEtag();
|
|
|
|
$last_modified = $resource->getLastModified();
|
|
|
|
|
|
|
|
// ...
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
|
|
|
|
echo 'Not modified, nothing to do!';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
catch (PicoFeedException $e) {
|
|
|
|
// Do something...
|
|
|
|
}
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
|
|
Feed and item properties
|
|
|
|
------------------------
|
|
|
|
|
|
|
|
```php
|
|
|
|
// Feed object
|
|
|
|
$feed->getId(); // Unique feed id
|
|
|
|
$feed->getTitle(); // Feed title
|
|
|
|
$feed->getFeedUrl(); // Feed url
|
|
|
|
$feed->getSiteUrl(); // Website url
|
2015-03-01 19:56:11 +01:00
|
|
|
$feed->getDate(); // Feed last updated date (DateTime object)
|
2014-12-24 03:28:26 +01:00
|
|
|
$feed->getLanguage(); // Feed language
|
|
|
|
$feed->getDescription(); // Feed description
|
|
|
|
$feed->getLogo(); // Feed logo (can be a large image, different from icon)
|
|
|
|
$feed->getItems(); // List of item objects
|
|
|
|
|
|
|
|
// Item object
|
|
|
|
$feed->items[0]->getId(); // Item unique id (hash)
|
|
|
|
$feed->items[0]->getTitle(); // Item title
|
|
|
|
$feed->items[0]->getUrl(); // Item url
|
2015-03-01 19:56:11 +01:00
|
|
|
$feed->items[0]->getDate(); // Item published date (DateTime object)
|
2014-12-24 03:28:26 +01:00
|
|
|
$feed->items[0]->getLanguage(); // Item language
|
|
|
|
$feed->items[0]->getAuthor(); // Item author
|
|
|
|
$feed->items[0]->getEnclosureUrl(); // Enclosure url
|
|
|
|
$feed->items[0]->getEnclosureType(); // Enclosure mime-type (audio/mp3, image/png...)
|
|
|
|
$feed->items[0]->getContent(); // Item content (filtered or raw)
|
|
|
|
$feed->items[0]->isRTL(); // Return true if the item language is Right-To-Left
|
|
|
|
```
|
|
|
|
|
2015-03-26 00:59:41 +01:00
|
|
|
Get raw XML tags/attributes or non standard tags for items
|
|
|
|
----------------------------------------------------------
|
|
|
|
|
|
|
|
Get the original `guid` tag for RSS 2.0 feeds:
|
|
|
|
|
|
|
|
```php
|
|
|
|
echo $feed->items[0]->getTag('guid');
|
|
|
|
```
|
|
|
|
|
|
|
|
Get a specific attribute value:
|
|
|
|
|
|
|
|
```php
|
|
|
|
echo $feed->items[1]->getTag('category', 'term');
|
|
|
|
```
|
|
|
|
|
|
|
|
Get value of namespaced tag:
|
|
|
|
|
|
|
|
```php
|
|
|
|
echo $feed->items[1]->getTag('wfw:commentRss');
|
|
|
|
```
|
|
|
|
|
|
|
|
Get attribute value of a namespaced tag:
|
|
|
|
|
|
|
|
```php
|
|
|
|
echo $feed->items[0]->getTag('media:content', 'url');
|
|
|
|
```
|
|
|
|
|
|
|
|
Get the xml of the item (returns a SimpleXMLElement instance):
|
|
|
|
|
|
|
|
```php
|
|
|
|
$simplexml = $feed->items[0]->xml;
|
|
|
|
```
|
|
|
|
|
|
|
|
Get the list of namespaces:
|
|
|
|
|
|
|
|
```php
|
|
|
|
print_r($feed->items[0]->namespaces);
|
|
|
|
```
|
|
|
|
|
2014-12-24 03:28:26 +01:00
|
|
|
RTL language detection
|
|
|
|
----------------------
|
|
|
|
|
|
|
|
Use the method `Item::isRTL()` to test if an item is RTL or not:
|
|
|
|
|
|
|
|
```php
|
|
|
|
var_dump($item->isRTL()); // true or false
|
|
|
|
```
|
|
|
|
|
|
|
|
Known RTL languages are:
|
|
|
|
|
|
|
|
- Arabic (ar-**)
|
|
|
|
- Farsi (fa-**)
|
|
|
|
- Urdu (ur-**)
|
|
|
|
- Pashtu (ps-**)
|
|
|
|
- Syriac (syr-**)
|
|
|
|
- Divehi (dv-**)
|
|
|
|
- Hebrew (he-**)
|
|
|
|
- Yiddish (yi-**)
|