diff --git a/epub.php b/epub.php
index e5c8eb2..da221b6 100644
--- a/epub.php
+++ b/epub.php
@@ -22,7 +22,7 @@
$epub = new TPEpubCreator();
$epub->temp_folder = 'temp/';
- $epub->epub_file = 'epub/lemonde' . $date . '.epub';
+ $epub->epub_file = 'epub/lemonde.epub';
$epub->title = 'Le Monde ' . $date ;
if ($lm_includecover)
@@ -78,12 +78,12 @@
{
$imageid = $image->ContentItemId;
$imageurl = preg_replace('/GetPublicationContentItems-.*\.json/', 'Image-MEDIUM-' . $imageid . '.jpg', $url);
-
+
$tempcontent = file_get_contents($imageurl);
file_put_contents('temp/' . $imageid, $tempcontent);
-
+
//$epub->AddImage('temp/' . $imageid, false, false );
-
+
$pagecontent .= '
';
if ($image->HtmlText)
{
@@ -116,21 +116,19 @@
// Mediapart
if (isset($_POST['mediapart']) && $_POST['mediapart'])
{
- $sessionid = $_POST['mpsessid'];
-
$feedurl = 'https://www.mediapart.fr/articles/feed';
$opts = [
'http' => [
'method' => "GET",
- 'header' => "Accept-language: en\nCookie: MPSESSID=" . $sessionid,
+ 'header' => "Accept-language: en\nCookie: MPSESSID=" . $mp_sessionid,
]
];
$context = stream_context_create($opts);
$epub = new TPEpubCreator();
$epub->temp_folder = 'temp/';
- $epub->epub_file = 'epub/mediapart' . $date . '.epub';
- $epub->title = 'Mediapart';
+ $epub->epub_file = 'epub/mediapart.epub';
+ $epub->title = 'Mediapart ' . $date;
// load feeds
$feed = file_get_contents($feedurl);
@@ -213,22 +211,129 @@
} else {
echo $epub->error;
- }
+ }
}
+ // New York Times
+ if (isset($_POST['nyt']) && $_POST['nyt'])
+ {
+
+ $feedurl = 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml';
+ $opts = [
+ 'http' => [
+ 'method' => "GET",
+ 'header' => "Accept-language: en\nCookie: NYT-S=" . $nyt_sessionid,
+ ]
+ ];
+ $context = stream_context_create($opts);
+
+ $epub = new TPEpubCreator();
+ $epub->temp_folder = 'temp/';
+ $epub->epub_file = 'epub/newyorktimes.epub';
+ $epub->title = 'The New York Times ' . $date;
+
+ // load feeds
+ $feed = file_get_contents($feedurl);
+ $xml = new SimpleXMLElement($feed);
+ $items = $xml->xpath("/rss/channel/item");
+
+ foreach ($items as $item)
+ {
+ $title = $item->title;
+
+ $category = $item->xpath('dc:subject')[0];
+ $author = $item->xpath('dc:creator')[0];
+ $summary = $item->description;
+
+ $article = file_get_contents($item->link, false, $context);
+ $doc = new DOMDocument();
+ $doc->loadHTML($article);
+ $finder = new DomXPath($doc);
+
+ // strip images
+ /*$toremove = $finder->query('//svg');
+ foreach ($toremove as $elt)
+ {
+ $elt->parentNode->removeChild($elt);
+ }
+ $toremove = $finder->query('//figure');
+ foreach ($toremove as $elt)
+ {
+ $elt->parentNode->removeChild($elt);
+ }
+ $toremove = $finder->query('//span[@class="screen-reader-only"]');
+ foreach ($toremove as $elt)
+ {
+ $elt->parentNode->removeChild($elt);
+ }*/
+
+ $result = '' . $title . '
';
+ $result .= '' . $author . '
';
+ $result .= '' . $item->pubDate . '
';
+ $result .= '' . $summary . '
';
+
+ $nodes = $finder->query('//section[@name="articleBody"]');
+ /*if (!$nodes->length)
+ {
+ // articles accès libre
+ $nodes = $finder->query('//div[contains(@class, "news__body__center__article")]');
+ }*/
+
+ if (!$nodes->length)
+ {
+ echo 'warning: could not get content of "' . $title . '"
';
+ }
+ else
+ {
+ $node = $nodes->item(0);
+
+ $innerHTML = '';
+ foreach ($node->childNodes as $childNode){
+ $innerHTML .= $childNode->ownerDocument->saveHTML($childNode);
+ }
+ $outerHTML = $node->ownerDocument->saveHTML($node);
+ $textcontent = $node->textContent;
+
+ $result .= '' . strip_tags($innerHTML, '
') . '
';
+
+ $epub->AddPage($result, false, $title);
+ }
+ }
+
+ if ( ! $epub->error ) {
+ $epub->CreateEPUB();
+
+ if ( ! $epub->error ) {
+ echo 'Success: ' . $epub->epub_file . ' created.
';
+ }
+ else
+ {
+ echo $epub->error;
+ }
+
+ } else {
+ echo $epub->error;
+ }
+ }
+
+
// list existing files
$files = glob('epub/*');
foreach ($files as $file)
{
- echo '' . $file . '
';
+ echo '' . $file . ' ' . date('F d Y H:i:s', filemtime($file)) . '
';
}
?>
+
+Generate epub:
+