diff --git a/epub.php b/epub.php index e5c8eb2..da221b6 100644 --- a/epub.php +++ b/epub.php @@ -22,7 +22,7 @@ $epub = new TPEpubCreator(); $epub->temp_folder = 'temp/'; - $epub->epub_file = 'epub/lemonde' . $date . '.epub'; + $epub->epub_file = 'epub/lemonde.epub'; $epub->title = 'Le Monde ' . $date ; if ($lm_includecover) @@ -78,12 +78,12 @@ { $imageid = $image->ContentItemId; $imageurl = preg_replace('/GetPublicationContentItems-.*\.json/', 'Image-MEDIUM-' . $imageid . '.jpg', $url); - + $tempcontent = file_get_contents($imageurl); file_put_contents('temp/' . $imageid, $tempcontent); - + //$epub->AddImage('temp/' . $imageid, false, false ); - + $pagecontent .= '
'; if ($image->HtmlText) { @@ -116,21 +116,19 @@ // Mediapart if (isset($_POST['mediapart']) && $_POST['mediapart']) { - $sessionid = $_POST['mpsessid']; - $feedurl = 'https://www.mediapart.fr/articles/feed'; $opts = [ 'http' => [ 'method' => "GET", - 'header' => "Accept-language: en\nCookie: MPSESSID=" . $sessionid, + 'header' => "Accept-language: en\nCookie: MPSESSID=" . $mp_sessionid, ] ]; $context = stream_context_create($opts); $epub = new TPEpubCreator(); $epub->temp_folder = 'temp/'; - $epub->epub_file = 'epub/mediapart' . $date . '.epub'; - $epub->title = 'Mediapart'; + $epub->epub_file = 'epub/mediapart.epub'; + $epub->title = 'Mediapart ' . $date; // load feeds $feed = file_get_contents($feedurl); @@ -213,22 +211,129 @@ } else { echo $epub->error; - } + } } + // New York Times + if (isset($_POST['nyt']) && $_POST['nyt']) + { + + $feedurl = 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml'; + $opts = [ + 'http' => [ + 'method' => "GET", + 'header' => "Accept-language: en\nCookie: NYT-S=" . $nyt_sessionid, + ] + ]; + $context = stream_context_create($opts); + + $epub = new TPEpubCreator(); + $epub->temp_folder = 'temp/'; + $epub->epub_file = 'epub/newyorktimes.epub'; + $epub->title = 'The New York Times ' . $date; + + // load feeds + $feed = file_get_contents($feedurl); + $xml = new SimpleXMLElement($feed); + $items = $xml->xpath("/rss/channel/item"); + + foreach ($items as $item) + { + $title = $item->title; + + $category = $item->xpath('dc:subject')[0]; + $author = $item->xpath('dc:creator')[0]; + $summary = $item->description; + + $article = file_get_contents($item->link, false, $context); + $doc = new DOMDocument(); + $doc->loadHTML($article); + $finder = new DomXPath($doc); + + // strip images + /*$toremove = $finder->query('//svg'); + foreach ($toremove as $elt) + { + $elt->parentNode->removeChild($elt); + } + $toremove = $finder->query('//figure'); + foreach ($toremove as $elt) + { + $elt->parentNode->removeChild($elt); + } + $toremove = $finder->query('//span[@class="screen-reader-only"]'); + foreach ($toremove as $elt) + { + $elt->parentNode->removeChild($elt); + }*/ + + $result = '

' . $title . '

'; + $result .= '

' . $author . '

'; + $result .= '

' . $item->pubDate . '

'; + $result .= '

' . $summary . '

'; + + $nodes = $finder->query('//section[@name="articleBody"]'); + /*if (!$nodes->length) + { + // articles accès libre + $nodes = $finder->query('//div[contains(@class, "news__body__center__article")]'); + }*/ + + if (!$nodes->length) + { + echo 'warning: could not get content of "' . $title . '"
'; + } + else + { + $node = $nodes->item(0); + + $innerHTML = ''; + foreach ($node->childNodes as $childNode){ + $innerHTML .= $childNode->ownerDocument->saveHTML($childNode); + } + $outerHTML = $node->ownerDocument->saveHTML($node); + $textcontent = $node->textContent; + + $result .= '
' . strip_tags($innerHTML, '

') . '

'; + + $epub->AddPage($result, false, $title); + } + } + + if ( ! $epub->error ) { + $epub->CreateEPUB(); + + if ( ! $epub->error ) { + echo 'Success: ' . $epub->epub_file . ' created.
'; + } + else + { + echo $epub->error; + } + + } else { + echo $epub->error; + } + } + + // list existing files $files = glob('epub/*'); foreach ($files as $file) { - echo '' . $file . '
'; + echo '' . $file . ' ' . date('F d Y H:i:s', filemtime($file)) . '
'; } ?> +
+Generate epub:
Le Monde. GetPublicationContentItems url:
Mediapart.
+ The New York Times.
+
\ No newline at end of file diff --git a/settings.php.sample b/settings.php.sample index 655d666..556f958 100644 --- a/settings.php.sample +++ b/settings.php.sample @@ -2,4 +2,5 @@ $lm_includeimages = false; $lm_includecover = false; $mp_sessionid = ''; +$nyt_sessionid = '' ?> \ No newline at end of file