From 1135e412031c99000bca21a0f1ea883848862d09 Mon Sep 17 00:00:00 2001 From: quenousimporte Date: Tue, 4 Jun 2024 17:19:23 +0200 Subject: [PATCH] add nyt today's paper add epub link on index --- epub.php | 116 ++++++++++++++++++++++++++++++++++++++++++++--------- index.html | 2 +- 2 files changed, 98 insertions(+), 20 deletions(-) diff --git a/epub.php b/epub.php index 3175417..fe2566b 100644 --- a/epub.php +++ b/epub.php @@ -205,9 +205,6 @@ foreach ($node->childNodes as $childNode){ $innerHTML .= $childNode->ownerDocument->saveHTML($childNode); } - $outerHTML = $node->ownerDocument->saveHTML($node); - $textcontent = $node->textContent; - $result .= '
' . strip_tags($innerHTML, '

') . '

'; $epub->AddPage($result, false, $title); @@ -220,7 +217,6 @@ // New York Times if (isset($_POST['nyt']) && $_POST['nyt']) { - $feedurl = 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml'; $opts = [ 'http' => [ @@ -276,11 +272,6 @@ $result .= '

' . $summary . '

'; $nodes = $finder->query('//section[@name="articleBody"]'); - /*if (!$nodes->length) - { - // articles accès libre - $nodes = $finder->query('//div[contains(@class, "news__body__center__article")]'); - }*/ if (!$nodes->length) { @@ -294,8 +285,6 @@ foreach ($node->childNodes as $childNode){ $innerHTML .= $childNode->ownerDocument->saveHTML($childNode); } - $outerHTML = $node->ownerDocument->saveHTML($node); - $textcontent = $node->textContent; $result .= '
' . strip_tags($innerHTML, '

') . '

'; @@ -306,23 +295,112 @@ write_epub($epub); } + if (isset($_POST['nyttoday']) && $_POST['nyttoday']) + { + $todaypage = file_get_contents("https://www.nytimes.com/section/todayspaper"); + $opts = [ + 'http' => [ + 'method' => "GET", + 'header' => "Accept-language: en\nCookie: NYT-S=" . $nyt_sessionid, + ] + ]; + $context = stream_context_create($opts); - // list existing files - echo '
'; + $epub = new TPEpubCreator(); + $epub->temp_folder = 'temp/'; + $epub->epub_file = 'epub/newyorktimestoday.epub'; + $epub->title = 'The New York Times today\'s paper ' . $date; + + // grab today's paper + $start = strpos($todaypage, "window.__preloadedData = ") + 25; + $end = strpos($todaypage, "};", $start) + 1; + $json = str_replace(":undefined", ":null", substr($todaypage, $start, $end - $start)); + $data = json_decode($json); + + foreach($data->initialState as $item) + { + if ($item->__typename == "Article") + { + $headline = ($data->initialState->{ $item->headline->id }); + + $title = $headline->default; + $summary = $item->summary; + + $result = '

' . $title . '

'; + + // todo + //$result .= '

' . $author . '

'; + //$result .= '

' . $item->pubDate . '

'; + + $result .= '

' . $summary . '

'; + + $article = file_get_contents($item->url, false, $context); + $doc = new DOMDocument(); + $doc->loadHTML($article); + $finder = new DomXPath($doc); + $nodes = $finder->query('//section[@name="articleBody"]'); + + if (!$nodes->length) + { + echo 'warning: could not get content of "' . $title . '"
'; + } + else + { + $node = $nodes->item(0); + + $innerHTML = ''; + foreach ($node->childNodes as $childNode){ + $innerHTML .= $childNode->ownerDocument->saveHTML($childNode); + } + + $result .= '
' . strip_tags($innerHTML, '

') . '

'; + + $epub->AddPage($result, false, $title); + } + } + } + write_epub($epub); + } +?> + +

Existing Files

+
+' . str_replace('epub/', '', $file) . ' ' . date('F d Y H:i:s', filemtime($file)) . '
'; + echo date('F d Y H:i:s', filemtime($file)) . ' ' . str_replace('epub/', '', $file) . '
'; } - echo '
'; ?> +
+

Generate epub

-Generate epub:
- Le Monde - GetPublicationContentItems url:
- Mediapart
- The New York Times
+ + Parameters: +
+ GetPublicationContentItems url:
+
+ + Newspapers: +
+ + +
+ + + +
+ + + +
+ + + +
+
diff --git a/index.html b/index.html index 4bb0d14..169c68b 100644 --- a/index.html +++ b/index.html @@ -52,7 +52,7 @@ - Autre publications
Cairn - Écologie & politique
- Fichiers epub + Fichiers epub