From 1eb8d51d3c11222b3e651b0ee2b707e580759137 Mon Sep 17 00:00:00 2001
From: quenousimporte
Date: Mon, 3 Jun 2024 17:38:46 +0200
Subject: [PATCH] merge le monde and mediapart
---
lemonde.php => epub.php | 107 +++++++++++++++++++++++++++++++++-
mediapart.php | 126 ----------------------------------------
2 files changed, 105 insertions(+), 128 deletions(-)
rename lemonde.php => epub.php (53%)
delete mode 100644 mediapart.php
diff --git a/lemonde.php b/epub.php
similarity index 53%
rename from lemonde.php
rename to epub.php
index aa9df19..e5c8eb2 100644
--- a/lemonde.php
+++ b/epub.php
@@ -113,6 +113,109 @@
}
}
+ // Mediapart
+ if (isset($_POST['mediapart']) && $_POST['mediapart'])
+ {
+ $sessionid = $_POST['mpsessid'];
+
+ $feedurl = 'https://www.mediapart.fr/articles/feed';
+ $opts = [
+ 'http' => [
+ 'method' => "GET",
+ 'header' => "Accept-language: en\nCookie: MPSESSID=" . $sessionid,
+ ]
+ ];
+ $context = stream_context_create($opts);
+
+ $epub = new TPEpubCreator();
+ $epub->temp_folder = 'temp/';
+ $epub->epub_file = 'epub/mediapart' . $date . '.epub';
+ $epub->title = 'Mediapart';
+
+ // load feeds
+ $feed = file_get_contents($feedurl);
+ $xml = new SimpleXMLElement($feed);
+ $items = $xml->xpath("/rss/channel/item");
+
+ foreach ($items as $item)
+ {
+ $title = $item->title;
+
+ $category = $item->xpath('dc:subject')[0];
+ $author = $item->xpath('dc:creator')[0];
+ $summary = $item->description;
+
+ $article = file_get_contents($item->link, false, $context);
+ $doc = new DOMDocument();
+ $doc->loadHTML($article);
+ $finder = new DomXPath($doc);
+
+ // strip images
+ $toremove = $finder->query('//svg');
+ foreach ($toremove as $elt)
+ {
+ $elt->parentNode->removeChild($elt);
+ }
+ $toremove = $finder->query('//figure');
+ foreach ($toremove as $elt)
+ {
+ $elt->parentNode->removeChild($elt);
+ }
+ $toremove = $finder->query('//span[@class="screen-reader-only"]');
+ foreach ($toremove as $elt)
+ {
+ $elt->parentNode->removeChild($elt);
+ }
+
+ $result = '' . $title . '
';
+ $result .= '' . $author . '
';
+ $result .= '' . $item->pubDate . '
';
+ $result .= '' . $summary . '
';
+
+ $nodes = $finder->query('//div[contains(@class, "paywall-restricted-content")]');
+ if (!$nodes->length)
+ {
+ // articles accès libre
+ $nodes = $finder->query('//div[contains(@class, "news__body__center__article")]');
+ }
+
+ if (!$nodes->length)
+ {
+ echo 'warning: could not get content of "' . $title . '"
';
+ }
+ else
+ {
+ $node = $nodes->item(0);
+
+ $innerHTML = '';
+ foreach ($node->childNodes as $childNode){
+ $innerHTML .= $childNode->ownerDocument->saveHTML($childNode);
+ }
+ $outerHTML = $node->ownerDocument->saveHTML($node);
+ $textcontent = $node->textContent;
+
+ $result .= '' . strip_tags($innerHTML, '
') . '
';
+
+ $epub->AddPage($result, false, $title);
+ }
+ }
+
+ if ( ! $epub->error ) {
+ $epub->CreateEPUB();
+
+ if ( ! $epub->error ) {
+ echo 'Success: ' . $epub->epub_file . ' created.
';
+ }
+ else
+ {
+ echo $epub->error;
+ }
+
+ } else {
+ echo $epub->error;
+ }
+ }
+
// list existing files
$files = glob('epub/*');
foreach ($files as $file)
@@ -123,8 +226,8 @@
?>
-
-';
-}
-
-/*ini_set('display_errors', '1');
-ini_set('display_startup_errors', '1');
-error_reporting(E_ALL);*/
-
-require('TPEpubCreator.php');
-$feedurl = 'https://www.mediapart.fr/articles/feed';
-$opts = [
- 'http' => [
- 'method' => "GET",
- 'header' => "Accept-language: en\nCookie: MPSESSID=" . $mp_sessionid,
- ]
-];
-$context = stream_context_create($opts);
-
-$epub = new TPEpubCreator();
-$epub->temp_folder = 'temp/';
-$epub->epub_file = 'mediapart.epub';
-$epub->title = 'Mediapart';
-
-// load feeds
-$feed = file_get_contents($feedurl);
-$xml = new SimpleXMLElement($feed);
-$items = $xml->xpath("/rss/channel/item");
-
-setlocale(LC_ALL, 'fr_FR.UTF8', 'fr_FR','fr','fr','fra','fr_FR@euro');
-$localedate = strftime("%A %d %B %Y");
-
-foreach ($items as $item)
-{
- $title = $item->title;
-
- $category = $item->xpath('dc:subject')[0];
- $author = $item->xpath('dc:creator')[0];
- $summary = $item->description;
-
- $article = file_get_contents($item->link, false, $context);
- $doc = new DOMDocument();
- $doc->loadHTML($article);
- $finder = new DomXPath($doc);
-
- // strip images
- $toremove = $finder->query('//svg');
- foreach ($toremove as $elt)
- {
- $elt->parentNode->removeChild($elt);
- }
- $toremove = $finder->query('//figure');
- foreach ($toremove as $elt)
- {
- $elt->parentNode->removeChild($elt);
- }
- $toremove = $finder->query('//span[@class="screen-reader-only"]');
- foreach ($toremove as $elt)
- {
- $elt->parentNode->removeChild($elt);
- }
-
- $result = '' . $title . '
';
- $result .= '' . $author . '
';
- $result .= '' . $item->pubDate . '
';
- $result .= '' . $summary . '
';
-
- $nodes = $finder->query('//div[contains(@class, "paywall-restricted-content")]');
- if (!$nodes->length)
- {
- // articles accès libre
- $nodes = $finder->query('//div[contains(@class, "news__body__center__article")]');
- }
-
- if (!$nodes->length)
- {
- echo 'warning: could not get content of "' . $title . '"
';
- }
- else
- {
- $node = $nodes->item(0);
-
- $innerHTML = '';
- foreach ($node->childNodes as $childNode){
- $innerHTML .= $childNode->ownerDocument->saveHTML($childNode);
- }
- $outerHTML = $node->ownerDocument->saveHTML($node);
- $textcontent = $node->textContent;
-
- $result .= '' . strip_tags($innerHTML, '
') . '
';
-
- $epub->AddPage($result, false, $title);
- }
-
-
- //echo $result;
- //break;
-}
-
-if ( ! $epub->error )
-{
- $epub->CreateEPUB();
-
- if ( ! $epub->error ) {
- echo 'Success: Download your book here.';
- }
-
- } else {
- echo $epub->error;
-}
-?>
-
-
-
-