diff --git a/epub.php b/epub.php
index c02857f..26475c0 100644
--- a/epub.php
+++ b/epub.php
@@ -157,7 +157,7 @@
$content = '
' . $article->title . '
';
//$content .= '' . $article->abstract . '';
$content .= 'Page ' . $article->page . '
';
-
+
if (count($article->rubrics))
{
$content .= 'Rubriques: ' . implode(',', $article->rubrics) . '
';
@@ -175,19 +175,19 @@
{
if ($item->class == 'quote')
{
- $content .= '' .$item->content . '
';
+ $content .= '' .$item->content . '
';
}
else if ($item->class == 'paragraphTitle')
{
- $content .= '' .$item->content . '
';
+ $content .= '' .$item->content . '
';
}
else if ($item->class == 'introduction')
{
- $content .= '' .$item->content . '
';
+ $content .= '' .$item->content . '
';
}
else
{
- $content .= '' .$item->content . '
';
+ $content .= '' .$item->content . '
';
}
}
}
@@ -206,11 +206,11 @@
else
{
$epub->AddPage('', false, 'Page ' . $page, true);
- }
+ }
$page++;
}
$epub->AddPage($content, false, $article->title);
-
+
}
write_epub($epub);
}
@@ -291,19 +291,19 @@
write_epub($epub);
}
- // Mediapart
+ if (isset($_POST['forcempcookies']) && $_POST['forcempcookies'])
+ {
+ $old = $mp_sessionid;
+ $mp_sessionid = get_mp_cookie();
+ $settings = file_get_contents('settings.php');
+ $settings = str_replace($old, $mp_sessionid, $settings);
+ file_put_contents('settings.php', $settings);
+ echo 'Updated mpsessid in settings.
';
+ }
+
+ // Mediapart RSS
if (isset($_POST['mediapart']) && $_POST['mediapart'])
{
- if (isset($_POST['forcempcookies']) && $_POST['forcempcookies'])
- {
- $old = $mp_sessionid;
- $mp_sessionid = get_mp_cookie();
- $settings = file_get_contents('settings.php');
- $settings = str_replace($old, $mp_sessionid, $settings);
- file_put_contents('settings.php', $settings);
- echo 'Updated mpsessid in settings.
';
- }
-
$feedurl = 'https://www.mediapart.fr/articles/feed';
$opts = [
'http' => [
@@ -386,6 +386,96 @@
write_epub($epub);
}
+ // Mediapart home page
+ if (isset($_POST['mphome']) && $_POST['mphome'])
+ {
+ $homeurl = 'https://www.mediapart.fr';
+ $opts = [
+ 'http' => [
+ 'method' => "GET",
+ 'header' => "Accept-language: en\nCookie: MPSESSID=" . $mp_sessionid,
+ ]
+ ];
+ $context = stream_context_create($opts);
+
+ $epub = new TPEpubCreator();
+ $epub->temp_folder = 'temp/';
+ $epub->epub_file = 'epub/mediaparthome.epub';
+ $epub->title = 'Mediapart home ' . $date;
+
+ $home = file_get_contents($homeurl, false, $context);
+ $doc = new DOMDocument();
+ $doc->loadHTML($home);
+ $finder = new DomXPath($doc);
+
+ $xpath = '//a[@data-js = "teaser-link" and contains(@href, "/journal/") and not(contains(@href, "/dossier/")) and not(contains(@href, "/studio/")) and not(contains(@href, "fil-")) and (@href != "/journal/series")]/@href';
+
+ $links = $finder->query($xpath);
+
+ foreach ($links as $link)
+ {
+ $url = 'https://www.mediapart.fr' . $link->value;
+
+ $article = file_get_contents($url, false, $context);
+ $doc = new DOMDocument();
+ $doc->loadHTML($article);
+ $finder = new DomXPath($doc);
+
+ $title = $finder->query('//h1')->item(0)->textContent;
+ $author = $finder->query('//a[contains(@href, "biographie")]')->item(0)->textContent;
+ $date = $finder->query('//time')->item(1)->textContent;
+ $summary = $finder->query('//p[contains(@class, "news__heading__top__intro")]')->item(0)->textContent;
+
+ // strip images
+ $toremove = $finder->query('//svg');
+ foreach ($toremove as $elt)
+ {
+ $elt->parentNode->removeChild($elt);
+ }
+ $toremove = $finder->query('//figure');
+ foreach ($toremove as $elt)
+ {
+ $elt->parentNode->removeChild($elt);
+ }
+ $toremove = $finder->query('//span[@class="screen-reader-only"]');
+ foreach ($toremove as $elt)
+ {
+ $elt->parentNode->removeChild($elt);
+ }
+
+ $result = '' . $title . '
';
+ $result .= '' . $author . '
';
+ $result .= '' . $date . '
';
+ $result .= '' . $summary . '
';
+
+ $nodes = $finder->query('//div[contains(@class, "paywall-restricted-content")]');
+ if (!$nodes->length)
+ {
+ // articles accès libre
+ $nodes = $finder->query('//div[contains(@class, "news__body__center__article")]');
+ }
+
+ if (!$nodes->length)
+ {
+ echo 'warning: could not get content of "' . $title . '"
';
+ }
+ else
+ {
+ $node = $nodes->item(0);
+
+ $innerHTML = '';
+ foreach ($node->childNodes as $childNode){
+ $innerHTML .= $childNode->ownerDocument->saveHTML($childNode);
+ }
+ $result .= '' . strip_tags($innerHTML, '
') . '
';
+
+ $epub->AddPage($result, false, $title);
+ }
+ }
+
+ write_epub($epub);
+ }
+
// New York Times
if (isset($_POST['nyt']) && $_POST['nyt'])
{
@@ -565,6 +655,9 @@
+
+
+