<html> <body> <?php $sessionid = 'value of cookie MPSESSID'; if (isset($_POST['mpsessid']) && $_POST['mpsessid']) { $sessionid = $_POST['mpsessid']; echo 'Using ' . $sessionid . '<br>'; } /*ini_set('display_errors', '1'); ini_set('display_startup_errors', '1'); error_reporting(E_ALL);*/ require('TPEpubCreator.php'); $feedurl = 'https://www.mediapart.fr/articles/feed'; $opts = [ 'http' => [ 'method' => "GET", 'header' => "Accept-language: en\nCookie: MPSESSID=" . $sessionid, ] ]; $context = stream_context_create($opts); $epub = new TPEpubCreator(); $epub->temp_folder = 'temp/'; $epub->epub_file = 'mediapart.epub'; $epub->title = 'Mediapart'; // load feeds $feed = file_get_contents($feedurl); $xml = new SimpleXMLElement($feed); $items = $xml->xpath("/rss/channel/item"); setlocale(LC_ALL, 'fr_FR.UTF8', 'fr_FR','fr','fr','fra','fr_FR@euro'); $localedate = strftime("%A %d %B %Y"); foreach ($items as $item) { $title = $item->title; $category = $item->xpath('dc:subject')[0]; $author = $item->xpath('dc:creator')[0]; $summary = $item->description; $article = file_get_contents($item->link, false, $context); $doc = new DOMDocument(); $doc->loadHTML($article); $finder = new DomXPath($doc); // strip images $toremove = $finder->query('//svg'); foreach ($toremove as $elt) { $elt->parentNode->removeChild($elt); } $toremove = $finder->query('//figure'); foreach ($toremove as $elt) { $elt->parentNode->removeChild($elt); } $toremove = $finder->query('//span[@class="screen-reader-only"]'); foreach ($toremove as $elt) { $elt->parentNode->removeChild($elt); } $result = '<h1>' . $title . '</h1>'; $result .= '<p>' . $author . '</p>'; $result .= '<p>' . $item->pubDate . '</p>'; $result .= '<p><b>' . $summary . '</b></p>'; $nodes = $finder->query('//div[contains(@class, "paywall-restricted-content")]'); if (!$nodes->length) { // articles accès libre $nodes = $finder->query('//div[contains(@class, "news__body__center__article")]'); } if (!$nodes->length) { echo 'warning: could not get content of "' . $title . '"<br>'; } else { $node = $nodes->item(0); $innerHTML = ''; foreach ($node->childNodes as $childNode){ $innerHTML .= $childNode->ownerDocument->saveHTML($childNode); } $outerHTML = $node->ownerDocument->saveHTML($node); $textcontent = $node->textContent; $result .= '<div>' . strip_tags($innerHTML, '<p><b><h2><i>') . '</div>'; $epub->AddPage($result, false, $title); } //echo $result; //break; } if ( ! $epub->error ) { $epub->CreateEPUB(); if ( ! $epub->error ) { echo 'Success: Download your book <a href="' . $epub->epub_file . '">here</a>.'; } } else { echo $epub->error; } ?> <form method="post"> Force MPSESSID: <input name="mpsessid"> <input type="submit"> </form> </body> </html>