diff --git a/mediapart.php b/mediapart.php index 19b9fe3..f45cc87 100644 --- a/mediapart.php +++ b/mediapart.php @@ -1,133 +1,84 @@ -textContent; - $content = str_replace('Partager sur Facebook', '', $content); - $content = str_replace('Partager sur X (ex Twitter)', '', $content); - - $content = str_replace('Lire + tard', '', $content); - $content = str_replace('Offrir l’article', '', $content); - $content = str_replace(' PDF', '', $content); - $content = str_replace('Cette fonctionnalité est actuellement en maintenance.La génération de PDF reste disponible via la fonctionnalité «Imprimer».', '', $content); - - $content = str_replace("Fermer\n", '', $content); - $content = str_replace("Recommander\n", '', $content); - $content = str_replace("Commenter\n", '', $content); - - - $content = str_replace(' ', '', $content); - $content = str_replace("\n", "\n\n", $content); - for ($i = 0; $i < 15; $i++) - { - $content = str_replace("\n\n\n", "\n\n", $content); - } - - return $content; -} - -$sessionid = 'value of cookie MPSESSID'; -$feedurl = 'https://www.mediapart.fr/articles/feed'; -$opts = [ - 'http' => [ - 'method' => "GET", - 'header' => "Accept-language: en\nCookie: MPSESSID=" . $sessionid, - ] -]; -$context = stream_context_create($opts); - -// load feeds -$feed = file_get_contents($feedurl); -$xml = new SimpleXMLElement($feed); -$items = $xml->xpath("/rss/channel/item"); - -setlocale(LC_ALL, 'fr_FR.UTF8', 'fr_FR','fr','fr','fra','fr_FR@euro'); -$localedate = strftime("%A %d %B %Y"); - -$header = "Mediapart - " . $localedate . "\n\nSommaire:\n"; -$result = ''; - -foreach ($items as $item) -{ - if (str_starts_with($item->pubDate, date("D, j M Y"))) - { - $title = $item->title; - - // Add to content - $header .= '- ' . $title . "\n"; - - $article = file_get_contents($item->link, false, $context); - $doc = new DOMDocument(); - $doc->loadHTML($article); - $finder = new DomXPath($doc); - - $category = trim($finder->query('//p[@class="news__heading__top__kicker _default"]')->item(0)->textContent); - $summary = trim($finder->query('//p[@class="news__heading__top__intro margin-top:500"]')->item(0)->textContent); - $author = trim($finder->query('//a[@class="focus color:text-brand heading-product:100"]')->item(0)->textContent); - - // clean images - $figures = $finder->query('//figure'); - foreach ($figures as $figure) - { - $figure->parentNode->removeChild($figure); - } - - // clean "à lire aussi" and inline quotes - $asides = $finder->query('//aside'); - foreach ($asides as $aside) - { - $aside->parentNode->removeChild($aside); - } - - // improve section titles - $asides = $finder->query('//h2'); - foreach ($asides as $aside) - { - $aside->textContent = '*' . $aside->textContent . '*'; - } - - // Output - - // old: add header as one block - //$part1 = $finder->query('//div[@class="news__heading grid"]'); - //$result .= cleantext($part1->item(0)); - - $result .= "\n------\n\n"; - $result .= "Article : " . $item->title . "\n"; - $result .= 'Catégorie: ' . str_replace("\n", '', $category) . "\n"; - $result .= 'Auteur.ice : ' . $author . "\n"; - $result .= "\n"; - $result .= "Introduction :\n" . $summary . "\n"; - - // article main text - $maintext = $finder->query('//div[@class="news__body__center"]'); - $result .= cleantext($maintext->item(0)); - } - //break; -} - -// web version: - -/* -file_put_contents('mediapart.txt', $header . "\n" . $result); - -echo '