diff --git a/index.html b/index.html index 451cb90..64e737a 100644 --- a/index.html +++ b/index.html @@ -17,7 +17,7 @@

Quotidiens

Mediapart - - Edition du jour en version texte
+ - 10 derniers articles en version texte
Les jours
Reporterre
Basta!
diff --git a/mediapart.php b/mediapart.php index f45cc87..5888692 100644 --- a/mediapart.php +++ b/mediapart.php @@ -23,56 +23,54 @@ $result = ''; foreach ($items as $item) { - if (str_starts_with($item->pubDate, date("D, j M Y"))) - { - $title = $item->title; - //$category = $item->category; - $category = $item->xpath('dc:subject')[0]; - $author = $item->xpath('dc:creator')[0]; - $summary = $item->description; - - // Add to content - $header .= '- ' . $title . "\n"; - - $article = file_get_contents($item->link, false, $context); - $doc = new DOMDocument(); - $doc->loadHTML($article); - - $finder = new DomXPath($doc); - - // clean images - $figures = $finder->query('//figure'); - foreach ($figures as $figure) - { - $figure->parentNode->removeChild($figure); - } - - // clean "à lire aussi" and inline quotes - $asides = $finder->query('//aside'); - foreach ($asides as $aside) - { - $aside->parentNode->removeChild($aside); - } - - // improve section titles - $asides = $finder->query('//h2'); - foreach ($asides as $aside) - { - $aside->textContent = "\n*" . $aside->textContent . "*\n"; - } - - // Output - $result .= "\n------\n\n"; - $result .= "Article : " . $item->title . "\n"; - $result .= 'Catégorie : ' . str_replace("\n", '', $category) . "\n"; - $result .= 'Auteur.ice : ' . $author . "\n"; - $result .= "Introduction : " . $summary . "\n"; - $result .= "\n"; + $title = $item->title; + //$category = $item->category; + $category = $item->xpath('dc:subject')[0]; + $author = $item->xpath('dc:creator')[0]; + $summary = $item->description; - // article main text - $maintext = $finder->query('//div[contains(@class, "paywall-restricted-content")]'); - $result .= $maintext->item(0)->textContent; + // Add to content + $header .= '- ' . $title . "\n"; + + $article = file_get_contents($item->link, false, $context); + $doc = new DOMDocument(); + $doc->loadHTML($article); + + $finder = new DomXPath($doc); + + // clean images + $figures = $finder->query('//figure'); + foreach ($figures as $figure) + { + $figure->parentNode->removeChild($figure); } + + // clean "à lire aussi" and inline quotes + $asides = $finder->query('//aside'); + foreach ($asides as $aside) + { + $aside->parentNode->removeChild($aside); + } + + // improve section titles + $asides = $finder->query('//h2'); + foreach ($asides as $aside) + { + $aside->textContent = "\n*" . $aside->textContent . "*\n"; + } + + // Output + $result .= "\n------\n\n"; + $result .= "Article : " . $item->title . "\n"; + $result .= "Date : " . $item->pubDate . "\n"; + $result .= 'Catégorie : ' . str_replace("\n", '', $category) . "\n"; + $result .= 'Auteur.ice : ' . $author . "\n"; + $result .= "Introduction : " . $summary . "\n"; + $result .= "\n"; + + // article main text + $maintext = $finder->query('//div[contains(@class, "paywall-restricted-content")]'); + $result .= $maintext->item(0)->textContent; } $filename = 'mediapart' . str_replace(' ', '', $localedate) . '.txt';