diff --git a/index.html b/index.html index 451cb90..64e737a 100644 --- a/index.html +++ b/index.html @@ -17,7 +17,7 @@
Mediapart
- - Edition du jour en version texte
+ - 10 derniers articles en version texte
Les jours
Reporterre
Basta!
diff --git a/mediapart.php b/mediapart.php
index f45cc87..5888692 100644
--- a/mediapart.php
+++ b/mediapart.php
@@ -23,56 +23,54 @@ $result = '';
foreach ($items as $item)
{
- if (str_starts_with($item->pubDate, date("D, j M Y")))
- {
- $title = $item->title;
- //$category = $item->category;
- $category = $item->xpath('dc:subject')[0];
- $author = $item->xpath('dc:creator')[0];
- $summary = $item->description;
-
- // Add to content
- $header .= '- ' . $title . "\n";
-
- $article = file_get_contents($item->link, false, $context);
- $doc = new DOMDocument();
- $doc->loadHTML($article);
-
- $finder = new DomXPath($doc);
-
- // clean images
- $figures = $finder->query('//figure');
- foreach ($figures as $figure)
- {
- $figure->parentNode->removeChild($figure);
- }
-
- // clean "à lire aussi" and inline quotes
- $asides = $finder->query('//aside');
- foreach ($asides as $aside)
- {
- $aside->parentNode->removeChild($aside);
- }
-
- // improve section titles
- $asides = $finder->query('//h2');
- foreach ($asides as $aside)
- {
- $aside->textContent = "\n*" . $aside->textContent . "*\n";
- }
-
- // Output
- $result .= "\n------\n\n";
- $result .= "Article : " . $item->title . "\n";
- $result .= 'Catégorie : ' . str_replace("\n", '', $category) . "\n";
- $result .= 'Auteur.ice : ' . $author . "\n";
- $result .= "Introduction : " . $summary . "\n";
- $result .= "\n";
+ $title = $item->title;
+ //$category = $item->category;
+ $category = $item->xpath('dc:subject')[0];
+ $author = $item->xpath('dc:creator')[0];
+ $summary = $item->description;
- // article main text
- $maintext = $finder->query('//div[contains(@class, "paywall-restricted-content")]');
- $result .= $maintext->item(0)->textContent;
+ // Add to content
+ $header .= '- ' . $title . "\n";
+
+ $article = file_get_contents($item->link, false, $context);
+ $doc = new DOMDocument();
+ $doc->loadHTML($article);
+
+ $finder = new DomXPath($doc);
+
+ // clean images
+ $figures = $finder->query('//figure');
+ foreach ($figures as $figure)
+ {
+ $figure->parentNode->removeChild($figure);
}
+
+ // clean "à lire aussi" and inline quotes
+ $asides = $finder->query('//aside');
+ foreach ($asides as $aside)
+ {
+ $aside->parentNode->removeChild($aside);
+ }
+
+ // improve section titles
+ $asides = $finder->query('//h2');
+ foreach ($asides as $aside)
+ {
+ $aside->textContent = "\n*" . $aside->textContent . "*\n";
+ }
+
+ // Output
+ $result .= "\n------\n\n";
+ $result .= "Article : " . $item->title . "\n";
+ $result .= "Date : " . $item->pubDate . "\n";
+ $result .= 'Catégorie : ' . str_replace("\n", '', $category) . "\n";
+ $result .= 'Auteur.ice : ' . $author . "\n";
+ $result .= "Introduction : " . $summary . "\n";
+ $result .= "\n";
+
+ // article main text
+ $maintext = $finder->query('//div[contains(@class, "paywall-restricted-content")]');
+ $result .= $maintext->item(0)->textContent;
}
$filename = 'mediapart' . str_replace(' ', '', $localedate) . '.txt';