82 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			PHP
		
	
	
	
			
		
		
	
	
			82 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			PHP
		
	
	
	
<?php
 | 
						|
 | 
						|
$sessionid = 'value of cookie MPSESSID';
 | 
						|
$feedurl = 'https://www.mediapart.fr/articles/feed';
 | 
						|
$opts = [
 | 
						|
  'http' => [
 | 
						|
    'method' => "GET",
 | 
						|
    'header' => "Accept-language: en\nCookie: MPSESSID=" . $sessionid,
 | 
						|
  ]
 | 
						|
];
 | 
						|
$context = stream_context_create($opts);
 | 
						|
 | 
						|
// load feeds
 | 
						|
$feed = file_get_contents($feedurl);
 | 
						|
$xml = new SimpleXMLElement($feed);
 | 
						|
$items = $xml->xpath("/rss/channel/item");
 | 
						|
 | 
						|
setlocale(LC_ALL, 'fr_FR.UTF8', 'fr_FR','fr','fr','fra','fr_FR@euro');
 | 
						|
$localedate = strftime("%A %d %B %Y");
 | 
						|
 | 
						|
$header = "Mediapart - " . $localedate . "\n\nSommaire:\n";
 | 
						|
$result = '';
 | 
						|
 | 
						|
foreach ($items as $item)
 | 
						|
{
 | 
						|
	$title = $item->title;
 | 
						|
	//$category =  $item->category;
 | 
						|
	$category = $item->xpath('dc:subject')[0];
 | 
						|
	$author = $item->xpath('dc:creator')[0];
 | 
						|
	$summary = $item->description;
 | 
						|
	
 | 
						|
	// Add to content
 | 
						|
	$header .= '- ' . $title . "\n";
 | 
						|
	
 | 
						|
	$article = file_get_contents($item->link, false, $context);
 | 
						|
	$doc = new DOMDocument();
 | 
						|
	$doc->loadHTML($article);
 | 
						|
 | 
						|
	$finder = new DomXPath($doc);
 | 
						|
	
 | 
						|
	// clean images
 | 
						|
	$figures = $finder->query('//figure');
 | 
						|
	foreach ($figures as $figure)
 | 
						|
	{
 | 
						|
		$figure->parentNode->removeChild($figure);
 | 
						|
	}
 | 
						|
	
 | 
						|
	// clean "à lire aussi" and inline quotes
 | 
						|
	$asides = $finder->query('//aside');
 | 
						|
	foreach ($asides as $aside)
 | 
						|
	{
 | 
						|
		$aside->parentNode->removeChild($aside);
 | 
						|
	}
 | 
						|
	
 | 
						|
	// improve section titles
 | 
						|
	$asides = $finder->query('//h2');
 | 
						|
	foreach ($asides as $aside)
 | 
						|
	{
 | 
						|
		$aside->textContent = "\n*" . $aside->textContent . "*\n";
 | 
						|
	}
 | 
						|
	
 | 
						|
	// Output	
 | 
						|
	$result .= "\n------\n\n";
 | 
						|
	$result .= "Article : " . $item->title . "\n";
 | 
						|
	$result .= "Date : " . $item->pubDate . "\n";
 | 
						|
	$result .= 'Catégorie : ' . str_replace("\n", '', $category) . "\n";
 | 
						|
	$result .= 'Auteur.ice : ' . $author . "\n";
 | 
						|
	$result .= "Introduction : " . $summary . "\n";
 | 
						|
	$result .= "\n";
 | 
						|
 | 
						|
	// article main text
 | 
						|
	$maintext = $finder->query('//div[contains(@class, "paywall-restricted-content")]');
 | 
						|
	$result .= $maintext->item(0)->textContent;
 | 
						|
}
 | 
						|
 | 
						|
$filename = 'mediapart' . str_replace(' ', '', $localedate) . '.txt';
 | 
						|
 | 
						|
header('Content-Type: application/text; charset=utf-8');
 | 
						|
header('Content-Disposition: attachment; filename=' . $filename);
 | 
						|
echo $header . "\n" . $result;
 | 
						|
 | 
						|
?>
 |