add politis (rss)
This commit is contained in:
parent
2a0274068a
commit
a7a1095487
109
epub.php
109
epub.php
|
@ -545,6 +545,110 @@
|
|||
}
|
||||
write_epub($epub);
|
||||
}
|
||||
|
||||
// Politis
|
||||
if (isset($_POST['politis']) && $_POST['politis'])
|
||||
{
|
||||
// cookies?
|
||||
/*$opts = [
|
||||
'http' => [
|
||||
'method' => "GET",
|
||||
'header' => "Accept-language: en\nCookie: MPSESSID=" . $mp_sessionid,
|
||||
]
|
||||
];
|
||||
$context = stream_context_create($opts);*/
|
||||
|
||||
$epub = new TPEpubCreator();
|
||||
$epub->temp_folder = 'temp/';
|
||||
$epub->epub_file = 'epub/politis.epub';
|
||||
$epub->title = 'Politis - RSS ' . $today;
|
||||
|
||||
// load feeds
|
||||
$feedurl = 'https://www.politis.fr/flux-rss-politis-fr/';
|
||||
$feed = file_get_contents($feedurl);
|
||||
$xml = new SimpleXMLElement($feed);
|
||||
$items = $xml->xpath("/rss/channel/item");
|
||||
|
||||
// toc-based to test
|
||||
/*
|
||||
action=get_sommaire
|
||||
id_produit=310761
|
||||
nonce=89e48e1696
|
||||
https://www.politis.fr/wp-admin/admin-ajax.php
|
||||
*/
|
||||
|
||||
|
||||
foreach ($items as $item)
|
||||
{
|
||||
if (new DateTime($item->pubDate) < new DateTime('-7 days'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// restrict to last week
|
||||
$url = $item->link;
|
||||
|
||||
$article = file_get_contents($url); //, false, $context);
|
||||
$doc = new DOMDocument();
|
||||
$doc->loadHTML($article);
|
||||
$finder = new DomXPath($doc);
|
||||
|
||||
$title = $finder->query('//h1')->item(0)->textContent;
|
||||
$author = $finder->query('//div[@class="auteur_date"]/span[@class="nom"]')->item(0)->textContent;
|
||||
$date = $finder->query('//div[@class="auteur_date"]/span[@class="date"]')->item(0)->textContent;
|
||||
$summary = $finder->query('//div[@class="extrait"]')->item(0)->textContent;
|
||||
|
||||
// strip images
|
||||
/*$toremove = $finder->query('//svg');
|
||||
foreach ($toremove as $elt)
|
||||
{
|
||||
$elt->parentNode->removeChild($elt);
|
||||
}
|
||||
$toremove = $finder->query('//figure');
|
||||
foreach ($toremove as $elt)
|
||||
{
|
||||
$elt->parentNode->removeChild($elt);
|
||||
}
|
||||
$toremove = $finder->query('//span[@class="screen-reader-only"]');
|
||||
foreach ($toremove as $elt)
|
||||
{
|
||||
$elt->parentNode->removeChild($elt);
|
||||
}*/
|
||||
|
||||
$toremove = $finder->query('//script');
|
||||
foreach ($toremove as $elt)
|
||||
{
|
||||
$elt->parentNode->removeChild($elt);
|
||||
}
|
||||
|
||||
$result = '<h1>' . $title . '</h1>';
|
||||
$result .= '<p>' . $author . '</p>';
|
||||
$result .= '<p>' . $date . '</p>';
|
||||
$result .= '<p><b>' . $summary . '</b></p>';
|
||||
|
||||
$nodes = $finder->query('//main/section[@class="section_top" or @class="section_content"]');
|
||||
if (!$nodes->length)
|
||||
{
|
||||
echo 'warning: could not get content of "' . $title . '"<br>';
|
||||
}
|
||||
else
|
||||
{
|
||||
// top
|
||||
$node = $nodes->item(0);
|
||||
$nodehtml = get_clean_html($node);
|
||||
$result .= $nodehtml;
|
||||
|
||||
// content
|
||||
$node = $nodes->item(1);
|
||||
$nodehtml = get_clean_html($node);
|
||||
$result .= $nodehtml;
|
||||
|
||||
$epub->AddPage($result, false, $title);
|
||||
}
|
||||
}
|
||||
|
||||
write_epub($epub);
|
||||
}
|
||||
|
||||
?>
|
||||
|
||||
<h1>Generate epub</h1>
|
||||
|
@ -581,6 +685,11 @@
|
|||
<input id="nyttoday" name="nyttoday" type="checkbox">
|
||||
<label for="nyttoday">From today's paper embedded script</label>
|
||||
|
||||
<h2>Politis</h2>
|
||||
<input id="politis" name="politis" type="checkbox">
|
||||
<label for="politis">From RSS (last 7 days)</label>
|
||||
<br>
|
||||
|
||||
<h2>Admin</h2>
|
||||
<input id="emptycache" name="emptycache" type="checkbox" checked>
|
||||
<label for="emptycache">Empty cache</label>
|
||||
|
|
Loading…
Reference in New Issue