Compare commits
No commits in common. "a7a1095487700270b56b33c974f2638c30d53d53" and "c6f604c28b9fac6f2b74e3f15fadeae933c88f35" have entirely different histories.
a7a1095487
...
c6f604c28b
133
epub.php
133
epub.php
|
@ -150,6 +150,16 @@
|
||||||
return $temp_img_path;
|
return $temp_img_path;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function inner_html($node)
|
||||||
|
{
|
||||||
|
$innerHTML = '';
|
||||||
|
foreach ($node->childNodes as $childNode)
|
||||||
|
{
|
||||||
|
$innerHTML .= $childNode->ownerDocument->saveHTML($childNode);
|
||||||
|
}
|
||||||
|
return $innerHTML;
|
||||||
|
}
|
||||||
|
|
||||||
function get_json($url)
|
function get_json($url)
|
||||||
{
|
{
|
||||||
// cache json in case url expires
|
// cache json in case url expires
|
||||||
|
@ -168,7 +178,7 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
date_default_timezone_set('Europe/Paris');
|
date_default_timezone_set('Europe/Paris');
|
||||||
$today = (new DateTime('today'))->format('Ymd');
|
$date = (new DateTime('today'))->format('Ymd');
|
||||||
|
|
||||||
if (isset($_POST['emptycache']) && $_POST['emptycache'])
|
if (isset($_POST['emptycache']) && $_POST['emptycache'])
|
||||||
{
|
{
|
||||||
|
@ -201,7 +211,7 @@
|
||||||
$epub = new TPEpubCreator();
|
$epub = new TPEpubCreator();
|
||||||
$epub->temp_folder = 'temp/';
|
$epub->temp_folder = 'temp/';
|
||||||
$epub->epub_file = 'epub/' . $pubname . '.epub';
|
$epub->epub_file = 'epub/' . $pubname . '.epub';
|
||||||
$epub->title = $pubname . ' ' . $today ;
|
$epub->title = $pubname . ' ' . $date ;
|
||||||
|
|
||||||
$list = (array)$publication->articles;
|
$list = (array)$publication->articles;
|
||||||
$page = 1;
|
$page = 1;
|
||||||
|
@ -286,7 +296,7 @@
|
||||||
$epub = new TPEpubCreator();
|
$epub = new TPEpubCreator();
|
||||||
$epub->temp_folder = 'temp/';
|
$epub->temp_folder = 'temp/';
|
||||||
$epub->epub_file = 'epub/lemonde.epub';
|
$epub->epub_file = 'epub/lemonde.epub';
|
||||||
$epub->title = 'Le Monde ' . $today;
|
$epub->title = 'Le Monde ' . $date ;
|
||||||
|
|
||||||
$publication = get_json($url);
|
$publication = get_json($url);
|
||||||
$content = array_filter($publication->Content, function($item) { return $item->Category == 'Le Monde'; });
|
$content = array_filter($publication->Content, function($item) { return $item->Category == 'Le Monde'; });
|
||||||
|
@ -372,7 +382,7 @@
|
||||||
$epub = new TPEpubCreator();
|
$epub = new TPEpubCreator();
|
||||||
$epub->temp_folder = 'temp/';
|
$epub->temp_folder = 'temp/';
|
||||||
$epub->epub_file = 'epub/mediapart.epub';
|
$epub->epub_file = 'epub/mediapart.epub';
|
||||||
$epub->title = 'Mediapart - RSS ' . $today;
|
$epub->title = 'Mediapart - RSS ' . $date;
|
||||||
|
|
||||||
// load feeds
|
// load feeds
|
||||||
$feed = file_get_contents($feedurl);
|
$feed = file_get_contents($feedurl);
|
||||||
|
@ -402,7 +412,7 @@
|
||||||
$epub = new TPEpubCreator();
|
$epub = new TPEpubCreator();
|
||||||
$epub->temp_folder = 'temp/';
|
$epub->temp_folder = 'temp/';
|
||||||
$epub->epub_file = 'epub/mediaparthome.epub';
|
$epub->epub_file = 'epub/mediaparthome.epub';
|
||||||
$epub->title = 'Mediapart - A la une ' . $today;
|
$epub->title = 'Mediapart - A la une ' . $date;
|
||||||
|
|
||||||
$home = file_get_contents($homeurl, false, $context);
|
$home = file_get_contents($homeurl, false, $context);
|
||||||
$doc = new DOMDocument();
|
$doc = new DOMDocument();
|
||||||
|
@ -436,7 +446,7 @@
|
||||||
$epub = new TPEpubCreator();
|
$epub = new TPEpubCreator();
|
||||||
$epub->temp_folder = 'temp/';
|
$epub->temp_folder = 'temp/';
|
||||||
$epub->epub_file = 'epub/newyorktimes.epub';
|
$epub->epub_file = 'epub/newyorktimes.epub';
|
||||||
$epub->title = 'The New York Times ' . $today;
|
$epub->title = 'The New York Times ' . $date;
|
||||||
|
|
||||||
// load feeds
|
// load feeds
|
||||||
$feed = file_get_contents($feedurl);
|
$feed = file_get_contents($feedurl);
|
||||||
|
@ -492,7 +502,7 @@
|
||||||
$epub = new TPEpubCreator();
|
$epub = new TPEpubCreator();
|
||||||
$epub->temp_folder = 'temp/';
|
$epub->temp_folder = 'temp/';
|
||||||
$epub->epub_file = 'epub/newyorktimestoday.epub';
|
$epub->epub_file = 'epub/newyorktimestoday.epub';
|
||||||
$epub->title = 'The New York Times today\'s paper ' . $today;
|
$epub->title = 'The New York Times today\'s paper ' . $date;
|
||||||
|
|
||||||
// grab today's paper
|
// grab today's paper
|
||||||
$start = strpos($todaypage, "window.__preloadedData = ") + 25;
|
$start = strpos($todaypage, "window.__preloadedData = ") + 25;
|
||||||
|
@ -545,110 +555,6 @@
|
||||||
}
|
}
|
||||||
write_epub($epub);
|
write_epub($epub);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Politis
|
|
||||||
if (isset($_POST['politis']) && $_POST['politis'])
|
|
||||||
{
|
|
||||||
// cookies?
|
|
||||||
/*$opts = [
|
|
||||||
'http' => [
|
|
||||||
'method' => "GET",
|
|
||||||
'header' => "Accept-language: en\nCookie: MPSESSID=" . $mp_sessionid,
|
|
||||||
]
|
|
||||||
];
|
|
||||||
$context = stream_context_create($opts);*/
|
|
||||||
|
|
||||||
$epub = new TPEpubCreator();
|
|
||||||
$epub->temp_folder = 'temp/';
|
|
||||||
$epub->epub_file = 'epub/politis.epub';
|
|
||||||
$epub->title = 'Politis - RSS ' . $today;
|
|
||||||
|
|
||||||
// load feeds
|
|
||||||
$feedurl = 'https://www.politis.fr/flux-rss-politis-fr/';
|
|
||||||
$feed = file_get_contents($feedurl);
|
|
||||||
$xml = new SimpleXMLElement($feed);
|
|
||||||
$items = $xml->xpath("/rss/channel/item");
|
|
||||||
|
|
||||||
// toc-based to test
|
|
||||||
/*
|
|
||||||
action=get_sommaire
|
|
||||||
id_produit=310761
|
|
||||||
nonce=89e48e1696
|
|
||||||
https://www.politis.fr/wp-admin/admin-ajax.php
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
foreach ($items as $item)
|
|
||||||
{
|
|
||||||
if (new DateTime($item->pubDate) < new DateTime('-7 days'))
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// restrict to last week
|
|
||||||
$url = $item->link;
|
|
||||||
|
|
||||||
$article = file_get_contents($url); //, false, $context);
|
|
||||||
$doc = new DOMDocument();
|
|
||||||
$doc->loadHTML($article);
|
|
||||||
$finder = new DomXPath($doc);
|
|
||||||
|
|
||||||
$title = $finder->query('//h1')->item(0)->textContent;
|
|
||||||
$author = $finder->query('//div[@class="auteur_date"]/span[@class="nom"]')->item(0)->textContent;
|
|
||||||
$date = $finder->query('//div[@class="auteur_date"]/span[@class="date"]')->item(0)->textContent;
|
|
||||||
$summary = $finder->query('//div[@class="extrait"]')->item(0)->textContent;
|
|
||||||
|
|
||||||
// strip images
|
|
||||||
/*$toremove = $finder->query('//svg');
|
|
||||||
foreach ($toremove as $elt)
|
|
||||||
{
|
|
||||||
$elt->parentNode->removeChild($elt);
|
|
||||||
}
|
|
||||||
$toremove = $finder->query('//figure');
|
|
||||||
foreach ($toremove as $elt)
|
|
||||||
{
|
|
||||||
$elt->parentNode->removeChild($elt);
|
|
||||||
}
|
|
||||||
$toremove = $finder->query('//span[@class="screen-reader-only"]');
|
|
||||||
foreach ($toremove as $elt)
|
|
||||||
{
|
|
||||||
$elt->parentNode->removeChild($elt);
|
|
||||||
}*/
|
|
||||||
|
|
||||||
$toremove = $finder->query('//script');
|
|
||||||
foreach ($toremove as $elt)
|
|
||||||
{
|
|
||||||
$elt->parentNode->removeChild($elt);
|
|
||||||
}
|
|
||||||
|
|
||||||
$result = '<h1>' . $title . '</h1>';
|
|
||||||
$result .= '<p>' . $author . '</p>';
|
|
||||||
$result .= '<p>' . $date . '</p>';
|
|
||||||
$result .= '<p><b>' . $summary . '</b></p>';
|
|
||||||
|
|
||||||
$nodes = $finder->query('//main/section[@class="section_top" or @class="section_content"]');
|
|
||||||
if (!$nodes->length)
|
|
||||||
{
|
|
||||||
echo 'warning: could not get content of "' . $title . '"<br>';
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// top
|
|
||||||
$node = $nodes->item(0);
|
|
||||||
$nodehtml = get_clean_html($node);
|
|
||||||
$result .= $nodehtml;
|
|
||||||
|
|
||||||
// content
|
|
||||||
$node = $nodes->item(1);
|
|
||||||
$nodehtml = get_clean_html($node);
|
|
||||||
$result .= $nodehtml;
|
|
||||||
|
|
||||||
$epub->AddPage($result, false, $title);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
write_epub($epub);
|
|
||||||
}
|
|
||||||
|
|
||||||
?>
|
?>
|
||||||
|
|
||||||
<h1>Generate epub</h1>
|
<h1>Generate epub</h1>
|
||||||
|
@ -685,11 +591,6 @@
|
||||||
<input id="nyttoday" name="nyttoday" type="checkbox">
|
<input id="nyttoday" name="nyttoday" type="checkbox">
|
||||||
<label for="nyttoday">From today's paper embedded script</label>
|
<label for="nyttoday">From today's paper embedded script</label>
|
||||||
|
|
||||||
<h2>Politis</h2>
|
|
||||||
<input id="politis" name="politis" type="checkbox">
|
|
||||||
<label for="politis">From RSS (last 7 days)</label>
|
|
||||||
<br>
|
|
||||||
|
|
||||||
<h2>Admin</h2>
|
<h2>Admin</h2>
|
||||||
<input id="emptycache" name="emptycache" type="checkbox" checked>
|
<input id="emptycache" name="emptycache" type="checkbox" checked>
|
||||||
<label for="emptycache">Empty cache</label>
|
<label for="emptycache">Empty cache</label>
|
||||||
|
|
Loading…
Reference in New Issue