Compare commits

...

4 Commits

Author SHA1 Message Date
quenousimporte ee0f2fe47c add politis cover 2024-09-05 14:08:19 +02:00
quenousimporte 9a52832348 parse rss in reverse to respect timing 2024-09-05 14:08:06 +02:00
quenousimporte dc707dcf1e change cafeyn file name 2024-09-05 14:07:45 +02:00
quenousimporte 4b2424ae17 todos and comments 2024-09-05 14:07:27 +02:00
1 changed files with 16 additions and 9 deletions

View File

@ -14,6 +14,7 @@
function get_clean_html($node) function get_clean_html($node)
{ {
// todo check words with "-" (like "sous-traitance")
$innerHTM = ''; $innerHTM = '';
foreach ($node->childNodes as $childNode){ foreach ($node->childNodes as $childNode){
$nodeHTML = $childNode->ownerDocument->saveHTML($childNode); $nodeHTML = $childNode->ownerDocument->saveHTML($childNode);
@ -74,6 +75,7 @@
// articles accès libre // articles accès libre
$nodes = $finder->query('//div[contains(@class, "news__body__center__article")]'); $nodes = $finder->query('//div[contains(@class, "news__body__center__article")]');
} }
// todo add "boite noire"
if (!$nodes->length) if (!$nodes->length)
{ {
@ -205,8 +207,8 @@
$epub = new TPEpubCreator(); $epub = new TPEpubCreator();
$epub->temp_folder = 'temp/'; $epub->temp_folder = 'temp/';
$epub->epub_file = 'epub/' . $pubname . '.epub'; $epub->epub_file = 'epub/' . $pubname . 'cafeyn.epub';
$epub->title = $pubname . ' ' . $today ; $epub->title = $pubname . ' Cafeyn - ' . $today ;
$list = (array)$publication->articles; $list = (array)$publication->articles;
$page = 1; $page = 1;
@ -394,7 +396,7 @@
$xml = new SimpleXMLElement($feed); $xml = new SimpleXMLElement($feed);
$items = $xml->xpath("/rss/channel/item"); $items = $xml->xpath("/rss/channel/item");
foreach ($items as $item) foreach (array_reverse($items) as $item)
{ {
add_mp_article($item->link, $context, $epub); add_mp_article($item->link, $context, $epub);
} }
@ -410,24 +412,29 @@
$opts = [ $opts = [
'http' => [ 'http' => [
'method' => "GET", 'method' => "GET",
//'header' => "Accept-language: en\nCookie: MPSESSID=" . $mp_sessionid, // todo //'header' => "Accept-language: en\nCookie: MPSESSID=" . $mp_sessionid, // todo?
] ]
]; ];
$context = stream_context_create($opts); $context = stream_context_create($opts);
// todo add cover!
$epub = new TPEpubCreator(); $epub = new TPEpubCreator();
$epub->temp_folder = 'temp/'; $epub->temp_folder = 'temp/';
$epub->epub_file = 'epub/politis' . $issuenb . '.epub'; $epub->epub_file = 'epub/politis' . $issuenb . '.epub';
$epub->title = 'Politis ' . $issuenb . ' - RSS ' . $today; $epub->title = 'Politis ' . $issuenb . ' - RSS ' . $today;
// find cover
$issues = file_get_contents('https://www.politis.fr/boutique/magazines-unite/');
$doc = new DOMDocument();
$doc->loadHTML($issues);
$finder = new DomXPath($doc);
$cover = $finder->query('//img[contains(@src, "' . $issuenb . '_")]')->item(0)->getAttribute('src');
$epub->AddImage($cover, 'image/jpeg', true);
// load feeds // load feeds
$feed = file_get_contents($feedurl); $feed = file_get_contents($feedurl);
$xml = new SimpleXMLElement($feed); $xml = new SimpleXMLElement($feed);
$items = $xml->xpath("/rss/channel/item"); $items = $xml->xpath("/rss/channel/item");
// reverse to respect timing
foreach (array_reverse($items) as $item) foreach (array_reverse($items) as $item)
{ {
$url = $item->link; $url = $item->link;
@ -497,7 +504,7 @@
write_epub($epub); write_epub($epub);
} }
// New York Times // New York Times RSS
if (isset($_POST['nyt']) && $_POST['nyt']) if (isset($_POST['nyt']) && $_POST['nyt'])
{ {
$feedurl = 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml'; $feedurl = 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml';
@ -519,7 +526,7 @@
$xml = new SimpleXMLElement($feed); $xml = new SimpleXMLElement($feed);
$items = $xml->xpath("/rss/channel/item"); $items = $xml->xpath("/rss/channel/item");
foreach ($items as $item) foreach (array_reverse($items) as $item)
{ {
$title = $item->title; $title = $item->title;
$author = $item->xpath('dc:creator')[0]; $author = $item->xpath('dc:creator')[0];