diff --git a/epub.php b/epub.php index e5c8eb2..da221b6 100644 --- a/epub.php +++ b/epub.php @@ -22,7 +22,7 @@ $epub = new TPEpubCreator(); $epub->temp_folder = 'temp/'; - $epub->epub_file = 'epub/lemonde' . $date . '.epub'; + $epub->epub_file = 'epub/lemonde.epub'; $epub->title = 'Le Monde ' . $date ; if ($lm_includecover) @@ -78,12 +78,12 @@ { $imageid = $image->ContentItemId; $imageurl = preg_replace('/GetPublicationContentItems-.*\.json/', 'Image-MEDIUM-' . $imageid . '.jpg', $url); - + $tempcontent = file_get_contents($imageurl); file_put_contents('temp/' . $imageid, $tempcontent); - + //$epub->AddImage('temp/' . $imageid, false, false ); - + $pagecontent .= '
'; if ($image->HtmlText) { @@ -116,21 +116,19 @@ // Mediapart if (isset($_POST['mediapart']) && $_POST['mediapart']) { - $sessionid = $_POST['mpsessid']; - $feedurl = 'https://www.mediapart.fr/articles/feed'; $opts = [ 'http' => [ 'method' => "GET", - 'header' => "Accept-language: en\nCookie: MPSESSID=" . $sessionid, + 'header' => "Accept-language: en\nCookie: MPSESSID=" . $mp_sessionid, ] ]; $context = stream_context_create($opts); $epub = new TPEpubCreator(); $epub->temp_folder = 'temp/'; - $epub->epub_file = 'epub/mediapart' . $date . '.epub'; - $epub->title = 'Mediapart'; + $epub->epub_file = 'epub/mediapart.epub'; + $epub->title = 'Mediapart ' . $date; // load feeds $feed = file_get_contents($feedurl); @@ -213,22 +211,129 @@ } else { echo $epub->error; - } + } } + // New York Times + if (isset($_POST['nyt']) && $_POST['nyt']) + { + + $feedurl = 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml'; + $opts = [ + 'http' => [ + 'method' => "GET", + 'header' => "Accept-language: en\nCookie: NYT-S=" . $nyt_sessionid, + ] + ]; + $context = stream_context_create($opts); + + $epub = new TPEpubCreator(); + $epub->temp_folder = 'temp/'; + $epub->epub_file = 'epub/newyorktimes.epub'; + $epub->title = 'The New York Times ' . $date; + + // load feeds + $feed = file_get_contents($feedurl); + $xml = new SimpleXMLElement($feed); + $items = $xml->xpath("/rss/channel/item"); + + foreach ($items as $item) + { + $title = $item->title; + + $category = $item->xpath('dc:subject')[0]; + $author = $item->xpath('dc:creator')[0]; + $summary = $item->description; + + $article = file_get_contents($item->link, false, $context); + $doc = new DOMDocument(); + $doc->loadHTML($article); + $finder = new DomXPath($doc); + + // strip images + /*$toremove = $finder->query('//svg'); + foreach ($toremove as $elt) + { + $elt->parentNode->removeChild($elt); + } + $toremove = $finder->query('//figure'); + foreach ($toremove as $elt) + { + $elt->parentNode->removeChild($elt); + } + $toremove = $finder->query('//span[@class="screen-reader-only"]'); + foreach ($toremove as $elt) + { + $elt->parentNode->removeChild($elt); + }*/ + + $result = '' . $author . '
'; + $result .= '' . $item->pubDate . '
'; + $result .= '' . $summary . '
'; + + $nodes = $finder->query('//section[@name="articleBody"]'); + /*if (!$nodes->length) + { + // articles accès libre + $nodes = $finder->query('//div[contains(@class, "news__body__center__article")]'); + }*/ + + if (!$nodes->length) + { + echo 'warning: could not get content of "' . $title . '"') . '