extracted url from curl command:
' . $url . '
'; } $epub = new TPEpubCreator(); $epub->temp_folder = 'temp/'; $epub->epub_file = 'epub/socialter.epub'; $epub->title = 'Socialter ' . $date ; // cache json in case url expires $tempjsonpath = 'temp/' . hash('md5', $url) . '.json'; $json = ''; if (file_exists($tempjsonpath)) { $json = file_get_contents($tempjsonpath); } else { $json = file_get_contents($url); file_put_contents($tempjsonpath, $json); } $publication = json_decode($json); $content = array_filter($publication->Content, function($item) { return $item->Category == 'Le Monde'; }); usort($content, function ($a, $b) { return $a->PageNumber - $b->PageNumber; }); if ($includepages) { $pageindex = json_decode(file_get_contents(str_replace('GetPublicationContentItems', 'GetPublicationPages', $url))); } $page = 0; foreach ($content as $article) { if ($includepages && $article->PageNumber > $page) { $page = $article->PageNumber; $pageobj = array_values(array_filter($pageindex->Page, function($p) { global $page; return $page == $p->PageNumber; }))[0]; $pageid = $pageobj->PublicationPageID; $path = lm_download_image($url, 'Preview-' . $imagesize . '-', $pageid); if ($page == 1) { // cover $epub->AddImage($path, 'image/jpeg', true); } else { $epub->AddPage('', false, 'Page ' . $page, true); } } $articlebody = array_filter($article->ContentItem, function($item) { return $item->ContentType == 'text/xml'; }); $articlebody = array_values($articlebody)[0]; if ($articlebody->Title && $articlebody->HtmlText) { $pagecontent = 'Page ' . $article->PageNumber . '
'; $author = array_filter($article->ContentItem, function($item) { return $item->ContentType == 'author/xml'; }); $author = array_values($author)[0]; if ($author->Author) { $pagecontent .= $author->Author; } if ($articlebody->Introduction) { $pagecontent .= '' . $articlebody->Introduction . ''; } if ($includeimages) { $images = array_values(array_filter($article->ContentItem, function($item) { return $item->ContentType == 'graphic/jpeg' || $item->ContentType == 'image/jpeg'; })); foreach ($images as $image) { $path = lm_download_image($url, 'Image-MEDIUM-', $image->ContentItemId); $pagecontent .= ''; } } $pagecontent .= $articlebody->HtmlText; if (!$imagesonly) { $epub->AddPage($pagecontent, false, strip_tags($articlebody->Title), true); } } } write_epub($epub); }*/ // Le Monde if (isset($_POST['lemonde']) && $_POST['lemonde']) { $url = $_POST['lmurl']; // extract url from curl command if (str_starts_with($url, 'curl ')) { $url = explode("'", $url)[1]; echo 'extracted url from curl command:
' . $url . '
'; } $epub = new TPEpubCreator(); $epub->temp_folder = 'temp/'; $epub->epub_file = 'epub/lemonde.epub'; $epub->title = 'Le Monde ' . $date ; // cache json in case url expires $tempjsonpath = 'temp/' . hash('md5', $url) . '.json'; $json = ''; if (file_exists($tempjsonpath)) { $json = file_get_contents($tempjsonpath); } else { $json = file_get_contents($url); file_put_contents($tempjsonpath, $json); } $publication = json_decode($json); $content = array_filter($publication->Content, function($item) { return $item->Category == 'Le Monde'; }); usort($content, function ($a, $b) { return $a->PageNumber - $b->PageNumber; }); $pageindex = json_decode(file_get_contents(str_replace('GetPublicationContentItems', 'GetPublicationPages', $url))); $page = 0; foreach ($content as $article) { if ($includepages && $article->PageNumber > $page) { $page = $article->PageNumber; $pageobj = array_values(array_filter($pageindex->Page, function($p) { global $page; return $page == $p->PageNumber; }))[0]; $pageid = $pageobj->PublicationPageID; $path = lm_download_image($url, 'Preview-MEDIUM-', $pageid); if ($page == 1) { // cover $epub->AddImage($path, 'image/jpeg', true); } else { $epub->AddPage('', false, 'Page ' . $page, true); } } $articlebody = array_filter($article->ContentItem, function($item) { return $item->ContentType == 'text/xml'; }); $articlebody = array_values($articlebody)[0]; if ($articlebody->Title && $articlebody->HtmlText) { $pagecontent = 'Page ' . $article->PageNumber . '
'; $author = array_filter($article->ContentItem, function($item) { return $item->ContentType == 'author/xml'; }); $author = array_values($author)[0]; if ($author->Author) { $pagecontent .= $author->Author; } if ($articlebody->Introduction) { $pagecontent .= '' . $articlebody->Introduction . ''; } $pagecontent .= $articlebody->HtmlText; $epub->AddPage($pagecontent, false, strip_tags($articlebody->Title), true); } } write_epub($epub); } // Mediapart if (isset($_POST['mediapart']) && $_POST['mediapart']) { if (isset($_POST['forcempcookies']) && $_POST['forcempcookies']) { $old = $mp_sessionid; $mp_sessionid = get_mp_cookie(); $settings = file_get_contents('settings.php'); $settings = str_replace($old, $mp_sessionid, $settings); file_put_contents('settings.php', $settings); echo '' . $author . '
'; $result .= '' . $item->pubDate . '
'; $result .= '' . $summary . '
'; $nodes = $finder->query('//div[contains(@class, "paywall-restricted-content")]'); if (!$nodes->length) { // articles accès libre $nodes = $finder->query('//div[contains(@class, "news__body__center__article")]'); } if (!$nodes->length) { echo 'warning: could not get content of "' . $title . '"') . '
' . $author . '
'; $result .= '' . $item->pubDate . '
'; $result .= '' . $summary . '
'; $nodes = $finder->query('//section[@name="articleBody"]'); if (!$nodes->length) { echo 'warning: could not get content of "' . $title . '"') . '
'; $creators = ($data->initialState->{ $item->bylines[0]->id })->creators; foreach ($creators as $creator) { $author = $data->initialState->{ $creator->id }; $result .= $author->displayName . ' '; } $result .= '
'; $result .= '' . $item->lastMajorModification . '
'; $result .= '' . $summary . '
'; $article = file_get_contents($item->url, false, $context); $doc = new DOMDocument(); $doc->loadHTML($article); $finder = new DomXPath($doc); $nodes = $finder->query('//section[@name="articleBody"]'); if (!$nodes->length) { echo 'warning: could not get content of "' . $title . '"') . '