diff --git a/index.html b/index.html index 36d20f8..17bc439 100644 --- a/index.html +++ b/index.html @@ -17,7 +17,7 @@

Quotidiens

Mediapart - - 10 derniers articles en version texte
+ - Epub (10 derniers)
Les jours
Reporterre
Basta!
diff --git a/mediapart.php b/mediapart.php index 5888692..c64a638 100644 --- a/mediapart.php +++ b/mediapart.php @@ -1,6 +1,7 @@ [ @@ -10,6 +11,11 @@ $opts = [ ]; $context = stream_context_create($opts); +$epub = new TPEpubCreator(); +$epub->temp_folder = 'temp/'; +$epub->epub_file = 'mediapart.epub'; +$epub->title = 'Mediapart'; + // load feeds $feed = file_get_contents($feedurl); $xml = new SimpleXMLElement($feed); @@ -18,65 +24,66 @@ $items = $xml->xpath("/rss/channel/item"); setlocale(LC_ALL, 'fr_FR.UTF8', 'fr_FR','fr','fr','fra','fr_FR@euro'); $localedate = strftime("%A %d %B %Y"); -$header = "Mediapart - " . $localedate . "\n\nSommaire:\n"; -$result = ''; - foreach ($items as $item) { $title = $item->title; - //$category = $item->category; + $category = $item->xpath('dc:subject')[0]; $author = $item->xpath('dc:creator')[0]; $summary = $item->description; - - // Add to content - $header .= '- ' . $title . "\n"; - + $article = file_get_contents($item->link, false, $context); $doc = new DOMDocument(); $doc->loadHTML($article); - $finder = new DomXPath($doc); - + // clean images $figures = $finder->query('//figure'); foreach ($figures as $figure) { $figure->parentNode->removeChild($figure); } - + // clean "à lire aussi" and inline quotes $asides = $finder->query('//aside'); foreach ($asides as $aside) { $aside->parentNode->removeChild($aside); } - + // improve section titles $asides = $finder->query('//h2'); foreach ($asides as $aside) { $aside->textContent = "\n*" . $aside->textContent . "*\n"; + // todo keep h2 } - - // Output - $result .= "\n------\n\n"; - $result .= "Article : " . $item->title . "\n"; - $result .= "Date : " . $item->pubDate . "\n"; - $result .= 'Catégorie : ' . str_replace("\n", '', $category) . "\n"; - $result .= 'Auteur.ice : ' . $author . "\n"; - $result .= "Introduction : " . $summary . "\n"; - $result .= "\n"; - // article main text $maintext = $finder->query('//div[contains(@class, "paywall-restricted-content")]'); - $result .= $maintext->item(0)->textContent; + $result = '

' . $title . '

'; + $result .= '

' . $author . '

'; + $result .= '

' . $item->pubDate . '

'; + $result .= '

' . $summary . '

'; + + // todo use html instead + $result .= '
' . str_replace("\n", '
', $maintext->item(0)->textContent) . '
'; + + //echo $result; + + $epub->AddPage($result, false, $title); + + //break; } -$filename = 'mediapart' . str_replace(' ', '', $localedate) . '.txt'; +if ( ! $epub->error ) +{ + $epub->CreateEPUB(); -header('Content-Type: application/text; charset=utf-8'); -header('Content-Disposition: attachment; filename=' . $filename); -echo $header . "\n" . $result; + if ( ! $epub->error ) { + echo 'Success: Download your book here.'; + } + } else { + echo $epub->error; +} ?> \ No newline at end of file