mediapart: simplify content

This commit is contained in:
quenousimporte 2024-05-30 09:12:23 +02:00
parent 42fca272af
commit 5dea3734dd
1 changed files with 15 additions and 4 deletions

View File

@ -51,10 +51,21 @@ foreach ($items as $item)
$doc->loadHTML($article); $doc->loadHTML($article);
$finder = new DomXPath($doc); $finder = new DomXPath($doc);
$svgs = $finder->query('//svg'); // strip images
foreach ($svgs as $svg) $toremove = $finder->query('//svg');
foreach ($toremove as $elt)
{ {
$svg->parentNode->removeChild($svg); $elt->parentNode->removeChild($elt);
}
$toremove = $finder->query('//figure');
foreach ($toremove as $elt)
{
$elt->parentNode->removeChild($elt);
}
$toremove = $finder->query('//span[@class="screen-reader-only"]');
foreach ($toremove as $elt)
{
$elt->parentNode->removeChild($elt);
} }
$result = '<h1>' . $title . '</h1>'; $result = '<h1>' . $title . '</h1>';
@ -84,7 +95,7 @@ foreach ($items as $item)
$outerHTML = $node->ownerDocument->saveHTML($node); $outerHTML = $node->ownerDocument->saveHTML($node);
$textcontent = $node->textContent; $textcontent = $node->textContent;
$result .= '<div>' . $innerHTML . '</div>'; $result .= '<div>' . strip_tags($innerHTML, '<p><b><h2><i>') . '</div>';
$epub->AddPage($result, false, $title); $epub->AddPage($result, false, $title);
} }