mediapart: simplify content
This commit is contained in:
parent
42fca272af
commit
5dea3734dd
|
@ -51,10 +51,21 @@ foreach ($items as $item)
|
|||
$doc->loadHTML($article);
|
||||
$finder = new DomXPath($doc);
|
||||
|
||||
$svgs = $finder->query('//svg');
|
||||
foreach ($svgs as $svg)
|
||||
// strip images
|
||||
$toremove = $finder->query('//svg');
|
||||
foreach ($toremove as $elt)
|
||||
{
|
||||
$svg->parentNode->removeChild($svg);
|
||||
$elt->parentNode->removeChild($elt);
|
||||
}
|
||||
$toremove = $finder->query('//figure');
|
||||
foreach ($toremove as $elt)
|
||||
{
|
||||
$elt->parentNode->removeChild($elt);
|
||||
}
|
||||
$toremove = $finder->query('//span[@class="screen-reader-only"]');
|
||||
foreach ($toremove as $elt)
|
||||
{
|
||||
$elt->parentNode->removeChild($elt);
|
||||
}
|
||||
|
||||
$result = '<h1>' . $title . '</h1>';
|
||||
|
@ -84,7 +95,7 @@ foreach ($items as $item)
|
|||
$outerHTML = $node->ownerDocument->saveHTML($node);
|
||||
$textcontent = $node->textContent;
|
||||
|
||||
$result .= '<div>' . $innerHTML . '</div>';
|
||||
$result .= '<div>' . strip_tags($innerHTML, '<p><b><h2><i>') . '</div>';
|
||||
|
||||
$epub->AddPage($result, false, $title);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue