mediapart: simplify content
This commit is contained in:
parent
42fca272af
commit
5dea3734dd
|
@ -51,10 +51,21 @@ foreach ($items as $item)
|
||||||
$doc->loadHTML($article);
|
$doc->loadHTML($article);
|
||||||
$finder = new DomXPath($doc);
|
$finder = new DomXPath($doc);
|
||||||
|
|
||||||
$svgs = $finder->query('//svg');
|
// strip images
|
||||||
foreach ($svgs as $svg)
|
$toremove = $finder->query('//svg');
|
||||||
|
foreach ($toremove as $elt)
|
||||||
{
|
{
|
||||||
$svg->parentNode->removeChild($svg);
|
$elt->parentNode->removeChild($elt);
|
||||||
|
}
|
||||||
|
$toremove = $finder->query('//figure');
|
||||||
|
foreach ($toremove as $elt)
|
||||||
|
{
|
||||||
|
$elt->parentNode->removeChild($elt);
|
||||||
|
}
|
||||||
|
$toremove = $finder->query('//span[@class="screen-reader-only"]');
|
||||||
|
foreach ($toremove as $elt)
|
||||||
|
{
|
||||||
|
$elt->parentNode->removeChild($elt);
|
||||||
}
|
}
|
||||||
|
|
||||||
$result = '<h1>' . $title . '</h1>';
|
$result = '<h1>' . $title . '</h1>';
|
||||||
|
@ -84,7 +95,7 @@ foreach ($items as $item)
|
||||||
$outerHTML = $node->ownerDocument->saveHTML($node);
|
$outerHTML = $node->ownerDocument->saveHTML($node);
|
||||||
$textcontent = $node->textContent;
|
$textcontent = $node->textContent;
|
||||||
|
|
||||||
$result .= '<div>' . $innerHTML . '</div>';
|
$result .= '<div>' . strip_tags($innerHTML, '<p><b><h2><i>') . '</div>';
|
||||||
|
|
||||||
$epub->AddPage($result, false, $title);
|
$epub->AddPage($result, false, $title);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue