diff --git a/mediapart.php b/mediapart.php index c64a638..f470964 100644 --- a/mediapart.php +++ b/mediapart.php @@ -1,6 +1,10 @@ loadHTML($article); $finder = new DomXPath($doc); - // clean images - $figures = $finder->query('//figure'); - foreach ($figures as $figure) + $svgs = $finder->query('//svg'); + foreach ($svgs as $svg) { - $figure->parentNode->removeChild($figure); + $svg->parentNode->removeChild($svg); } - // clean "à lire aussi" and inline quotes - $asides = $finder->query('//aside'); - foreach ($asides as $aside) - { - $aside->parentNode->removeChild($aside); - } - - // improve section titles - $asides = $finder->query('//h2'); - foreach ($asides as $aside) - { - $aside->textContent = "\n*" . $aside->textContent . "*\n"; - // todo keep h2 - } - - $maintext = $finder->query('//div[contains(@class, "paywall-restricted-content")]'); $result = '
' . $author . '
'; $result .= '' . $item->pubDate . '
'; $result .= '' . $summary . '
'; - // todo use html instead - $result .= '