diff options
author | Simon Rettberg | 2023-11-17 17:30:19 +0100 |
---|---|---|
committer | Simon Rettberg | 2023-11-17 17:30:19 +0100 |
commit | ceb1ee698d13c5bbc50efa9b5c7d3bf268ce2d90 (patch) | |
tree | 04f5210dcd1149e8ec566c03a3a209b83e014b48 /modules-available/sysconfig | |
parent | [inc/Download] Add user-agent string to avoid 403 from Wikipedia (diff) | |
download | slx-admin-ceb1ee698d13c5bbc50efa9b5c7d3bf268ce2d90.tar.gz slx-admin-ceb1ee698d13c5bbc50efa9b5c7d3bf268ce2d90.tar.xz slx-admin-ceb1ee698d13c5bbc50efa9b5c7d3bf268ce2d90.zip |
[sysconfig] Update Wikipedia extraction logic
Diffstat (limited to 'modules-available/sysconfig')
-rw-r--r-- | modules-available/sysconfig/addmodule_branding.inc.php | 31 |
1 files changed, 24 insertions, 7 deletions
diff --git a/modules-available/sysconfig/addmodule_branding.inc.php b/modules-available/sysconfig/addmodule_branding.inc.php index 4cbcb66f..54b2ad57 100644 --- a/modules-available/sysconfig/addmodule_branding.inc.php +++ b/modules-available/sysconfig/addmodule_branding.inc.php @@ -99,28 +99,44 @@ class Branding_ProcessFile extends AddModule_Base private static function downloadSvg(string $svgName, string $url, &$title): bool { $title = false; - // [wikipedia] Did someone paste a link to a thumbnail of the svg? Let's fix that... - if (preg_match('#^(.*)/thumb/(.*\.svg)/.*\.svg#', $url, $out)) { - $url = $out[1] . '/' . $out[2]; - } for ($i = 0; $i < 5; ++$i) { + // [wikipedia] Did someone paste a link to a thumbnail of the svg? Let's fix that... + if (preg_match('#^(.*)/thumb/(.*\.svg)/.*\.svg#', $url, $out)) { + $url = $out[1] . '/' . $out[2]; + } $code = 400; if (!Download::toFile($svgName, $url, 3, $code) || $code < 200 || $code > 299) { Message::addError('remote-timeout', $url, $code); return false; } - $content = FileUtil::readFile($svgName, 25000); + $content = FileUtil::readFile($svgName, 250000); // Is svg file? if (strpos($content, '<svg') !== false) return true; // Found an svg tag - don't try to find links to the actual image // [wikipedia] Try to be nice and detect links that might give a hint where the svg can be found - if (preg_match_all('#href="([^"]*upload.wikimedia.org/[^"]*/[^"]*/[^"]*\.svg|[^"]+/[^"]+:[^"]+\.svg[^"]*)"#', $content, $out, PREG_PATTERN_ORDER)) { + $out1 = $out2 = $out3 = null; + if (preg_match_all('#href="([^"]*upload.wikimedia.org/[^"]*/[^"]*/[^"]*\.svg)"#', $content, $out1, PREG_PATTERN_ORDER) + || preg_match_all('#src="([^"]*upload.wikimedia.org/[^"]*/thumb/[^"]*\.svg/[^"]+\.svg[^"]*)"#', $content, $out2, PREG_PATTERN_ORDER) + || preg_match_all('#href="([^"]+/[^"]+:[^"]+\.svg)"#', $content, $out3, PREG_PATTERN_ORDER)) { if ($title === false && preg_match('#<title>([^<]*)</title>#i', $content, $tout)) { $title = trim(preg_replace('/\W*Wikipedia.*/', '', $tout[1])); } $new = false; - foreach ($out[1] as $res) { + $out = []; + if (isset($out1[1])) { + $out += $out1[1]; + } + if (isset($out2[1])) { + $out += $out2[1]; + } + if (isset($out3[1])) { + $out += $out3[1]; + } + foreach ($out as $res) { + error_log("Match '$res'"); + if (!preg_match('/hochschule|univers|logo|siegel/i', $res)) + continue; if (strpos($res, 'action=edit') !== false) continue; $new = Branding_ProcessFile::internetCombineUrl($url, html_entity_decode($res, ENT_COMPAT, 'UTF-8')); @@ -129,6 +145,7 @@ class Branding_ProcessFile extends AddModule_Base } if ($new === $url || $new === false) break; + error_log("New: '$new'"); $url = $new; continue; } |