summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Rettberg2023-11-17 17:30:19 +0100
committerSimon Rettberg2023-11-17 17:30:19 +0100
commitceb1ee698d13c5bbc50efa9b5c7d3bf268ce2d90 (patch)
tree04f5210dcd1149e8ec566c03a3a209b83e014b48
parent[inc/Download] Add user-agent string to avoid 403 from Wikipedia (diff)
downloadslx-admin-ceb1ee698d13c5bbc50efa9b5c7d3bf268ce2d90.tar.gz
slx-admin-ceb1ee698d13c5bbc50efa9b5c7d3bf268ce2d90.tar.xz
slx-admin-ceb1ee698d13c5bbc50efa9b5c7d3bf268ce2d90.zip
[sysconfig] Update Wikipedia extraction logic
-rw-r--r--modules-available/sysconfig/addmodule_branding.inc.php31
1 files changed, 24 insertions, 7 deletions
diff --git a/modules-available/sysconfig/addmodule_branding.inc.php b/modules-available/sysconfig/addmodule_branding.inc.php
index 4cbcb66f..54b2ad57 100644
--- a/modules-available/sysconfig/addmodule_branding.inc.php
+++ b/modules-available/sysconfig/addmodule_branding.inc.php
@@ -99,28 +99,44 @@ class Branding_ProcessFile extends AddModule_Base
private static function downloadSvg(string $svgName, string $url, &$title): bool
{
$title = false;
- // [wikipedia] Did someone paste a link to a thumbnail of the svg? Let's fix that...
- if (preg_match('#^(.*)/thumb/(.*\.svg)/.*\.svg#', $url, $out)) {
- $url = $out[1] . '/' . $out[2];
- }
for ($i = 0; $i < 5; ++$i) {
+ // [wikipedia] Did someone paste a link to a thumbnail of the svg? Let's fix that...
+ if (preg_match('#^(.*)/thumb/(.*\.svg)/.*\.svg#', $url, $out)) {
+ $url = $out[1] . '/' . $out[2];
+ }
$code = 400;
if (!Download::toFile($svgName, $url, 3, $code) || $code < 200 || $code > 299) {
Message::addError('remote-timeout', $url, $code);
return false;
}
- $content = FileUtil::readFile($svgName, 25000);
+ $content = FileUtil::readFile($svgName, 250000);
// Is svg file?
if (strpos($content, '<svg') !== false)
return true; // Found an svg tag - don't try to find links to the actual image
// [wikipedia] Try to be nice and detect links that might give a hint where the svg can be found
- if (preg_match_all('#href="([^"]*upload.wikimedia.org/[^"]*/[^"]*/[^"]*\.svg|[^"]+/[^"]+:[^"]+\.svg[^"]*)"#', $content, $out, PREG_PATTERN_ORDER)) {
+ $out1 = $out2 = $out3 = null;
+ if (preg_match_all('#href="([^"]*upload.wikimedia.org/[^"]*/[^"]*/[^"]*\.svg)"#', $content, $out1, PREG_PATTERN_ORDER)
+ || preg_match_all('#src="([^"]*upload.wikimedia.org/[^"]*/thumb/[^"]*\.svg/[^"]+\.svg[^"]*)"#', $content, $out2, PREG_PATTERN_ORDER)
+ || preg_match_all('#href="([^"]+/[^"]+:[^"]+\.svg)"#', $content, $out3, PREG_PATTERN_ORDER)) {
if ($title === false && preg_match('#<title>([^<]*)</title>#i', $content, $tout)) {
$title = trim(preg_replace('/\W*Wikipedia.*/', '', $tout[1]));
}
$new = false;
- foreach ($out[1] as $res) {
+ $out = [];
+ if (isset($out1[1])) {
+ $out += $out1[1];
+ }
+ if (isset($out2[1])) {
+ $out += $out2[1];
+ }
+ if (isset($out3[1])) {
+ $out += $out3[1];
+ }
+ foreach ($out as $res) {
+ error_log("Match '$res'");
+ if (!preg_match('/hochschule|univers|logo|siegel/i', $res))
+ continue;
if (strpos($res, 'action=edit') !== false)
continue;
$new = Branding_ProcessFile::internetCombineUrl($url, html_entity_decode($res, ENT_COMPAT, 'UTF-8'));
@@ -129,6 +145,7 @@ class Branding_ProcessFile extends AddModule_Base
}
if ($new === $url || $new === false)
break;
+ error_log("New: '$new'");
$url = $new;
continue;
}