From c27997e5662b8a9209fb487f735b1412ac600a91 Mon Sep 17 00:00:00 2001 From: Simon Rettberg Date: Thu, 16 Apr 2020 13:47:16 +0200 Subject: Add tool to find duplicate lang tags --- tools/global-candidates.php | 74 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 tools/global-candidates.php diff --git a/tools/global-candidates.php b/tools/global-candidates.php new file mode 100644 index 00000000..12e900a5 --- /dev/null +++ b/tools/global-candidates.php @@ -0,0 +1,74 @@ + 1 && $argv[1] ? $argv[1] : 'en'; + +echo "Scanning for $langs... (pass as comma separated list, no spaces)\n"; + +$tags = []; +$strings = []; + +foreach (glob('./modules-available/*/lang/{' . $langs . '}/template-tags.json', GLOB_NOSORT | GLOB_BRACE) as $file) { + preg_match('#modules-available/([^/]+)/lang/(..)#', $file, $out); + $module = $out[1]; + $lang = $out[2]; + $j = json_decode(file_get_contents($file), true); + if (!is_array($j)) continue; + foreach ($j as $k => $v) { + if (!isset($tags[$k])) { + $tags[$k] = ['modules' => [], 'lang' => []]; + } + $tags[$k]['modules'][$module] = true; + if (!isset($tags[$k]['lang'][$lang])) { + $tags[$k]['lang'][$lang] = []; + } + $tags[$k]['lang'][$lang][$v] = true; + if (!isset($strings[$v])) { + $strings[$v] = []; + } + if (!isset($strings[$v][$k])) { + $strings[$v][$k] = []; + } + $strings[$v][$k][$module] = true; + } +} + +if ($argc > 1) { + $find = array_flip(array_slice($argv, 2)); + print_r($find); +} else $find = []; + +echo "\n\nDUPLICATE TAG NAME ACROSS DIFFERENT MODULES:\n"; +foreach ($tags as $k => &$tag) { + if (isset($find[$k])) { + echo "## LOOKUP: '$k'\n"; + print_r($tag['lang']); + } + if (count($tag['modules']) < 4) continue; + $tag['modules'] = array_keys($tag['modules']); + foreach ($tag['lang'] as &$lang) { + $lang = array_keys($lang); + } + unset($lang); + echo "## Common tag '$k'\n"; + echo " In " . count($tag['modules']) . " modules: " . implode(', ', $tag['modules']) . "\n"; + foreach ($tag['lang'] as $lang => $str) { + echo " " . count($str) . " in $lang: '" . implode("', '", $str) . "'\n"; + if (count($str) / count($tag['modules']) < 0.26) { + echo " +++ Possible candidate +++\n"; + } + } +} + +echo "\n\nDUPLICATE STRINGS WITH DIFFERENT NAMES:\n"; +foreach ($strings as $text => $data) { + if (count($data) < 3) continue; + echo "## '$text' ##\n"; + foreach ($data as $tag => $mods) { + echo " As $tag in"; + foreach($mods as $mod => $count) { + echo " $mod($count) "; + } + echo "\n"; + } +} + -- cgit v1.2.3-55-g7522