diff options
Diffstat (limited to 'contrib/syslinux/syslinux-4.03/codepage/cptable.pl')
-rwxr-xr-x | contrib/syslinux/syslinux-4.03/codepage/cptable.pl | 176 |
1 files changed, 0 insertions, 176 deletions
diff --git a/contrib/syslinux/syslinux-4.03/codepage/cptable.pl b/contrib/syslinux/syslinux-4.03/codepage/cptable.pl deleted file mode 100755 index e29cf00..0000000 --- a/contrib/syslinux/syslinux-4.03/codepage/cptable.pl +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/perl -# -# Produce a codepage matching table. For each 8-bit character, list -# a primary and an alternate match (the latter used for case-insensitive -# matching.) -# -# Usage: -# cptable.pl UnicodeData console-cp.txt filesystem-cp.txt output.cp -# -# Note: for the format of the UnicodeData file, see: -# http://www.unicode.org/Public/UNIDATA/UCD.html -# - -($ucd, $cpco, $cpfs, $cpout) = @ARGV; - -if (!defined($cpout)) { - die "Usage: $0 UnicodeData console-cp.txt fs-cp.txt output.cp\n"; -} - -%ucase = (); -%lcase = (); -%tcase = (); -%decomp = (); - -open(UCD, '<', $ucd) - or die "$0: could not open unicode data: $ucd: $!\n"; -while (defined($line = <UCD>)) { - chomp $line; - @f = split(/;/, $line); - $n = hex $f[0]; - $ucase{$n} = ($f[12] ne '') ? hex $f[12] : $n; - $lcase{$n} = ($f[13] ne '') ? hex $f[13] : $n; - $tcase{$n} = ($f[14] ne '') ? hex $f[14] : $n; - if ($f[5] =~ /^[0-9A-F\s]+$/) { - # This character has a canonical decomposition. - # The regular expression rejects angle brackets, so other - # decompositions aren't permitted. - $decomp{$n} = []; - foreach my $dch (split(' ', $f[5])) { - push(@{$decomp{$n}}, hex $dch); - } - } -} -close(UCD); - -# -# Filesystem and console codepages. The filesystem codepage is used -# for FAT shortnames, whereas the console codepage is whatever is used -# on the screen and keyboard. -# -@xtab = (undef) x 256; -%tabx = (); -open(CPFS, '<', $cpfs) - or die "$0: could not open fs codepage: $cpfs: $!\n"; -while (defined($line = <CPFS>)) { - $line =~ s/\s*(\#.*|)$//; - @f = split(/\s+/, $line); - next if (scalar @f != 2); - next if (hex $f[0] > 255); - $xtab[hex $f[0]] = hex $f[1]; # Codepage -> Unicode - $tabx{hex $f[1]} = hex $f[0]; # Unicode -> Codepage -} -close(CPFS); - -@ytab = (undef) x 256; -%taby = (); -open(CPCO, '<', $cpco) - or die "$0: could not open console codepage: $cpco: $!\n"; -while (defined($line = <CPCO>)) { - $line =~ s/\s*(\#.*|)$//; - @f = split(/\s+/, $line); - next if (scalar @f != 2); - next if (hex $f[0] > 255); - $ytab[hex $f[0]] = hex $f[1]; # Codepage -> Unicode - $taby{hex $f[1]} = hex $f[0]; # Unicode -> Codepage -} -close(CPCO); - -open(CPOUT, '>', $cpout) - or die "$0: could not open output file: $cpout: $!\n"; -# -# Magic number, in anticipation of being able to load these -# files dynamically... -# -print CPOUT pack("VV", 0x58a8b3d4, 0x51d21eb1); - -# Header fields available for future use... -print CPOUT pack("VVVVVV", 0, 0, 0, 0, 0, 0); - -# -# Self (shortname) uppercase table. -# This depends both on the console codepage and the filesystem codepage; -# the logical transcoding operation is: -# -# $tabx{$ucase{$ytab[$i]}} -# -# ... where @ytab is console codepage -> Unicode and -# %tabx is Unicode -> filesystem codepage. -# -@uctab = (undef) x 256; -for ($i = 0; $i < 256; $i++) { - $uuc = $ucase{$ytab[$i]}; # Unicode upper case - if (defined($tabx{$uuc})) { - # Straight-forward conversion - $u = $tabx{$uuc}; - } elsif (defined($tabx{${$decomp{$uuc}}[0]})) { - # Upper case equivalent stripped of accents - $u = $tabx{${$decomp{$uuc}}[0]}; - } else { - # No equivalent at all found. Assume it is a lower-case-only - # character, like greek alpha in CP437. - $u = $i; - } - $uctab[$i] = $u; - print CPOUT pack("C", $u); -} - -# -# Self (shortname) lowercase table. -# This depends both on the console codepage and the filesystem codepage; -# the logical transcoding operation is: -# -# $taby{$lcase{$xtab[$i]}} -# -# ... where @ytab is console codepage -> Unicode and -# %tabx is Unicode -> filesystem codepage. -# -@lctab = (undef) x 256; -for ($i = 0; $i < 256; $i++) { - $llc = $lcase{$xtab[$i]}; # Unicode lower case - if (defined($l = $taby{$llc}) && $uctab[$l] == $i) { - # Straight-forward conversion - } elsif (defined($l = $tabx{${$decomp{$llc}}[0]}) && $uctab[$l] == $i) { - # Lower case equivalent stripped of accents - } else { - # No equivalent at all found. Find *anything* that matches the - # bijection criterion... - for ($l = 0; $l < 256; $l++) { - last if ($uctab[$l] == $i); - } - $l = $i if ($l == 256); # If nothing, we're screwed anyway... - } - $lctab[$i] = $l; - print CPOUT pack("C", $l); -} - -# -# Unicode (longname) matching table. -# This only depends on the console codepage. -# -$pp0 = ''; $pp1 = ''; -for ($i = 0; $i < 256; $i++) { - if (!defined($ytab[$i])) { - $p0 = $p1 = 0xffff; - } else { - $p0 = $ytab[$i]; - if ($ucase{$p0} != $p0) { - $p1 = $ucase{$p0}; - } elsif ($lcase{$p0} != $p0) { - $p1 = $lcase{$p0}; - } elsif ($tcase{$p0} != $p0) { - $p1 = $tcase{$p0}; - } else { - $p1 = $p0; - } - } - # Only the BMP is supported... - $p0 = 0xffff if ($p0 > 0xffff); - $p1 = 0xffff if ($p1 > 0xffff); - $pp0 .= pack("v", $p0); - $pp1 .= pack("v", $p1); -} -print CPOUT $pp0, $pp1; -close (CPOUT); - - |