summaryrefslogtreecommitdiffstats
path: root/contrib/syslinux/syslinux-4.03/codepage/cptable.pl
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/syslinux/syslinux-4.03/codepage/cptable.pl')
-rwxr-xr-xcontrib/syslinux/syslinux-4.03/codepage/cptable.pl176
1 files changed, 0 insertions, 176 deletions
diff --git a/contrib/syslinux/syslinux-4.03/codepage/cptable.pl b/contrib/syslinux/syslinux-4.03/codepage/cptable.pl
deleted file mode 100755
index e29cf00..0000000
--- a/contrib/syslinux/syslinux-4.03/codepage/cptable.pl
+++ /dev/null
@@ -1,176 +0,0 @@
-#!/usr/bin/perl
-#
-# Produce a codepage matching table. For each 8-bit character, list
-# a primary and an alternate match (the latter used for case-insensitive
-# matching.)
-#
-# Usage:
-# cptable.pl UnicodeData console-cp.txt filesystem-cp.txt output.cp
-#
-# Note: for the format of the UnicodeData file, see:
-# http://www.unicode.org/Public/UNIDATA/UCD.html
-#
-
-($ucd, $cpco, $cpfs, $cpout) = @ARGV;
-
-if (!defined($cpout)) {
- die "Usage: $0 UnicodeData console-cp.txt fs-cp.txt output.cp\n";
-}
-
-%ucase = ();
-%lcase = ();
-%tcase = ();
-%decomp = ();
-
-open(UCD, '<', $ucd)
- or die "$0: could not open unicode data: $ucd: $!\n";
-while (defined($line = <UCD>)) {
- chomp $line;
- @f = split(/;/, $line);
- $n = hex $f[0];
- $ucase{$n} = ($f[12] ne '') ? hex $f[12] : $n;
- $lcase{$n} = ($f[13] ne '') ? hex $f[13] : $n;
- $tcase{$n} = ($f[14] ne '') ? hex $f[14] : $n;
- if ($f[5] =~ /^[0-9A-F\s]+$/) {
- # This character has a canonical decomposition.
- # The regular expression rejects angle brackets, so other
- # decompositions aren't permitted.
- $decomp{$n} = [];
- foreach my $dch (split(' ', $f[5])) {
- push(@{$decomp{$n}}, hex $dch);
- }
- }
-}
-close(UCD);
-
-#
-# Filesystem and console codepages. The filesystem codepage is used
-# for FAT shortnames, whereas the console codepage is whatever is used
-# on the screen and keyboard.
-#
-@xtab = (undef) x 256;
-%tabx = ();
-open(CPFS, '<', $cpfs)
- or die "$0: could not open fs codepage: $cpfs: $!\n";
-while (defined($line = <CPFS>)) {
- $line =~ s/\s*(\#.*|)$//;
- @f = split(/\s+/, $line);
- next if (scalar @f != 2);
- next if (hex $f[0] > 255);
- $xtab[hex $f[0]] = hex $f[1]; # Codepage -> Unicode
- $tabx{hex $f[1]} = hex $f[0]; # Unicode -> Codepage
-}
-close(CPFS);
-
-@ytab = (undef) x 256;
-%taby = ();
-open(CPCO, '<', $cpco)
- or die "$0: could not open console codepage: $cpco: $!\n";
-while (defined($line = <CPCO>)) {
- $line =~ s/\s*(\#.*|)$//;
- @f = split(/\s+/, $line);
- next if (scalar @f != 2);
- next if (hex $f[0] > 255);
- $ytab[hex $f[0]] = hex $f[1]; # Codepage -> Unicode
- $taby{hex $f[1]} = hex $f[0]; # Unicode -> Codepage
-}
-close(CPCO);
-
-open(CPOUT, '>', $cpout)
- or die "$0: could not open output file: $cpout: $!\n";
-#
-# Magic number, in anticipation of being able to load these
-# files dynamically...
-#
-print CPOUT pack("VV", 0x58a8b3d4, 0x51d21eb1);
-
-# Header fields available for future use...
-print CPOUT pack("VVVVVV", 0, 0, 0, 0, 0, 0);
-
-#
-# Self (shortname) uppercase table.
-# This depends both on the console codepage and the filesystem codepage;
-# the logical transcoding operation is:
-#
-# $tabx{$ucase{$ytab[$i]}}
-#
-# ... where @ytab is console codepage -> Unicode and
-# %tabx is Unicode -> filesystem codepage.
-#
-@uctab = (undef) x 256;
-for ($i = 0; $i < 256; $i++) {
- $uuc = $ucase{$ytab[$i]}; # Unicode upper case
- if (defined($tabx{$uuc})) {
- # Straight-forward conversion
- $u = $tabx{$uuc};
- } elsif (defined($tabx{${$decomp{$uuc}}[0]})) {
- # Upper case equivalent stripped of accents
- $u = $tabx{${$decomp{$uuc}}[0]};
- } else {
- # No equivalent at all found. Assume it is a lower-case-only
- # character, like greek alpha in CP437.
- $u = $i;
- }
- $uctab[$i] = $u;
- print CPOUT pack("C", $u);
-}
-
-#
-# Self (shortname) lowercase table.
-# This depends both on the console codepage and the filesystem codepage;
-# the logical transcoding operation is:
-#
-# $taby{$lcase{$xtab[$i]}}
-#
-# ... where @ytab is console codepage -> Unicode and
-# %tabx is Unicode -> filesystem codepage.
-#
-@lctab = (undef) x 256;
-for ($i = 0; $i < 256; $i++) {
- $llc = $lcase{$xtab[$i]}; # Unicode lower case
- if (defined($l = $taby{$llc}) && $uctab[$l] == $i) {
- # Straight-forward conversion
- } elsif (defined($l = $tabx{${$decomp{$llc}}[0]}) && $uctab[$l] == $i) {
- # Lower case equivalent stripped of accents
- } else {
- # No equivalent at all found. Find *anything* that matches the
- # bijection criterion...
- for ($l = 0; $l < 256; $l++) {
- last if ($uctab[$l] == $i);
- }
- $l = $i if ($l == 256); # If nothing, we're screwed anyway...
- }
- $lctab[$i] = $l;
- print CPOUT pack("C", $l);
-}
-
-#
-# Unicode (longname) matching table.
-# This only depends on the console codepage.
-#
-$pp0 = ''; $pp1 = '';
-for ($i = 0; $i < 256; $i++) {
- if (!defined($ytab[$i])) {
- $p0 = $p1 = 0xffff;
- } else {
- $p0 = $ytab[$i];
- if ($ucase{$p0} != $p0) {
- $p1 = $ucase{$p0};
- } elsif ($lcase{$p0} != $p0) {
- $p1 = $lcase{$p0};
- } elsif ($tcase{$p0} != $p0) {
- $p1 = $tcase{$p0};
- } else {
- $p1 = $p0;
- }
- }
- # Only the BMP is supported...
- $p0 = 0xffff if ($p0 > 0xffff);
- $p1 = 0xffff if ($p1 > 0xffff);
- $pp0 .= pack("v", $p0);
- $pp1 .= pack("v", $p1);
-}
-print CPOUT $pp0, $pp1;
-close (CPOUT);
-
-