diff options
Diffstat (limited to 'contrib/syslinux/syslinux-4.03/codepage/gensubset.pl')
-rwxr-xr-x | contrib/syslinux/syslinux-4.03/codepage/gensubset.pl | 57 |
1 files changed, 0 insertions, 57 deletions
diff --git a/contrib/syslinux/syslinux-4.03/codepage/gensubset.pl b/contrib/syslinux/syslinux-4.03/codepage/gensubset.pl deleted file mode 100755 index 4dd7f2c..0000000 --- a/contrib/syslinux/syslinux-4.03/codepage/gensubset.pl +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/perl -# -# Generate a subset of the UnicodeData.txt file, available from -# ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt -# -# Usage: -# gensubset.pl [subset files] < UnicodeData.txt > MiniUCD.txt -# - -%need_these = (); - -# Mark as needed all the characters mentioned in the relevant files -foreach $file (@ARGV) { - open(F, '<', $file) or die; - while (defined($line = <F>)) { - $line =~ s/\s*(\#.*|)$//; # Remove comments and final blanks - @f = split(/\s+/, $line); - next if (scalar @f != 2); - $need_these{hex $f[1]}++; - } - close(F); -} - -# Also mark as needed any case variants of those -# (Note: this doesn't necessarily provide the full transitive closure, -# but we shouldn't need it.) -while (defined($line = <STDIN>)) { - @f = split(/;/, $line); - if ($f[0] =~ /^([0-9a-f]+)$/i) { - $r = hex $f[0]; - if ($need_these{$r}) { - $need_these{hex $f[12]}++ if ($f[12] ne ''); - $need_these{hex $f[13]}++ if ($f[13] ne ''); - $need_these{hex $f[14]}++ if ($f[14] ne ''); - } - } -} - -# Finally, write out the subset -seek(STDIN, 0, 0); -while (defined($line = <STDIN>)) { - ($v, $l) = split(/;/, $line, 2); - if ($v =~ /^([0-9a-f]+)\-([0-9a-f]+)$/i) { - # This isn't actually the format... fix that if it ever matters - $r1 = hex $1; - $r2 = hex $2; - } elsif ($v =~ /^([0-9a-f]+)$/i) { - $r1 = $r2 = hex $1; - } else { - next; - } - for ($r = $r1; $r <= $r2; $r++) { - printf "%04X;%s", $r, $l if ($need_these{$r}); - } -} - - |