Newer
Older
alignement-pascal-francis / 03-dedoublonnage / weedTei.pl
@besagni besagni on 10 Nov 2021 30 KB Renommage des répertoires
#!/usr/bin/perl


# Déclaration des pragmas
use strict;
use utf8;
use open qw/:std :utf8/;

# Appel des modules externes de base
use Encode;
use Getopt::Long;

# Appel des modules spécifiques à l'application
use HTML::Entities qw(decode_entities %entity2char);
## use HTTP::CookieJar::LWP;
use JSON;
## use LWP::UserAgent;
use Text::Unidecode;
## use URI::Encode qw(uri_encode uri_decode);

my ($programme) = $0 =~ m|^(?:.*/)?(.+)|;
my $Version     = "1.5.1";
my $dateModif   = "10 Mars 2021";

my $usage = "Usage : \n" .
            "    $programme -f fichier[,fichier]* -r répertoire [ -l log ] [ -x ] \n" .
            "    $programme -h \n";

my $aide     = undef;
my $log      = undef;
my $rep      = undef;
my $xclam    = undef;
my @fichiers = ();
my %dejaVu   = ();

eval    {
        $SIG{__WARN__} = sub {usage(1);};
        GetOptions(
                "fichier=s"    => \@fichiers,
                "help"         => \$aide,
                "log=s"        => \$log,
                "repertoire=s" => \$rep,
                "xclam"        => \$xclam,
                );
        };
$SIG{__WARN__} = sub {warn $_[0];};

if ( $aide ) {
        print " \n";
        print "Programme : \n";
        print "    “$programme”, version $Version ($dateModif)\n";
        print "    Permet de créer les fichiers d’enrichissement avec les codes de classement \n";
        print "    et les mots-clés Pascal ou Francis au format TEI StandOff \n";
        print "\n";
        print $usage;
        print "\nOptions : \n";
        print "    -f  indique le nom du ou des fichiers d’entrée (qui peuvent être des fichiers \n";
        print "        compressés avec “gzip” ou “bzip2”). L’option est répétitive et il est possible \n";
        print "        d’indiquer plusieurs noms de fichier en les séparant par des virgules (mais \n";
        print "        sans espace entre eux) \n";
        print "    -h  affiche cette aide \n";
        print "    -l  indique le nom du fichier “log” contenant la liste des appariements \n";
        print "        supprimés \n";
        print "    -r  indique le nom du répertoire où seront créés les fichiers de sortie \n";
        print "        portant le même nom que les fichiers d’entrée \n";
        print "    -x  accepte comme valides les appariements lorsque la valeur du score est \n";
        print "        suivie d’un point d’exclamation (“!”) \n";
        print " \n";

        exit 0;
        }

@fichiers = grep {not $dejaVu{$_} ++;} split(/,/, join(",", @fichiers));
usage(2) if $#fichiers < 0;

usage(2) if not $rep;

# Récupération des noms de fichiers sur l'entrée standard
for ( my $nb = 0 ; $nb <= $#fichiers ; $nb ++ ) {
        my $fichier = $fichiers[$nb];
        if ( $fichier eq '-' ) {
                splice(@fichiers, $nb, 1);
                while(<STDIN>) {
                        chomp;
                        s/\r//o;
                        next if /^\s*$/o;
                        next if /^\s*-\s*$/o;
                        s/^\s+//o;
                        s/\s+$//o;
                        push(@fichiers, $_);
                        }
                }
        }

# Variables
my $info   = undef;
my $inist  = undef;
my $nb     = undef;
my $score  = undef;
my %groupe = ();
my %info   = ();
my %istex  = ();
my %match  = ();
my %rang   = ();
my %rejetD = ();
my %rejetI = ();

# Complétion de la table des entitées HTML
while(<DATA>) {
        next if /^\s*$/o;
        next if /^#/o;
        chomp;
        my ($num, $sgml) = split(/\t+/);
        next if $entity2char{$sgml};
        $entity2char{$sgml} = chr($num);
        }
close DATA;

# Ouverture du fichier "log"
if ( $log ) {
        open(LOG, ">:utf8", $log) or die "$!,";
        }
else    {
        open(LOG, ">:utf8", "/dev/null") or die "$!,";
        }

foreach my $fichier (@fichiers) {
        if ( $fichier =~ /\.gz\z/o ) {
                open(INP, "gzip -cd $fichier |") or die "$!,";
                binmode(INP, ":utf8");
                }
        elsif ( $fichier =~ /\.bz2\z/o ) {
                open(INP, "bzip2 -cd $fichier |") or die "$!,";
                binmode(INP, ":utf8");
                }
        else    {
                open(INP, "<:utf8", $fichier) or die "$!,";
                }

        while(<INP>) {
                if (/^([_.0\*\+]+\x{00A0}*)\t(\d\.\d+)(!?)\t/o) {
                        $score  = $2;
                        my $statut = $3;
                        $statut = "" if not $xclam;
                        if ( $score < 3.490 and $statut ne '!' ) {
                                $inist = undef;
                                next;
                                }
                        chomp;
                        my @champs = split(/\t/);
                        $inist     = $champs[3];
                        my $id     = $champs[17];
                        $match{$id}{$inist} = $score;
                        $rang{$id}{$inist} = keys %{$match{$id}};
                        $istex{$inist}{$id} = $score;
                        }
                elsif (/^([_.0\*\+]+\x{00A0}*)\t(\d\.\d+)\W\t/o) {
                        $inist = undef;
                        }
                elsif ( /^ ~~> \t/o ) {
                        next if not $inist;
                        chomp;
                        my @champs = split(/\t/);
                        my $id     = $champs[5];
                        next if defined $match{$id}{$inist};
                        push(@{$groupe{$inist}}, "$id:$score");
                        }
                }
        close INP;

        foreach $inist (sort keys %groupe) {
                my @tmp = @{$groupe{$inist}};
                if ( $#tmp == 0 ) {
                        my ($id, $valeur) = split(/:/, $tmp[0]);
                        if ( $match{$id}{$inist} ) {
                                $rejetI{$inist};
                                next;
                                }
                        }
                my $erreur = 0;
                foreach my $item (@tmp) {
                        my ($id, $valeur) = split(/:/, $item);
                        if ( defined $match{$id} ) {
                                my @inist = keys %{$match{$id}};
                                $erreur ++ if @inist;
                                }
                        }
                if ( $erreur ) {
                        $rejetI{$inist} ++;
                        }
                else    {
                        foreach my $item (@tmp) {
                                my ($id, $valeur) = split(/:/, $item);
                                $match{$id}{$inist} = $valeur;
                                }
                        }
                }
        }

foreach my $id (keys %match) {
        my @tmp = sort keys %{$match{$id}};
        if ( $#tmp == 0 ) {
                delete $match{$id};
                next;
                }
        $nb ++;
        foreach my $num (@tmp) {
                $info{$num} ++;
                }
        }

# print LOG "Nombre de doublons : $nb \n";
# foreach my $id (sort keys %match) {
#         print LOG " -> $id \n";
#         foreach my $num (sort keys %{$match{$id}}) {
#                 print LOG "\t$num => $match{$id}{$num} \n";
#                 }
#         }

foreach my $fichier (@fichiers) {
        if ( $fichier =~ /\.gz\z/o ) {
                open(INP, "gzip -cd $fichier |") or die "$!,";
                binmode(INP, ":utf8");
                }
        elsif ( $fichier =~ /\.bz2\z/o ) {
                open(INP, "bzip2 -cd $fichier |") or die "$!,";
                binmode(INP, ":utf8");
                }
        else    {
                open(INP, "<:utf8", $fichier) or die "$!,";
                }

        while(<INP>) {
                if (/^([_.0\*\+]+\x{00A0}*)\t/o) {
                        chomp;
                        my @champs = split(/\t/);
                        $inist     = $champs[3];
                        $info      = join("\t", @champs[4 .. 16]);
#                       my $id     = $champs[17];
                        $info{$inist} = $info if $info{$inist};
                        }
                }
        close INP;
        }

foreach my $id (sort keys %match) {
        my %tmp = ();
        next if not defined $match{$id};
        foreach my $num (sort keys %{$match{$id}}) {
                push(@{$tmp{$match{$id}{$num}}}, $num);
                }
        my @tmp = sort {$b <=> $a} keys %tmp;
        next if $#tmp < 0;
        my $max = $tmp[0];
        if ( $#{$tmp{$max}} == 0 ) {
                my $correct = $tmp{$max}->[0];
                foreach my $num (sort keys %{$match{$id}}) {
                        next if $num eq $correct;
                        $rejetD{$num} ++;
                        }
                }
        else    {
                @tmp = sort {$rang{$id}{$a} <=> $rang{$id}{$b}} @{$tmp{$max}};
                my $total = 0;
                for (my $nb = 1 ; $nb <= $#tmp ; $nb ++) {
                        if ( $info{$tmp[0]} eq $info{$tmp[$nb]} ) {
                                $total ++;
                                next;
                                }
                        if ( compare($tmp[0], $tmp[$nb]) ) {
                                $total ++;
                                next;
                                }
                        last;
                        }
                if ( $total == $#tmp ) {
                        foreach my $num (sort keys %{$match{$id}}) {
                                next if $num eq $tmp[0];
                                $rejetD{$num} ++;
                                }
                        }
                elsif ( $total > 1 and $total == $#tmp - 1 ) {
                        print LOG " => $id [$max] \n";
                        foreach my $num (@{$tmp{$max}}) {
                                $rejetD{$num} ++;
                                print LOG "\t$num\t$info{$num}\n";
                                }
                        print LOG "\n";
                        }
                else    {
                        print LOG " -> $id [$max] \n";
                        foreach my $num (@{$tmp{$max}}) {
                                $rejetD{$num} ++;
                                print LOG "\t$num\t$info{$num}\n";
                                }
                        print LOG "\n";
                        }
                }
        }

foreach my $fichier (@fichiers) {
        my $compression = undef;
        if ( $fichier =~ /\.gz\z/o ) {
                $compression = "gzip";
                open(INP, "gzip -cd $fichier |") or die "$!,";
                binmode(INP, ":utf8");
                }
        elsif ( $fichier =~ /\.bz2\z/o ) {
                $compression = "bzip2";
                open(INP, "bzip2 -cd $fichier |") or die "$!,";
                binmode(INP, ":utf8");
                }
        else    {
                open(INP, "<:utf8", $fichier) or die "$!,";
                }

        if ( $compression ) {
                open(OUT, "| $compression -c > $rep/$fichier") or die "$!,";
                binmode(OUT, ":utf8");
                }
        else    {
                open(OUT, ">:utf8", "$rep/$fichier") or die "$!,";
                }

        while(<INP>) {
                if (/^([_.0\*\+]+\x{00A0}*)\t(\d\.\d+)(!?)\t/o) {
                        my $score = $2;
                        my $statut = $3;
                        my @champs = split(/\t/);
                        $inist     = $champs[3];
                        if ( $rejetD{$inist} ) {
                                $champs[1] = $score . '-';
                                $_ = join("\t", @champs);
                                }
                        }
                elsif (/^([_.0\*\+]+\x{00A0}*)\t(\d\.\d+)(\W?)\t/o) {
                        $inist = undef;
                        }
                elsif ( /^ ~~> \t/o ) {
                        if ( $inist and $rejetI{$inist} ) {
                                s/^ ~~> \t/ ::> \t/o;
                                }
                        }
                print OUT;
                }
        close INP;
        close OUT;
        }

exit 0;


sub usage
{
print STDERR "\n$usage\n";

exit shift;
}

sub compare
{
my ($n1, $n2) = @_;

my $test   = 0;
my %trouve = ();

my @i1 = split(/\t/, $info{$n1});
my @i2 = split(/\t/, $info{$n2});
# 0 : titre ; 1 : journal ; 2 : livre ; 3 : $issn ; 4 : isbn ; 5 : date
# 6 : volume ; 7 : fascicule ; 8 : pagedebut ; 9 : pagefin ; 10 : nom1 
# 11 : prenom1 ; 12 : autres

# ISSN
if ( $i1[3] and $i2[3] ) {
        $test ++;
        if ( $i1[3] eq $i2[3] or uc($i1[3]) eq uc($i2[3]) ) {
                $trouve{'ISSN'} ++; 
                }
        }

# Revue
if ( $i1[1] and $i2[1] ) {
        $test ++;
        $trouve{'revue'} ++ if revue($i1[1], $i2[1]);
        }

# Date
if ( $i1[5] and $i2[5] ) {
        $test ++;
        if ( $i1[5] == $i2[5] or $i1[5] eq $i2[5] ) {
                $trouve{'date'} ++;
                }
        }

# Volume
if ( $i1[6] and $i2[6] ) {
        $test ++;
        if ( $i1[6] eq $i2[6] or biniou($i1[6], $i2[6], "VOLUME", $n1, $n2) ) {
                $trouve{'volume'} ++;
                }
        elsif ( unidecode($i1[6]) eq unidecode($i2[6]) ) {
                $trouve{'volume'} ++;
                }
        }

# Fascicule
if ( $i1[7] and $i2[7] ) {
        $test ++;
        if ( $i1[7] eq $i2[7] or biniou($i1[7], $i2[7], "FASCICULE", $n1, $n2) ) {
                $trouve{'fascicule'} ++;
                }
        elsif ( unidecode($i1[7]) eq unidecode($i2[7]) ) {
                $trouve{'fascicule'} ++;
                }
        }

# Page de début
if ( $i1[8] and $i2[8] ) {
        $test ++;
        $trouve{'pagedebut'} ++ if $i1[8] == $i2[8] or $i1[8] eq $i2[8];
        }

# Page de fin
if ( $i1[9] and $i2[9] ) {
        $test ++;
        $trouve{'pagefin'} ++ if $i1[9] == $i2[9] or $i1[9] eq $i2[9];
        }

# Titre du document
if ( $i1[0] and $i2[0] ) {
        $test ++;
        if ( ($trouve{'ISSN'} or $trouve{'revue'}) and 
             ($trouve{'date'} or $trouve{'volume'}) and 
             ($trouve{'pagedebut'} or $trouve{'pagefin'}) ) {
                $trouve{'titre'} += titre($i1[0], $i2[0], 1);
                }
        else    {
                $trouve{'titre'} += titre($i1[0], $i2[0], 0);
                }
        }

# Premier auteur
if ( $i1[10] and $i2[10] ) {
        $test ++;
        if ( $i1[10] eq $i2[10] or
             uc($i1[10]) eq uc($i2[10]) ) {
                $trouve{'auteur'} ++;
                }
        }

my $trouve = scalar keys %trouve;

if ( $test > 4 and $trouve == $test ) {
        return 1;
        }

if ( ($trouve{'ISSN'} or $trouve{'revue'}) and 
     ($trouve{'date'} or $trouve{'volume'}) and 
     ($trouve{'pagedebut'} or $trouve{'pagefin'}) and 
     ($trouve{'auteur'} or $trouve{'titre'}) ) {
        return 1;
        }

return 0;
}

sub revue
{
my ($title, @liste) = @_;

my $match = 0;

my $rv = lc(join(" ", ($title =~ /(\w+)/go)));
foreach my $item (@liste) {
        my $jn = decode_entities($item);
        if ( $jn eq $title or lc($jn) eq lc($title) ) {
                $match ++;
                }
        else    {
                my $tmp = lc(join(" ", ($jn =~ /(\w+)/go)));
                if ( $tmp eq $rv ) {
                        $match ++;
                        }
                elsif ( $tmp =~ / [a-z]\z/o and $rv =~ /^$tmp / ) {
                        $match ++;
                        }
                elsif ( $rv =~ / [a-z]\z/o and $tmp =~ /^$rv / ) {
                        $match ++;
                        }
                else    {
                        my $tmp1 = join(" ", grep(!/^(and|et|und|e|y)\z/, split(/ +/, $tmp)));
                        my $tmp2 = join(" ", grep(!/^(and|et|und|e|y)\z/, split(/ +/, $rv)));
                        if ( $tmp1 eq $tmp2 ) {
                                $match ++;
                                }
                        else    {
                                $tmp1 =~  s/^(the|die|das|les?|la?|du|[ei]l) //o;
                                $tmp2 =~  s/^(the|die|das|les?|la?|du|[ei]l) //o;
                                $match ++ if $tmp1 eq $tmp2;
                                }
                        }
                }
        if ( not $match ) {
                if ( $jn =~ /\s*: /o ) {
                        my ($tmp) = ($jn =~ /^(.+)\s*:/o);
                        if ( $tmp eq $title or lc($tmp) eq lc($title) ) {
                                $match ++;
                                }
                        }
                if ( $title =~ /\s*: /o ) {
                        my ($tmp) = ($title =~ /^(.+)\s*:/o);
                        if ( $jn eq $tmp or lc($jn) eq lc($tmp) ) {
                                $match ++;
                                }
                        }
                }
        }

return $match;
}

sub biniou
{
my ($val1, $val2, $champ, $num1, $num2) = @_;

if ( $val1 =~ /^\d+\z/o ) {
        if ( $val2 =~ /^(\d+)[-\x{2010}-\x{2015}\x{2212}](\d+)\z/o ) {
                my $inf = $1;
                my $sup = $2;
                if ( $inf < $sup and $val1 >= $inf and $val1 <= $sup ) {
                        return 1;
                        }
                }
        }
elsif ( $val1 =~ m|^(\d+)[-\x{2010}-\x{2015}\x{2212}/](\d+)\z|o ) {
        my $inf = $1;
        my $sup = $2;
        if ( $val2 =~ /^\d+\z/o ) {
                if ( $inf < $sup and $val2 >= $inf and $val2 <= $sup ) {
                        return 1;
                        }
                }
        elsif ( $val2 =~ m|^(\d+)[-\x{2010}-\x{2015}\x{2212}/](\d+)\z|o ) {
                my $inf2 = $1;
                my $sup2 = $2;
                if ( $inf == $inf2 and $sup == $sup2 ) {
                        return 1;
                        }
                }
        }
elsif ( $val1 =~ /^([A-Za-z]+)(\d+)\z/o ) {
        my $vl = $1;
        my $vn = $2;
        if ( $val2 =~ /^([A-Za-z])(\d+)[-\x{2010}-\x{2015}\x{2212}]([A-Za-z])(\d+)\z/o ) {
                my $infl = $1;
                my $infn = $2;
                my $supl = $3;
                my $supn = $4;
                if ( $infl eq $supl ) {
                        if ( $vl eq $infl and $vn >= $infn and $vn <= $supl ) {
                                return 1;
                                }
                        }
                elsif ( $infl lt $supl ) {
                        if ( $vl eq $infl and $vn >= $infn ) {
                                return 1;
                                }
                        if ( $vl eq $supl and $vn <= $supn ) {
                                return 1;
                                }
                        if ( $vl gt $infl and $vl lt $supl ) {
                                return 1
                                }
                        }
                else    {
                        alerte("FORMAT $champ \"$num2\" BIZARRE \"$val2\"")
                        }
                }
        }
elsif ( $val1 =~ /^([A-Za-z])(\d+)[-\x{2010}-\x{2015}\x{2212}]([A-Za-z])(\d+)\z/o ) {
        my $infl = $1;
        my $infn = $2;
        my $supl = $3;
        my $supn = $4;
        if ( $val2 =~ /^([A-Za-z]+)(\d+)\z/o ) {
                my $vl = $1;
                my $vn = $2;
                if ( $infl eq $supl ) {
                        if ( $vl eq $infl and $vn >= $infn and $vn <= $supl ) {
                                return 1;
                                }
                        }
                elsif ( $infl lt $supl ) {
                        if ( $vl eq $infl and $vn >= $infn ) {
                                return 1;
                                }
                        if ( $vl eq $supl and $vn <= $supn ) {
                                return 1;
                                }
                        if ( $vl gt $infl and $vl lt $supl ) {
                                return 1
                                }
                        }
                else    {
                        alerte("FORMAT $champ \"$num1\" BIZARRE \"$val1\"")
                        }
                }
        }
else    {
        alerte("FORMAT $champ \"$num1\" INATTENDU \"$val1\"");
        if ( $val2 !~ /^\d+\z/o and
             $val2 !~ /^([A-Za-z]+)(\d+)\z/o and
             $val2 !~ /^(\d+)[-\x{2010}-\x{2015}\x{2212}](\d+)\z/o and
             $val2 !~ /^([A-Za-z])(\d+)[-\x{2010}-\x{2015}\x{2212}]([A-Za-z])(\d+)\z/o ) {
                alerte("FORMAT $champ \"$num2\" INATTENDU \"$val2\"");
                }
        }

return 0;
}

sub titre
{
my ($t1, $t2, $sb) = @_;

if ( $t1 eq $t2 or
     lc($t1) eq lc($t2) or
     lc(unidecode($t1)) eq lc(unidecode($t2))) {
        return 1;
        }

$t2 =~ s|</?su[bp]>||go;

my $tmp1 = lc(join(" ", ($t1 =~ /(\w+)/go)));
my $tmp2 = lc(join(" ", ($t2 =~ /(\w+)/go)));

if ( $tmp1 eq $tmp2 ) {
        return 1;
        }
elsif ( unidecode($tmp1) eq unidecode($tmp2) ) {
        return 1;
        }
else    {
        $tmp1 = unidecode($tmp1);
        $tmp2 = unidecode($tmp2);
        my $ld = levenshtein_damereau($tmp1, $tmp2);
        if ( $ld == 1 and length($tmp2) > 20 ) {
                return 1;
                }
        elsif ( $ld == 2 and length($tmp2) > 40 ) {
                return 1;
                }
        }

if ( $sb ) {
        if ( $tmp1 =~ /\b$tmp2\b/ or $tmp2 =~ /\b$tmp1\b/ ) {
                return 0.5;
                }
        }

if ( $t1 =~ /: /o or $t2 =~ /: /o ) {
        my ($tmp1) = $t1 =~ /^(.+?)\s*: /o;
        my ($tmp2) = $t2 =~ /^(.+?)\s*: /o;
        if ( $tmp1 eq $tmp2 ) {
                return 1;
                }
        else    {
                $tmp1 = lc(join(" ", ($tmp1 =~ /(\w+)/go)));
                $tmp2 = lc(join(" ", ($tmp2 =~ /(\w+)/go)));
                if ( $tmp1 eq $tmp2 ) {
                        return 1;
                        }
                elsif ( unidecode($tmp1) eq unidecode($tmp2) ) {
                        return 1;
                        }
                }
        }

## Test partie de titre
($tmp1 = $t1) =~ s/([^\w ])/\\$1/go;
($tmp2 = $t2) =~ s/([^\w ])/\\$1/go;
if ( length($t1) > length($t2) ) {
        if ( $tmp1 =~ /^$tmp2\. .+/i ) {
                return 0.5;
                }
        }
elsif ( length($t1) < length($t2) ) {
        if ( $tmp2 =~ /^$tmp1\. .+/i ) {
                return 0.5;
                }
        }

return 0;
}

sub levenshtein_damereau
{
my ($s1, $s2) = @_;

my $len1 = length $s1;
my $len2 = length $s2;

return $len2 if $len1 == 0;
return $len1 if $len2 == 0;

my %mat = ();

for ( my $i = 0 ; $i <= $len1 ; $i ++ ) {
        for ( my $j = 0 ; $j <= $len2 ; $j ++ ) {
                $mat{$i}{$j} = 0;
                $mat{0}{$j} = $j;
                }
        $mat{$i}{0} = $i;
        }

my @ar1 = split(//, $s1);
my @ar2 = split(//, $s2);

my $cost = 0;

for ( my $i = 1 ; $i <= $len1 ; $i ++ ) {
        for ( my $j = 1 ; $j <= $len2 ; $j ++ ) {
                $cost = $ar1[$i-1] eq $ar2[$j-1] ? 0 : 1;
                $mat{$i}{$j} = min ([$mat{$i-1}{$j} + 1,
                                     $mat{$i}{$j-1} + 1,
                                     $mat{$i-1}{$j-1} + $cost]);
                if ( $i > 1 and $j > 1 and
                     $ar1[$i - 1] eq $ar2[$j - 2] and
                     $ar1[$i - 2] eq $ar2[$j - 1] ) {
                        $mat{$i}{$j} = min ([$mat{$i}{$j}, $mat{$i-2}{$j-2} + $cost]);
                        }
                }
        }

return $mat{$len1}{$len2};
}

sub min
{
my @liste = @{$_[0]};
my $min = shift @liste;

foreach my $i (@liste) {
        $min = $i if $i < $min;
        }

return $min;
}

sub alerte
{
my $message = shift;

print LOG "ALERTE : $message\n";
}

__DATA__

##
##  Liste d’entités caractères
##

##
##  NE PAS MODIFIER !
##
##  DO NOT EDIT!
##

33	excl
34	dquot
35	num
36	dollar
37	percnt
40	lpar
41	rpar
42	ast
43	plus
44	comma
45	hyphen
46	period
47	sol
58	colon
59	semi
61	equals
63	quest
64	commat
91	lsqb
92	bsol
93	rsqb
95	lowbar
96	grave
123	lcub
124	verbar
256	Amacr;
257	amacr;
258	Abreve;
259	abreve;
260	Aogon;
261	aogon;
262	Cacute;
263	cacute;
264	Ccirc;
265	ccirc;
266	Cdot;
267	cdot;
268	Ccaron;
269	ccaron;
270	Dcaron;
271	dcaron;
272	Dstrok;
273	dstrok;
274	Emacr;
275	emacr;
278	Edot;
279	edot;
280	Eogon;
281	eogon;
282	Ecaron;
283	ecaron;
284	Gcirc;
285	gcirc;
286	Gbreve;
287	gbreve;
288	Gdot;
289	gdot;
290	Gcedil;
291	gcedil;
292	Hcirc;
293	hcirc;
294	Hstrok;
295	hstrok;
296	Itilde;
297	itilde;
298	Imacr;
299	imacr;
302	Iogon;
303	iogon;
304	Idot;
305	inodot;
306	IJlig;
307	ijlig;
308	Jcirc;
309	jcirc;
310	Kcedil;
311	kcedil;
312	kgreen;
313	Lacute;
314	lacute;
315	Lcedil;
316	lcedil;
317	Lcaron;
318	lcaron;
319	Lmidot;
320	lmidot;
321	Lstrok;
322	lstrok;
323	Nacute;
324	nacute;
325	Ncedil;
326	ncedil;
327	Ncaron;
328	ncaron;
329	napos;
330	ENG;
331	eng;
332	Omacr;
333	omacr;
336	Odblac;
337	odblac;
340	Racute;
341	racute;
342	Rcedil;
343	rcedil;
344	Rcaron;
345	rcaron;
346	Sacute;
347	sacute;
348	Scirc;
349	scirc;
350	Scedil;
351	scedil;
354	Tcedil;
355	tcedil;
356	Tcaron;
357	tcaron;
358	Tstrok;
359	tstrok;
360	Utilde;
361	utilde;
362	Umacr;
363	umacr;
364	Ubreve;
365	ubreve;
366	Uring;
367	uring;
368	Udblac;
369	udblac;
370	Uogon;
371	uogon;
372	Wcirc;
373	wcirc;
374	Ycirc;
375	ycirc;
377	Zacute;
378	zacute;
379	Zdot;
380	zdot;
381	Zcaron;
382	zcaron;
501	gacute;
711	caron;
728	breve;
729	dot;
730	ring;
731	ogon;
733	dblac;
902	Aacgr;
904	Eacgr;
905	EEacgr;
906	Iacgr;
908	Oacgr;
910	Uacgr;
911	OHacgr;
912	idiagr;
913	Agr;
914	Bgr;
915	Ggr;
916	Dgr;
917	Egr;
918	Zgr;
919	EEgr;
920	THgr;
921	Igr;
922	Kgr;
923	Lgr;
924	Mgr;
925	Ngr;
926	Xgr;
927	Ogr;
928	Pgr;
929	Rgr;
931	Sgr;
932	Tgr;
933	Ugr;
934	PHgr;
935	KHgr;
936	PSgr;
937	OHgr;
938	Idigr;
939	Udigr;
940	aacgr;
941	eacgr;
942	eeacgr;
943	iacgr;
944	udiagr;
945	agr;
946	bgr;
947	ggr;
948	dgr;
949	egr;
950	zgr;
951	eegr;
952	thgr;
953	igr;
954	kgr;
955	lgr;
956	mgr;
957	ngr;
958	xgr;
959	ogr;
960	pgr;
961	rgr;
962	sfgr;
963	sgr;
964	tgr;
965	ugr;
966	phgr;
967	khgr;
968	psgr;
969	ohgr;
970	idigr;
971	udigr;
972	oacgr;
973	uacgr;
974	ohacgr;
977	thetav;
981	phiv;
988	gammad;
1008	kappav;
1009	rhov;
1025	IOcy;
1026	DJcy;
1027	GJcy;
1028	Jukcy;
1029	DScy;
1030	Iukcy;
1031	YIcy;
1032	Jsercy;
1033	LJcy;
1034	NJcy;
1035	TSHcy;
1036	KJcy;
1038	Ubrcy;
1039	DZcy;
1040	Acy;
1041	Bcy;
1042	Vcy;
1043	Gcy;
1044	Dcy;
1045	IEcy;
1046	ZHcy;
1047	Zcy;
1048	Icy;
1049	Jcy;
1050	Kcy;
1051	Lcy;
1052	Mcy;
1053	Ncy;
1054	Ocy;
1055	Pcy;
1056	Rcy;
1057	Scy;
1058	Tcy;
1059	Ucy;
1060	Fcy;
1061	KHcy;
1062	TScy;
1063	CHcy;
1064	SHcy;
1065	SHCHcy;
1066	HARDcy;
1067	Ycy;
1068	SOFTcy;
1069	Ecy;
1070	YUcy;
1071	YAcy;
1072	acy;
1073	bcy;
1074	vcy;
1075	gcy;
1076	dcy;
1077	iecy;
1078	zhcy;
1079	zcy;
1080	icy;
1081	jcy;
1082	kcy;
1083	lcy;
1084	mcy;
1085	ncy;
1086	ocy;
1087	pcy;
1088	rcy;
1089	scy;
1090	tcy;
1091	ucy;
1092	fcy;
1093	khcy;
1094	tscy;
1095	chcy;
1096	shcy;
1097	shchcy;
1098	hardcy;
1099	ycy;
1100	softcy;
1101	ecy;
1102	yucy;
1103	yacy;
1105	iocy;
1106	djcy;
1107	gjcy;
1108	jukcy;
1109	dscy;
1110	iukcy;
1111	yicy;
1112	jsercy;
1113	ljcy;
1114	njcy;
1115	tshcy;
1116	kjcy;
1118	ubrcy;
1119	dzcy;
8196	emsp13;
8197	emsp14;
8199	numsp;
8200	puncsp;
8202	hairsp;
8208	dash;
8211	ndash;
8212	mdash;
8213	horbar;
8214	Verbar;
8229	nldr;
8244	tprime;
8245	bprime;
8257	caret;
8259	hybull;
8411	tdot;
8412	DotDot;
8453	incare;
8459	hamilt;
8463	planck;
8466	lagran;
8467	ell;
8470	numero;
8471	copysr;
8478	rx;
8486	ohm;
8491	angst;
8492	bernou;
8499	phmmat;
8500	order;
8502	beth;
8503	gimel;
8504	daleth;
8531	frac13;
8532	frac23;
8533	frac15;
8534	frac25;
8535	frac35;
8536	frac45;
8537	frac16;
8538	frac56;
8539	frac18;
8540	frac38;
8541	frac58;
8542	frac78;
8597	varr;
8598	nwarr;
8599	nearr;
8600	drarr;
8601	dlarr;
8602	nlarr;
8603	nrarr;
8605	rarrw;
8606	Larr;
8608	Rarr;
8610	larrtl;
8611	rarrtl;
8614	map;
8617	larrhk;
8618	rarrhk;
8619	larrlp;
8620	rarrlp;
8621	harrw;
8622	nharr;
8624	lsh;
8625	rsh;
8630	cularr;
8631	curarr;
8634	olarr;
8635	orarr;
8636	lharu;
8637	lhard;
8638	uharr;
8639	uharl;
8640	rharu;
8641	rhard;
8642	dharr;
8643	dharl;
8644	rlarr2;
8646	lrarr2;
8647	larr2;
8648	uarr2;
8649	rarr2;
8650	darr2;
8651	lrhar2;
8652	rlhar2;
8653	nlArr;
8654	nhArr;
8655	nrArr;
8661	vArr;
8666	lAarr;
8667	rAarr;
8705	comp;
8708	nexist;
8714	epsis;
8717	bepsi;
8720	coprod;
8722	minus;
8723	mnplus;
8724	plusdo;
8726	setmn;
8728	compfn;
8735	ang90;
8737	angmsd;
8738	angsph;
8739	mid;
8740	nmid;
8741	par;
8742	npar;
8750	conint;
8757	becaus;
8765	bsim;
8768	wreath;
8769	nsim;
8771	sime;
8772	nsime;
8775	ncong;
8777	nap;
8778	ape;
8780	bcong;
8782	bump;
8783	bumpe;
8784	esdot;
8785	eDot;
8786	efDot;
8787	erDot;
8788	colone;
8789	ecolon;
8790	ecir;
8791	cire;
8793	wedgeq;
8796	trie;
8802	nequiv;
8806	lE;
8807	gE;
8808	lne;
8809	gne;
8810	Lt;
8811	Gt;
8812	twixt;
8814	nlt;
8815	ngt;
8816	nle;
8817	nge;
8818	lsim;
8819	gsim;
8822	lg;
8823	gl;
8826	pr;
8827	sc;
8828	pre;
8829	sce;
8830	prsim;
8831	scsim;
8832	npr;
8833	nsc;
8837	nsup;
8840	nsube;
8841	nsupe;
8842	subne;
8843	supne;
8846	uplus;
8847	sqsub;
8848	sqsup;
8849	sqsube;
8850	sqsupe;
8851	sqcap;
8852	sqcup;
8854	ominus;
8856	osol;
8857	odot;
8858	ocir;
8859	oast;
8861	odash;
8862	plusb;
8863	minusb;
8864	timesb;
8865	sdotb;
8866	vdash;
8867	dashv;
8868	top;
8871	models;
8872	vDash;
8873	Vdash;
8874	Vvdash;
8876	nvdash;
8877	nvDash;
8878	nVdash;
8879	nVDash;
8882	vltri;
8883	vrtri;
8884	ltrie;
8885	rtrie;
8888	mumap;
8890	intcal;
8891	veebar;
8892	barwed;
8900	diam;
8902	sstarf;
8903	divonx;
8904	bowtie;
8905	ltimes;
8906	rtimes;
8907	lthree;
8908	rthree;
8909	bsime;
8910	cuvee;
8911	cuwed;
8912	Sub;
8913	Sup;
8914	Cap;
8915	Cup;
8916	fork;
8918	ldot;
8919	gsdot;
8920	Ll;
8921	Gg;
8922	leg;
8923	gel;
8924	els;
8925	egs;
8926	cuepr;
8927	cuesc;
8928	npre;
8929	nsce;
8934	lnsim;
8935	gnsim;
8936	prnsim;
8937	scnsim;
8938	nltri;
8939	nrtri;
8940	nltrie;
8941	nrtrie;
8942	vellip;
8966	Barwed;
8972	drcrop;
8973	dlcrop;
8974	urcrop;
8975	ulcrop;
8981	telrec;
8982	target;
8988	ulcorn;
8989	urcorn;
8990	dlcorn;
8991	drcorn;
8994	frown;
8995	smile;
9251	blank;
9416	oS;
9472	boxh;
9474	boxv;
9484	boxdr;
9488	boxdl;
9492	boxur;
9496	boxul;
9500	boxvr;
9508	boxvl;
9516	boxhd;
9524	boxhu;
9532	boxvh;
9552	boxH;
9553	boxV;
9554	boxdR;
9555	boxDr;
9556	boxDR;
9557	boxdL;
9558	boxDl;
9559	boxDL;
9560	boxuR;
9561	boxUr;
9562	boxUR;
9563	boxuL;
9564	boxUl;
9565	boxUL;
9566	boxvR;
9567	boxVr;
9568	boxVR;
9569	boxvL;
9570	boxVl;
9571	boxVL;
9572	boxHd;
9573	boxhD;
9574	boxHD;
9575	boxHu;
9576	boxhU;
9577	boxHU;
9578	boxvH;
9579	boxVh;
9580	boxVH;
9600	uhblk;
9604	lhblk;
9608	block;
9617	blk14;
9618	blk12;
9619	blk34;
9633	squ;
9642	squf;
9645	rect;
9646	marker;
9651	xutri;
9652	utrif;
9653	utri;
9656	rtrif;
9657	rtri;
9661	xdtri;
9662	dtrif;
9663	dtri;
9666	ltrif;
9667	ltri;
9675	cir;
9733	starf;
9734	star;
9742	phone;
9792	female;
9794	male;
9834	sung;
9837	flat;
9838	natur;
9839	sharp;
10003	check;
10007	cross;
10016	malt;
10022	lozf;
10038	sext;
64256	fflig;
64257	filig;
64258	fllig;
64259	ffilig;

##
## The End!
##