diff --git a/README.md b/README.md
index fb77ff8..c59505a 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,37 @@
 calcule-offsets
 ===============
 
-Script Perl pour calculer les offsets des entités nommées annotées dans le fichier original non annoté
\ No newline at end of file
+Calculer des offsets des entités nommées 
+
+Permet de générer un fichier JSON avec les offsets des entités nommées dans le fichier en entrée en utilisant un fichier où ces entitées ont été balisées.
+
+### Prérequis
+
+Le programme `calculeOffsetsEN.pl` fonctionne sous Unix/Linux ainsi qu’avec Cygwin 
+sous Windows. Il utilise plusieurs modules dont la plupart sont présents dans la 
+distribution standard de **Perl**. Normalement, les seuls modules à installer sont :
+ - Digest::MD5
+ - XML::TokeParser
+ - XML::Twig
+
+### Usage
+```
+    calculeOffsetsEN.pl -i input_file -e entities_file -o output_file
+                        [ -n new_input_file ] [ -d ]
+    calculeOffsetsEN.pl -h
+```
+
+
+### Options
+```
+    -d  active le mode “débogage”
+    -e  indique le nom du fichier au format TEI où les entitées nommées 
+        sont balisées
+    -h  affiche cette aide.
+    -i  indique le nom du fichier d’entrée, au format TEI, qui doit être 
+        le fichier présent sur le serveur Istex
+    -n  crée un nouveau fichier d’entrée, identique à celui indiqué par 
+        l’option “-i”, mais où les entitées nommées sont balisées
+    -o  indique le nom du fichier de sortie,  au format JSON, avec les 
+        offsets des entitées nommées
+```
diff --git a/calculeOffsetsEN.pl b/calculeOffsetsEN.pl
new file mode 100755
index 0000000..50590ec
--- /dev/null
+++ b/calculeOffsetsEN.pl
@@ -0,0 +1,833 @@
+#!/usr/bin/perl
+
+# TEST AVEC 5D991C2718453AF12941533C23C84AF4A8B0F399.tei !!!!!
+
+# Declaration of pragmas
+use strict;
+use utf8;
+use open qw/:std :utf8/;
+
+# Call of basic external modules
+use Encode qw(decode_utf8 encode_utf8 is_utf8);
+use Getopt::Long;
+
+# Call of modules specific to the application
+use Digest::MD5;
+use XML::TokeParser;
+use XML::Twig;
+
+my ($programme) = $0 =~ m|^(?:.*/)?(.+)|;
+my  $substitut  = " " x length($programme);
+my $usage = "Usage : \n" .
+            "    $programme -i input_file -e entities_file -o output_file \n" .
+            "    $substitut [ -n new_input_file ] [ -d ] \n" .
+            "    $programme -h \n\n";
+
+my $version     = "0.6.2";
+my $dateModif   = "14 Août 2020";
+
+# Initialisation of global variables
+# needed for the options
+my $debug      = 0;
+my $entities   = "";
+my $input      = "";
+my $help       = 0;
+my $newfile    = "";
+my $output     = "";
+
+eval    {
+        $SIG{__WARN__} = sub {usage(1);};
+        GetOptions(
+                "debug"             => \$debug,
+                "entities=s"        => \$entities,
+                "input=s"           => \$input,
+                "help"              => \$help,
+                "newfile=s"         => \$newfile,
+                "output=s"          => \$output,
+                );
+        };
+$SIG{__WARN__} = sub {warn $_[0];};
+
+if ( $help ) {
+        print "\nProgramme : \n";
+        print "   “$programme”, version $version ($dateModif)\n";
+        print "   Permet de générer un fichier JSON avec les offsets des entités \n";
+        print "   nommées dans le fichier en entrée en utilisant un fichier où \n";
+        print "   ces entitées ont été balisées. \n";
+        print "\n";
+        print $usage;
+        print "\nOptions : \n";
+        print "    -d  active le mode “débogage” \n";
+        print "    -e  indique le nom du fichier au format TEI où les entitées \n";
+        print "        nommées sont balisées \n";
+        print "    -h  affiche cette aide. \n";
+        print "    -i  indique le nom du fichier d’entrée, au format TEI, qui doit \n";
+        print "        être le fichier présent sur le serveur Istex \n";
+        print "    -n  crée un nouveau fichier d’entrée, identique à celui indiqué \n";
+        print "        par l’option “-i”, mais où les entitées nommées sont balisées \n";
+        print "    -o  indique le nom du fichier de sortie,  au format JSON, avec \n";
+        print "        les offsets des entitées nommées \n";
+        print " \n";
+
+        exit 0;
+        }
+
+usage(2) if not $input or not $entities or not $output;
+
+# Variables needed for processing the data
+my $doi    = "";
+my $header = "";
+my $istex  = "";
+my $length = 0;
+my $path   = "";
+my $pii    = "";
+my $pmid   = "";
+my $raw    = "";
+my $start  = 0;
+my $status = 'out';
+my $tei    = "";
+my $text   = "";
+my @lines  = ();
+my @offset = ();
+my %dejavu = ();
+my %limite = ();
+
+# Inirtializing MD5
+my $md5 = Digest::MD5->new;
+
+open(INP, "<:raw", $input) or die "$!,";
+while(<INP>) {
+        $md5->add($_);
+        my $line = decode_utf8($_);
+        push @lines, $line;
+        push @offset, $length;
+        $length += length($line);
+        $line =~ s/[\s\r]/ /go;
+        $text .= $line;
+        }
+push @offset, $length;
+close INP;
+
+my $digest = $md5->hexdigest;
+
+if ( $text =~ /<TEI\s/i ) {
+        $header = $`;
+        $tei = $& . $';
+        $start = length($header);
+        }
+else    {
+        print STDERR "$programme: Warning: no “TEI” element in file “$input”\n";
+        exit 3;
+        }
+
+
+# Parsing original file
+my $parser = XML::TokeParser->new(\$tei);
+while( my $token = $parser->get_token() ) {
+        $raw .= $token->raw;
+
+        if ( $token->is_start_tag ) {
+                my $tag = $token->tag;
+                # Updating the path
+                $path .= "/$tag";
+                # We get the attributes from a hashref
+                my %attributes = %{$token->attr};
+
+                if ( $tag eq "titleStmt" ) {
+                        $limite{'title'}{'start'} = $start + length(decode_utf8($raw)) + 1;
+                        print STDERR "Début titre  : $limite{'title'}{'start'}\n" if $debug;
+                        $status = 'in';
+                        }
+                elsif ( $tag eq "abstract" ) {
+                        $limite{'abstract'}{'start'} = $start + length(decode_utf8($raw)) - length($token->raw);
+                        print STDERR "Début résumé : $limite{'abstract'}{'start'}\n" if $debug;
+                        $status = 'in';
+                        }
+                elsif ( $tag eq "body" ) {
+                        $limite{'body'}{'start'} = $start + length(decode_utf8($raw)) + 1;
+                        print STDERR "Début texte  : $limite{'body'}{'start'}\n" if $debug;
+                        $status = 'in';
+                        }
+                elsif ( $tag eq "idno" ) {
+                        if ( defined $attributes{'type'} ) {
+                                my $id = $parser->get_text();
+                                $istex = $id if $attributes{'type'} eq 'istex';
+                                $doi = $id if $attributes{'type'} eq 'DOI';
+                                $pii = $id if $attributes{'type'} eq 'PII';
+                                $pmid = $id if $attributes{'type'} eq 'PMID';
+                                }
+                        }
+                elsif ( $status eq 'in' ) {
+                        my $string = $token->raw;
+                        my $position = $start + length($raw) - length($token->raw);
+                        push(@{$dejavu{"S:$tag"}{$string}}, $position);
+                        }
+                }
+
+        elsif ( $token->is_end_tag ) {
+                my $tag = $token->tag;
+
+                if ( $tag eq "titleStmt" ) {
+                        $limite{'title'}{'end'} = $start + length(decode_utf8($raw)) - length($token->raw) + 1;
+                        print STDERR "Fin titre    : $limite{'title'}{'end'}\n" if $debug;
+                        $status = 'out';
+                        }
+                elsif ( $tag eq "abstract" ) {
+                        $limite{'abstract'}{'end'} = $start + length(decode_utf8($raw)) + 1;
+                        print STDERR "Fin résumé   : $limite{'abstract'}{'end'}\n" if $debug;
+                        $status = 'out';
+                        }
+                elsif ( $tag eq "body" ) {
+                        $limite{'body'}{'end'} = $start + length(decode_utf8($raw)) - length($token->raw) + 1;
+                        print STDERR "Fin texte    : $limite{'body'}{'end'}\n" if $debug;
+                        $status = 'out';
+                        }
+                elsif ( $status eq 'in' ) {
+                        my $string = $token->raw;
+                        my $position = $start + length($raw) - length($token->raw);
+                        push(@{$dejavu{"E:$tag"}{$string}}, $position);
+                        }
+
+                $path =~ s|/\w+\z||o;
+                }
+        }
+
+# Other variables
+my $element  = "";
+my $endtag   = "";
+my $entity   = "";
+my $erase    = "";
+my $ok       = undef;
+my $original = validateUtf8($text);;
+my $starttag = "";
+my $string   = "";
+my $type     = "";
+my @errors   = ();
+my %offset   = ();
+
+open(ORG, ">:utf8", "original.txt") or die "$!,";
+print ORG "$original\n";
+close ORG;
+
+# Reinitializing variables
+$path = $raw = $text = "";
+$status = 'out';
+
+# Getting the title
+my $title = substr($original, $limite{'title'}{'start'} - 1, $limite{'title'}{'end'} - $limite{'title'}{'start'});
+print STDERR "Titre : \"$title\"\n" if $debug;
+my $twig = XML::Twig->new(
+                   twig_handlers => {
+                        'title[@type="main"]'         => sub {$string = $_->sprint;},
+                        'title[@level="a"]'         => sub {$string = $_->sprint;},
+                        },
+                 );
+$twig->parse("<text>$title</text>");
+$twig->purge;
+
+$title = $string;
+$title =~ s|^<title [^>]+>||o;
+$title =~ s|</title>||o;
+$title =~ s/\s+/ /go;
+$title =~ s/^\s+//o;
+$title =~ s/\s+\z//o;
+
+# Generating a simpler version of
+# the original text
+my $simpler = join(" ", map {s/<[^>]+>/<>/go; $_;} ($original =~ /(<[^>]+>|\w+)/go));
+
+open(SMP, ">:utf8", "simpler.txt") or die "$!,";
+print SMP "$simpler\n";
+close SMP;
+
+# List of named entity tag
+my %entity = (
+        'date'      => 1,
+        'geogName'  => 1,
+        'orgName'   => 1,
+        'persName'  => 1,
+        'placeName' => 1,
+        'ref'       => {'type' => 'bibl'},
+        'roleName'  => 1,
+        );
+my %position = ();
+
+open(INP, "<:raw", $entities) or die "$!,";
+while(<INP>) {
+        $text .= $_;
+        }
+close INP;
+
+# Parsing annotated file
+$parser = XML::TokeParser->new(\$text);
+while( my $token = $parser->get_token() ) {
+        $string = $token->raw;
+        $entity .= $string if $status eq 'in';
+        if ( $token->is_start_tag ) {
+                my $tag = $token->tag;
+                # Updating the path
+                $path .= "/$tag";
+
+                if ( $path =~ m|/titleStmt/|o or
+                     $path =~ m|/abstract/|o or
+                     $path =~ m|/body/|o ) {
+                        $ok = 1;
+                        }
+                else    {
+                        $ok = 0;
+                        }
+                # We get the attributes from a hashref
+                my %attributes = %{$token->attr};
+
+                if ( $entity{$tag} and not ref($entity{$tag}) ) {
+                        $element = $tag;
+                        $starttag = $string;
+                        $entity = "";
+                        $status = 'in';
+                        if ( defined $attributes{'type'} ) {
+                                $type = $attributes{'type'};
+                                }
+                        else    {
+                                $type = "";
+                                }
+                        }
+                # Problème pour faire une condition qui ne se base que sur le hachage
+                # “%entity” et qui permette de n'avoir que le cas souhaité pour ne pas
+                # géner la condition suivante.
+                # elsif ( $entity{$tag} and ref($entity{$tag}) eq 'HASH' ) {
+                elsif ( $tag eq 'ref' and defined $attributes{'type'} and $attributes{'type'} eq 'bibl' ) {
+                        $element = $tag;
+                        $starttag = $string;
+                        $entity = "";
+                        $status = 'in';
+                        $type = 'bibl';
+                        }
+                elsif ( $ok ) {
+                        # print "Balive ouvrante \"$string\"\n";
+                        my $position = $start + length($string) + 1;
+                        if ( defined $dejavu{"S:$tag"} ) {
+                                if ( defined $dejavu{"S:$tag"}{$string} ) {
+                                        my @tmp = @{$dejavu{"S:$tag"}{$string}};
+                                        my $match = 0;
+                                        foreach my $item (@tmp) {
+                                                if ( $item == $position ) {
+                                                        # print "Balise \"$string\" trouvé à offset $position\n";
+                                                        $match ++;
+                                                        }
+                                                }
+                                        # print "Pas de correspondance pour la balise \"$string\"\n" if not $match;
+                                        }
+                                else    {
+                                        my @tmp = ();
+                                        my $match = 0;
+                                        foreach my $item (keys %{$dejavu{"S:$tag"}}) {
+                                                push(@tmp, @{$dejavu{"S:$tag"}{$item}});
+                                                }
+                                        foreach my $item (@tmp) {
+                                                if ( $item == $position ) {
+                                                        # print "Balise \"$string\" trouvé à offset $position\n";
+                                                        $match ++;
+                                                        }
+                                                }
+                                        # print "Pas de correspondance pour une balise \"$tag\"\n" if not $match;
+                                        }
+
+                                }
+                        else    {
+                                $erase = $path;
+                                $string = "";
+                                }
+                        }
+                }
+        elsif ( $token->is_end_tag ) {
+                my $tag = $token->tag;
+
+                if ( $entity{$tag} and $tag eq $element ) {
+                        $entity = substr($entity, 0, length($entity) - length($string));
+                        $status = recherche();
+                        $element = "";
+                        $endtag = $string;
+                        # $status = 'leaving';
+                        }
+                elsif ( $erase eq $path ) {
+                        $erase = "";
+                        $string = "";
+                        }
+
+                $path =~ s|/\w+\z||o;
+                }
+        if ( $status eq 'out' ) {
+                $raw .= $string;
+                }
+        elsif ( $status eq 'leaving' ) {
+                $raw .= $entity;
+                $status = 'out';
+                }
+        }
+
+# If errors ...
+$input =~ s|^(?:.*/)?(.+)|$1|;
+
+if ( @errors ) {
+        print STDERR "Fichier \"$input\" : \n";
+        foreach my $error (@errors) {
+                print STDERR "\t -> $error \n";
+                }
+        print STDERR " \n";
+
+        exit 4;
+        }
+else    {
+        open(OUT, ">:utf8", $output) or die "$!,";
+        print OUT "{\n";
+        print OUT "  \"Fichier\": \"$input\",\n";
+        print OUT "  \"Titre\": \"" . valid($title). "\",\n";
+        my @tmp = ();
+        if ( $istex ) {
+                push(@tmp, "    \"Istex\": \"$istex\"");
+                }
+        if ( $doi ) {
+                push(@tmp, "    \"DOI\": \"" . valid($doi). "\"");
+                }
+        if ( $pii ) {
+                push(@tmp, "    \"PII\": \"" . valid($pii). "\"");
+                }
+        if ( $pmid ) {
+                push(@tmp, "    \"PMID\": \"" . valid($pmid). "\"");
+                }
+        if ( @tmp ) {
+                print OUT "  \"Identifiant\": \{\n";
+                while ( my $item = shift @tmp ) {
+                        print OUT $item, @tmp ? "," : "", "\n";
+                        }
+                print OUT "  \},\n";
+                }
+        print OUT "  \"Empreinte MD5\": \"$digest\",\n";
+        if ( %offset ) {
+                print OUT "  \"Entités nommées\": \[\n";
+                my @entities = sort keys %offset;
+                while ( $entity = shift @entities ) {
+                        print OUT "    \{\n";
+                        print OUT "      \"Type\": \"$entity\",\n";
+                        print OUT "      \"Liste\": \[\n";
+                        my @ne = @{$offset{$entity}};
+                        while ( my $ne = shift @ne ) {
+                                print OUT "        \{\n";
+                                print OUT "          \"Expression\": \"" . valid($ne->{'ne'}). "\",\n";
+                                if ( defined $ne->{'string'} ) {
+                                        print OUT "          \"Texte\": \"" . valid($ne->{'string'}). "\",\n";
+                                        }
+                                print OUT "          \"Position\": \{\n";
+                                print OUT "            \"Début\": \"$ne->{'debut'}\",\n";
+                                print OUT "            \"Fin\": \"$ne->{'fin'}\"\n";
+                                print OUT "          \},\n";
+                                print OUT "          \"Offsets\": \{\n";
+                                print OUT "            \"Caractères\": \"$ne->{'characters'}\",\n";
+                                print OUT "            \"Octets\": \"$ne->{'octets'}\"\n";
+                                print OUT "          \}\n";
+                                print OUT "        \}", @ne ? "," : "", "\n";
+                                }
+                        print OUT "      \]\n";
+                        print OUT "    \}", @entities ? "," : "", "\n";
+                        }
+                print OUT "  \]\n";
+                }
+        else    {
+                print OUT "  \"Entités nommées\": \[\]\n";
+                }
+        print OUT "}\n";
+        close OUT;
+        }
+
+# If necessary, create a new annotated file combining
+# the original file and the file of named entities
+if ( $newfile ) {
+        my %position = "";
+        foreach $entity (keys %offset) {
+                $starttag = "<$entity>";
+                $endtag = "</$entity>";
+                if ( $entity =~ /^(\w+) type:(\w+)\z/o ) {
+                        $starttag = "<$1 type=\"$2\">";
+                        $endtag = "</$1>";
+                        }
+                foreach my $ne (@{$offset{$entity}}) {
+                        my ($line, $char) = ($ne->{'debut'} =~ /^ligne (\d+), caractère (\d+)\z/o);
+                        $position{$line}{$char} = $starttag;
+                        ($line, $char) = $ne->{'fin'} =~ /^ligne (\d+), caractère (\d+)\z/o;
+                        $position{$line}{$char + 1} = $endtag;
+                        }
+                }
+        foreach my $line (keys %position) {
+                my @chars = split(//, $lines[$line - 1]);
+                foreach my $char (sort {$b <=> $a} keys %{$position{$line}}) {
+                        splice(@chars, $char - 1, 0, $position{$line}{$char});
+                        }
+                $lines[$line - 1] = join("", @chars);
+                }
+        open(NEW, ">:utf8", $newfile) or die "$!,";
+        foreach my $line (@lines) {
+                print NEW $line;
+                }
+        close NEW;
+        }
+
+exit 0;
+
+
+sub usage
+{
+print STDERR $usage;
+
+exit shift;
+}
+
+sub recherche
+{
+if ( $path !~ m{/(abstract|body|titleStmt)/}o ) {
+        $raw .= $starttag . $entity;
+        return 'out';
+        }
+
+my $chaine = validateUtf8($entity);
+my $utf8 = validateUtf8($raw);
+my $dejaVu = undef;
+
+$chaine = join(" ", ($chaine =~ /(\S+)/go));
+print STDERR "Entité : \"$chaine\"\n" if $debug;
+# Regular expression for the named entity
+# my $regex2 = join("\\s*", map {s/\W/\\W+/go; $_;} ($chaine =~ /(\p{IsAlnum}+|\P{IsSpace})/go));
+# $regex2 = ligature($regex2);
+# $regex2 =~ s/\\W\+\\s\*amp\\s\*\\W\+/$&(?:amp\\s*\\W)?/go;
+my $regex2 = join("\\s*", map {s/\W/\\W/go; $_;} ($chaine =~ /(\p{IsAlnum}+|\P{IsSpace})/go));
+$regex2 = ligature($regex2);
+$regex2 =~ s/\\W\\s\*amp\\s\*\\W/$&(?:amp\\s*\\W)?/go;
+my @words = $utf8 =~ /(<[^>]+>|\p{IsAlnum}+)/go;
+splice(@words, 0, -10);
+my @all = @words;
+while ( 1 ) {
+        # Taking into account entity "&amp;" mistakenly
+        # written "&amp;amp;" in the original file
+        my $regex1 = join("\\W+", @words);
+        $regex1 =~ s/\bamp\b/amp(?:\\W+amp)?/go;
+        $regex1 = ligature($regex1);
+        if ( $regex1 =~ /<[^>]+>/ ) {
+                $regex1 =~ s|(\\W\+)?<[^>]+>(\\W\+)?|\\W*<[^>]+>\\W*|go;
+                }
+                if ( $regex1 =~ /\\W\*\\W\*/ ) {
+                        $regex1 =~ s|\\W\*(\\W\*)+|\\W*|go;
+                        }
+        print STDERR "Recherche \"", join(" ", @words, ($chaine =~ /(\S+)/go)), "\"\n" if $debug;
+
+        my @results = $original =~ /$regex1\W*$regex2/g;
+        if ( $#results == 0 ) {
+                $original =~ /($regex1\W*)($regex2)/;
+                offset($2, $1, $`, $chaine);
+                last;
+                }
+        elsif ( $#results > 0 ) {
+                if ( not $dejaVu ) {
+                        $dejaVu = 1;
+                        my ($code, $intruder) = intruder($regex2, @all) if not $dejaVu;
+                        if ( $code > 0 ) {
+                                print STDERR "INTRUS [$code] \"$intruder\"\n";
+                                @words = @all;
+                                splice(@words, $code, 0, $intruder);
+                                next;
+                                }
+                        elsif ( $code < 0 ) {
+                                print STDERR "INTRUS [$code] \"$intruder\"\n";
+                                @words = @all;
+                                @words[ - $code] = $intruder;
+                                next;
+                                }
+                        else    {
+                                last if occurrence($regex2);
+                                }
+                        }
+                my $message = "Résultats multiples pour \"" . join(" ", ($chaine =~ /(\S+)/go)) . "\" [texte original]";
+                push(@errors, $message);
+                last;
+                }
+        else    {
+                $regex1 =~ s/\\W\*\<\[\^\>\]\+>\\W\*/\\W*/go;
+                (my $regex3 = $regex2) =~ s/(\\[Ws][?*+]?)+/\\W*/go;
+                @results = $simpler =~ /$regex1\W*$regex3/g;
+                if ( $#results == 0 ) {
+                        $simpler =~ /($regex1\W*)($regex3)/;
+                        my $match1 = $1;
+                        my $match2 = $2;
+                        $match1 =~ s/<>/<[^>]+>/go;
+                        $match1 =~ s/ /\\W*/go;
+                        $match2 =~ s/\P{IsAlnum}+\z//o;
+                        $match2 =~ s/<>/<[^>]+>/go;
+                        $match2 =~ s/ /\\W*/go;
+                        if ( $chaine =~ /^[^\p{IsAlnum}\p{IsSpace}]/ ) {
+                                $match2 = '\W\s*(?:<[^>]+>)?\s*' . $match2;
+                                }
+                        if ( $chaine =~ /[^\p{IsAlnum}\p{IsSpace}]\z/ ) {
+                                $match2 .= '\s*(?:<[^>]+>)?\s*\W';
+                                }
+                        if ( $original =~ /($match1)($match2)/ ) {
+                                offset($2, $1, $`, $chaine);
+                                last;
+                                }
+                        }
+                elsif ( $#results > 0 ) {
+                        if ( not $dejaVu ) {
+                                $dejaVu = 1;
+                                my ($code, $intruder) = intruder($regex2, @all);
+                                if ( $code > 0 ) {
+                                        print STDERR "INTRUS [$code] \"$intruder\"\n";
+                                        @words = @all;
+                                        splice(@words, $code, 0, $intruder);
+                                        next;
+                                        }
+                                if ( $code < 0 ) {
+                                        print STDERR "INTRUS [$code] \"$intruder\"\n";
+                                        @words = @all;
+                                        @words[ - $code] = $intruder;
+                                        next;
+                                        }
+                                }
+                        my $message = "Résultats multiples pour \"" . join(" ", ($chaine =~ /(\S+)/go)) . "\" [texte simple]";
+                        push(@errors, $message);
+                        last;
+                        }
+                elsif ( $#words < 0 ) {
+                        if ( not $dejaVu ) {
+                                $dejaVu = 1;
+                                if ( $regex2 =~ /\\W/o ) {
+                                        $regex2 =~ s/\\W/\\W?/go;
+                                        @words = @all;
+                                        next;
+                                        }
+                                }
+                        my $message = "Pas de résultat pour \"" . join(" ", ($chaine =~ /(\S+)/go)) . "\"";
+                        push(@errors, $message);
+                        }
+                }
+
+        last if not @words;
+        shift @words;
+        }
+
+return 'leaving';
+}
+
+sub offset
+{
+my ($ne, $match, $before, $expression) = @_;
+
+# Petit correctif pour les cas où on récupère un blanc final
+$ne =~ s/\s+\z//o;
+
+my $offset   = length($before) + length($match);
+my $size     = length($ne);
+my $position = $offset + 1;
+my $nb       = 0;
+my %tmp      = ();
+
+for ( ; $nb < $#offset ; $nb ++ ) {
+        if ( $position > $offset[$nb] and $position < $offset[$nb + 1] ) {
+                my $line = $nb + 1;
+                $tmp{'debut'} = sprintf "ligne %d, caractère %d", $line, $position - $offset[$nb];
+                for ( ; $nb < $#offset ; $nb ++ ) {
+                        if ( $offset + $size > $offset[$nb] and $offset + $size < $offset[$nb + 1] ) {
+                                $tmp{'fin'} = sprintf "ligne %d, caractère %d", $nb + 1, $offset + $size - $offset[$nb];
+                                $nb = $#offset;
+                                }
+                        }
+                }
+        }
+$tmp{'characters'} = sprintf "%d - %d", $offset + 1, $offset + $size;
+$offset = length(encode_utf8($before . $match));
+$size = length(encode_utf8($ne));
+$tmp{'octets'} = sprintf "%d - %d", $offset + 1, $offset + $size;
+
+if ( %tmp ) {
+        $tmp{'ne'} = $expression;
+        if ( $expression ne $ne ) {
+                $tmp{'string'} = $ne;
+                }
+        my $value = $element;
+        if ( $type ) {
+                $value .= " type:$type";
+                }
+        push(@{$offset{$value}}, \%tmp);
+        }
+else    {
+        push(@errors, "Entité nommée \"$ne\" introuvable dans le fichier original");
+        }
+}
+
+sub offset2
+{
+my ($ne, $position) = @_;
+
+my $size     = length($ne);
+my $offset   = $position - $size + 1;
+my $nb       = 0;
+my %tmp      = ();
+
+for ( ; $nb < $#offset ; $nb ++ ) {
+        if ( $offset > $offset[$nb] and $offset < $offset[$nb + 1] ) {
+                my $line = $nb + 1;
+                $tmp{'debut'} = sprintf "ligne %d, caractère %d", $line, $offset - $offset[$nb];
+                for ( ; $nb < $#offset ; $nb ++ ) {
+                        if ( $position > $offset[$nb] and $position < $offset[$nb + 1] ) {
+                                $tmp{'fin'} = sprintf "ligne %d, caractère %d", $nb + 1, $position - $offset[$nb];
+                                $nb = $#offset;
+                                }
+                        }
+                }
+        }
+$tmp{'characters'} = sprintf "%d - %d", $offset, $position;
+my $before = substr($original, 0, $offset - 1);
+$offset = length(encode_utf8($before));
+$size = length(encode_utf8($ne));
+$tmp{'octets'} = sprintf "%d - %d", $offset + 1, $offset + $size;
+
+if ( %tmp ) {
+        $tmp{'ne'} = $ne;
+        my $value = $element;
+        if ( $type ) {
+                $value .= " type:$type";
+                }
+        push(@{$offset{$value}}, \%tmp);
+        }
+else    {
+        push(@errors, "Entité nommée \"$ne\" introuvable dans le fichier original");
+        }
+}
+
+sub validateUtf8
+{
+my $chaine = shift;
+
+my $valide = "";
+if ( is_utf8($chaine, Encode::FB_QUIET) ) {
+        $valide = $chaine;
+        }
+else    {
+        $valide = decode_utf8($chaine);
+        }
+
+return $valide
+}
+
+sub valid
+{
+my $json = shift;
+
+$json =~ s/\\/\\\\/go;
+$json =~ s/"/\\"/go;
+
+return $json;
+}
+
+sub ligature
+{
+my $chaine = shift;
+
+if ( $chaine =~ /f(f|i|l|fi|fl)/o ) {
+        $chaine =~ s/ff/(?:ff|\x{FB00})/go;
+        $chaine =~ s/fi/(?:fi|\x{FB01})/go;
+        $chaine =~ s/fl/(?:fl|\x{FB02})/go;
+        $chaine =~ s/ffi/(?:ffi|\x{FB03})/go;
+        $chaine =~ s/ffl/(?:ffl|\x{FB04})/go;
+        }
+if ( $chaine =~ /[äéöü]/ ) {
+        $chaine =~ s/ä/(?:ä|a\x{0308})/go;
+        $chaine =~ s/é/(?:é|e\x{0301})/go;
+        $chaine =~ s/ö/(?:ö|o\x{0308})/go;
+        $chaine =~ s/ü/(?:ü|u\x{0308})/go;
+        }
+elsif ( $chaine =~ /[aou]\x{0308}/o ){
+        $chaine =~ s/a\x{0308}/(?:ä|a\x{0308})/go;
+        $chaine =~ s/o\x{0308}/(?:ö|o\x{0308})/go;
+        $chaine =~ s/u\x{0308}/(?:ü|u\x{0308})/go;
+        }
+elsif ( $chaine =~ /e\x{0301}/o ){
+        $chaine =~ s/e\x{0301}/(?:é|e\x{0301})/go;
+        }
+
+return $chaine;
+}
+
+sub intruder
+{
+my ($regex, @words) = @_;
+
+for ( my $nb = 10 ; $nb > 0 ; $nb -- ) {
+
+        my @tmp = @words;
+        splice(@tmp, $nb, 0, '(.+?)');
+
+        my $tmp = join("\\W+", map {$_ eq 'amp' ? 'amp(?:\W+amp)?' : $_;} @tmp);
+        $tmp = ligature($tmp);
+
+        my @results = $original =~ /$tmp\W*$regex/g;
+        if ( $#results == 0 ) {
+                my $intruder = $1;
+                my $nbWords =()= $intruder =~ /\p{IsAlnum}+/go;
+                return ($nb, $intruder) if $nbWords < 10;
+                }
+        elsif ( $#results < 0 ) {
+                @results = $simpler =~ /$tmp\W*$regex/g;
+                if ( $#results == 0 ) {
+                        my $intruder = $1;
+                        my $nbWords =()= $intruder =~ /\p{IsAlnum}+/go;
+                        return ($nb, $intruder) if $nbWords < 10;
+                        }
+                }
+        }
+
+for ( my $nb = 9 ; $nb > 0 ; $nb -- ) {
+
+        my @tmp = @words;
+        $tmp[$nb] = '(.+?)';
+
+        my $tmp = join("\\W+", map {$_ eq 'amp' ? 'amp(?:\W+amp)?' : $_;} @tmp);
+        $tmp = ligature($tmp);
+
+        my @results = $original =~ /$tmp\W*$regex/g;
+        if ( $#results == 0 ) {
+                my $intruder = $1;
+                my $nbWords =()= $intruder =~ /\p{IsAlnum}+/go;
+                return (- $nb, $intruder) if $nbWords < 10;
+                }
+        elsif ( $#results < 0 ) {
+                @results = $simpler =~ /$tmp\W*$regex/g;
+                if ( $#results == 0 ) {
+                        my $intruder = $1;
+                        my $nbWords =()= $intruder =~ /\p{IsAlnum}+/go;
+                        return (- $nb, $intruder) if $nbWords < 10;
+                        }
+                }
+        }
+
+return 0;
+}
+
+sub occurrence
+{
+my $regex = shift;
+
+my @match = ();
+my @pos   = ();
+
+my @raw = $raw =~ /$regex/g;
+my $num = $#raw + 1;
+
+while ( $original =~ /$regex/g ) {
+        push(@match, $&);
+        push(@pos, pos($original));
+        }
+
+if ( $pos[$num] ) {
+        offset2($match[$num], $pos[$num]);
+        return 1;
+        }
+
+return 0;
+}