diff --git a/Niveau-0/Extraction_Infos_Depuis_wos.pl b/Niveau-0/Extraction_Infos_Depuis_wos.pl index 70d3cdb..c0faea3 100755 --- a/Niveau-0/Extraction_Infos_Depuis_wos.pl +++ b/Niveau-0/Extraction_Infos_Depuis_wos.pl @@ -80,7 +80,7 @@ { foreach my $valeur (@valeurs) { - if ($valeur =~ /[,;"]/o) + if ($valeur =~ /[,;"]/o)#pour proteger les valeurs au format CSV { $valeur =~ s/"/""/go; $valeur = '"' . $valeur . '"'; @@ -96,12 +96,11 @@ exit 0; - sub usage { my $code = shift; -print STDERR "Usage : $programme -i input [ -o output ]\n"; +print "Usage : $programme -i input [ -o output ]\n"; exit $code; } @@ -113,3 +112,5 @@ + + diff --git "a/Niveau-1/Iramuteq/Pr\303\251paration_Donn\303\251es_entr\303\251e_Iramuteq.pl" "b/Niveau-1/Iramuteq/Pr\303\251paration_Donn\303\251es_entr\303\251e_Iramuteq.pl" index 1875bf0..060c63c 100755 --- "a/Niveau-1/Iramuteq/Pr\303\251paration_Donn\303\251es_entr\303\251e_Iramuteq.pl" +++ "b/Niveau-1/Iramuteq/Pr\303\251paration_Donn\303\251es_entr\303\251e_Iramuteq.pl" @@ -74,4 +74,4 @@ print STDERR " $programme -d ArthropodesTest -r Arthropodes -m metadata.csv -s Arthropodes_Ira.txt\n"; exit $code; -} \ No newline at end of file +} diff --git "a/Niveau-1/Iramuteq/Pr\303\251paration_Donn\303\251es_entr\303\251e_Iramuteq2.pl" "b/Niveau-1/Iramuteq/Pr\303\251paration_Donn\303\251es_entr\303\251e_Iramuteq2.pl" new file mode 100644 index 0000000..ea49e7c --- /dev/null +++ "b/Niveau-1/Iramuteq/Pr\303\251paration_Donn\303\251es_entr\303\251e_Iramuteq2.pl" @@ -0,0 +1,54 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use utf8; +use open qw/:std :utf8/; + +use Encode; +use Getopt::Long; + +my ($programme) = $0 =~ m|^(?:.*/)?(.+)|; +$programme = decode_utf8($programme); + +my $dir = ""; +my $sortie = ""; + +eval { + $SIG{__WARN__} = sub {usage(1);}; + GetOptions( + "dir=s" => \$dir, + "sortie=s" => \$sortie, + ); + }; +$SIG{__WARN__} = sub {warn $_[0];}; + +usage(2) if not $dir or not $racine or not $metadata or not $sortie; + +opendir(my $dh, $dir) or die "Couldn't open file, $!"; + my @files = readdir($dh); + closedir($dh); + my $size = @files; +open(DATA, ">:utf8", $sortie) +or die "Couldn't open file $sortie, $!"; +print METADATA "\"id\",\"sous_corpus\"\n"; +foreach my $file(@files){ + $file =~ /(\pL+)_\d*/; + print DATA "**** *souscorpus_$1\n"; + open(FILE, "<:encoding(UTF-8)", "$dir/$file") or die "Couldn't open file $file, $!"; + my @texte = ; + print DATA @texte; + close(FILE); +} +close(DATA); + +exit 0; + + +sub usage +{ +my $code = shift; + +print STDERR "Usage : $programme -d dir -s sortie\n"; +exit $code; +} diff --git a/Niveau-1/TXM/Extraction_Infos_Depuis_wos.pl b/Niveau-1/TXM/Extraction_Infos_Depuis_wos.pl new file mode 100755 index 0000000..c0faea3 --- /dev/null +++ b/Niveau-1/TXM/Extraction_Infos_Depuis_wos.pl @@ -0,0 +1,116 @@ +#!/usr/bin/perl +use strict; +use warnings; +use utf8; +use open qw/:std :utf8/; + +use Getopt::Long; + +my ($programme) = $0 =~ m|^(?:.*/)?(.+)|; + +my $input = ""; +my $output = "metadata.txt"; + +eval { + $SIG{__WARN__} = sub {usage(1);}; + GetOptions( + "input=s" => \$input, + "output=s" => \$output, + ); + }; +$SIG{__WARN__} = sub {warn $_[0];}; + +usage(2) if not $input; + +open(WOS, "<:utf8", $input) or die "Couldn't open file \"$input\", $!"; +open(META, ">:utf8", $output) or die "Couldn't open file \"$output\", $!"; + +print META "TI;SO;LA;DT;AB;PU;J9;PY;SC;UT\n"; + +my @valeurs = (); + +foreach my $ligne () +{ + chomp($ligne); + $ligne =~ s/\r//go; + if ($ligne =~ /^TI (.*)/o) + { + $valeurs[0] = "$1"; + # équivalent à + # $valeurs[0] = substr($ligne, 3); + } + elsif ($ligne =~ /^SO (.*)/o) + { + $valeurs[1] = "$1"; + } + elsif ($ligne =~ /^LA (.*)/o) + { + $valeurs[2] = "$1"; + } + elsif ($ligne =~ /^DT (.*)/o) + { + $valeurs[3] = "$1"; + } + elsif ($ligne =~ /^AB (.*)/o) + { + $valeurs[4] = "$1"; + } + elsif ($ligne =~ /^PU (.*)/o) + { + $valeurs[5] = "$1"; + } + elsif ($ligne =~ /^J9 (.*)/o) + { + $valeurs[6] = "$1"; + } + elsif ($ligne =~ /^PY (.*)/o) + { + $valeurs[7] = "$1"; + } + elsif ($ligne =~ /^SC (.*)/o) + { + $valeurs[8] = "$1"; + } + elsif ($ligne =~ /UT ISTEX:(.*)/o) + { + my $lien = $1; + $valeurs[9] = "".$lien.""; + } + elsif ($ligne =~ /^ER/o) + { + foreach my $valeur (@valeurs) + { + if ($valeur =~ /[,;"]/o)#pour proteger les valeurs au format CSV + { + $valeur =~ s/"/""/go; + $valeur = '"' . $valeur . '"'; + } + } + print META join(";", @valeurs), "\n"; + @valeurs = (); + } +} +close WOS; +close META; + + +exit 0; + +sub usage +{ +my $code = shift; + +print "Usage : $programme -i input [ -o output ]\n"; + +exit $code; +} + + + + + + + + + + diff --git a/Niveau-1/TXM/Metadata1.pl b/Niveau-1/TXM/Metadata1.pl new file mode 100644 index 0000000..9d699b0 --- /dev/null +++ b/Niveau-1/TXM/Metadata1.pl @@ -0,0 +1,47 @@ +#!/usr/bin/perl +use strict; +use warnings; +use utf8; +use open qw/:std :utf8/; + +use Getopt::Long; + +my ($programme) = $0 =~ m|^(?:.*/)?(.+)|; + +#ouverture du fichier des métadonnées version 1 +open(METADATA_V1, "<:encoding(UTF-8)", "Arthropodes_metadata_v1.txt") +or die "Couldn't open file Arthropodes_metadata_v1.txt, $!"; +#ouverture du fichier des métadonnées version 2 +open(METADATA_V2, ">:encoding(UTF-8)", "Arthropodes_metadata_v2.txt") +or die "Couldn't open file Arthropodes_metadata_v2.txt, $!"; +#définition des catégories des métadonnéés version 2 +print METADATA_V2 "\"id\",\"corpus\",\"journal\",\"annee\"\n"; +#parcours des métadonnées version 1, sélectionner les informations utiles à entrer dans version2 +my $id; +while(my $ligne = ){ + chomp($ligne); + #si la ligne commence par "NO :", on extrait les informations le suivantes. + if ($ligne =~ /^NO : .*\(corpus ([A-Z].*?)\)/){ + $id++; + print METADATA_V2 sprintf("\"Arthropodes_%04d\",",$id);#numéroter les ids par 0001,0002... + print METADATA_V2 "\"$1\","; + } + elsif ($ligne =~ /^SO : (.*?) ; .*? ; ([0-9]{4})/){ + print METADATA_V2 "\"$1\","; + print METADATA_V2 "\"$2\"\n"; + } + } +close (METADATA_V1); +close (METADATA_V2); + +exit 0; + +sub usage +{ +my $code = shift; + +print "Usage : $programme -i input [ -o output ]\n"; + +exit $code; +} + diff --git a/Niveau-1/TXM/Metadata2.pl b/Niveau-1/TXM/Metadata2.pl new file mode 100644 index 0000000..b321fec --- /dev/null +++ b/Niveau-1/TXM/Metadata2.pl @@ -0,0 +1,51 @@ +#!/usr/bin/perl +use strict; +use warnings; +use utf8; +use open qw/:std :utf8/; + +use Getopt::Long; + +my ($programme) = $0 =~ m|^(?:.*/)?(.+)|; + +my $dir = ""; +my $output = "ira_900.txt"; + +eval { + $SIG{__WARN__} = sub {usage(1);}; + GetOptions( + "dir=s" => \$input, + "output=s" => \$output, + ); + }; +$SIG{__WARN__} = sub {warn $_[0];}; + +usage(2) if not $dir; + +opendir(my $dh, $dir) or die "Couldn't open file, $!"; + my @files = readdir($dh); + closedir($dh); + my $size = @files; +open(DATA, ">:utf8",$output ) +or die "Couldn't open file $output, $!"; +print METADATA "\"id\",\"sous_corpus\"\n"; +foreach my $file(@files){ + $file =~ /(\pL+)_\d*/; + print DATA "**** *souscorpus_$1\n"; + open(FILE, "<:utf8", "$dir/$file") or die "Couldn't open file $file, $!"; + my @texte = ; + print DATA @texte; + close(FILE); +} +close(DATA); + +exit 0; + +sub usage +{ +my $code = shift; + +print "Usage : $programme -d dir [ -o output ]\n"; + +exit $code; +}