Newer
Older
explore-corpus / Niveau-3 / Iramuteq / Construction_Dictionnaires_Nom_Espèce.pl
@Pan Pan Hu Pan Pan Hu on 21 Jul 2017 1 KB third commit
#!/usr/bin/perl

use strict;
use warnings;

use Getopt::Long;

my ($programme) = $0 =~ m|^(?:.*/)?(.+)|;

my $input   = "";
my $lexique = "lexique.txt";
my $output  = "";
my $type    = "";

eval	{
	$SIG{__WARN__} = sub {usage(1);};
		GetOptions(
		"input=s"   => \$input,
		"lexique=s" => \$lexique,
		"output=s"  => \$output,
		"type=s"    => \$type,
		);
	};
$SIG{__WARN__} = sub {warn $_[0];};

if (not $input or $output)
{
	usage(2);
}
if ($type ne "animalia" and $type ne "plantae")
{
	usage(3);
}


open(FILE, "<:utf8", $input) or 
	die "Couldn't open file $input, $!";
open(LEXIQUE, ">:utf8", $lexique) or 
	die "Couldn't open file $lexique, $!";
open(EXPRESSION, ">:utf8", $output) or 
	die "Couldn't open file $output, $!";

while (my $ligne = <FILE>) {
	$ligne = lc($ligne);
	$ligne =~ /(\pL*) (\pL*)\t(\pL*)/;
	if($ligne =~ /(\pL*) (\pL*)\t$type/){
		print LEXIQUE "$1_$2\t$1_$2\tnom\n";
		print EXPRESSION "$1 $2\t$1_$2\n";
	}
}
close(FILE);
close(LEXIQUE);
close(EXPRESSION);

exit 0;

sub usage
{
my $code = shift;

print STDERR "Usage : $programme -i input -o output -t (\"animalia\"|\"plantae\") [ -l lexique ]\n";
print STDERR "        -i input  : liste des espèces\n";
print STDERR "        -o output : liste des expressions espèces\n";

exit $code;
}