Newer
Older
explore-corpus / Polaris / Preparation_entree_Ira_polaris.pl
@Pan Pan Hu Pan Pan Hu on 11 Aug 2017 1 KB premier commit
#!/usr/bin/perl

use strict;
use warnings;
use utf8;
use open qw/:std :utf8/;

use Encode;
use Getopt::Long;

my ($programme) = $0 =~ m|^(?:.*/)?(.+)|;
$programme = decode_utf8($programme);

my $dir      = "";	
my $metadata = "";	
my $sortie   = "";	

eval	{
	$SIG{__WARN__} = sub {usage(1);};
		GetOptions(
		"dir=s"      => \$dir,
		"metadata=s" => \$metadata,
		"sortie=s"   => \$sortie,
		);
	};
$SIG{__WARN__} = sub {warn $_[0];};

usage(2) if not $dir or not $metadata or not $sortie;

#ouverture du dossier contenant les fichier .txt

opendir(DIR, $dir) or die "Couldn't open file $dir, $!";
my @files = sort grep {not /^\./} readdir(DIR);
closedir(DIR);
my $size = @files;
print STDERR "Total : $size\n";

#ouverture des métadonnées
open(METADATA, "<:utf8", $metadata) or die "Couldn't open file $metadata, $!"; 

#ouverture d'un fichier pour saisir des données
open(DATA, ">:utf8", $sortie) or die "Couldn't open file $sortie, $!"; 

my $i = 0;
while (my $metadata = <METADATA>) {
	print DATA $metadata;
	open(FILE, "<:utf8", "$dir/$files[$i]") or die "Couldn't open file $files[$i], $!";
	while(my $ligne = <FILE>){
		#suppression des étoiles et des chiffres
		$ligne =~ s/\*//;
		print DATA $ligne;
	}
	close(FILE);
	$i++;
}

close(METADATA);
close(DATA);

exit 0;


sub usage
{
my $code = shift;

print STDERR "Usage : $programme -d répertoire -m métadonnées -s sortie\n\n";

exit $code;
}