Newer
Older
explore-corpus / Niveau-1 / Iramuteq / Préparation_Données_entrée_Iramuteq2.pl
@Pan Pan Hu Pan Pan Hu on 11 Aug 2017 1 KB 6commits
#!/usr/bin/perl

use strict;
use warnings;
use utf8;
use open qw/:std :utf8/;

use Encode;
use Getopt::Long;

my ($programme) = $0 =~ m|^(?:.*/)?(.+)|;
$programme = decode_utf8($programme);

my $dir = "";
my $sortie = "";

eval	{
	$SIG{__WARN__} = sub {usage(1);};
		GetOptions(
		"dir=s"      => \$dir,
		"sortie=s"   => \$sortie,
		);
	};
$SIG{__WARN__} = sub {warn $_[0];};

usage(2) if not $dir or not $racine or not $metadata or not $sortie;

opendir(my $dh, $dir) or die "Couldn't open file, $!";
	my @files = readdir($dh);
	closedir($dh);
	my $size = @files;
open(DATA, ">:utf8", $sortie) 
or die "Couldn't open file $sortie, $!";
print METADATA "\"id\",\"sous_corpus\"\n";
foreach my $file(@files){
	$file =~ /(\pL+)_\d*/;
	print DATA "**** *souscorpus_$1\n";
	open(FILE, "<:encoding(UTF-8)", "$dir/$file") or die "Couldn't open file $file, $!";
	my @texte = <FILE>;
	print DATA @texte;
	close(FILE);
}
close(DATA);

exit 0;


sub usage
{
my $code = shift;

print STDERR "Usage : $programme -d dir -s sortie\n";
exit $code;
}