Newer
Older
explore-corpus / Niveau-2 / Vieillissement_V1 / Iramuteq / Préparation_Données_entrée.pl
@Pan Pan Hu Pan Pan Hu on 26 Jul 2017 1 KB fourth commit
#!/usr/bin/perl
use strict;
use warnings;
use utf8;
use open qw/:std :utf8/;

use Encode;
use Getopt::Long;

my ($programme) = $0 =~ m|^(?:.*/)?(.+)|;
$programme = decode_utf8($programme);

my $dir = ""; #par exemple "Vieillissement_TXT"
my $sortie = ""; #par exemple "Vieillissement_total.txt"

eval	{
	$SIG{__WARN__} = sub {usage(1);};
		GetOptions(
		"sortie=s"      => \$sortie,
		"dir=s"         => \$dir,
		);
	};
$SIG{__WARN__} = sub {warn $_[0];};

usage(2) if not $dir or not $source;

opendir(DIR, $dir) or die "Couldn't open file, $!";
my @files = grep {not /^\./} readdir(DIR);
closedir(DIR);
my $size = @files;

open(DATA, ">:utf8",$sortie) or
	die "Couldn't open file $sortie, $!";

print METADATA "\"id\",\"sous_corpus\"\n";
foreach my $file (@files){
	if ($file =~ /^(\pL+)_\d*/) 
	{
		print DATA "**** *souscorpus_$1\n";
		open(FILE, "<:encoding(UTF-8)", "$dir/$file") or 
						die "Couldn't open file $file, $!";
		while(<FILE>) 
		{
			print DATA;
		}
		close(FILE);
	}
}
close(DATA);

exit 0;

sub usage
{
my $code = shift;

print STDERR "Usage : $programme -d dir -s sortie \n";

exit $code;
}