#!/usr/bin/perl use strict; use warnings; use utf8; use open qw/:std :utf8/; use XML::Twig; use Encode; use Getopt::Long; my ($programme) = $0 =~ m|^(?:.*/)?(.+)|; $programme = decode_utf8($programme); my $dir_input = ""; my $dir_output = ""; eval { $SIG{__WARN__} = sub {usage(1);}; GetOptions( "input=s" => \$input, "output=s" => \$output, ); }; $SIG{__WARN__} = sub {warn $_[0];}; usage(2) if not $dir_input or not $dir_output; opendir(my $dh, $dir_input) or die "Couldn't open file $dir_input, $!"; my @files = sort grep { /*.tei$/ and -f "$dir_input/$_" } readdir($dh); closedir($dh); my $size = @files; print STDERR "Total : $size\n"; #il faut écrire "$dir/$file", pour entrer dans le file !!! foreach my $file (@files) { my $new_nom = $file; $new_nom =~ s/\.tei/\.txt/; print STDERR "$file => $new_nom\n"; open(TXT, ">:encoding(UTF-8)", "$output/$new_nom") or die "Couldn't open file $new_nom, $!"; #utiliser la module Twig ; prendre le contenu dans le balise p (le texte) my $parser = XML::Twig->new ( twig_handlers => { 'text/body/div/p' => sub { print TXT $_->first_child_text. "\n"; } } ); $parser->parsefile("$input/$file"); close(TXT); } exit 0; sub usage { my $code = shift; print STDERR "Usage: $programme -i input -o output\n"; exit $code; }