diff --git a/script/harvester/harvester.sh b/script/harvester/harvester.sh index 3acd1bb..475bb4b 100755 --- a/script/harvester/harvester.sh +++ b/script/harvester/harvester.sh @@ -111,7 +111,8 @@ "categories.scienceMetrix": "category", "categories.inist": "category", "categories.scopus": "category", - "categories.wos": "category" + "categories.wos": "category", + "enrichments.type": "erichment" } as $mapping | with_entries(.key = ($mapping[.key]? // .key)) ' @@ -139,7 +140,7 @@ } handle_request "*" "corpusName" "" "corpus.nb_doc" -handle_request "*" "accessCondition.value" "" "istex.access_condition" +handle_request "*" "accessCondition.value" "" "istex.access_conditions" handle_request "*" "language" "" "istex.languages" handle_request "*" "enrichments.type" "" "istex.enrichments" handle_request "*" "host.genre" "" "istex.host.genres" @@ -161,4 +162,4 @@ handle_request "*" "corpusName" "genre" "corpus.genre" fetch_total "*" | type "istex.nb_doc" | stamp -fetch_total "enrichments.type.raw:*" | type "istex.nb_doc_enrichis" | stamp +fetch_total "enrichments.type.raw:*" | type "istex.nb_doc_enriched" | stamp diff --git a/script/lodex-export/lodex-export.py b/script/lodex-export/lodex-export.py index d9ba26d..7d2b8a7 100755 --- a/script/lodex-export/lodex-export.py +++ b/script/lodex-export/lodex-export.py @@ -64,6 +64,22 @@ IstexNbDocEnriched.__name__ = "istex.nb_doc_enriched" +@add_kind(Kind.Atomic) +class IstexEnrichments(BaseModel): + type: Literal["istex.enrichments"] = Field(..., description="Type de l'élément") + date: str = Field(..., description="Date de la mesure") + enrichment: str = Field(..., description="Type d'enrichissement") + nb: int = Field(..., description="Nombre de documents enrichis") + + @field_validator("date") + @classmethod + def check_date_format(cls, value: str): + return date_validator(value) + + +IstexEnrichments.__name__ = "istex.enrichments" + + @add_kind(Kind.Aggregate) class IstexCategories(BaseModel): type: Literal[ @@ -86,6 +102,28 @@ @add_kind(Kind.Aggregate) +class CorpusCategory(BaseModel): + type: Literal[ + "corpus.category.scopus", + "corpus.category.wos", + "corpus.category.inist", + "corpus.category.scienceMetrix", + ] = Field(..., description="Type de l'élément") + date: str = Field(..., description="Date de la mesure") + category: str = Field(..., description="Nom de la catégorie") + nb: int = Field(..., description="Nombre de documents pour cette catégorie") + corpus: str = Field(..., description="Nom du corpus") + + @field_validator("date") + @classmethod + def check_date_format(cls, value: str): + return date_validator(value) + + +CorpusCategory.__name__ = "corpus.category" + + +@add_kind(Kind.Aggregate) class IstexLanguages(BaseModel): type: Literal["istex.languages"] = Field(..., description="Type de l'élément") date: str = Field(..., description="Date de la mesure") @@ -136,6 +174,21 @@ @add_kind(Kind.Atomic) +class IstexHostGenres(BaseModel): + type: Literal["istex.host.genres"] = Field(..., description="Type de l'élément") + date: str = Field(..., description="Date de la mesure") + host_genre: str = Field(..., description="Genre des documents", alias="host.genre") + nb: int = Field(..., description="Nombre de documents par genre") + + @field_validator("date") + def check_date_format(cls, value: str): + return date_validator(value) + + +IstexHostGenres.__name__ = "corpus.host.genre" + + +@add_kind(Kind.Atomic) class CorpusNbDoc(BaseModel): type: Literal["corpus.nb_doc"] = Field(..., description="Type de l'élément") date: str = Field(..., description="Date de la mesure") @@ -242,32 +295,10 @@ @add_kind(Kind.Atomic) -class CorpusCategory(BaseModel): - type: Literal[ - "corpus.category.wos", - "corpus.category.scopus", - "corpus.category.inist", - "corpus.category.scienceMetrix", - ] = Field(..., description="Type de l'élément") - date: str = Field(..., description="Date de la mesure") - cateogry: str = Field(..., description="Nom de la catégorie") - nb: int = Field(..., description="Nombre de documents pour cette catégorie") - corpus: str = Field(..., description="Nom du corpus") - - @field_validator("date") - @classmethod - def check_date_format(cls, value: str): - return date_validator(value) - - -CorpusCategory.__name__ = "corpus.category" - - -@add_kind(Kind.Atomic) class CorpusHostGenre(BaseModel): type: Literal["corpus.host.genre"] = Field(..., description="Type de l'élément") date: str = Field(..., description="Date de la mesure") - genre: str = Field(..., description="Genre des documents") + host_genre: str = Field(..., description="Genre des documents", alias="host.genre") nb: int = Field(..., description="Nombre de documents par genre") corpus: str = Field(..., description="Nom du corpus") @@ -282,10 +313,12 @@ Annotated[ IstexNbDoc | IstexNbDocEnriched + | IstexEnrichments | IstexCategories | IstexLanguages | IstexAccessConditions | IstexGenres + | IstexHostGenres | CorpusNbDoc | CorpusEnrichment | CorpusEnrichmentLang @@ -301,10 +334,12 @@ RecordType = ( IstexNbDoc | IstexNbDocEnriched + | IstexEnrichments | IstexCategories | IstexLanguages | IstexAccessConditions | IstexGenres + | IstexHostGenres | CorpusNbDoc | CorpusEnrichment | CorpusEnrichmentLang @@ -347,6 +382,7 @@ self.statistic_units: list[type[RecordType]] = [ CorpusEnrichment, CorpusAccessCondition, + CorpusCategory, ] self.contexts_data: dict[Key, Any] = {} @@ -452,8 +488,10 @@ self.collect_contextual_data(data) self.collect_statistics(data) - except: - print(line) + except Exception as error: + print(error, file=sys.stderr) + print(line, file=sys.stderr) + raise def write( self, formatter: Callable[[list[Any]], list[str]], file: str | None = None