diff --git a/halcnrs-dumps-config.json b/halcnrs-dumps-config.json new file mode 100644 index 0000000..cf4be23 --- /dev/null +++ b/halcnrs-dumps-config.json @@ -0,0 +1,101 @@ +{ + "environnement": { + "CRON_VERBOSE": true, + "EZS_VERBOSE": true, + "NODE_OPTIONS": "--max_old_space_size=1024", + "NODE_ENV": "production" + }, + "tasks": [ + { + "CronRule": "0 18 * * *", + "FileName": "doublons-cnrs-2014-tsv", + "RunOnStartup": true + }, + { + "CronRule": "15 18 * * *", + "FileName": "doublons-cnrs-2015-tsv", + "RunOnStartup": true + }, + { + "CronRule": "30 18 * * *", + "FileName": "doublons-cnrs-2016-tsv", + "RunOnStartup": true + }, + { + "CronRule": "45 18 * * *", + "FileName": "doublons-cnrs-2017-tsv", + "RunOnStartup": true + }, + { + "CronRule": "0 19 * * *", + "FileName": "doublons-cnrs-2018-tsv", + "RunOnStartup": true + }, + { + "CronRule": "15 19 * * *", + "FileName": "doublons-cnrs-2019-tsv", + "RunOnStartup": true + }, + { + "CronRule": "30 19 * * *", + "FileName": "doublons-cnrs-2020-tsv", + "RunOnStartup": true + }, + { + "CronRule": "45 19 * * *", + "FileName": "doublons-cnrs-2021-tsv", + "RunOnStartup": true + }, + { + "CronRule": "0 20 * * *", + "FileName": "doublons-cnrs-2022-tsv", + "RunOnStartup": true + }, + { + "CronRule": "15 20 * * *", + "FileName": "candidats-nonhal-cnrs-2014-tsv", + "RunOnStartup": false + }, + { + "CronRule": "30 20 * * *", + "FileName": "candidats-nonhal-cnrs-2015-tsv", + "RunOnStartup": false + }, + { + "CronRule": "0 21 * * *", + "FileName": "candidats-nonhal-cnrs-2016-tsv", + "RunOnStartup": false + }, + { + "CronRule": "30 21 * * *", + "FileName": "candidats-nonhal-cnrs-2017-tsv", + "RunOnStartup": false + }, + { + "CronRule": "0 22 * * *", + "FileName": "candidats-nonhal-cnrs-2018-tsv", + "RunOnStartup": false + }, + { + "CronRule": "30 22 * * *", + "FileName": "candidats-nonhal-cnrs-2019-tsv", + "RunOnStartup": false + }, + { + "CronRule": "0 23 * * *", + "FileName": "candidats-nonhal-cnrs-2020-tsv", + "RunOnStartup": false + }, + { + "CronRule": "30 23 * * *", + "FileName": "candidats-nonhal-cnrs-2021-tsv", + "RunOnStartup": false + }, + { + "CronRule": "0 0 * * *", + "FileName": "candidats-nonhal-cnrs-2022-tsv", + "RunOnStartup": false + } + ] +} + diff --git a/halcnrs-dumps/candidats-nonhal-cnrs-2014-tsv.ini b/halcnrs-dumps/candidats-nonhal-cnrs-2014-tsv.ini new file mode 100644 index 0000000..8cca4f6 --- /dev/null +++ b/halcnrs-dumps/candidats-nonhal-cnrs-2014-tsv.ini @@ -0,0 +1,64 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) + +; +; +; Notices de 2014 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - non présentes dans HAL (selon les critères paramétrés dans le module de dédoublonnage) +; - respectant les contraintes CCSD d'import (possédant un code RNSR et une catégorie scientifique) +; - identifiées par Unpaywall comme étant en open access +; +; le fichier est généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2014 OR host.electronicPublicationDate.normalized:2014) AND -business.sourceUidChain:*hal* AND business.authorsRnsr:* AND classifications.enrichments.hal.code:* AND enrichments.openAccess.unpaywall.isOa:true') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/candidats-nonhal-cnrs-2015-tsv.ini b/halcnrs-dumps/candidats-nonhal-cnrs-2015-tsv.ini new file mode 100644 index 0000000..e7a69fd --- /dev/null +++ b/halcnrs-dumps/candidats-nonhal-cnrs-2015-tsv.ini @@ -0,0 +1,64 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) + +; +; +; Notices de 2015 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - non présentes dans HAL (selon les critères paramétrés dans le module de dédoublonnage) +; - respectant les contraintes CCSD d'import (possédant un code RNSR et une catégorie scientifique) +; - identifiées par Unpaywall comme étant en open access +; +; le fichier est généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2015 OR host.electronicPublicationDate.normalized:2015) AND -business.sourceUidChain:*hal* AND business.authorsRnsr:* AND classifications.enrichments.hal.code:* AND enrichments.openAccess.unpaywall.isOa:true') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/candidats-nonhal-cnrs-2016-tsv.ini b/halcnrs-dumps/candidats-nonhal-cnrs-2016-tsv.ini new file mode 100644 index 0000000..fbedfef --- /dev/null +++ b/halcnrs-dumps/candidats-nonhal-cnrs-2016-tsv.ini @@ -0,0 +1,64 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) + +; +; +; Notices de 2016 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - non présentes dans HAL (selon les critères paramétrés dans le module de dédoublonnage) +; - respectant les contraintes CCSD d'import (possédant un code RNSR et une catégorie scientifique) +; - identifiées par Unpaywall comme étant en open access +; +; le fichier est généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2016 OR host.electronicPublicationDate.normalized:2016) AND -business.sourceUidChain:*hal* AND business.authorsRnsr:* AND classifications.enrichments.hal.code:* AND enrichments.openAccess.unpaywall.isOa:true') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/candidats-nonhal-cnrs-2017-tsv.ini b/halcnrs-dumps/candidats-nonhal-cnrs-2017-tsv.ini new file mode 100644 index 0000000..b78062b --- /dev/null +++ b/halcnrs-dumps/candidats-nonhal-cnrs-2017-tsv.ini @@ -0,0 +1,64 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) + +; +; +; Notices de 2017 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - non présentes dans HAL (selon les critères paramétrés dans le module de dédoublonnage) +; - respectant les contraintes CCSD d'import (possédant un code RNSR et une catégorie scientifique) +; - identifiées par Unpaywall comme étant en open access +; +; le fichier est généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2017 OR host.electronicPublicationDate.normalized:2017) AND -business.sourceUidChain:*hal* AND business.authorsRnsr:* AND classifications.enrichments.hal.code:* AND enrichments.openAccess.unpaywall.isOa:true') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/candidats-nonhal-cnrs-2018-tsv.ini b/halcnrs-dumps/candidats-nonhal-cnrs-2018-tsv.ini new file mode 100644 index 0000000..765533b --- /dev/null +++ b/halcnrs-dumps/candidats-nonhal-cnrs-2018-tsv.ini @@ -0,0 +1,64 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) + +; +; +; Notices de 2018 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - non présentes dans HAL (selon les critères paramétrés dans le module de dédoublonnage) +; - respectant les contraintes CCSD d'import (possédant un code RNSR et une catégorie scientifique) +; - identifiées par Unpaywall comme étant en open access +; +; le fichier est généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2018 OR host.electronicPublicationDate.normalized:2018) AND -business.sourceUidChain:*hal* AND business.authorsRnsr:* AND classifications.enrichments.hal.code:* AND enrichments.openAccess.unpaywall.isOa:true') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[CSVString] +format = strict +separator = fix('\t') + +[OBJStandardize] + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/candidats-nonhal-cnrs-2019-tsv.ini b/halcnrs-dumps/candidats-nonhal-cnrs-2019-tsv.ini new file mode 100644 index 0000000..524a1db --- /dev/null +++ b/halcnrs-dumps/candidats-nonhal-cnrs-2019-tsv.ini @@ -0,0 +1,64 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) + +; +; +; Notices de 2019 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - non présentes dans HAL (selon les critères paramétrés dans le module de dédoublonnage) +; - respectant les contraintes CCSD d'import (possédant un code RNSR et une catégorie scientifique) +; - identifiées par Unpaywall comme étant en open access +; +; le fichier est généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2019 OR host.electronicPublicationDate.normalized:2019) AND -business.sourceUidChain:*hal* AND business.authorsRnsr:* AND classifications.enrichments.hal.code:* AND enrichments.openAccess.unpaywall.isOa:true') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/candidats-nonhal-cnrs-2020-tsv.ini b/halcnrs-dumps/candidats-nonhal-cnrs-2020-tsv.ini new file mode 100644 index 0000000..bae21ff --- /dev/null +++ b/halcnrs-dumps/candidats-nonhal-cnrs-2020-tsv.ini @@ -0,0 +1,64 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) + +; +; +; Notices de 2020 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - non présentes dans HAL (selon les critères paramétrés dans le module de dédoublonnage) +; - respectant les contraintes CCSD d'import (possédant un code RNSR et une catégorie scientifique) +; - identifiées par Unpaywall comme étant en open access +; +; le fichier est généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2020 OR host.electronicPublicationDate.normalized:2020) AND -business.sourceUidChain:*hal* AND business.authorsRnsr:* AND classifications.enrichments.hal.code:* AND enrichments.openAccess.unpaywall.isOa:true') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/candidats-nonhal-cnrs-2021-tsv.ini b/halcnrs-dumps/candidats-nonhal-cnrs-2021-tsv.ini new file mode 100644 index 0000000..5f44c3f --- /dev/null +++ b/halcnrs-dumps/candidats-nonhal-cnrs-2021-tsv.ini @@ -0,0 +1,63 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) + +; +; +; Notices de 2021 : +; - non présentes dans HAL (selon les critères paramétrés dans le module de dédoublonnage) +; - respectant les contraintes CCSD d'import (possédant un code RNSR et une catégorie scientifique) +; - identifiées par Unpaywall comme étant en open access +; +; le fichier généré au format TSV compatible Excel +; +; +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2021 OR host.electronicPublicationDate.normalized:2021) AND -business.sourceUidChain:*hal* AND business.authorsRnsr:* AND classifications.enrichments.hal.code:* AND enrichments.openAccess.unpaywall.isOa:true') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 +noerror = true + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/candidats-nonhal-cnrs-2022-tsv.ini b/halcnrs-dumps/candidats-nonhal-cnrs-2022-tsv.ini new file mode 100644 index 0000000..52d6f7b --- /dev/null +++ b/halcnrs-dumps/candidats-nonhal-cnrs-2022-tsv.ini @@ -0,0 +1,64 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) +; +; +; Notices de 2022 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - non présentes dans HAL (selon les critères paramétrés dans le module de dédoublonnage) +; - respectant les contraintes CCSD d'import (possédant un code RNSR et une catégorie scientifique) +; - identifiées par Unpaywall comme étant en open access +; +; le fichier généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2022 OR host.electronicPublicationDate.normalized:2022) AND -business.sourceUidChain:*hal* AND business.authorsRnsr:* AND classifications.enrichments.hal.code:* AND enrichments.openAccess.unpaywall.isOa:true') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 +noerror = true + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/doublons-cnrs-2014-tsv.ini b/halcnrs-dumps/doublons-cnrs-2014-tsv.ini new file mode 100644 index 0000000..34aee32 --- /dev/null +++ b/halcnrs-dumps/doublons-cnrs-2014-tsv.ini @@ -0,0 +1,66 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) +; +; +; Notices de 2014 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - présentes au moins deux fois dans HAL +; +; le fichier généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2014 OR host.electronicPublicationDate.normalized:2014) AND (business.sourceUidChain:*\\!hal\\$*\\!hal\\$*)') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 +noerror = true + +[assign] +path = result.doublons +value = get('result.sourceUids').filter(i => (i.indexOf('hal') === 0)).map(i => i.replace(/^hal\$/, 'https://hal.archives-ouvertes.fr/')) + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files', 'keywords', 'enrichments', 'classifications', 'funders', 'sourceUids', 'abstract' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/doublons-cnrs-2015-tsv.ini b/halcnrs-dumps/doublons-cnrs-2015-tsv.ini new file mode 100644 index 0000000..08f746a --- /dev/null +++ b/halcnrs-dumps/doublons-cnrs-2015-tsv.ini @@ -0,0 +1,66 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) +; +; +; Notices de 2015 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - présentes au moins deux fois dans HAL +; +; le fichier généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2015 OR host.electronicPublicationDate.normalized:2015) AND (business.sourceUidChain:*\\!hal\\$*\\!hal\\$*)') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 +noerror = true + +[assign] +path = result.doublons +value = get('result.sourceUids').filter(i => (i.indexOf('hal') === 0)).map(i => i.replace(/^hal\$/, 'https://hal.archives-ouvertes.fr/')) + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files', 'keywords', 'enrichments', 'classifications', 'funders', 'sourceUids', 'abstract' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/doublons-cnrs-2016-tsv.ini b/halcnrs-dumps/doublons-cnrs-2016-tsv.ini new file mode 100644 index 0000000..3c72d77 --- /dev/null +++ b/halcnrs-dumps/doublons-cnrs-2016-tsv.ini @@ -0,0 +1,66 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) +; +; +; Notices de 2016 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - présentes au moins deux fois dans HAL +; +; le fichier généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2016 OR host.electronicPublicationDate.normalized:2016) AND (business.sourceUidChain:*\\!hal\\$*\\!hal\\$*)') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 +noerror = true + +[assign] +path = result.doublons +value = get('result.sourceUids').filter(i => (i.indexOf('hal') === 0)).map(i => i.replace(/^hal\$/, 'https://hal.archives-ouvertes.fr/')) + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files', 'keywords', 'enrichments', 'classifications', 'funders', 'sourceUids', 'abstract' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/doublons-cnrs-2017-tsv.ini b/halcnrs-dumps/doublons-cnrs-2017-tsv.ini new file mode 100644 index 0000000..e436e87 --- /dev/null +++ b/halcnrs-dumps/doublons-cnrs-2017-tsv.ini @@ -0,0 +1,66 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) +; +; +; Notices de 2017 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - présentes au moins deux fois dans HAL +; +; le fichier généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2017 OR host.electronicPublicationDate.normalized:2017) AND (business.sourceUidChain:*\\!hal\\$*\\!hal\\$*)') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 +noerror = true + +[assign] +path = result.doublons +value = get('result.sourceUids').filter(i => (i.indexOf('hal') === 0)).map(i => i.replace(/^hal\$/, 'https://hal.archives-ouvertes.fr/')) + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files', 'keywords', 'enrichments', 'classifications', 'funders', 'sourceUids', 'abstract' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/doublons-cnrs-2018-tsv.ini b/halcnrs-dumps/doublons-cnrs-2018-tsv.ini new file mode 100644 index 0000000..6a910a7 --- /dev/null +++ b/halcnrs-dumps/doublons-cnrs-2018-tsv.ini @@ -0,0 +1,66 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) +; +; +; Notices de 2018 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - présentes au moins deux fois dans HAL +; +; le fichier généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2018 OR host.electronicPublicationDate.normalized:2018) AND (business.sourceUidChain:*\\!hal\\$*\\!hal\\$*)') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 +noerror = true + +[assign] +path = result.doublons +value = get('result.sourceUids').filter(i => (i.indexOf('hal') === 0)).map(i => i.replace(/^hal\$/, 'https://hal.archives-ouvertes.fr/')) + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files', 'keywords', 'enrichments', 'classifications', 'funders', 'sourceUids', 'abstract' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/doublons-cnrs-2019-tsv.ini b/halcnrs-dumps/doublons-cnrs-2019-tsv.ini new file mode 100644 index 0000000..4b419cf --- /dev/null +++ b/halcnrs-dumps/doublons-cnrs-2019-tsv.ini @@ -0,0 +1,66 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) +; +; +; Notices de 2019 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - présentes au moins deux fois dans HAL +; +; le fichier généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2019 OR host.electronicPublicationDate.normalized:2019) AND (business.sourceUidChain:*\\!hal\\$*\\!hal\\$*)') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 +noerror = true + +[assign] +path = result.doublons +value = get('result.sourceUids').filter(i => (i.indexOf('hal') === 0)).map(i => i.replace(/^hal\$/, 'https://hal.archives-ouvertes.fr/')) + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files', 'keywords', 'enrichments', 'classifications', 'funders', 'sourceUids', 'abstract' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/doublons-cnrs-2020-tsv.ini b/halcnrs-dumps/doublons-cnrs-2020-tsv.ini new file mode 100644 index 0000000..69acd7c --- /dev/null +++ b/halcnrs-dumps/doublons-cnrs-2020-tsv.ini @@ -0,0 +1,66 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) +; +; +; Notices de 2020 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - présentes au moins deux fois dans HAL +; +; le fichier généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2020 OR host.electronicPublicationDate.normalized:2020) AND (business.sourceUidChain:*\\!hal\\$*\\!hal\\$*)') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 +noerror = true + +[assign] +path = result.doublons +value = get('result.sourceUids').filter(i => (i.indexOf('hal') === 0)).map(i => i.replace(/^hal\$/, 'https://hal.archives-ouvertes.fr/')) + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files', 'keywords', 'enrichments', 'classifications', 'funders', 'sourceUids', 'abstract' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/doublons-cnrs-2021-tsv.ini b/halcnrs-dumps/doublons-cnrs-2021-tsv.ini new file mode 100644 index 0000000..f5964b5 --- /dev/null +++ b/halcnrs-dumps/doublons-cnrs-2021-tsv.ini @@ -0,0 +1,66 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) +; +; +; Notices de 2021 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - présentes au moins deux fois dans HAL +; +; le fichier généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2021 OR host.electronicPublicationDate.normalized:2021) AND (business.sourceUidChain:*\\!hal\\$*\\!hal\\$*)') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 +noerror = true + +[assign] +path = result.doublons +value = get('result.sourceUids').filter(i => (i.indexOf('hal') === 0)).map(i => i.replace(/^hal\$/, 'https://hal.archives-ouvertes.fr/')) + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files', 'keywords', 'enrichments', 'classifications', 'funders', 'sourceUids', 'abstract' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true diff --git a/halcnrs-dumps/doublons-cnrs-2022-tsv.ini b/halcnrs-dumps/doublons-cnrs-2022-tsv.ini new file mode 100644 index 0000000..acf0697 --- /dev/null +++ b/halcnrs-dumps/doublons-cnrs-2022-tsv.ini @@ -0,0 +1,66 @@ +[use] +plugin = basics +plugin = conditor +plugin = lodex + +[env] +path = currentDate +value = thru(() => new Date()).thru(d => d.getDay()).thru(i => ['Dimanche', 'Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi'][i]).thru(m => env('Startup') ? 'Startup' : m) +; +; +; Notices de 2022 : +; - ayant au moins un auteur ayant indiqué le CNRS +; - présentes au moins deux fois dans HAL +; +; le fichier généré au format TSV compatible Excel +; +; + +[replace] +path = q +value = fix('business.authorsAddresses:(cnrs OR insu OR ins2i) AND (host.publicationDate.normalized:2022 OR host.electronicPublicationDate.normalized:2022) AND (business.sourceUidChain:*\\!hal\\$*\\!hal\\$*)') + +[CORHALFetch] +url = https://corhal-api.inist.fr +retries = 3 +timeout = 60000 + +[replace] +path = url +value = get('business.sourceUidChain').prepend('https://corhal-api.inist.fr/mergedDocuments/') + +[URLFetch] +url = get('url') +json = true +target = result +timeout = 60000 +noerror = true + +[assign] +path = result.doublons +value = get('result.sourceUids').filter(i => (i.indexOf('hal') === 0)).map(i => i.replace(/^hal\$/, 'https://hal.archives-ouvertes.fr/')) + +[exchange] +value = get('result').omit(['authors', 'origins', 'business', 'files', 'keywords', 'enrichments', 'classifications', 'funders', 'sourceUids', 'abstract' ]) + +[OBJFlatten] +separator = / + +[objects2columns] + +[OBJStandardize] + +[CSVString] +format = strict +separator = fix('\t') + +[FILESave] +location = fix(`${env('PWD')}/public/${env('FileName')}`) +identifier = fix(`${env('FileName')}-${env('currentDate')}.tsv`) +compress = true + +[exchange] +value = get('filename').append(' generated.') + +[dump] +indent = true