diff --git a/conf/conf.json b/conf/conf.json index fd8d9e6..2d02c90 100644 --- a/conf/conf.json +++ b/conf/conf.json @@ -1,5 +1,4 @@ { "halApi":"http://api.archives-ouvertes.fr/search?", - "proxy":"http://proxyout.inist.fr:8080", "max_retry":2 } \ No newline at end of file diff --git a/index.js b/index.js index 6fac824..2cca386 100644 --- a/index.js +++ b/index.js @@ -3,7 +3,6 @@ const fetch = require('node-fetch'); const fs = require('graceful-fs'); const xmlFormatter = require('xml-formatter'); -const HttpProxyAgent = require('http-proxy-agent'); const utils = require('./lib/utils.js'); const config = require('./conf/conf.json'); @@ -31,15 +30,15 @@ }; business.doTheJob = function (docObject, cb) { - const CORPUSES_ROOT = process.env.CORPUSES_ROOT ? process.env.CORPUSES_ROOT : ''; const dateBegin = docObject.halHarvestModifiedDateFrom; const dateEnd = docObject.halHarvestModifiedDateTo; - harvestPath = [ - CORPUSES_ROOT, + + harvestPath = utils.getHarvestPath([ docObject.corpusName, utils.getDateYYYYMMDD(new Date(dateBegin)), 'to', - utils.getDateYYYYMMDD(new Date(dateEnd))].filter(Boolean).join('-'); + utils.getDateYYYYMMDD(new Date(dateEnd))].filter(Boolean).join('-') + ); if (fs.existsSync(harvestPath)) { const err = { @@ -74,9 +73,12 @@ 'Content-Type': 'application/json' } }; - if (config.proxy !== '') { - fetchOpts.agent = new HttpProxyAgent(config.proxy); + + const agent = utils.getProxyAgent(); + if (agent) { + fetchOpts.agent = agent; } + return fetch(target, fetchOpts).then((res) => res.json()).then(async function (res) { const nextCursorMark = res.nextCursorMark; const docs = res.response.docs; diff --git a/lib/utils.js b/lib/utils.js index b419668..3ac3ce3 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -3,6 +3,8 @@ const { URLSearchParams } = require('url'); const query = require('../conf/query.json'); const config = require('../conf/conf.json'); +const HttpProxyAgent = require('http-proxy-agent'); +const HttpsProxyAgent = require('https-proxy-agent'); const utils = {}; /** @@ -71,4 +73,20 @@ return `${urlBase}/?${sp.toString()}`; }; +utils.getHarvestPath = function getHarvestPath (path) { + return (process.env.CORPUSES_ROOT ? process.env.CORPUSES_ROOT + '/' + path : path); +}; + +utils.getProxyAgent = function getProxyAgent () { + // Get proxy env vars + const httpProxy = process.env.HTTP_PROXY ? process.env.HTTP_PROXY : process.env.http_proxy; + const httpsProxy = process.env.HTTPS_PROXY ? process.env.HTTPS_PROXY : process.env.https_proxy; + + if (httpProxy) { + return new HttpProxyAgent(httpProxy); + } else if (httpsProxy) { + return new HttpsProxyAgent(httpsProxy); + } else return false; +}; + module.exports = utils; diff --git a/package.json b/package.json index eec5aa4..8898e99 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "license": "GNU", "dependencies": { "chai": "^3.4.0", + "fetch-with-proxy": "^3.0.1", "graceful-fs": "^4.2.8", "http-proxy-agent": "^5.0.0", "https-proxy-agent": "^5.0.0", diff --git a/test/dataset/in/test.json b/test/dataset/in/test.json deleted file mode 100644 index 828494c..0000000 --- a/test/dataset/in/test.json +++ /dev/null @@ -1,34 +0,0 @@ -[ - { - "corpusName": "hal", - "cartoType": "conditor:hal", - "corpusResources": "/applis/corhal/loadistex/corpus-resources", - "halHarvestModifiedDateFrom": "2021-11-21T00:00:00.000Z", - "halHarvestModifiedDateTo": "2021-11-21T06:00:00.000Z", - "corpusOutput": "/applis/corhal/corpusOutput" - }, - { - "corpusName": "hal", - "cartoType": "conditor:hal", - "corpusResources": "/applis/corhal/loadistex/corpus-resources", - "halHarvestModifiedDateFrom": "2021-10-20T00:00:00.000Z", - "halHarvestModifiedDateTo": "2021-10-21T00:00:00.000Z", - "corpusOutput": "/applis/corhal/corpusOutput" - }, - { - "corpusName": "hal", - "cartoType": "conditor:hal", - "corpusResources": "/applis/corhal/loadistex/corpus-resources", - "halHarvestModifiedDateFrom": "1022-11-20T00:00:00.000Z", - "halHarvestModifiedDateTo": "1022-11-21T00:00:00.000Z", - "corpusOutput": "/applis/corhal/corpusOutput" - }, - { - "corpusName": "hal", - "cartoType": "conditor:hal", - "corpusResources": "/applis/corhal/loadistex/corpus-resources", - "halHarvestModifiedDateFrom": "2022-11-20T00:00:00.000Z", - "halHarvestModifiedDateTo": "2022-11-21T00:00:00.000Z", - "corpusOutput": "/applis/corhal/corpusOutput" - } -] \ No newline at end of file diff --git a/test/run.js b/test/run.js index 1a4c1e2..469a007 100644 --- a/test/run.js +++ b/test/run.js @@ -3,27 +3,38 @@ const pkg = require('../package.json'); const fs = require('graceful-fs'); const business = require('../index.js'); +const utils = require('../lib/utils.js'); const chai = require('chai'); -const testData = require('./dataset/in/test.json'); const expect = chai.expect; +function initDocObject () { + return { + corpusName: 'hal', + cartoType: 'conditor:hal', + corpusResources: '/applis/corhal/loadistex/corpus-resources', + halHarvestModifiedDateFrom: '2021-11-21T00:00:00.000Z', + halHarvestModifiedDateTo: '2021-11-21T06:00:00.000Z', + corpusOutput: '/applis/corhal/corpusOutput' + }; +} + describe(pkg.name + '/index.js', function () { before(function () { - const harvestPath = 'hal-20211121-to-20211121'; + const harvestPath = utils.getHarvestPath('hal-20211121-to-20211121'); if (fs.existsSync(harvestPath)) { - fs.rmdirSync('hal-20211121-to-20211121', { recursive: true }); + fs.rmdirSync(harvestPath, { recursive: true }); } }); describe('Test succès de moissonnage :', function () { this.timeout(600000); it('Le docObject devrait contenir le corpusRoot', function (done) { - const docObject = testData[0]; + const docObject = initDocObject(); business.doTheJob(docObject, function () { - const harvestPath = docObject.corpusRoot; - expect(harvestPath).to.equal('hal-20211121-to-20211121'); - const pathExist = fs.existsSync(harvestPath); + const harvestPath = utils.getHarvestPath('hal-20211121-to-20211121'); + expect(docObject.corpusRoot).to.equal(harvestPath); + const pathExist = fs.existsSync(docObject.corpusRoot); expect(pathExist).to.equal(true); - fs.rmdirSync('hal-20211121-to-20211121', { recursive: true }); + fs.rmdirSync(harvestPath, { recursive: true }); done(); }); }); @@ -32,18 +43,23 @@ describe('Test échec de moissonnage :', function () { this.timeout(600000); it('Si le corpusRoot exist déjà', function (done) { - const docObject = testData[1]; - fs.mkdirSync('hal-20211020-to-20211021', { recursive: true }); + const docObject = initDocObject(); + docObject.halHarvestModifiedDateFrom = '2021-10-20T00:00:00.000Z'; + docObject.halHarvestModifiedDateTo = '2021-10-21T00:00:00.000Z'; + const harvestPath = utils.getHarvestPath('hal-20211020-to-20211021'); + fs.mkdirSync(harvestPath, { recursive: true }); business.doTheJob(docObject, function (err) { // harvest folder already exist expect(err.code).to.equal(1); - fs.rmdirSync('hal-20211020-to-20211021', { recursive: true }); + fs.rmdirSync(harvestPath, { recursive: true }); done(); }); }); it('Si l\'API renvoie une erreur', function (done) { - const docObject = testData[2]; + const docObject = initDocObject(); + docObject.halHarvestModifiedDateFrom = '1020-10-20T00:00:00.000Z'; + docObject.halHarvestModifiedDateTo = '1020-10-21T00:00:00.000Z'; // message d'erreur de l'api business.doTheJob(docObject, function (err) { expect(err.code).to.equal(2); @@ -52,7 +68,9 @@ }); it('Si aucun résultat trouvé', function (done) { - const docObject = testData[3]; + const docObject = initDocObject(); + docObject.halHarvestModifiedDateFrom = '2050-10-20T00:00:00.000Z'; + docObject.halHarvestModifiedDateTo = '2050-10-21T00:00:00.000Z'; business.doTheJob(docObject, function (err) { // no result found expect(err.code).to.equal(3);