ISTEX/Corhal module for JSON source metadata extraction into JSON docObject

@niederle niederle authored on 17 Nov 2022
.eslintrc.json [init] first commit, initialize repo with package.json, license and Readme 1 year ago
.gitignore [init] first commit, initialize repo with package.json, license and Readme 1 year ago
Licence.en.txt [init] first commit, initialize repo with package.json, license and Readme 1 year ago
License.fr.txt [init] first commit, initialize repo with package.json, license and Readme 1 year ago
README.md [init] first commit, initialize repo with package.json, license and Readme 1 year ago
package.json [init] first commit, initialize repo with package.json, license and Readme 1 year ago
README.md

li-json-formatter

Corhal/Istex module for JSON source metadata file extraction to JSON docObject.

Description

This module aims to :

  • parse JSON-formatted source metadata file
  • extract relevant information
  • transform it to Corhal/Istex JSON format
  • put it into JSON docObject

Example of source metadata-file

(from Openalex database : https://openalex.org/)

{
  {
  "id": "https://openalex.org/W4296777267",
  "doi": "https://doi.org/10.1016/j.resourpol.2022.103010",
  "title": "Impact of oil price and oil production on inflation in the CEMAC",
  "display_name": "Impact of oil price and oil production on inflation in the CEMAC",
  "publication_year": 2022,
  "publication_date": "2022-12-01",
  "ids": {
    "openalex": "https://openalex.org/W4296777267",
    "doi": "https://doi.org/10.1016/j.resourpol.2022.103010"
  },
  "host_venue": {
    "id": "https://openalex.org/V194185345",
    "issn_l": "0301-4207",
    "issn": [
      "0301-4207",
      "1873-7641"
    ],
    "display_name": "Resources Policy",
    "publisher": "Elsevier",
    "type": "journal",
    "url": "https://doi.org/10.1016/j.resourpol.2022.103010",
    "is_oa": null,
    "version": null,
    "license": null
  },
  "type": "journal-article",
  "open_access": {
    "is_oa": true,
    "oa_status": "green",
    "oa_url": "https://hal.archives-ouvertes.fr/hal-03790291/file/Impact%20of%20Oil%20Price%20and%20Oil%20Production%20on%20Inflation%20in%20the%20CEMAC.pdf"
  },
  "authorships": [
    {
      "author_position": "first",
      "author": {
        "id": "https://openalex.org/A3183623253",
        "display_name": "Edouard Mien",
        "orcid": "https://orcid.org/0000-0001-8774-8107"
      },
      "institutions": [
        {
          "id": "https://openalex.org/I4210103002",
          "display_name": "University of Clermont Auvergne",
          "ror": "https://ror.org/01a8ajp46",
          "country_code": "FR",
          "type": "education"
        }
      ],
      "raw_affiliation_string": "School of Economics, University of Clermont Auvergne, CNRS, CERDI, 26 avenue Léon-Blum, 63000, Clermont‐Ferrand, FranceTel. +33 4 73 17 74 00, Fax +33 4 73 17 74 28"
    }
  ],
}

Example of output docObject

{
  "idIstex": "0123456789012345678901234567890123456789",
  "files": {
    "metadata": [
      {
        "mime": "application/json",
        "original": false,
        "path": "/data/input.json"
      }
    ]
  },
  "source": "openalex",
  "sourceId": "https://openalex.org/V194185345",
  "openalex": "https://openalex.org/V194185345",
  "sourceUid": "openalex$https://openalex.org/V194185345",
  "doi": "10.1016/b978-0-323-89903-1.00001-3",
  "title": {
    "default": "Impact of oil price and oil production on inflation in the CEMAC"
  },
  "genre": "journal",
  "host": {
    "publicationDate": "2022-12-01",
    "journalId": "https://openalex.org/V194185345",
    "title": "Resources Policy",
    "issn": null, # don't know which one to take
    "eissn": null,  # don't know which one to take
    "openalex": "https://openalex.org/V194185345",
    ...
  },
  "authors": [
    {
      "fullname": "Edouard Mien",
      "orcId": ["https://orcid.org/0000-0001-8774-8107"],
      "openalex": "https://openalex.org/A3183623253",
      "affiliations": [
        {
          "address": "University of Clermont Auvergne, FR", # TODO precise exactly what to put here
          "openalex": "https://openalex.org/I4210103002",
          "ror": "https://ror.org/01a8ajp46", # TODO should we keep this info ?
          ...
        }
      ]
    }
  ],
  "business": {
    "xissn": [
      "0301-4207",
      "1873-7641"
    ]
  }
  ...
}