Newer
Older
sisyphe-go / example / sisyphe-conf.json
{ 
    "xml": {
      "metadata": [
        {
          "name": "content-type",
          "type": "Attribute",
          "xpath": ["/article/@article-type", "/article/front/article-meta/article-categories/subj-group[@subj-group-type=\"document-type-name\"]/subject"]
        },
        {
          "name": "publicationYear",
          "regex": "^([0-9]{4})$",
          "type": "Number",
          "xpath": ["/article/front/article-meta/pub-date/year"]
        },
        {
          "name": "doi",
          "regex": "(10[.][0-9]{4,}[^\\s\"/<>]*/[^\\s\"<>]+)",
          "type": "String",
          "xpath": ["/article/front/article-meta/article-id[@pub-id-type=\"doi\"]"]
        },
        {
          "name": "pmid",
          "type": "String",
          "xpath": ["/article/front/article-meta/article-id[@pub-id-type=\"pmid\"]"]
        },
        {
          "name": "issn",
          "regex": "^(\\d{4})-?(\\d{3})([\\dX])$",
          "type": "String",
          "xpath": ["/article/front/journal-meta/issn[@pub-type=\"ppub\"]"]
        },
        {
          "name": "eissn",
          "regex": "^(\\d{4})-?(\\d{3})([\\dX])$",
          "type": "String",
          "xpath": ["/article/front/journal-meta/issn[@pub-type=\"epub\"]"]
        },
        {
          "name": "isbn",
         "regex": "((978[-– ])?[0-9][0-9-– ]{10}[-– ][0-9xX])|((978)?[0-9]{9}[0-9])",
          "type": "String",
          "xpath": ["/article/front/journal-meta/isbn[@pub-type=\"ppub\"]"]
        },
        {
          "name": "eisbn",
          "regex": "((978[-– ])?[0-9][0-9-– ]{10}[-– ][0-9xX])|((978)?[0-9]{9}[0-9])",
          "type": "String",
          "xpath": ["/article/front/journal-meta/isbn[@pub-type=\"epub\"]"]
        },
        {
          "name": "hasAbstract",
          "type": "Boolean",
          "xpath": ["/article/front/article-meta/abstract"]
        },
        {
          "name": "issue",
          "regex": "(\\d)+",
          "type": "Number",
          "xpath": ["/article/front/article-meta/issue"]
        },
        {
          "name": "volume",
          "regex": "(\\d)+",
          "type": "Number",
          "xpath": ["/article/front/article-meta/volume"]
        },
        {
          "name": "publisherId",
          "type": "String",
          "xpath": ["/article/front/journal-meta/publisher/publisher-name"]
        },
        {
          "name": "publisherLoc",
          "type": "String",
          "xpath": ["/article/front/journal-meta/publisher/publisher-loc"]
        },
        {
          "name": "hostTitle",
          "type": "String",
          "xpath": ["/article/front/journal-meta/journal-title-group/journal-title"]
        },
        {
          "name": "title",
          "type": "String",
          "xpath": ["/article/front/article-meta/title-group/article-title"]
        },
        {
          "name": "nbRefBibs",
          "type": "Count",
          "xpath": ["/article/back/ref-list/ref"]
        },
        {
          "name": "nbKeywords",
          "type": "Count",
          "xpath": ["/article/front/article-meta/kwd-group"]
        },
        {
          "name": "nbElementInBody",
          "type": "Count",
          "xpath": ["/article/body/*"]
        },
        {
          "name": "nbParagraphInBody",
          "type": "Count",
          "xpath": ["/article/body/p"]
        },
        {
          "name": "nbSectionInBody",
          "type": "Count",
          "xpath": ["/article/body/sec"]
        },
        {
          "name": "codeLanguage",
          "type": "Attribute",
          "xpath": ["/article/@xml:lang"]
        },
        {
          "name": "article-type",
          "type": "String",
          "xpath": ["//article/@article-type"]
        },
        {
          "name": "pub-id-type",
          "type": "String",
          "xpath": ["//front/article-meta/article-id/@pub-id-type"]
        },
        {
          "name": "contrib-type",
          "type": "String",
          "xpath": ["//front/article-meta/contrib-group/contrib/@contrib-type"]
        },
        {
          "name": "rid",
          "type": "String",
          "xpath": ["//front//xref/@rid"]
        },
        {
          "name": "id",
          "type": "String",
          "xpath": ["//aff/@id"]
        },
        {
          "name": "subj-group-type",
          "type": "String",
          "xpath": ["//article-categories/subj-group/@subj-group-type"]
        },
        {
          "name": "subject",
          "type": "String",
          "xpath": ["//article-categories/subj-group/subject"]
        }
      ],
      "dtd": [
        "note.dtd"
      ]
    } 
  }