diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c209561 --- /dev/null +++ b/.gitignore @@ -0,0 +1,187 @@ +*.xml +*.jsonl + +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# TypeScript v1 declaration files +typings/ + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variables file +.env + +# next.js build output +.next + +# @ezs/istex tests get a PDF file +*.pdf + +# vim swap files +*.swp +*.swo + +# result TSV files +res*.tsv + + +packages/analytics/test/forbidden +globalConfig.json + +### WebStorm ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### WebStorm Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint +.idea/**/sonarlint/ + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator-enh.xml +.idea/**/markdown-navigator/ + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 +.idea/$CACHE_FILE$ + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream +.idea/codestream.xml + + + + +# local +*.ark diff --git a/README.md b/README.md index e69de29..019d555 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,5 @@ +### Usage + +```bash +ezs xml2json.ini < RNSR-2021.xml > RNSR-2021.jsonl +``` diff --git a/locals.js b/locals.js new file mode 100644 index 0000000..1d6c5fc --- /dev/null +++ b/locals.js @@ -0,0 +1,39 @@ +const handle1 = {}; +const handle2= {}; + +function hierachy(data, feed) { + if (this.isLast()) { + return feed.close(); + } + if (data.token) { + if (data.token[1] === 0) { + handle2[data.value.numero_national_de_structure] = data.value.libelle + data.value.regroupe.forEach((child)=> { + if (!handle1[child]) { + handle1[child] = []; + } + handle1[child].push(data.value.numero_national_de_structure); + }) + } + if (data.token[1] === 1) { + const structure_englobante = handle1[data.value.numero_national_de_structure] && handle1[data.value.numero_national_de_structure][0] ? handle1[data.value.numero_national_de_structure][0] : 'n/a'; + data.value.structure_englobante = { id: structure_englobante, value: handle2[structure_englobante] }; + data.value.structuration_hierachique = [data.value.structure_englobante]; + let id = structure_englobante; + let i = 0; + while (id && handle1[id] && handle1[id][0] && i < 5 ) { + data.value.structuration_hierachique.unshift({ id: handle1[id][0], value: handle2[handle1[id][0]] }); + id = handle1[id][0]; + i++; + } + data.value.structures_rattachees = data.value.regroupe.map(id => ({ id, value: handle2[id] })); + delete data.value.regroupe; + } + feed.send(data); + } +} + +module.exports = { + hierachy, +}; + diff --git a/xml2json.ini b/xml2json.ini new file mode 100644 index 0000000..4d4ceb8 --- /dev/null +++ b/xml2json.ini @@ -0,0 +1,61 @@ +[use] +plugin = basics +plugin = ./locals.js + + +[XMLParse] +separator = /structures/structure + +[assign] +path = fields.regroupe +value = get('grpSt.adhesionStruct').castArray().filter(Boolean).map(v => v.referenceStruct?.numStructLien.$t) + +path = fields.numero_national_de_structure +value = get('num_nat_struct.$t') + +path = fields.libelle +value = get('intitule.$cd') + +path = fields.annee_de_fermeture +value = get('an_fermeture.$t') + +path = fields.type_de_structure +value = get('typStruct.$t') + +path = fields.annee_de_creation +value = get('annee_creation.$t') + +path = fields.sigle +value = get('sigle.$cd') + +path = fields.url +value = get('url.$cd') + +[overturn] +path = fields +[overturn/hierachy] + +[OBJFlatten] +separator = / + +[exchange] +value = self().mapKeys((v, k) => k.replace(/\/\$[tc][d]*/, '')) + +[OBJFlatten] +separator = / +reverse = true + + +[replace] +path = id +value = get('fields.numero_national_de_structure') + +path = value +value = get('fields') + + +[pack] + + + +