import { chain, map, pipe, split, toUpper, uniq } from "ramda"; import { fs } from "zx"; /** * @param {{[city:string]: string}} mapping * @returns {{[city: string]: string}} */ const getSpacedMapping = (mapping) => Object.keys(mapping) .reduce( (m, city) => ({ ...m, [city.replace(/\-/g, " ")]: mapping[city] }), {} ); /** * @param {{[city:string]: string}} spacedMapping */ const getMatchingArea3 = (spacedMapping) => /** @param {string} s */ (s) => Object.keys(spacedMapping) .flatMap((city) => s.includes(` ${city} `) || s.endsWith(` ${city}`) || s.startsWith(`${city} `) ? [{ area: spacedMapping[city], index: s.lastIndexOf(city) + city.length }] : []); /** * Return a geographic area for an address. * * Look first into dash-separated city names, then into space-separated city * names. * * Return "UNKNOWN" when no area is found. * * @param {string} address space-separated-tokens address * @param {{[city:string]: string}} mapping from city to area * @param {{[city:string]: string}} spacedMapping from city to area (city is * spaced-separated) */ const getArea = (address, mapping, spacedMapping = getSpacedMapping(mapping)) => { const normalizedAddress = address.toUpperCase().replace(/-/g, " "); const candidateAreas = getMatchingArea3(spacedMapping)(normalizedAddress); if (candidateAreas.length === 0) return "UNKNOWN"; if (candidateAreas.length === 1) return candidateAreas[0].area; const candidateAreaNames = uniq(candidateAreas.map(c => c.area)); if (candidateAreaNames.length === 1) return candidateAreaNames[0]; const chosenCandidate = candidateAreas.reduce( (best, candidate) => candidate.index > best.index ? candidate : best, { area: "", index: -1 } ); // console.error(`"${chosenCandidate.area}"\t${address}`); // console.error(candidateAreas); return chosenCandidate.area; }; const getMapping = async () => { const netscityStr = await fs.readFile("./data/netscity-ville-aire-uniq.tsv", "utf-8"); const lines = netscityStr.split("\n"); /** @type {{[city: string]: string}} */ const mapping = lines.reduce( (m, line) => ({ ...m, [line.split("\t")[0]]: line.split("\t")[3] }), {} ); return mapping; } export { getArea, getMapping, getSpacedMapping, };