Newer
Older
rnsr-geo-ml-dvc / libs / geo.mjs
import { chain, map, pipe, split, toUpper, uniq } from "ramda";
import { fs } from "zx";

/**
 * @param {{[city:string]: string}} mapping
 * @returns {{[city: string]: string}}
 */
const getSpacedMapping = (mapping) =>
    Object.keys(mapping)
        .reduce(
            (m, city) => ({ ...m, [city.replace(/\-/g, " ")]: mapping[city] }),
            {}
        );

/**
 * @param {{[city:string]: string}} spacedMapping
 */
const getMatchingArea3 = (spacedMapping) =>
    /** @param {string} s */
    (s) => Object.keys(spacedMapping)
        .flatMap((city) =>
            s.includes(` ${city} `) || s.endsWith(` ${city}`) || s.startsWith(`${city} `)
                ? [{ area: spacedMapping[city], index: s.lastIndexOf(city) + city.length }]
                : []);

/**
 * Return a geographic area for an address.
 *
 * Look first into dash-separated city names, then into space-separated city
 * names.
 *
 * Return "UNKNOWN" when no area is found.
 *
 * @param {string} address space-separated-tokens address
 * @param {{[city:string]: string}} mapping from city to area
 * @param {{[city:string]: string}} spacedMapping from city to area (city is
 * spaced-separated)
 */
const getArea = (address, mapping, spacedMapping = getSpacedMapping(mapping)) => {
    const normalizedAddress = address.toUpperCase().replace(/-/g, " ");
    const candidateAreas = getMatchingArea3(spacedMapping)(normalizedAddress);
    if (candidateAreas.length === 0) return "UNKNOWN";
    if (candidateAreas.length === 1) return candidateAreas[0].area;

    const candidateAreaNames = uniq(candidateAreas.map(c => c.area));
    if (candidateAreaNames.length === 1) return candidateAreaNames[0];

    const chosenCandidate = candidateAreas.reduce(
        (best, candidate) => candidate.index > best.index ? candidate : best,
        { area: "", index: -1 }
    );
    // console.error(`"${chosenCandidate.area}"\t${address}`);
    // console.error(candidateAreas);
    return chosenCandidate.area;
};

const getMapping = async () => {
    const netscityStr = await fs.readFile("./data/netscity-ville-aire-uniq.tsv", "utf-8");
    const lines = netscityStr.split("\n");
    /** @type {{[city: string]: string}} */
    const mapping = lines.reduce(
        (m, line) => ({
            ...m,
            [line.split("\t")[0]]: line.split("\t")[3]
        }),
        {}
    );
    return mapping;
}

export {
    getArea,
    getMapping,
    getSpacedMapping,
};