import {
keys,
filter,
includes,
intersection,
groupBy,
prop,
difference,
} from "ramda"
const renameCol = (oldColName, suffix, { [oldColName]: old, ...others }) => ({
[oldColName + suffix]: old,
...others,
})
const renameDuplicateColumns = (cols, arr, suffix) => {
let renamed = arr
cols.forEach(c => {
renamed = arr.map(r => renameCol(c, suffix, r))
})
return renamed
}
const fillRow = (row, cols) => {
const rowCols = keys(row)
const filledRow = row
difference(cols, rowCols).forEach(c => {
filledRow[c] = undefined
})
return row
}
/**
* Join two dataframes on a column.
*
* Performs a left join on two dataframes.
* The 'On' arguments set which column in each df to join on.
* The 'Suffix' arguments determine what the suffix should be when the two
* dataframes have overlapping column names besides the one being joined on.
*
* @func
* @memberOf Z
* @category Manipulation
* @param {df} dfLeft First dataframe
* @param {df} dfRight Second dataframe
* @param {String} leftOn Left column to join on
* @param {String} rightOn Right column to join on
* @param {String} leftSuffix Left suffix for overlapping column names
* @param {String} rightSuffix Right suffix for overlapping column names
* @return {df} Joined dataframe
* @example
*
* const df1 = [{"label": "A", "value": 7}, {"label": "B", "value": 2}, {"label": "C", "value": 75}]
* const df2 = [{"label": "A", "value": "2010-12-13"}, {"label": "B", "value": "2010-12-15"}, {"label": "C", "value": "2010-12-17"}]
* Z.merge(df1, df2, "label", "label", "_df1", "_df2")
* // [
* // { label: "A", value_df1: 7, value_df2: "2010-12-13" },
* // { label: "B", value_df1: 2, value_df2: "2010-12-15" },
* // { label: "C", value_df1: 75, value_df2: "2010-12-17" },
* // ]
*/
const merge = (dfLeft, dfRight, leftOn, rightOn, leftSuffix, rightSuffix) => {
const colsLeft = keys(dfLeft[0])
const colsRight = keys(dfRight[0])
const intersected = filter(
x => !includes(x, [leftOn, rightOn]),
intersection(colsLeft, colsRight)
)
const dfLeftUpdated = renameDuplicateColumns(intersected, dfLeft, leftSuffix)
const dfRightUpdated = renameDuplicateColumns(
intersected,
dfRight,
rightSuffix
)
const colsLeftUpdated = keys(dfLeftUpdated[0])
const colsRightUpdated = keys(dfRightUpdated[0])
const colsAll = Array.from(new Set([...colsLeftUpdated, ...colsRightUpdated]))
const dfLeftGrouped = groupBy(prop(leftOn), dfLeftUpdated)
const dfRightGrouped = groupBy(prop(rightOn), dfRightUpdated)
const index = keys(dfLeftGrouped)
return index.map(i => {
try {
return fillRow(
{ ...dfLeftGrouped[i]["0"], ...dfRightGrouped[i]["0"] },
colsAll
)
} catch (err) {
return fillRow({ ...dfLeftGrouped[i]["0"] }, colsAll)
}
})
}
export default merge