Skip to content

Commit

Permalink
Merge pull request #99 from icefoganalytics/issue-37/automate-dataset…
Browse files Browse the repository at this point in the history
…-field-creation-from-api-link

Automate Dataset Field Creation From API Link
  • Loading branch information
klondikemarlen authored May 16, 2024
2 parents 90b623d + 32a0981 commit 9b3e8f3
Show file tree
Hide file tree
Showing 12 changed files with 290 additions and 51 deletions.
8 changes: 4 additions & 4 deletions api/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"@types/express-fileupload": "^1.5.0",
"@types/jest": "^29.5.11",
"@types/jmespath": "^0.15.2",
"@types/lodash": "^4.14.202",
"@types/lodash": "^4.17.3",
"@types/luxon": "^3.4.2",
"@types/nodemailer": "^6.4.14",
"@types/papaparse": "^5.3.14",
Expand Down
1 change: 1 addition & 0 deletions api/src/models/dataset-integration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export type DatasetIntegrationRawJsonDataType = Record<string, unknown>
export type DatasetIntegrationParsedJsonDataType = Record<string, unknown>[]

export const MAX_RECORDS = 100 // TODO: consider making this configurable?
export const DEFAULT_KEY = "data"

export class DatasetIntegration extends Model<
InferAttributes<DatasetIntegration>,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import { isArray, isNil } from "lodash"
import { search } from "jmespath"

import DatasetIntegration from "@/models/dataset-integration"
import isStringArray from "@/utils/is-string-array"
import DatasetIntegration, {
DEFAULT_KEY,
DatasetIntegrationParsedJsonDataType,
} from "@/models/dataset-integration"

import BaseService from "@/services/base-service"

Expand All @@ -17,19 +21,32 @@ export class ApplyJMESPathTransformService extends BaseService {
throw new Error("An integration must have data to be parsed.")
}

if (isNil(jmesPathTransform) && isArray(rawJsonData)) {
return this.datasetIntegration.set({
parsedJsonData: rawJsonData,
})
let searchedRawJsonData = rawJsonData
if (!isNil(jmesPathTransform)) {
searchedRawJsonData = search(rawJsonData, jmesPathTransform)
}

if (isNil(jmesPathTransform)) {
if (!isArray(searchedRawJsonData)) {
throw new Error("An integration must parse to an array to be valid")
}

const parsedJsonData = search(rawJsonData, jmesPathTransform)
return this.datasetIntegration.set({
parsedJsonData,
const normalizedData = this.nomalizeData(searchedRawJsonData)
await this.datasetIntegration.update({
parsedJsonData: normalizedData,
})

return this.datasetIntegration
}

private nomalizeData(
rawJsonData: string[] | Record<string, unknown>[]
): DatasetIntegrationParsedJsonDataType {
if (isStringArray(rawJsonData)) {
return rawJsonData.map((value) => ({
[DEFAULT_KEY]: value,
}))
}

return rawJsonData
}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import { isNil, isString } from "lodash"
import { isNil } from "lodash"
import { CreationAttributes } from "sequelize"

import db, { DatasetEntry, DatasetIntegration } from "@/models"
import { type DatasetEntryJsonDataType } from "@/models/dataset-entry"

import BaseService from "@/services/base-service"

Expand All @@ -27,19 +26,11 @@ export class BulkReplaceDatasetEntriesService extends BaseService {

const datasetEntriesAttributes: CreationAttributes<DatasetEntry>[] = parsedJsonData.map(
(rawJsonData) => {
let jsonData: DatasetEntryJsonDataType
if (isString(rawJsonData)) {
jsonData = {
value: rawJsonData,
} as DatasetEntryJsonDataType
} else {
jsonData = rawJsonData as DatasetEntryJsonDataType
}

return {
datasetId,
rawJsonData,
jsonData,
// TODO: fix types, and pipeline, so this cast is not necessary
jsonData: rawJsonData as Record<string, string | number>,
}
}
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import { isEmpty, isNil, isNumber, startCase } from "lodash"
import { CreationAttributes } from "sequelize"

import db, { DatasetField, DatasetIntegration } from "@/models"
import { DatasetFieldDataTypes } from "@/models/dataset-field"
import BaseService from "@/services/base-service"

export class BulkReplaceDatasetFieldsService extends BaseService {
constructor(private datasetIntegration: DatasetIntegration) {
super()
}

async perform(): Promise<DatasetField[]> {
const { datasetId, parsedJsonData } = this.datasetIntegration

return db.transaction(async () => {
await DatasetField.destroy({
where: {
datasetId,
},
})

if (isNil(parsedJsonData) || isEmpty(parsedJsonData)) {
throw new Error("An integration must have data to build dataset fields.")
}

const firstRecord = parsedJsonData[0]
const datasetFieldsAttributes: CreationAttributes<DatasetField>[] = Object.entries(
firstRecord
).map(([key, value]) => {
const displayName = startCase(key)
const dataType = this.determineDataType(value)
return {
datasetId,
name: key,
displayName,
dataType,
}
})
return DatasetField.bulkCreate(datasetFieldsAttributes)
})
}

private determineDataType(value: unknown): DatasetFieldDataTypes {
if (isNumber(value)) {
return DatasetField.DataTypes.INTEGER
}

return DatasetField.DataTypes.TEXT
}
}

export default BulkReplaceDatasetFieldsService
1 change: 1 addition & 0 deletions api/src/services/dataset-integrations/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ export { UpdateService } from "./update-service"
export { ActivateService } from "./activate-service"
export { ApplyJMESPathTransformService } from "./apply-jmes-path-transform-service"
export { BulkReplaceDatasetEntriesService } from "./bulk-replace-dataset-entries-service"
export { BulkReplaceDatasetFieldsService } from "./bulk-replace-dataset-fields-service"
export { RefreshService } from "./refresh-service"
21 changes: 19 additions & 2 deletions api/src/services/dataset-integrations/update-service.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import db, { DatasetIntegration, User } from "@/models"
import db, { DatasetField, DatasetIntegration, User } from "@/models"
import {
ApplyJMESPathTransformService,
BulkReplaceDatasetEntriesService,
BulkReplaceDatasetFieldsService,
RefreshService,
} from "@/services/dataset-integrations"

Expand Down Expand Up @@ -30,14 +31,30 @@ export class UpdateService extends BaseService {

await RefreshService.perform(this.datasetIntegration)
await ApplyJMESPathTransformService.perform(this.datasetIntegration)
// TODO: create fields if none exist during dataset import

if (!(await this.hasDataSetFields())) {
await BulkReplaceDatasetFieldsService.perform(this.datasetIntegration)
}

await BulkReplaceDatasetEntriesService.perform(this.datasetIntegration)

// TODO: log user action

return this.datasetIntegration.save()
})
}

// TODO: consider if we should always add fields, but mark them as hidden by default
private async hasDataSetFields(): Promise<boolean> {
const { datasetId } = this.datasetIntegration
const count = await DatasetField.count({
where: {
datasetId,
},
})

return count > 0
}
}

export default UpdateService
22 changes: 18 additions & 4 deletions api/src/services/datasets/refresh/create-service.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import db, { DatasetIntegration, User } from "@/models"
import db, { DatasetField, DatasetIntegration, User } from "@/models"
import { DatasetIntegrations } from "@/services"

import BaseService from "@/services/base-service"
Expand All @@ -18,16 +18,30 @@ export class CreateService extends BaseService {
async perform(): Promise<DatasetIntegration> {
return db.transaction(async () => {
await DatasetIntegrations.RefreshService.perform(this.datasetIntegration)

await DatasetIntegrations.ApplyJMESPathTransformService.perform(this.datasetIntegration)
// TODO: create fields if none exist during dataset import
await DatasetIntegrations.BulkReplaceDatasetEntriesService.perform(this.datasetIntegration)

if (!(await this.hasDataSetFields())) {
await DatasetIntegrations.BulkReplaceDatasetFieldsService.perform(this.datasetIntegration)
}

await DatasetIntegrations.BulkReplaceDatasetEntriesService.perform(this.datasetIntegration)
// TODO: log user action

return this.datasetIntegration.save()
})
}

// TODO: consider if we should always add fields, but mark them as hidden by default
private async hasDataSetFields(): Promise<boolean> {
const { datasetId } = this.datasetIntegration
const count = await DatasetField.count({
where: {
datasetId,
},
})

return count > 0
}
}

export default CreateService
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { search } from "jmespath"

import { DatasetField, DatasetIntegration, User } from "@/models"
import {
DEFAULT_KEY,
DatasetIntegrationParsedJsonDataType,
DatasetIntegrationRawJsonDataType,
} from "@/models/dataset-integration"
Expand Down Expand Up @@ -32,13 +33,16 @@ export class CreateFromIntegrationService extends BaseService {
const allRawJsonData = await DatasetIntegrations.RefreshService.perform(this.datasetIntegration)
await this.datasetIntegration.save()

const parsedJsonData = this.applyJMESPathTransform(this.datasetIntegration, allRawJsonData)
const normalizedData = this.nomalizeData(parsedJsonData, headerKeys)
const parsedAndNormalizedJsonData = this.applyJMESPathTransformAndNormalize(
this.datasetIntegration,
allRawJsonData
)
this.assertSelfConsistentDataStructure(parsedAndNormalizedJsonData, headerKeys)

let filteredData = normalizedData
let filteredData = parsedAndNormalizedJsonData
const searchToken = this.options.searchToken
if (!isNil(searchToken) && !isEmpty(searchToken)) {
filteredData = this.filterData(normalizedData, headerKeys, searchToken)
filteredData = this.filterData(parsedAndNormalizedJsonData, headerKeys, searchToken)
}

for (const entry of filteredData) {
Expand Down Expand Up @@ -76,10 +80,10 @@ export class CreateFromIntegrationService extends BaseService {
})
}

private nomalizeData(
private assertSelfConsistentDataStructure(
parsedJsonData: DatasetIntegrationParsedJsonDataType,
headerKeys: string[]
): DatasetIntegrationParsedJsonDataType {
): void {
if (headerKeys.length === 0) {
throw new Error("Header keys array is empty")
}
Expand All @@ -89,12 +93,6 @@ export class CreateFromIntegrationService extends BaseService {
throw new Error("Parsed JSON data is empty")
}

const isSimpleStringArray = isString(firstEntry) && headerKeys.length === 1
if (isSimpleStringArray) {
const firstHeaderKey = headerKeys[0]
return parsedJsonData.map((value) => ({ [firstHeaderKey]: value }))
}

if (typeof firstEntry !== "object") {
throw new Error("Parsed JSON data is not an object")
}
Expand All @@ -105,25 +103,33 @@ export class CreateFromIntegrationService extends BaseService {
throw new Error("There is a mismatch between header keys and parsed JSON data keys.")
}

return parsedJsonData
return
}

private applyJMESPathTransform(
private applyJMESPathTransformAndNormalize(
datasetIntegration: DatasetIntegration,
allRawJsonData: DatasetIntegrationRawJsonDataType
): DatasetIntegrationParsedJsonDataType {
const { jmesPathTransform } = datasetIntegration

if (isNil(jmesPathTransform) && isArray(allRawJsonData)) {
return allRawJsonData
if (isNil(allRawJsonData)) {
throw new Error("An integration must have data to be parsed.")
}

if (isNil(jmesPathTransform)) {
let searchedAllRawJsonData = allRawJsonData
if (!isNil(jmesPathTransform)) {
searchedAllRawJsonData = search(allRawJsonData, jmesPathTransform)
}

if (!isArray(searchedAllRawJsonData)) {
throw new Error("An integration must parse to an array to be valid")
}

const parsedJsonData = search(allRawJsonData, jmesPathTransform)
return parsedJsonData
if (searchedAllRawJsonData.every(isString)) {
return searchedAllRawJsonData.map((value) => ({ [DEFAULT_KEY]: value }))
}

return searchedAllRawJsonData
}
}

Expand Down
7 changes: 7 additions & 0 deletions api/src/utils/is-string-array.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { isArray, isString } from "lodash"

export function isStringArray<T>(data: T | string[]): data is string[] {
return isArray(data) && data.every(isString)
}

export default isStringArray
Loading

0 comments on commit 9b3e8f3

Please sign in to comment.