-
Notifications
You must be signed in to change notification settings - Fork 48
/
Copy pathetl.sql
465 lines (461 loc) · 16.9 KB
/
etl.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
-- from drug_exposure
-- mapping is 85% done from gsn coding
WITH
"pr" AS (
SELECT
'drug:['|| coalesce(drug, drug_name_poe, drug_name_generic,'') ||']'|| 'prod_strength:['||coalesce(prod_strength,'')||']'|| 'drug_type:['||coalesce(drug_type,'')||']'|| 'formulary_drug_cd:['||coalesce(formulary_drug_cd,'') || ']' || 'dose_unit_rx:[' || coalesce(dose_unit_rx,'') || ']' as concept_name
, subject_id
, hadm_id
, dose_val_rx
, prescriptions.mimic_id as drug_exposure_id
, startdate as drug_exposure_start_datetime
, enddate as drug_exposure_end_datetime
, coalesce(c2.concept_id, c3.concept_id) as drug_concept_id
, gcpt_route_to_concept.concept_id as route_concept_id
, route as route_source_value --TODO: add route as local concept
, form_unit_disp as dose_unit_source_value --TODO: add unit as local concept
, ndc as drug_source_value -- ndc was used for automatic/manual mapping
, form_val_disp
FROM prescriptions
LEFT join :OMOP_SCHEMA.concept on domain_id = 'Drug' and concept_code = ndc::text --this covers 85% of direct mapping but no standard
LEFT join :OMOP_SCHEMA.concept_relationship on concept_id = concept_id_1 and relationship_id = 'Maps to'
LEFT join :OMOP_SCHEMA.concept c2 on c2.concept_id = concept_id_2 and c2.standard_concept = 'S' --covers 71% of rxnorm standards concepts
LEFT JOIN gcpt_route_to_concept using (route)
LEFT JOIN gcpt_prescriptions_ndcisnullzero_to_concept as c3 ON coalesce(drug, drug_name_poe, drug_name_generic,'') || ' ' || coalesce(prod_strength, '') = c3.label -- this improve to 85% mapping and save most of ndc = 0
),
"patients" AS (SELECT subject_id, mimic_id as person_id from patients),
"admissions" AS (SELECT hadm_id, mimic_id as visit_occurrence_id FROM admissions),
"omop_local_drug" AS (SELECT concept_name as drug_source_value, concept_id as drug_source_concept_id FROM :OMOP_SCHEMA.concept WHERE domain_id = 'prescriptions' AND vocabulary_id = 'MIMIC prescriptions'),
"row_to_insert" AS (
SELECT
drug_exposure_id
, person_id
, coalesce(drug_concept_id, 0) as drug_concept_id
, drug_exposure_start_datetime::date as drug_exposure_start_date
, (drug_exposure_start_datetime) AS drug_exposure_start_datetime
, drug_exposure_end_datetime::date as drug_exposure_end_date
, (drug_exposure_end_datetime) AS drug_exposure_end_datetime
, null::date as verbatim_end_date
, 38000177 as drug_type_concept_id
, null::text as stop_reason
, null::integer as refills
, extract_value_period_decimal(form_val_disp) as quantity --extract quantity from pure numeric when possible
, null::integer as days_supply
, null::text as sig
, route_concept_id
, null::text as lot_number
, null::integer as provider_id
, visit_occurrence_id
, null::integer as visit_detail_id
, drug_source_value
, drug_source_concept_id
, route_source_value
, dose_unit_source_value
, form_val_disp as quantity_source_value
FROM pr
LEFT JOIN omop_local_drug USING (drug_source_value)
LEFT JOIN patients USING (subject_id)
LEFT JOIN admissions USING (hadm_id)
)
INSERT INTO :OMOP_SCHEMA.drug_exposure
(
drug_exposure_id
, person_id
, drug_concept_id
, drug_exposure_start_date
, drug_exposure_start_datetime
, drug_exposure_end_date
, drug_exposure_end_datetime
, verbatim_end_date
, drug_type_concept_id
, stop_reason
, refills
, quantity
, days_supply
, sig
, route_concept_id
, lot_number
, provider_id
, visit_occurrence_id
, visit_detail_id
, drug_source_value
, drug_source_concept_id
, route_source_value
, dose_unit_source_value
, quantity_source_value
)
SELECT
row_to_insert.drug_exposure_id
, row_to_insert.person_id
, row_to_insert.drug_concept_id
, row_to_insert.drug_exposure_start_date
, row_to_insert.drug_exposure_start_datetime
, row_to_insert.drug_exposure_end_date
, row_to_insert.drug_exposure_end_datetime
, row_to_insert.verbatim_end_date
, row_to_insert.drug_type_concept_id
, row_to_insert.stop_reason
, row_to_insert.refills
, row_to_insert.quantity
, row_to_insert.days_supply
, row_to_insert.sig
, row_to_insert.route_concept_id
, row_to_insert.lot_number
, row_to_insert.provider_id
, row_to_insert.visit_occurrence_id
, row_to_insert.visit_detail_id
, row_to_insert.drug_source_value
, row_to_insert.drug_source_concept_id
, row_to_insert.route_source_value
, row_to_insert.dose_unit_source_value
, row_to_insert.quantity_source_value
FROM row_to_insert;
-- MEASUREMENT / inputevent
-- ajouter champs unit_concept_id
-- type = 38000180 -- Inpatient administration
-- route = 4112421 -- intravenous ()
-- inputevent_mv
-- route_concept_source = ordercategorydescription (ordercategoryname)
-- -> CREER les deux concepts
-- cgid provider
-- privilegie rate
-- stop reason: statusdescription
-- quality_concept_id : when 1 then cancel else ok. --> infered from data.
-- when orderid then fact_relationship with 44818791 -- Has temporal context [SNOMED]
-- weight into observation/measurement
WITH
"imv" AS (
SELECT
mimic_id AS drug_exposure_id
, subject_id
, hadm_id
, itemid
, cgid
, starttime as drug_exposure_start_datetime
, endtime as drug_exposure_end_datetime
, CASE WHEN rate IS NOT NULL THEN rate WHEN amount IS NOT NULL THEN amount ELSE NULL END AS quantity
, CASE WHEN rate IS NOT NULL THEN rateuom WHEN amount IS NOT NULL THEN amountuom ELSE NULL END AS dose_unit_source_value
, 38000180 AS drug_type_concept_id -- Inpatient administration
--, 4112421 as route_concept_id -- intraveous
, orderid = linkorderid as is_leader -- other input are linked to it/them
, first_value(mimic_id) over(partition by orderid order by starttime ASC) = mimic_id as is_orderid_leader -- other input are linked to it/them
, linkorderid
, orderid
, ordercategorydescription || ' (' || ordercategoryname || ')' AS route_source_value
, statusdescription as stop_reason
, ordercategoryname
, cancelreason
FROM inputevents_mv
WHERE cancelreason = 0
),
--"rxnorm_map" AS (SELECT distinct on (drug_source_value) concept_id as drug_concept_id, drug_source_value FROM mimic.gcpt_gdata_drug_exposure LEFT JOIN :OMOP_SCHEMA.concept ON drug_concept_id::text = concept_code AND domain_id = 'Drug' WHERE drug_concept_id IS NOT NULL),
"rxnorm_map" AS (-- exploit the mapping based on ndc
select distinct drug_concept_id, concept_name as drug_source_value from :OMOP_SCHEMA.drug_exposure left join :OMOP_SCHEMA.concept on drug_concept_id = concept_id where drug_concept_id != 0),
"patients" AS (SELECT mimic_id AS person_id, subject_id FROM patients),
"admissions" AS (SELECT mimic_id AS visit_occurrence_id, hadm_id FROM admissions),
"gcpt_inputevents_drug_to_concept" AS (SELECT itemid, concept_id as drug_concept_id FROM gcpt_inputevents_drug_to_concept),
"gcpt_mv_input_label_to_concept" AS (SELECT DISTINCT ON (item_id) item_id as itemid, concept_id as drug_concept_id FROM gcpt_mv_input_label_to_concept),
"gcpt_map_route_to_concept" AS (SELECT concept_id as route_concept_id, ordercategoryname FROM gcpt_map_route_to_concept),
"caregivers" AS (SELECT mimic_id AS provider_id, cgid FROM caregivers),
"d_items" AS (SELECT itemid, label as drug_source_value, mimic_id as drug_source_concept_id FROM d_items),
"fact_relationship" AS (
INSERT INTO :OMOP_SCHEMA.fact_relationship
(
domain_concept_id_1
, fact_id_1
, domain_concept_id_2
, fact_id_2
, relationship_concept_id
)
SELECT
DISTINCT
13 As fact_id_1 --Drug
, mv2.drug_exposure_id AS domain_concept_id_1
, 13 As fact_id_2 --Drug
, mv1.drug_exposure_id AS domain_concept_id_2
, 44818791 AS relationship_concept_id -- Has temporal context [SNOMED]
FROM imv mv1
LEFT JOIN imv mv2 ON (mv2.orderid = mv1.linkorderid AND mv2.is_leader IS TRUE)
),
"fact_relationship_order" AS (
INSERT INTO :OMOP_SCHEMA.fact_relationship
(
domain_concept_id_1
, fact_id_1
, domain_concept_id_2
, fact_id_2
, relationship_concept_id
)
SELECT
DISTINCT
13 As fact_id_1 --Drug
, mv2.drug_exposure_id AS domain_concept_id_1
, 13 As fact_id_2 --Drug
, mv1.drug_exposure_id AS domain_concept_id_2
, 44818784 AS relationship_concept_id -- Has associated procedure [SNOMED]
FROM imv mv1
LEFT JOIN imv mv2 ON (mv2.orderid = mv1.orderid AND mv2.is_orderid_leader IS TRUE)
),
"row_to_insert" AS (
SELECT
drug_exposure_id
, person_id
, coalesce(rxnorm_map.drug_concept_id, gcpt_inputevents_drug_to_concept.drug_concept_id, gcpt_mv_input_label_to_concept.drug_concept_id, 0) AS drug_concept_id
, drug_exposure_start_datetime::date AS drug_exposure_start_date
, drug_exposure_start_datetime
, drug_exposure_end_datetime::date AS drug_exposure_end_date
, drug_exposure_end_datetime
, null::date as verbatim_end_date
, drug_type_concept_id
, stop_reason
, null::integer as refills
, quantity
, null::integer as days_supply
, null::text as sig
, coalesce(route_concept_id, 0) as route_concept_id
, null::integer as lot_number
, provider_id
, visit_occurrence_id
, null::integer AS visit_detail_id
, drug_source_value
, d_items.drug_source_concept_id
, route_source_value
, dose_unit_source_value
FROM imv
LEFT JOIN patients USING (subject_id)
LEFT JOIN admissions USING (hadm_id)
LEFT JOIN caregivers USING (cgid)
LEFT JOIN gcpt_inputevents_drug_to_concept USING (itemid)
LEFT JOIN gcpt_mv_input_label_to_concept USING (itemid)
LEFT JOIN gcpt_map_route_to_concept USING (ordercategoryname)
LEFT JOIN d_items USING (itemid)
LEFT JOIN rxnorm_map USING (drug_source_value)
)
INSERT INTO :OMOP_SCHEMA.drug_exposure
(
drug_exposure_id
, person_id
, drug_concept_id
, drug_exposure_start_date
, drug_exposure_start_datetime
, drug_exposure_end_date
, drug_exposure_end_datetime
, verbatim_end_date
, drug_type_concept_id
, stop_reason
, refills
, quantity
, days_supply
, sig
, route_concept_id
, lot_number
, provider_id
, visit_occurrence_id
, visit_detail_id
, drug_source_value
, drug_source_concept_id
, route_source_value
, dose_unit_source_value
, quantity_source_value
)
SELECT
drug_exposure_id
, person_id
, drug_concept_id
, drug_exposure_start_date
, drug_exposure_start_datetime
, drug_exposure_end_date
, drug_exposure_end_datetime
, verbatim_end_date
, drug_type_concept_id
, stop_reason
, refills
, quantity
, days_supply
, sig
, route_concept_id
, lot_number
, provider_id
, row_to_insert.visit_occurrence_id
, visit_detail_assign.visit_detail_id
, drug_source_value
, drug_source_concept_id
, route_source_value
, dose_unit_source_value
, quantity::text as quantity_source_value
FROM row_to_insert
LEFT JOIN :OMOP_SCHEMA.visit_detail_assign
ON row_to_insert.visit_occurrence_id = visit_detail_assign.visit_occurrence_id
AND
(--only one visit_detail
(is_first IS TRUE AND is_last IS TRUE)
OR -- first
(is_first IS TRUE AND is_last IS FALSE AND row_to_insert.drug_exposure_start_datetime <= visit_detail_assign.visit_end_datetime)
OR -- last
(is_last IS TRUE AND is_first IS FALSE AND row_to_insert.drug_exposure_start_datetime > visit_detail_assign.visit_start_datetime)
OR -- middle
(is_last IS FALSE AND is_first IS FALSE AND row_to_insert.drug_exposure_start_datetime > visit_detail_assign.visit_start_datetime AND row_to_insert.drug_exposure_start_datetime <= visit_detail_assign.visit_end_datetime)
);
-- inputevent_cv
-- when rate chattime -> start
-- when amount charttime -> end
-- stopped as is -> stop_reason
-- concept_id gcpt_inputevents_drug_to_concept, gcpt_mv_input_label_to_concept, gcpt_cv_input_label_to_concept
-- route = NULL (!= originalroute, original* never considered)
WITH
"icv" AS (
SELECT
mimic_id AS drug_exposure_id
, subject_id
, hadm_id
, cgid
, itemid
--when rate then start date, when amount then end date (from mimic docuemntaiton)
, CASE WHEN rate IS NOT NULL THEN charttime WHEN amount IS NULL THEN charttime END as drug_exposure_start_datetime
, CASE WHEN rate IS NULL AND amount IS NOT NULL THEN charttime ELSE NULL END as drug_exposure_end_datetime
, CASE WHEN rate IS NOT NULL THEN rate WHEN amount IS NOT NULL THEN amount ELSE NULL END as quantity
, CASE WHEN rate IS NOT NULL THEN rateuom WHEN amount IS NOT NULL THEN amountuom ELSE NULL END as dose_unit_source_value
, 38000180 AS drug_type_concept_id -- Inpatient administration
--, 4112421 as route_concept_id -- intraveous
, orderid = linkorderid as is_leader -- other input are linked to it/them
, orderid
, linkorderid
, originalroute
, stopped as stop_reason
FROM inputevents_cv
),
"patients" AS (SELECT mimic_id AS person_id, subject_id FROM patients),
"admissions" AS (SELECT mimic_id AS visit_occurrence_id, hadm_id FROM admissions),
--"rxnorm_map" AS (SELECT DISTINCT ON (drug_source_value) concept_id as drug_concept_id, drug_source_value FROM .gcpt_gdata_drug_exposure LEFT JOIN :OMOP_SCHEMA.concept ON drug_concept_id::text = concept_code AND domain_id = 'Drug' WHERE drug_concept_id IS NOT NULL),
"rxnorm_map" AS (-- exploit the mapping based on ndc
select distinct drug_concept_id, concept_name as drug_source_value from :OMOP_SCHEMA.drug_exposure left join :OMOP_SCHEMA.concept on drug_concept_id = concept_id where drug_concept_id != 0),
"gcpt_inputevents_drug_to_concept" AS (SELECT itemid, concept_id as drug_concept_id FROM gcpt_inputevents_drug_to_concept),
"gcpt_cv_input_label_to_concept" AS (SELECT DISTINCT ON (item_id) item_id as itemid, concept_id as drug_concept_id FROM gcpt_mv_input_label_to_concept),
"caregivers" AS (SELECT mimic_id AS provider_id, cgid FROM caregivers),
"gcpt_map_route_to_concept" AS (SELECT concept_id as route_concept_id, ordercategoryname as originalroute FROM gcpt_map_route_to_concept),
"d_items" AS (SELECT itemid, label as drug_source_value, mimic_id as drug_source_concept_id FROM d_items),
"gcpt_continuous_unit_carevue.csv" as (
select dose_unit_source_value, dose_unit_source_value_new
from gcpt_continuous_unit_carevue),
"fact_relationship" AS (
INSERT INTO :OMOP_SCHEMA.fact_relationship
(
domain_concept_id_1
, fact_id_1
, domain_concept_id_2
, fact_id_2
, relationship_concept_id
)
SELECT
DISTINCT
13 As fact_id_1 --Drug
, cv2.drug_exposure_id AS domain_concept_id_1
, 13 As fact_id_2 --Drug
, cv1.drug_exposure_id AS domain_concept_id_2
, 44818791 AS relationship_concept_id -- Has temporal context [SNOMED]
FROM icv cv1
LEFT JOIN icv cv2 ON (cv2.orderid = cv1.linkorderid AND cv2.is_leader IS TRUE)
WHERE cv2.drug_exposure_id IS NOT NULL
RETURNING *
),
"row_to_insert" AS (
SELECT
drug_exposure_id
, person_id
, coalesce(rxnorm_map.drug_concept_id, gcpt_inputevents_drug_to_concept.drug_concept_id, gcpt_cv_input_label_to_concept.drug_concept_id, 0) AS drug_concept_id
, drug_exposure_start_datetime::date AS drug_exposure_start_date
, drug_exposure_start_datetime
, drug_exposure_end_datetime::date AS drug_exposure_end_date
, drug_exposure_end_datetime
, null::date as verbatim_end_date
, drug_type_concept_id
, stop_reason
, null::integer as refills
, quantity
, null::integer as days_supply
, null::text as sig
, coalesce(route_concept_id,0) as route_concept_id
, null::integer as lot_number
, provider_id
, visit_occurrence_id
, null::integer AS visit_detail_id
, drug_source_value
, d_items.drug_source_concept_id
, null::text route_source_value
, coalesce(gcpt_continuous_unit_carevue.dose_unit_source_value_new, dose_unit_source_value) as dose_unit_source_value
FROM icv
LEFT JOIN patients USING (subject_id)
LEFT JOIN admissions USING (hadm_id)
LEFT JOIN caregivers USING (cgid)
LEFT JOIN gcpt_inputevents_drug_to_concept USING (itemid)
LEFT JOIN gcpt_cv_input_label_to_concept USING (itemid)
LEFT JOIN d_items USING (itemid)
LEFT JOIN rxnorm_map USING (drug_source_value)
LEFT JOIN gcpt_map_route_to_concept USING (originalroute)
LEFT JOIN gcpt_continuous_unit_carevue USING (dose_unit_source_value)
)
INSERT INTO :OMOP_SCHEMA.drug_exposure
(
drug_exposure_id
, person_id
, drug_concept_id
, drug_exposure_start_date
, drug_exposure_start_datetime
, drug_exposure_end_date
, drug_exposure_end_datetime
, verbatim_end_date
, drug_type_concept_id
, stop_reason
, refills
, quantity
, days_supply
, sig
, route_concept_id
, lot_number
, provider_id
, visit_occurrence_id
, visit_detail_id
, drug_source_value
, drug_source_concept_id
, route_source_value
, dose_unit_source_value
, quantity_source_value
)
SELECT
drug_exposure_id
, person_id
, drug_concept_id
, drug_exposure_start_date
, drug_exposure_start_datetime
, drug_exposure_end_date
, drug_exposure_end_datetime
, verbatim_end_date
, drug_type_concept_id
, stop_reason
, refills
, quantity
, days_supply
, sig
, route_concept_id
, lot_number
, provider_id
, row_to_insert.visit_occurrence_id
, visit_detail_assign.visit_detail_id
, drug_source_value
, drug_source_concept_id
, route_source_value
, dose_unit_source_value
, quantity::text as quantity_source_value
FROM row_to_insert
LEFT JOIN :OMOP_SCHEMA.visit_detail_assign
ON row_to_insert.visit_occurrence_id = visit_detail_assign.visit_occurrence_id
AND
(--only one visit_detail
(is_first IS TRUE AND is_last IS TRUE)
OR -- first
(is_first IS TRUE AND is_last IS FALSE AND row_to_insert.drug_exposure_start_datetime <= visit_detail_assign.visit_end_datetime)
OR -- last
(is_last IS TRUE AND is_first IS FALSE AND row_to_insert.drug_exposure_start_datetime > visit_detail_assign.visit_start_datetime)
OR -- middle
(is_last IS FALSE AND is_first IS FALSE AND row_to_insert.drug_exposure_start_datetime > visit_detail_assign.visit_start_datetime AND row_to_insert.drug_exposure_start_datetime <= visit_detail_assign.visit_end_datetime)
);