I have the following data set:
DROP TABLE IF EXISTS #df
CREATE TABLE #df
(
PTID VARCHAR(10),
HospitalID VARCHAR(5),
Procedure_Dt date,
Check_In_Dt DATE,
);
INSERT INTO #df (PTID, HospitalID, Procedure_Dt, Check_In_Dt)
VALUES
('X0001', 'WY', '2021-07-25', '2021-07-23'),
('X0001', 'WY', '2021-07-25', '2021-10-24'),
('X0001', 'WY', '2021-07-25', '2021-10-27'),
('X0001', 'WY', '2021-07-25', '2021-06-24'),
('X0001', 'WY', '2021-07-25', '2022-06-10'),
('X0002', 'CA', '2022-08-25', '2022-08-26'),
('X0002', 'CA', '2022-08-25', '2022-08-27'),
('X0002', 'CA', '2022-08-25', '2022-08-29'),
('X0002', 'CA', '2022-08-25', '2022-09-22'),
('X0003', 'AL', '2023-02-02', NULL)
--SELECT * FROM #df
DROP TABLE IF EXISTS #df_datediff
;WITH CTE_datediff AS --Using only most recent quarter and year
(
SELECT PTID
, HospitalID
, Procedure_Dt
, Check_In_Dt
FROM #df
)
SELECT DISTINCT a.PTID
, HospitalID
, Procedure_Dt
, Check_In_Dt
, DATEDIFF(dd, CAST(Check_In_Dt AS DATE), Procedure_Dt) AS Date_Diff
INTO #df_datediff
FROM CTE_datediff a
I would like to be able to select the Check_In_Date
closest to the procedure date. However, this becomes complicated because some check in dates are after the procedure date and some are before.
Ultimately I would like the final dataset below:
DROP TABLE IF EXISTS #df_final
CREATE TABLE #df_final
(
PTID VARCHAR(10),
HospitalID VARCHAR(5),
Procedure_Dt date,
Check_In_Dt DATE,
Date_Diff smallint
);
INSERT INTO #df_final (PTID, HospitalID, Procedure_Dt, Check_In_Dt, Date_Diff)
VALUES
('X0001', 'WY', '2021-07-25', '2021-07-23', 2),
('X0002', 'CA', '2022-08-25', '2022-08-26', -1)
('X0003', 'AL', '2023-02-02', NULL, NULL)
I attempted to do this by writing the following code:
SELECT a.PTID, HospitalID
, Procedure_Dt
, Check_In_Dt
, a.Date_Diff
FROM #df_datediff a
JOIN (SELECT PTID, MIN(Check_In_Dt) AS Check_In_Date FROM #df_datediff GROUP BY PTID) B
ON a.PTID = B.PTID
AND a.Check_In_Dt = B.Check_In_Date
UNION /*Since using MAX in the above query removes Null Facesheets, we use this union to include the null facesheet accesses*/
SELECT a.PTID, HospitalID
, Procedure_Dt
, Check_In_Dt
, a.Date_Diff
FROM #df_datediff a
WHERE Check_In_Dt IS NULL;
The problem is, this selects, for PTID X0001, the check in date of '2021-06-24' while, for PTID X0002, it selects correct min negative value of '2022-08-26'. For X0001 it should be selecting '2021-07-23'
My goal is to keep the check in dates that are 0-40 days before a procedure as the numerator. All other check in dates should no be considered in the numerator.
Any tips would be appreciated.