I would like to dynamically query data that is staged as a long string by defining how to read the string and how to split it up.
So I can define the data with the following elements
FIELD_NAME VARCHAR2(30) NOT NULL,
DATA_TYPE VARCHAR2(20) NOT NULL,
COLUMN_ID NUMBER NOT NULL,
FIELD_START_POS NUMBER,
FIELD_END_POS NUMBER,
FIELD_LEN NUMBER,
ROW_TYPE VARCHAR2(10),
DATE_MASK VARCHAR2(12)
sample data in this table
can I take that info to create a select that would look something like
SELECT CASE cd.data_type
WHEN 'DATE'
THEN
TO_DATE (SUBSTR (sd.source_text, cd.field_start_pos, cd.field_len), cd.date_mask)
WHEN 'NUMBER'
THEN
TO_NUMBER (SUBSTR (sd.source_text, cd.field_start_pos, cd.field_len))
ELSE
TRIM (SUBSTR (sd.source_text, cd.field_start_pos, cd.field_len))
END
AS cd.field_name
FROM staged_data sd, column_definitions cd
I am having difficulties trying to tie the 2 together.
I know I could pivot the column names in the definition out like so:
SELECT *
FROM column_definitions
PIVOT (max(field_name) FOR column_id IN (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20))
but this still results in many rows
My goal is to generate this statement so that is can be run via the EXECUTE IMMEDIATE so it could work for many different files just by defining how to read the string.
I also have the need to read different row types hence the row_type column which will be defined for the same file but had their own column order and columns.
So I have been able to generate a string that is the select I am looking for based on the metadata about the staged file like this:
DECLARE
select_items VARCHAR2 (4000);
BEGIN
FOR c IN ( SELECT *
FROM column_definitions
WHERE file_pk = 1 AND row_type = 1
ORDER BY column_id)
LOOP
IF c.data_type = 'NUMBER'
THEN
select_items :=
select_items
|| 'CASE WHEN is_number(SUBSTR(row_data,'
|| c.field_start_pos
|| ','
|| c.field_len
|| ')) = ''TRUE'' THEN TO_NUMBER(SUBSTR(row_data,'
|| c.field_start_pos
|| ','
|| c.field_len
|| ')) ELSE NULL END AS '
|| c.field_name
|| ',';
ELSIF c.data_type = 'DATE'
THEN
select_items :=
select_items
|| 'CASE WHEN ISDATE(SUBSTR(row_data,'
|| c.field_start_pos
|| ','
|| c.field_len
|| '))=''true'' THEN TO_DATE(SUBSTR(row_data,'
|| c.field_start_pos
|| ','
|| c.field_len
|| '),'''
|| c.date_mask
|| ''') ELSE NULL END AS '
|| c.field_name
|| ',';
ELSE
select_items :=
select_items
|| 'TRIM(SUBSTR(row_data,'
|| c.field_start_pos
|| ','
|| c.field_len
|| ')) AS '
|| c.field_name
|| ',';
END IF;
END LOOP;
select_items := SUBSTR (select_items, 1, LENGTH (select_items) - 1);
select_items :=
'SELECT '
|| select_items
|| ' FROM STAGED_FILE where row_type=1 AND rownum <= 1000;';
DBMS_OUTPUT.PUT_LINE (select_items);
END;
this spits out something like this:
SELECT CASE
WHEN is_number (SUBSTR (row_data, 1, 1)) = 'TRUE'
THEN
TO_NUMBER (SUBSTR (row_data, 1, 1))
ELSE
NULL
END
AS REC_TYPE_IND,
SUBSTR (row_data, 11, 4) AS SRVC_LOC,
CASE
WHEN ISDATE (SUBSTR (row_data, 15, 8)) = 'true'
THEN
TO_DATE (SUBSTR (row_data, 15, 8), 'YYYYMMDD')
ELSE
NULL
END
AS BEGIN_DT,
CASE
WHEN ISDATE (SUBSTR (row_data, 23, 8)) = 'true'
THEN
TO_DATE (SUBSTR (row_data, 23, 8), 'YYYYMMDD')
ELSE
NULL
END
AS END_DT,
SUBSTR (row_data, 31, 50) AS ID,
SUBSTR (row_data, 101, 2) AS COUNTY_CD,
SUBSTR (row_data, 103, 30) AS ADDR_LN_1,
SUBSTR (row_data, 133, 30) AS ADDR_LN_2,
SUBSTR (row_data, 163, 18) AS CITY,
SUBSTR (row_data, 181, 2) AS STATE_CD,
CASE
WHEN is_number (SUBSTR (row_data, 183, 5)) = 'TRUE'
THEN
TO_NUMBER (SUBSTR (row_data, 183, 5))
ELSE
NULL
END
AS ZIP_CD,
CASE
WHEN is_number (SUBSTR (row_data, 188, 4)) = 'TRUE'
THEN
TO_NUMBER (SUBSTR (row_data, 188, 4))
ELSE
NULL
END
AS ZIP_CD4,
CASE
WHEN is_number (SUBSTR (row_data, 192, 10)) = 'TRUE'
THEN
TO_NUMBER (SUBSTR (row_data, 192, 10))
ELSE
NULL
END
AS PHONE_NUM
FROM staged_FILE
WHERE row_type = 1 AND ROWNUM <= 1000;
Now off to solve how to dynamically create an associative array to stuff the data into or another way to work with the data.