2

I'm new at using UTF_FILE in Oracle. I have a CSV-file with over 300K records and 6 columns, the columns are separated by comma ",".

I'm looking to split all this data and set that to some variables and insert in some tables or columns by Pl/SQL but how can I do that?

For now the time of execution of the pl/sql is not important [Oracle 11gR2 XE]

Sudipta Mondal
  • 2,550
  • 1
  • 19
  • 20
Sr Jefers
  • 87
  • 4
  • 14
  • You can do it via`UTL_FILE`, read each line and create a insert statement dynamically, but an easier way for you is to use SQL LOADER. – Sudipta Mondal Aug 13 '18 at 03:35
  • I have the problem that I want to save the data splited into variables to analize – Sr Jefers Aug 13 '18 at 04:04
  • 3
    As Mentioned here (https://asktom.oracle.com/pls/apex/asktom.search?tag=utl-file-to-read-csv) a external table would be a better way of going, you can then use standard SQL / plsql to do the analysis of the table. If you are dead set on using UTL_FILE Tom also links to a couple of examples of that. – Shaun Peterson Aug 13 '18 at 04:32
  • For this time I don't have the option to do that, just work with CVS files and load data from the files and analize by Pl/SQL – Sr Jefers Aug 13 '18 at 07:27

2 Answers2

1

Solution to you question:

DECLARE
    v_row VARCHAR2(200) := 'a,b,c';
BEGIN
    FOR v_col IN (SELECT REGEXP_SUBSTR (v_row, '[^,]+', 1, LEVEL) cell FROM DUAL CONNECT BY REGEXP_SUBSTR (v_row, '[^,]+', 1, LEVEL) IS NOT NULL)
    LOOP
        DBMS_OUTPUT.PUT_LINE (v_col.cell);
    END LOOP;
END;

But like the comments say: You shouldn't do this. Use SQL Loader: Oracle: Import CSV file

kara
  • 3,205
  • 4
  • 20
  • 34
  • Yeap REGEXP_SUBSTR is the solution for this but I didn't try this query, I used [this](https://stackoverflow.com/a/31464415/7102575) and a SELECT INTO to set into variables and next work – Sr Jefers Aug 25 '18 at 16:53
1

Hi I've used this code years ago and it works

you can change it to use regexp instead of complicated substrs

you can use index-by tables instead of nested table type I've used, the nested can be inserted in a table but index-by is only usable on pl/sql and cannot be passed as parameter to other stored programs(which nested can).

CREATE OR REPLACE TYPE ARRAY_OF_TEXTS AS TABLE OF VARCHAR2 (4000);

CREATE OR REPLACE TYPE ARRAY_OF_ARRAYS AS TABLE OF ARRAY_OF_TEXTS;

CREATE OR REPLACE FUNCTION SPLIT (
   TEXT                  IN CLOB,
   C_SEPARATOR           IN VARCHAR2,
   IGNORE_EMBTY_BLOCKS   IN BOOLEAN DEFAULT FALSE)
   RETURN ARRAY_OF_TEXTS
IS
   S_LINE     VARCHAR2 (4000);
   S_TABLE    ARRAY_OF_TEXTS;
   I          INTEGER;
   OFFSET1    INTEGER := 1;
   OFFSET2    INTEGER;
   TEXT_LEN   INTEGER;
BEGIN
   S_TABLE := ARRAY_OF_TEXTS ();
   OFFSET2 := INSTR (TEXT, C_SEPARATOR, OFFSET1);
   TEXT_LEN := DBMS_LOB.GETLENGTH (TEXT);

   IF OFFSET2 < 1 --if there is no c_separator (if offset2 is 0) or there is not any c_separator at the end of text
   THEN
      OFFSET2 := TEXT_LEN;
   END IF;

   WHILE     (OFFSET2 = OFFSET1 + LENGTH (C_SEPARATOR) OR OFFSET2 = OFFSET1)
         AND DBMS_LOB.SUBSTR (TEXT, LENGTH (C_SEPARATOR), OFFSET1) =
                C_SEPARATOR         -- if there are 2 c_separator sequentially
   LOOP
      IF NOT IGNORE_EMBTY_BLOCKS
      THEN
         S_TABLE.EXTEND;
         S_TABLE (S_TABLE.LAST) := NULL;
      END IF;

      OFFSET1 := OFFSET2 + LENGTH (C_SEPARATOR);
      OFFSET2 := DBMS_LOB.INSTR (TEXT, C_SEPARATOR, OFFSET1);
   END LOOP;

   IF OFFSET2 > OFFSET1 + LENGTH (C_SEPARATOR)
   THEN
      S_TABLE.EXTEND;

      S_LINE := DBMS_LOB.SUBSTR (TEXT, OFFSET2 - OFFSET1, OFFSET1);
      S_TABLE (S_TABLE.LAST) := S_LINE;
      OFFSET1 := OFFSET2 + LENGTH (C_SEPARATOR);
   END IF;

   OFFSET2 := DBMS_LOB.INSTR (TEXT, C_SEPARATOR, OFFSET1);

   WHILE OFFSET2 > 1
   LOOP
      S_TABLE.EXTEND;

      S_LINE := DBMS_LOB.SUBSTR (TEXT, OFFSET2 - OFFSET1, OFFSET1);
      S_TABLE (S_TABLE.LAST) := S_LINE;
      OFFSET1 := OFFSET2 + LENGTH (C_SEPARATOR);
      OFFSET2 := DBMS_LOB.INSTR (TEXT, C_SEPARATOR, OFFSET1);

      WHILE     (   OFFSET2 = OFFSET1 + LENGTH (C_SEPARATOR)
                 OR OFFSET2 = OFFSET1)
            AND DBMS_LOB.SUBSTR (TEXT, LENGTH (C_SEPARATOR), OFFSET1) =
                   C_SEPARATOR      -- if there are 2 c_separator sequentially
      LOOP
         IF NOT IGNORE_EMBTY_BLOCKS
         THEN
            S_TABLE.EXTEND;
            S_TABLE (S_TABLE.LAST) := NULL;
         END IF;

         OFFSET1 := OFFSET2 + LENGTH (C_SEPARATOR);
         OFFSET2 := DBMS_LOB.INSTR (TEXT, C_SEPARATOR, OFFSET1);
      END LOOP;
   END LOOP;

   IF OFFSET1 < TEXT_LEN
   THEN
      S_TABLE.EXTEND;
      S_LINE :=
         DBMS_LOB.SUBSTR (
                          TEXT,
                          TEXT_LEN - OFFSET1 + LENGTH (C_SEPARATOR),
                          OFFSET1
                         );
      S_TABLE (S_TABLE.LAST) := S_LINE;
   ELSIF     OFFSET1 = TEXT_LEN
         AND DBMS_LOB.SUBSTR (TEXT, LENGTH (C_SEPARATOR), OFFSET1) <>
                C_SEPARATOR
   THEN
      S_TABLE.EXTEND;
      S_LINE.TEXT := DBMS_LOB.SUBSTR (TEXT, LENGTH (C_SEPARATOR), OFFSET1);
      S_TABLE (S_TABLE.LAST) := S_LINE;
   END IF;

   RETURN S_TABLE;
END;

CREATE OR REPLACE FUNCTION LOAD_CSV_FILE (
                                          P_DIRECTORY   IN VARCHAR2,
                                          P_FILENAME    IN VARCHAR2
                                         )
   RETURN ARRAY_OF_ARRAYS
AS
   SEPARATOR1   VARCHAR2 (2) := CHR (10);
   -- In Some Cases you should use: CHR (13) || CHR (10);
   SEPARATOR2   VARCHAR2 (1) := ',';
   --if csv separator is ; or | use it here
   V_TEXT       CLOB;
   V_BFILE      BFILE;

   V_LINES      ARRAY_OF_TEXTS;
   V_LINE       ARRAY_OF_TEXTS;
   V_ARRAY      ARRAY_OF_ARRAYS;
BEGIN
   SELECT EMPTY_CLOB () INTO V_TEXT FROM DUAL;
   --V_TEXT := EMPTY_CLOB ();
    DBMS_LOB.CREATETEMPORARY(V_TEXT, TRUE);

   V_BFILE := BFILENAME (P_DIRECTORY, P_FILENAME);

   IF DBMS_LOB.FILEEXISTS (V_BFILE) <> 1
   THEN
      RETURN NULL;
   END IF;

   DBMS_LOB.FILEOPEN (V_BFILE, DBMS_LOB.FILE_READONLY);
   DBMS_LOB.LOADFROMFILE (V_TEXT, V_BFILE, DBMS_LOB.GETLENGTH (V_BFILE));
   DBMS_LOB.FILECLOSE (V_BFILE);

   V_LINES := SPLIT2 (V_TEXT, SEPARATOR1);
   V_ARRAY := ARRAY_OF_ARRAYS ();

   FOR R_LINE IN (SELECT *
                  FROM TABLE (V_LINES))
   LOOP
      V_LINE := SPLIT(R_LINE.COLUMN_VALUE, SEPARATOR2);
      V_ARRAY.EXTEND ();
      V_ARRAY (V_ARRAY.LAST) := V_LINE;
   END LOOP;

   RETURN V_ARRAY;
END;

now you can use it like this:

DECLARE
   V_ARR    ARRAY_OF_ARRAYS;
   V_LINE   VARCHAR2 (4000);
BEGIN
   V_ARR := LOAD_CSV_FILE ('MY_DIR', 'file.csv');

   FOR LINE IN (SELECT *
                FROM TABLE (V_ARR))
   LOOP
      FOR FIELD IN (SELECT *
                    FROM TABLE (LINE.COLUMN_VALUE))
      LOOP
         DBMS_OUTPUT.PUT_LINE ('field:' || FIELD.COLUMN_VALUE);
      END LOOP;

      DBMS_OUTPUT.PUT_LINE ('end of line');
   END LOOP;
END;
hmmftg
  • 1,274
  • 1
  • 18
  • 31