3

i have a complex SQL like below. I'm looking for a python (3+) library that would extract the table names that are contained in the from clauses. I'm mapping a client's database & views & reports, and have dozens of queries to map, so the need to automate the job.

Already looked at Python-sqlparse , sqlparse, but I'm not sure they are up to the task and I'm trying to avoid the learning curve.

SELECT --Locais Disponíveis para Abertura de Ordens de corte
     COALESCE(a."Chave", b."Chave") AS "Chave",
     COALESCE(a."Unidade", b."Unidade") AS "Unidade",
     COALESCE(a."Fazenda", b."Fazenda") AS "Fazenda",
     COALESCE(a."Talhao", b."Talhao") AS "Talhao",
     COALESCE(a."Participacao", b."Participacao") AS "Participacao",
     CASE
       WHEN a."Condicao" = 'Disponível Parcial (Moagem)' AND
            b."Condicao" = 'Disponível Parcial (Mudas)' THEN
        'Disponível (Safra+Mudas)'
       ELSE
        COALESCE(a."Condicao", b."Condicao")
     END AS "Condicao",
     COALESCE(a."Estagio", b."Estagio") AS "Estagio",
     COALESCE(a."Variedade", b."Variedade") AS "Variedade",
     COALESCE(a."Ciclo Maturacao", b."Ciclo Maturacao") AS "Ciclo Maturacao",
     COALESCE(a."Propriedade", b."Propriedade") AS "Propriedade",
     COALESCE(a."Proprietario", b."Proprietario") AS "Proprietario",
     COALESCE(a."No. Corte", b."No. Corte") AS "No. Corte",
     (CASE
       WHEN a."Area" IS NULL THEN
    0
       ELSE
        a."Area"
     END + CASE
       WHEN b."Area" IS NULL THEN
    0
       ELSE
        b."Area"
     END) AS "Area",
     CASE
       WHEN a."Condicao" = 'Disponível Parcial (Moagem)' AND
            b."Condicao" = 'Disponível Parcial (Mudas)' THEN
        ((CASE
          WHEN a."Area" IS NULL THEN
    0
          ELSE
           a."Area"
        END + CASE
          WHEN b."Area" IS NULL THEN
    0
          ELSE
           b."Area"
        END) * a."TCH")
       ELSE
        a."Toneladas"
     END AS "Toneladas",
     a."TCH",
     COALESCE(a."Distancia", b."Distancia") AS "Distancia"
      FROM (SELECT --Disponibilidade (Moagem)
             A.*,
             a."Area" * b."TCH" AS "Toneladas",
             b."TCH" AS "TCH",
             c."Dist. Terra" + c."Dist. Asfalto" AS "Distancia"
              FROM ((SELECT --ÁREAS DISPONÍVEIS PARA ABERTURA DE ORDEM CORTE DE SAFRA
                      a."Fazenda" * 1000 + a."Talhao" AS "Chave",
                      CASE
                        WHEN a."Unidade" = 15 THEN
                         'USF'
                        ELSE
                         'URD'
                      END AS "Unidade",
                      a."Fazenda",
                      a."Talhao",
                      a."Participacao",
                      CASE
                        WHEN a."Ocorrencia Cadastro" = 'C' THEN
                         'Disponível Total (Moagem)'
                        ELSE
                         'Disponível Parcial (Moagem)'
                      END AS "Condicao",
                      a."Estagio",
                      a."Variedade",
                      a."Ciclo Maturacao",
                      a."Propriedade",
                      a."Proprietario",
                      a."No. Corte",
                      (a."Area" - (CASE
                        WHEN b."Area Fechada" IS NULL THEN
    0
                        ELSE
                         b."Area Fechada"
                      END)) AS "Area"
                       FROM (SELECT --ULTIMA ESTIMATIVA DO TALHAO A
                              OBJ.CD_UNID_IND AS "Unidade",
                              OBJ.CD_UPNIVEL1 AS "Fazenda",
                              OBJ.CD_UPNIVEL3 AS "Talhao",
                              OBJ.CD_UPNIVEL1 || ' - ' || F.DE_UPNIVEL1 AS "Propriedade",
                              G.DE_FORNEC AS "Proprietario",
                              CASE
                                WHEN UP3.CD_TP_PROPR IN (1, 2, 3, 11) THEN
                                 'Parceria'
                                WHEN UP3.CD_TP_PROPR IN (5, 8) THEN
                                 'Fornecedor'
                                WHEN UP3.CD_TP_PROPR = 6 THEN
                                 'Fornecedor'
                                WHEN UP3.CD_TP_PROPR = 14 THEN
                                 'Parceria'
                                ELSE
                                 'Verificar'
                              END AS "Participacao",
                              C.FG_OCORREN AS "Ocorrencia Cadastro",
                              C.DT_OCORREN AS "Data Ocorrencia",
                              B.DA_ESTAGIO AS "Estagio",
                              B.NO_CORTE AS "No. Corte",
                              D.DE_VARIED AS "Variedade",
                              E.DE_MATURAC AS "Ciclo Maturacao",
                              (OBJ.QT_AREA_PROD * 1) AS "Area",
                              (OBJ.QT_CANA_ENTR / 1000) AS "Toneladas"
                               FROM PIMSCS.HISTPREPRO   OBJ,
                                    PIMSCS.ESTAGIOS     B,
                                    PIMSCS.UPNIVEL3     UP3,
                                    PIMSCS.SAFRUPNIV3   C,
                                    PIMSCS.VARIEDADES   D,
                                    PIMSCS.TIPO_MATURAC E,
                                    PIMSCS.UPNIVEL1     F,
                                    PIMSCS.FORNECS      G
                              WHERE OBJ.CD_SAFRA =
                                    (SELECT MAX(CD_SAFRA) FROM PIMSCS.HISTPREPRO)
                                AND OBJ.CD_UNID_IND IN (15, 19)
                                AND OBJ.CD_ESTAGIO = B.CD_ESTAGIO
                                AND OBJ.CD_UPNIVEL1 = UP3.CD_UPNIVEL1
                                AND OBJ.CD_UPNIVEL3 = UP3.CD_UPNIVEL3
                                AND OBJ.CD_SAFRA = UP3.CD_SAFRA
                                AND OBJ.CD_UPNIVEL1 = C.CD_UPNIVEL1
                                AND OBJ.CD_UPNIVEL3 = C.CD_UPNIVEL3
                                AND OBJ.CD_SAFRA = C.CD_SAFRA
                                AND UP3.CD_VARIED = D.CD_VARIED
                                AND E.FG_MATURAC = D.FG_MATURAC
                                AND OBJ.CD_UPNIVEL1 = F.CD_UPNIVEL1
                                AND F.CD_FORNEC = G.CD_FORNEC
                                AND C.DT_OCORREN =
                                    (SELECT MAX(D.DT_OCORREN)
                                       FROM PIMSCS.SAFRUPNIV3 D
                                      WHERE D.CD_UPNIVEL1 = C.CD_UPNIVEL1
                                        AND D.CD_UPNIVEL3 = C.CD_UPNIVEL3
                                        AND D.CD_SAFRA = C.CD_SAFRA)
                                AND OBJ.CD_HIST =
                                    (SELECT OBJ2.CD_HIST
                                       FROM PIMSCS.HISTPREPRO OBJ2
                                      WHERE OBJ2.CD_UPNIVEL1 = OBJ.CD_UPNIVEL1
                                        AND OBJ2.CD_UPNIVEL3 = OBJ.CD_UPNIVEL3
                                        AND OBJ2.CD_SAFRA =
                                            (SELECT MAX(CD_SAFRA)
                                               FROM PIMSCS.HISTPREPRO)
                                        AND OBJ2.CD_HIST NOT IN ('E', 'S')
                                        AND OBJ2.CD_EMPRESA IN (15, 19)
                                        AND OBJ2.DT_HISTORICO =
                                            (SELECT MAX(OBJ3.DT_HISTORICO)
                                               FROM PIMSCS.HISTPREPRO OBJ3
                                              WHERE OBJ3.CD_UPNIVEL1 =
                                                    OBJ.CD_UPNIVEL1
                                                AND OBJ3.CD_UPNIVEL3 =
                                                    OBJ.CD_UPNIVEL3
                                                AND OBJ3.CD_SAFRA =
                                                    (SELECT MAX(CD_SAFRA)
                                                       FROM PIMSCS.HISTPREPRO)
                                                AND OBJ3.CD_HIST NOT IN ('E', 'S')
                                                AND OBJ3.CD_EMPRESA IN (15, 19)))) A,
                            (SELECT --ÁREA DE ORDEM DE CORTE DE SAFRA FECHADA B
                              QD.CD_UPNIVEL1 AS "Fazenda",
                              QD.CD_UPNIVEL3 AS "Talhao",
                              SUM(QD.QT_AREA) AS "Area Fechada"
                               FROM PIMSCS.QUEIMA_HE QH, PIMSCS.QUEIMA_DE QD
                              WHERE QH.NO_QUEIMA = QD.NO_QUEIMA
                                AND QD.CD_SAFRA =
                                    (SELECT MAX(CD_SAFRA) FROM PIMSCS.HISTPREPRO)
                              GROUP BY QD.CD_UPNIVEL1, QD.CD_UPNIVEL3) B
                      WHERE a."Fazenda" = b."Fazenda"(+)
                        AND a."Talhao" = b."Talhao"(+)
                        AND a."Ocorrencia Cadastro" <> 'F'
                        AND (a."Area" - (CASE
                              WHEN b."Area Fechada" IS NULL THEN
    0
                              ELSE
                               b."Area Fechada"
                            END)) > 0)) A
              LEFT JOIN (SELECT --Ultima Estimativa do Talhão
                         A.CD_HIST "Cod. Historico",
                         CASE
                           WHEN A.CD_UNID_IND = 15 THEN
                            'USF'
                           ELSE
                            'URD'
                         END AS "Unidade",
                         A.CD_UPNIVEL1 AS "Zona",
                         A.CD_UPNIVEL3 AS "Talhao",
                         A.DT_HISTORICO AS "Data",
                         A.QT_AREA_PROD AS "Area",
                         (A.QT_CANA_ENTR / 1000) AS "Toneladas",
                         A.QT_TCH AS "TCH"
                          FROM PIMSCS.HISTPREPRO A
                         WHERE A.CD_UNID_IND IN (15, 19)
                           AND A.CD_SAFRA =
                               (SELECT MAX(CD_SAFRA) FROM PIMSCS.HISTPREPRO)
                           AND A.CD_HIST NOT IN ('E', 'S')
                           AND A.QT_AREA_PROD <> 0
                           AND A.DT_HISTORICO =
                               (SELECT MAX(A2.DT_HISTORICO)
                                  FROM PIMSCS.HISTPREPRO A2
                                 WHERE A.CD_SAFRA = A2.CD_SAFRA
                                   AND A.CD_UPNIVEL2 = A2.CD_UPNIVEL1
                                   AND A.CD_UPNIVEL3 = A2.CD_UPNIVEL3
                                   AND A2.CD_HIST NOT IN ('E', 'S'))) B
                ON a."Fazenda" = b."Zona"
               AND a."Talhao" = b."Talhao"
              LEFT JOIN (SELECT --Distancia Cadastrada 
                         A.CD_UPNIVEL1 AS "Zona",
                         A.CD_UPNIVEL3 AS "Talhao",
                         MAX(A.DS_TERRA) AS "Dist. Terra",
                         MAX(A.DS_ASFALTO) AS "Dist. Asfalto"
                          FROM PIMSCS.UPNIVEL3 A
                          LEFT JOIN PIMSCS.SAFRUPNIV3 B
                            ON A.CD_SAFRA = B.CD_SAFRA
                           AND A.CD_UPNIVEL1 = B.CD_UPNIVEL1
                           AND A.CD_UPNIVEL3 = B.CD_UPNIVEL3
                         WHERE A.CD_UNID_IND IN (15, 19)
                           AND A.CD_OCUP = 1
                           AND A.CD_SAFRA =
                               (SELECT MAX(CD_SAFRA) FROM PIMSCS.HISTPREPRO)
                           AND B.CD_SAFRA =
                               (SELECT MAX(CD_SAFRA) FROM PIMSCS.HISTPREPRO)
                           AND B.FG_OCORREN <> 'I'
                           AND B.DT_OCORREN =
                               (SELECT MAX(B2.DT_OCORREN)
                                  FROM PIMSCS.SAFRUPNIV3 B2
                                 WHERE B.CD_SAFRA = B2.CD_SAFRA
                                   AND B.CD_UPNIVEL1 = B2.CD_UPNIVEL1
                                   AND B.CD_UPNIVEL3 = B2.CD_UPNIVEL3)
                         GROUP BY A.CD_UPNIVEL1, A.CD_UPNIVEL3) C
                ON a."Fazenda" = c."Zona"
               AND a."Talhao" = c."Talhao") A
      FULL JOIN (SELECT --Disponibilidade (Mudas)
                  A.*,
                  a."Area" * b."TCH" AS "Toneladas",
                  b."TCH" AS "TCH",
                  c."Dist. Terra" + c."Dist. Asfalto" AS "Distancia"
                   FROM ((SELECT --ÁREAS DISPONÍVEIS PARA ABERTURA DE ORDEM CORTE DE SAFRA
                           a."Fazenda" * 1000 + a."Talhao" AS "Chave",
                           CASE
                             WHEN a."Unidade" = 15 THEN
                              'USF'
                             ELSE
                              'URD'
                           END AS "Unidade",
                           a."Fazenda",
                           a."Talhao",
                           a."Participacao",
                           CASE
                             WHEN a."Ocorrencia Cadastro" = 'C' THEN
                              'Disponível Total (Mudas)'
                             ELSE
                              'Disponível Parcial (Mudas)'
                           END AS "Condicao",
                           a."Estagio",
                           a."Variedade",
                           a."Ciclo Maturacao",
                           a."Propriedade",
                           a."Proprietario",
                           a."No. Corte",
                           (a."Area" - (CASE
                             WHEN b."Area Fechada" IS NULL THEN
    0
                             ELSE
                              b."Area Fechada"
                           END)) AS "Area"
                            FROM (SELECT --ULTIMA ESTIMATIVA DO TALHAO A
                                   OBJ.CD_UNID_IND AS "Unidade",
                                   OBJ.CD_UPNIVEL1 AS "Fazenda",
                                   OBJ.CD_UPNIVEL3 AS "Talhao",
                                   OBJ.CD_UPNIVEL1 || ' - ' || F.DE_UPNIVEL1 AS "Propriedade",
                                   G.DE_FORNEC AS "Proprietario",
                                   CASE
                                     WHEN UP3.CD_TP_PROPR IN (1, 2, 3, 11) THEN
                                      'Parceria'
                                     WHEN UP3.CD_TP_PROPR IN (5, 8) THEN
                                      'Fornecedor'
                                     WHEN UP3.CD_TP_PROPR = 6 THEN
                                      'Fornecedor'
                                     WHEN UP3.CD_TP_PROPR = 14 THEN
                                      'Parceria'
                                     ELSE
                                      'Verificar'
                                   END AS "Participacao",
                                   C.FG_OCORREN AS "Ocorrencia Cadastro",
                                   C.DT_OCORREN AS "Data Ocorrencia",
                                   B.DA_ESTAGIO AS "Estagio",
                                   B.NO_CORTE AS "No. Corte",
                                   D.DE_VARIED AS "Variedade",
                                   E.DE_MATURAC AS "Ciclo Maturacao",
                                   (OBJ.QT_AREA_PROD * 1) AS "Area",
                                   (OBJ.QT_CANA_ENTR / 1000) AS "Toneladas"
                                    FROM PIMSCS.HISTPREPRO   OBJ,
                                         PIMSCS.ESTAGIOS     B,
                                         PIMSCS.UPNIVEL3     UP3,
                                         PIMSCS.SAFRUPNIV3   C,
                                         PIMSCS.VARIEDADES   D,
                                         PIMSCS.TIPO_MATURAC E,
                                         PIMSCS.UPNIVEL1     F,
                                         PIMSCS.FORNECS      G
                                   WHERE OBJ.CD_SAFRA =
                                         (SELECT MAX(CD_SAFRA)
                                            FROM PIMSCS.HISTPREPRO)
                                     AND OBJ.CD_UNID_IND IN (15, 19)
                                     AND OBJ.CD_ESTAGIO = B.CD_ESTAGIO
                                     AND OBJ.CD_UPNIVEL1 = UP3.CD_UPNIVEL1
                                     AND OBJ.CD_UPNIVEL3 = UP3.CD_UPNIVEL3
                                     AND OBJ.CD_SAFRA = UP3.CD_SAFRA
                                     AND OBJ.CD_UPNIVEL1 = C.CD_UPNIVEL1
                                     AND OBJ.CD_UPNIVEL3 = C.CD_UPNIVEL3
                                     AND OBJ.CD_SAFRA = C.CD_SAFRA
                                     AND UP3.CD_VARIED = D.CD_VARIED
                                     AND E.FG_MATURAC = D.FG_MATURAC
                                     AND OBJ.CD_UPNIVEL1 = F.CD_UPNIVEL1
                                     AND F.CD_FORNEC = G.CD_FORNEC
                                     AND C.DT_OCORREN =
                                         (SELECT MAX(D.DT_OCORREN)
                                            FROM PIMSCS.SAFRUPNIV3 D
                                           WHERE D.CD_UPNIVEL1 = C.CD_UPNIVEL1
                                             AND D.CD_UPNIVEL3 = C.CD_UPNIVEL3
                                             AND D.CD_SAFRA = C.CD_SAFRA)
                                     AND OBJ.CD_HIST =
                                         (SELECT OBJ2.CD_HIST
                                            FROM PIMSCS.HISTPREPRO OBJ2
                                           WHERE OBJ2.CD_UPNIVEL1 = OBJ.CD_UPNIVEL1
                                             AND OBJ2.CD_UPNIVEL3 = OBJ.CD_UPNIVEL3
                                             AND OBJ2.CD_SAFRA =
                                                 (SELECT MAX(CD_SAFRA)
                                                    FROM PIMSCS.HISTPREPRO)
                                             AND OBJ2.CD_HIST = 'S'
                                             AND OBJ2.CD_EMPRESA IN (15, 19)
                                             AND OBJ2.DT_HISTORICO =
                                                 (SELECT MAX(OBJ3.DT_HISTORICO)
                                                    FROM PIMSCS.HISTPREPRO OBJ3
                                                   WHERE OBJ3.CD_UPNIVEL1 =
                                                         OBJ.CD_UPNIVEL1
                                                     AND OBJ3.CD_UPNIVEL3 =
                                                         OBJ.CD_UPNIVEL3
                                                     AND OBJ3.CD_SAFRA =
                                                         (SELECT MAX(CD_SAFRA)
                                                            FROM PIMSCS.HISTPREPRO)
                                                     AND OBJ3.CD_HIST = 'S'
                                                     AND OBJ3.CD_EMPRESA IN (15, 19)))) A,
                                 (SELECT --ÁREA DE ORDEM DE CORTE DE MUDAS FECHADA B
                                   A.CD_UPNIVEL1 AS "Fazenda",
                                   A.CD_UPNIVEL3 AS "Talhao",
                                   SUM(A.QT_AREA) AS "Area Fechada"
                                    FROM  PIMSCS.OCORTEMD_DE A
                                    JOIN PIMSCS.OCORTEMD_HE B
                                      ON A.NO_ORDEM = B.NO_ORDEM
                                   WHERE A.CD_SAFRA =
                                         (SELECT MAX(CD_SAFRA)
                                            FROM PIMSCS.HISTPREPRO)
                                     AND B.FG_SITUACAO = 'F'
                                   GROUP BY A.CD_UPNIVEL1, A.CD_UPNIVEL3) B
                           WHERE a."Fazenda" = b."Fazenda"(+)
                             AND a."Talhao" = b."Talhao"(+)
                             AND a."Ocorrencia Cadastro" <> 'F'
                             AND (a."Area" - (CASE
                                   WHEN b."Area Fechada" IS NULL THEN
    0
                                   ELSE
                                    b."Area Fechada"
                                 END)) > 0)) A
                   LEFT JOIN (SELECT --Ultima Estimativa do Talhão
                              A.CD_HIST "Cod. Historico",
                              CASE
                                WHEN A.CD_UNID_IND = 15 THEN
                                 'USF'
                                ELSE
                                 'URD'
                              END AS "Unidade",
                              A.CD_UPNIVEL1 AS "Zona",
                              A.CD_UPNIVEL3 AS "Talhao",
                              A.DT_HISTORICO AS "Data",
                              A.QT_AREA_PROD AS "Area",
                              (A.QT_CANA_ENTR / 1000) AS "Toneladas",
                              A.QT_TCH AS "TCH"
                               FROM PIMSCS.HISTPREPRO A
                              WHERE A.CD_UNID_IND IN (15, 19)
                                AND A.CD_SAFRA =
                                    (SELECT MAX(CD_SAFRA) FROM PIMSCS.HISTPREPRO)
                                AND A.CD_HIST = 'S'
                                AND A.QT_AREA_PROD <> 0
                                AND A.DT_HISTORICO =
                                    (SELECT MAX(A2.DT_HISTORICO)
                                       FROM PIMSCS.HISTPREPRO A2
                                      WHERE A.CD_SAFRA = A2.CD_SAFRA
                                        AND A.CD_UPNIVEL2 = A2.CD_UPNIVEL1
                                        AND A.CD_UPNIVEL3 = A2.CD_UPNIVEL3
                                        AND A2.CD_HIST = 'S')) B
                     ON a."Fazenda" = b."Zona"
                    AND a."Talhao" = b."Talhao"
                   LEFT JOIN (SELECT --Distancia Cadastrada 
                              A.CD_UPNIVEL1 AS "Zona",
                              A.CD_UPNIVEL3 AS "Talhao",
                              MAX(A.DS_TERRA) AS "Dist. Terra",
                              MAX(A.DS_ASFALTO) AS "Dist. Asfalto"
                               FROM PIMSCS.UPNIVEL3 A
                               LEFT JOIN PIMSCS.SAFRUPNIV3 B
                                 ON A.CD_SAFRA = B.CD_SAFRA
                                AND A.CD_UPNIVEL1 = B.CD_UPNIVEL1
                                AND A.CD_UPNIVEL3 = B.CD_UPNIVEL3
                              WHERE A.CD_UNID_IND IN (15, 19)
                                AND A.CD_OCUP = 1
                                AND A.CD_SAFRA =
                                    (SELECT MAX(CD_SAFRA) FROM PIMSCS.HISTPREPRO)
                                AND B.CD_SAFRA =
                                    (SELECT MAX(CD_SAFRA) FROM PIMSCS.HISTPREPRO)
                                AND B.FG_OCORREN <> 'I'
                                AND B.DT_OCORREN =
                                    (SELECT MAX(B2.DT_OCORREN)
                                       FROM PIMSCS.SAFRUPNIV3 B2
                                      WHERE B.CD_SAFRA = B2.CD_SAFRA
                                        AND B.CD_UPNIVEL1 = B2.CD_UPNIVEL1
                                        AND B.CD_UPNIVEL3 = B2.CD_UPNIVEL3)
                              GROUP BY A.CD_UPNIVEL1, A.CD_UPNIVEL3) C
                     ON a."Fazenda" = c."Zona"
                    AND a."Talhao" = c."Talhao") B
        ON a."Chave" = b."Chave"

Ajax1234
  • 69,937
  • 8
  • 61
  • 102
  • 1
    Can you explain what you mean by "the name of the table in the from clauses?" There are lots of anonymous subqueries within from statements in your example. – dsillman2000 Oct 23 '21 at 00:02
  • 1
    This is a good question. If you truly want to avoid using a parsing library, you can try to capture the desired content via regex. You'll need to analyze examples in your dataset to design the optimal regex. Then you can use `re.findall` to find the capture groups corresponding to all occurrences of your regular expression. See https://docs.python.org/3/library/re.html#re.Pattern.findall. I also agree with dsillman2000's comment, what are your requirements for nested queries? – Michael Sohnen Oct 23 '21 at 00:09
  • 1
    For non-quoted identifiers the source name (in general, maybe I've missed DB specific cases) should appear after `(from|join)[^[:graph:]]+` regex. But it all depends on the database: there may be nested function calls that returns resultsets, `values` clause and it will be very hard to deal with all that stuff. Since you use quoted identifiers the things become complicated. – astentx Oct 23 '21 at 00:20
  • @dsillman2000, by the name of the tables, I'm trying to get really all of them, including the subqueries. – Marcos Scalabrin Oct 25 '21 at 12:56
  • @MichaelSohnen, @, I'm really looking for some way to get **all** tables, including those in the subqueries. Tryied some regex, the complexity is added by the subqueries and joins. this hyerarchy made me go for a series os regexes... which started to be confusing and trick. – Marcos Scalabrin Oct 25 '21 at 13:02

2 Answers2

4

I created Sqlglot which can parse your SQL.

import sqlglot
import sqlglot.expressions as exp

expression = sqlglot.parse_one(..., read=“oracle”)
list(expression.find_all(exp.Table))
Toby Mao
  • 374
  • 2
  • 6
3

You can use sqlparse with a recursive generator function:

import sqlparse, re
def get_tables(d):
    f = False
    for i in getattr(d, 'tokens', []):
       if isinstance(i, sqlparse.sql.Token) and i.value.lower() == 'from':
          f = True
       elif isinstance(i, (sqlparse.sql.Identifier, sqlparse.sql.IdentifierList)) and f:
          f = False
          if not any(isinstance(x, sqlparse.sql.Parenthesis) or 'select' in x.value.lower() for x in getattr(i, 'tokens', [])):
             fr = ''.join(str(j) for j in i if j.value not in {'as', '\n'})
             for t in re.findall('(?:\w+\.\w+|\w+)\s+\w+|(?:\w+\.\w+|\w+)', fr):
                yield {'table':(t1:=t.split())[0], 'alias':None if len(t1) < 2 else t1[-1]}
       yield from get_tables(i)

print(list(get_tables(sqlparse.parse(s)[0])))

Output:

[{'table': 'PIMSCS.HISTPREPRO', 'alias': 'OBJ'}, {'table': 'PIMSCS.ESTAGIOS', 'alias': 'B'}, {'table': 'PIMSCS.UPNIVEL3', 'alias': 'UP3'}, {'table': 'PIMSCS.SAFRUPNIV3', 'alias': 'C'}, {'table': 'PIMSCS.VARIEDADES', 'alias': 'D'}, {'table': 'PIMSCS.TIPO_MATURAC', 'alias': 'E'}, {'table': 'PIMSCS.UPNIVEL1', 'alias': 'F'}, {'table': 'PIMSCS.FORNECS', 'alias': 'G'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.SAFRUPNIV3', 'alias': 'D'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': 'OBJ2'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.HISTPREPRO', 'alias': 'OBJ3'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.QUEIMA_HE', 'alias': 'QH'}, {'table': 'PIMSCS.QUEIMA_DE', 'alias': 'QD'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.HISTPREPRO', 'alias': 'A'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.HISTPREPRO', 'alias': 'A2'}, {'table': 'PIMSCS.UPNIVEL3', 'alias': 'A'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.SAFRUPNIV3', 'alias': 'B2'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': 'OBJ'}, {'table': 'PIMSCS.ESTAGIOS', 'alias': 'B'}, {'table': 'PIMSCS.UPNIVEL3', 'alias': 'UP3'}, {'table': 'PIMSCS.SAFRUPNIV3', 'alias': 'C'}, {'table': 'PIMSCS.VARIEDADES', 'alias': 'D'}, {'table': 'PIMSCS.TIPO_MATURAC', 'alias': 'E'}, {'table': 'PIMSCS.UPNIVEL1', 'alias': 'F'}, {'table': 'PIMSCS.FORNECS', 'alias': 'G'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.SAFRUPNIV3', 'alias': 'D'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': 'OBJ2'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.HISTPREPRO', 'alias': 'OBJ3'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.OCORTEMD_DE', 'alias': 'A'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.HISTPREPRO', 'alias': 'A'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.HISTPREPRO', 'alias': 'A2'}, {'table': 'PIMSCS.UPNIVEL3', 'alias': 'A'}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.HISTPREPRO', 'alias': None}, {'table': 'PIMSCS.SAFRUPNIV3', 'alias': 'B2'}]
Ajax1234
  • 69,937
  • 8
  • 61
  • 102