One of the approach could be to use pattern matching like below
(Assumption - Each query is separated by newline and one complete sql query is in one line)
library(stringr)
library(dplyr)
library(tidyr)
#read file having sql query
txt <- readLines("test.txt")
#extract column name & table name
df <- data.frame(column_name = str_match(txt, "Select\\s+(.*?)\\s+from")[,2],
table_name = str_match(txt, "from\\s+(.*?)\\s+where")[,2])
#clean above extracted data to have the final outcome
df <- df %>%
separate_rows(column_name, sep = ",") %>%
separate_rows(table_name, sep = ",") %>%
filter(word(trimws(column_name), 1, sep = "\\.") == word(trimws(table_name), -1)) %>%
mutate(column_name = word(trimws(column_name), -1, sep = "\\."),
table_name = word(trimws(table_name), 1))
which gives
> df
column_name table_name
1 Name employee
2 Age employee
3 Dept_Name department
Sample data:
test.txt
contains
Select T1.Name , T1.Age, T2.Dept_Name from employee T1 , department T2 where T1.Dept_No= T2.Dept_No