You may use regexp_replace
with capturing group for patterns that you do not want to keep and specify only groups of interest in the replacement string.
See example below in Impala (impalad version 3.4.0):
select
addr_list,
/*Concat is used just for visualization*/
rtrim(ltrim(regexp_replace(addr_list,concat(
/*Group of 169.254.*.* that should be excluded*/
'(169\\.254\\.\\d{1,3}\\.\\d{1,3})', '|',
/*Another group for 192.168.0.1*/
'(192\.168\.0\.1)', '|',
/*And the group that we need to keep*/
'(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})'
/*So keep the third group in the output.
Other groups will be replaced with empty string*/
), '\\3'), ','), ',') as ip_whitelist
from(values
('169.254.182.175,192.168.0.1,169.254.2.12,10.199.44.111,169.254.0.2' as addr_list),
('10.58.3.142,169.254.2.12'),
('192.168.0.1,192.100.0.2,154.16.171.3')
) as t
addr_list |
ip_whitelist |
169.254.182.175,192.168.0.1,169.254.2.12,10.199.44.111,169.254.0.2 |
10.199.44.111 |
10.58.3.142,169.254.2.12 |
10.58.3.142 |
192.168.0.1,192.100.0.2,154.16.171.3 |
192.100.0.2,154.16.171.3 |
regexp_extract
works differently for unknown reason, because the same regex with 3 as return group doesn't return anything at all for case 1 and 3.
select
t.addr_list,
rtrim(ltrim(regexp_replace(addr_list, r.regex, '\\3'), ','), ',') as ip_whitelist,
regexp_extract(addr_list, r.regex, 3) as ip_wl_extract
from(values
('169.254.182.175,192.168.0.1,169.254.2.12,10.199.44.111,169.254.0.2' as addr_list),
('10.58.3.142,169.254.2.12'),
('192.168.0.1,192.100.0.2,154.16.171.3')
) as t
cross join (
select concat(
'(169\\.254\\.\\d{1,3}\\.\\d{1,3})', '|',
'(192\.168\.0\.1)', '|',
'(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})'
) as regex
) as r
addr_list |
ip_whitelist |
ip_wl_extract |
169.254.182.175,192.168.0.1,169.254.2.12,10.199.44.111,169.254.0.2 |
10.199.44.111 |
|
10.58.3.142,169.254.2.12 |
10.58.3.142 |
10.58.3.142 |
192.168.0.1,192.100.0.2,154.16.171.3 |
192.100.0.2,154.16.171.3 |
|