0

I have a JSON structure in a field that looks like this. I'm trying to extract the task viewed and completed date, there could be any number of tasks in each field.

Sample data

    "task_1a232445": {
        "completedDate": {
            "_seconds": 1670371200,
            "_nanoseconds": 516000000
        },
        "viewedDate": {
            "_seconds": 1666652400,
            "_nanoseconds": 667000000
        }
    },
    "task_1a233445": {
        "completedDate": {
            "_seconds": 1670198400,
            "_nanoseconds": 450000000
        },
        "viewedDate": {
            "_seconds": 1674000000,
            "_nanoseconds": 687000000
        }
    }
}

I have tried to adapt this previous question I asked, but where there are multiple tasks in a single row (as sample data) I can only return the first completedDate

Tom
  • 33,626
  • 31
  • 85
  • 109

1 Answers1

1

Consider below approach

create temp function extract_keys(input string) returns array<string> language js as """
  return Object.keys(JSON.parse(input));
  """;
create temp function extract_values(input string) returns array<string> language js as """
  return Object.values(JSON.parse(input));
  """;
create temp function get_leaves(input string) returns string language js as '''
  function flattenObj(obj, parent = '', res = {}){
    for(let key in obj){
        let propName = parent ? parent + '.' + key : key;
        if(typeof obj[key] == 'object'){
            flattenObj(obj[key], propName, res);
        } else {
            res[propName] = obj[key];
        }
    }
    return JSON.stringify(res);
  }
  return flattenObj(JSON.parse(input));
  ''';
select * from (
  select 
    arr[safe_offset(0)] task,
    arr[safe_offset(1)] date,
    date(timestamp_seconds(cast(val as int64))) val 
  from your_table, unnest([get_leaves(json)]) leaves,
  unnest(extract_keys(leaves)) key with offset
  join unnest(extract_values(leaves)) val with offset
  using(offset),
  unnest([struct(split(key, '.') as arr)])
  where arr[safe_offset(2)] = '_seconds'
)
pivot (any_value(val) for date in ('completedDate', 'viewedDate'))     

if applied to sample data in your question

with your_table as (
  select '''{
    "task_1a232445": {
        "completedDate": {
            "_seconds": 1670371200,
            "_nanoseconds": 516000000
        },
        "viewedDate": {
            "_seconds": 1666652400,
            "_nanoseconds": 667000000
        }
    },
    "task_1a233445": {
        "completedDate": {
            "_seconds": 1670198400,
            "_nanoseconds": 450000000
        },
        "viewedDate": {
            "_seconds": 1674000000,
            "_nanoseconds": 687000000
        }
    }
  }
  ''' as json
  )    

output is

enter image description here

Mikhail Berlyant
  • 165,386
  • 8
  • 154
  • 230