Get value of substrings after splitting

Question

I have a json file which looks something like this:

{
    "model": "Sequential",
    "layers": [
        {
            "L1": "Conv2D(filters = 64, kernel_size=(2,2), strides=(2,2), padding='same', data_format='channels_last', activation='relu', use_bias=True, kernel_initializer='zeros', bias_initializer='zeros', kernel_regularizer=regularizers.l1(0.), bias_regularizer=regularizers.l1(0.), activity_regularizer=regularizers.l1(0.), kernel_constraint=max_norm(2.), bias_constraint=max_norm(2.), input_shape=(224,224,3))",
            "L2": "MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same', data_format='channels_last')",
            "L3": "Conv2D(filters = 64, kernel_size=(2,2), strides=(2,2), padding='same', data_format='channels_last', activation='relu', use_bias=True, kernel_initializer='zeros', bias_initializer='zeros', kernel_regularizer=regularizers.l1(0.), bias_regularizer=regularizers.l1(0.), activity_regularizer=regularizers.l1(0.), kernel_constraint=max_norm(2.), bias_constraint=max_norm(2.))",
            "L4": "MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same', data_format='channels_last')",
            "L5": "Conv2D(filters = 64, kernel_size=(2,2), strides=(2,2), padding='same', data_format='channels_last', activation='relu', use_bias=True, kernel_initializer='zeros', bias_initializer='zeros', kernel_regularizer=regularizers.l1(0.), bias_regularizer=regularizers.l1(0.), activity_regularizer=regularizers.l1(0.), kernel_constraint=max_norm(2.), bias_constraint=max_norm(2.))",
            "L6": "Conv2D(filters = 64, kernel_size=(2,2), strides=(2,2), padding='same', data_format='channels_last', activation='relu', use_bias=True, kernel_initializer='zeros', bias_initializer='zeros', kernel_regularizer=regularizers.l1(0.), bias_regularizer=regularizers.l1(0.), activity_regularizer=regularizers.l1(0.), kernel_constraint=max_norm(2.), bias_constraint=max_norm(2.))",
            "L7": "Conv2D(filters = 64, kernel_size=(2,2), strides=(2,2), padding='same', data_format='channels_last', activation='relu', use_bias=True, kernel_initializer='zeros', bias_initializer='zeros', kernel_regularizer=regularizers.l1(0.), bias_regularizer=regularizers.l1(0.), activity_regularizer=regularizers.l1(0.), kernel_constraint=max_norm(2.), bias_constraint=max_norm(2.))",
            "L8": "MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same', data_format='channels_last')",
            "L9": "Flatten()",
            "L10": "Dense(4096, activation='softmax', use_bias=True, kernel_initializer='zeros', bias_initializer='zeros', kernel_regularizer=regularizers.l1(0.), bias_regularizer=regularizers.l1(0.), activity_regularizer=regularizers.l1(0.), kernel_constraint=max_norm(2.), bias_constraint=max_norm(2.))",
            "L11": "Dropout(0.4)",
            "L12": "Dense(2048, activation='softmax', use_bias=True, kernel_initializer='zeros', bias_initializer='zeros', kernel_regularizer=regularizers.l1(0.), bias_regularizer=regularizers.l1(0.), activity_regularizer=regularizers.l1(0.), kernel_constraint=max_norm(2.), bias_constraint=max_norm(2.))",
            "L13": "Dropout(0.4)",
            "L14": "Dense(1000, activation='softmax', use_bias=True, kernel_initializer='zeros', bias_initializer='zeros', kernel_regularizer=regularizers.l1(0.), bias_regularizer=regularizers.l1(0.), activity_regularizer=regularizers.l1(0.), kernel_constraint=max_norm(2.), bias_constraint=max_norm(2.))",
            "L15": "Dropout(0.4)"
        }
    ]
}

I want to get the information as to what layer is present in the json file. Example, Conv2D, MaxPooling2D, Flatten() etc.

Also, I want to know the value of the strings like filters, kernel_size, stride, activation etc.

I tried getting the layer name by doing this:

with open('model.json','r') as fb:
    con = json.load(fb)
con['layers'][0]['L1'].split('(', 1)[0].rstrip()

Output is 'Conv2d'. Similarly, I got other layer names.

What I need the help in is to get the value of filters (eg 64 in L1).

I tried doing this:

c = con['layers'][0]['L1'].split('(', 1)[1].rstrip()
c.split(',')
['filters = 8', ' kernel_size=(3', '3)', ' strides=(1', ' 1)', " padding='valid'", " data_format='channels_last'", " activation='relu'", ' use_bias=True', " kernel_initializer='zeros'", " bias_initializer='zeros'", ' kernel_regularizer=regularizers.l1(0.)', ' bias_regularizer=regularizers.l1(0.)', ' activity_regularizer=regularizers.l2(0.)', ' kernel_constraint=max_norm(2.)', ' bias_constraint=max_norm(2.)', ' input_shape=(28', '28', '1))']

But still I am not getting the value.

Does anyone have any idea how to get this information ?

what about if a string does not contain filters. – bharatk Jun 28 '19 at 04:52 — bharatk, Jun 28 '19 at 04:52
Then it should return 0 – Ashutosh Mishra Jun 28 '19 at 04:55 — Ashutosh Mishra, Jun 28 '19 at 04:55

Wytamma Wirth · Answer 1 · 2019-06-28T05:11:24.477

UPDATE: Using regex you can extract the keyword arguments. Then split on '=' to find the value of every keyword argument for every layer.

import json
import re

with open('model.json','r') as fb:
  con = json.load(fb)

for layer_key in con['layers'][0]:
  print("Layer: {}".format(layer_key))
  layer = con['layers'][0][layer_key]
  layers_kwargs = re.sub('^(.*?)\(', '', layer)[:-1]
  if not layers_kwargs:
    print('No kwargs')
    continue
  for kwarg in layers_kwargs.split(', '):
    kwarg = [i.strip() for i in kwarg.split('=')]
    if len(kwarg) != 2:
      print('No key', kwarg)
      continue
    k = kwarg[0]
    v = kwarg[1]
    print(k,v)

It's giving IndexError: list index out of range – Ashutosh Mishra Jun 28 '19 at 04:54 — Ashutosh Mishra, Jun 28 '19 at 04:54

bharatk · Accepted Answer · 2019-06-28T06:23:20.093

Using regular expressions - documentation for further reference

import re

string_lst = ['filters','kernel_size','stride','activation']
my_dict = {}
for key,value in con['layers'][0].items():
    my_dict[key] = {}
    layer_names = value.split('(')[0].rstrip()
    my_dict[key][layer_names] = {}
    for i in string_lst:
        match = re.search(i+'(.+?), ', value)
        if match:
            filters = match.group(1).split("=")[1].strip()
            my_dict[key][layer_names][i] = filters

    if len(my_dict[key][layer_names]) <= 0:
        del my_dict[key]

print(my_dict)

O/P:

{
    'L1': {'Conv2D': {'filters': '64', 'kernel_size': '(2,2)', 'stride': '(2,2)', 'activation': "'relu'"}}, '
    L2': {'MaxPooling2D': {'stride': '(2,2)'}}, 'L3': {'Conv2D': 
    {'filters': '64', 'kernel_size': '(2,2)', 'stride': '(2,2)', 'activation': "'relu'"}}, 
    'L4': {'MaxPooling2D': {'stride': '(2,2)'}}, 'L5': 
    {'Conv2D': {'filters': '64', 'kernel_size': '(2,2)', 'stride': '(2,2)', 'activation': "'relu'"}}, 
    'L6': {'Conv2D': {'filters': '64', 'kernel_size': '(2,2)', 'stride': '(2,2)', 'activation': "'relu'"}}, 
    'L7': {'Conv2D': {'filters': '64', 'kernel_size': '(2,2)', 'stride': '(2,2)', 'activation': "'relu'"}}, 
    'L8': {'MaxPooling2D': {'stride': '(2,2)'}}, 'L10': {'Dense': {'activation': "'softmax'"}}, 
    'L12': {'Dense': {'activation': "'softmax'"}}, 'L14': {'Dense': {'activation': "'softmax'"}}
}

JSON contains duplicate layers name, if you want unique records then replace all lines

my_dict[key][layer_names]

TO

my_dict[layer_names]

And remove this my_dict[key] = {} line

When I am searching for 'activation', it is not returning the string. For eg, in L1, it should return 'relu'. — Ashutosh Mishra, Jun 28 '19 at 05:04
Yeah it worked, but now in things like kernel size and stride, the whole tuple is not being returned (eg (3,3)) — Ashutosh Mishra, Jun 28 '19 at 05:12

score 1 · Answer 3 · answered Jun 28 '19 at 05:09

I would do this in two steps. First make a regex for the outer filter name and the contents

re.compile(r"^\s*([^(]*)\s*\((.*)\)\s*$")

This has two groups, the name, and the contents surrounded in parenthesis (...)

Then make a regex to split on commas that are not inside a parenthesis. You can see an in depth explanation here

re.compile(r',\s*(?![^()]*\))')

Demo:

import re

main_regex = re.compile(r"^\s*([^(]*)\s*\((.*)\)\s*$")
split_regex = re.compile(r',\s*(?![^()]*\))')

input = "Conv2D(filters = 64, kernel_size=(2,2), padding='same)"

main_match = main_regex.match(input)
print(main_match.group(1))
parts = split_regex.split(main_match.group(2))
print(parts)

Prints:

Conv2D
['filters = 64', 'kernel_size=(2,2)', "padding='same"]

Get value of substrings after splitting

3 Answers3