0

I am importing json data from a URL. I would like to unnest the information and format as a geodataframe but am unsure of how to handle the nested attributes. There are both points and lines in my data and the points seem to be nested within lines.

Data is coming from:

import urllib.request, json 
with urllib.request.urlopen('https://transit.land/api/v2/rest/routes.geojson?operator_onestop_id=o-9q8y-sfmta&api_key=LsyqCJs5aYI6uyxvUz1d0VQQLYoDYdh4&l&') as url:
    data = json.loads(url.read())

My ideal output is a points geodataframe and a lines geodataframe. This would include a geometry column as well as columns for id, stop_id, stop_name etc..

SMar3552
  • 101
  • 7
  • Does this answer your question? [Loading JSON into a GeoDataFrame](https://stackoverflow.com/questions/45552955/loading-json-into-a-geodataframe) – Michael Delgado Aug 25 '22 at 19:57
  • I think I am seeing an error with this method because I actually have a list of strings rather than a list of dictionaries – SMar3552 Aug 25 '22 at 20:04
  • do you need help debugging this? the data in your question is actually a properly-structured feature collection dictionary so I'm not sure how to help. – Michael Delgado Aug 25 '22 at 20:18
  • Thanks for looking into it. I updated the question to indicate where the data is coming from. Perhaps I am wrong about the structure – SMar3552 Aug 25 '22 at 20:23
  • got it. can you read the file directly with `gpd.read_file(URL, engine="GeoJSON")`? if not, can you do the legwork of downloading and inspecting the file and then correcting the contents in your question? – Michael Delgado Aug 25 '22 at 20:26
  • I get a RecursionError: maximum recursion depth exceeded in comparison with this method. looking into that now – SMar3552 Aug 25 '22 at 20:31

1 Answers1

0
import math
import re
import numpy as np
import pandas as pd


class Tuppsub(tuple):
    pass


class ProtectedTuple(tuple):
    pass


class ProtectedList(list):
    pass


class ProtectedDict(dict):
    pass


class ProtectedSet(set):
    pass


def aa_flatten_dict_tu(
    v,
    listitem,
    forbidden=(list, tuple, set, frozenset),
    allowed=(
        str,
        int,
        float,
        complex,
        bool,
        bytes,
        type(None),
        ProtectedTuple,
        ProtectedList,
        ProtectedDict,
        ProtectedSet,
    ),
):

    if isinstance(v, dict):
        for k, v2 in v.items():
            newtu = listitem + (k,)

            yield from aa_flatten_dict_tu(
                v2, listitem=newtu, forbidden=forbidden, allowed=allowed
            )
    elif isinstance(v, forbidden):

        for indi, v2 in enumerate(v):

            if isinstance(v2, allowed):
                yield v2, listitem
            else:
                yield from aa_flatten_dict_tu(
                    v2,
                    listitem=(listitem + (indi,)),
                    forbidden=forbidden,
                    allowed=allowed,
                )
    elif isinstance(v, allowed):

        yield Tuppsub((v, listitem))
    else:
        try:
            for indi2, v2 in enumerate(v):

                try:
                    if isinstance(v2, allowed):
                        yield v2, listitem

                    else:
                        yield aa_flatten_dict_tu(
                            v2,
                            listitem=(listitem + (indi2,)),
                            forbidden=forbidden,
                            allowed=allowed,
                        )
                except Exception:
                    yield v2, listitem
        except:

            yield v, listitem


def fla_tu(
    item,
    walkthrough=(),
    forbidden=(list, tuple, set, frozenset),
    allowed=(
        str,
        int,
        float,
        complex,
        bool,
        bytes,
        type(None),
        ProtectedTuple,
        ProtectedList,
        ProtectedDict,
        ProtectedSet,
    ),
    dict_variation=(
        "collections.defaultdict",
        "collections.UserDict",
        "collections.OrderedDict",
    ),
):

    if isinstance(item, allowed):
        yield item, walkthrough
    elif isinstance(item, forbidden):
        for ini, xaa in enumerate(item):
            try:
                yield from fla_tu(
                    xaa,
                    walkthrough=(walkthrough + (ini,)),
                    forbidden=forbidden,
                    allowed=allowed,
                    dict_variation=dict_variation,
                )
            except Exception:

                yield xaa, Tuppsub((walkthrough + Tuppsub((ini,))))
    elif isinstance(item, dict):

        yield from aa_flatten_dict_tu(
            item, listitem=walkthrough, forbidden=forbidden, allowed=allowed
        )
    elif str(type(item)) in dict_variation:
        yield from aa_flatten_dict_tu(
            dict(item), listitem=walkthrough, forbidden=forbidden, allowed=allowed
        )

    elif "DataFrame" in str(type(item)):

        yield from aa_flatten_dict_tu(
            item.copy().to_dict(),
            listitem=walkthrough,
            forbidden=forbidden,
            allowed=allowed,
        )

    else:
        try:
            for ini2, xaa in enumerate(item):
                try:
                    if isinstance(xaa, allowed):
                        yield xaa, Tuppsub((walkthrough + (ini2,)))
                    else:
                        yield from fla_tu(
                            xaa,
                            walkthrough=Tuppsub((walkthrough + Tuppsub(ini2,))),
                            forbidden=forbidden,
                            allowed=allowed,
                            dict_variation=dict_variation,
                        )
                except Exception:

                    yield xaa, Tuppsub((walkthrough + (ini2,)))
        except Exception:

            yield item, Tuppsub((walkthrough + Tuppsub(item,)))


def qq_d_sort_columns_alphabetically(df, reverse=False):
    if reverse is False:
        return df.filter(sorted(df.columns)).copy()
    return df.filter(reversed(sorted(df.columns))).copy()


def qq_ds_merge_multiple_dfs_and_series_on_index(
    df,
    list_with_ds,
    how="inner",
    on=None,
    sort=False,
    suffixes=("_x", "_y"),
    indicator=False,
    validate=None,
):
    df2 = df.copy()
    for ini, x in enumerate(list_with_ds):
        if isinstance(x, pd.Series):
            x = x.to_frame().copy()
        df2 = (
            pd.merge(
                df2.copy(),
                x.copy(),
                how=how,
                on=on,
                sort=sort,
                indicator=indicator,
                validate=validate,
                left_index=True,
                right_index=True,
                suffixes=(
                    f"{suffixes[0]}_{str(ini).zfill(3)}",
                    f"{suffixes[1]}_{str(ini).zfill(3)}",
                ),
            )
        ).copy()
        return df2


def qq_s_isnan(wert, nan_back=False, debug=False):
    allenanvalues = [
        "<NA>",
        "<NAN>",
        "<nan>",
        "np.nan",
        "NoneType",
        "None",
        "-1.#IND",
        "1.#QNAN",
        "1.#IND",
        "-1.#QNAN",
        "#N/A N/A",
        "#N/A",
        "N/A",
        "n/a",
        "NA",
        "",
        "#NA",
        "NULL",
        "null",
        "NaN",
        "-NaN",
        "nan",
        "-nan",
    ]
    try:
        if pd.isna(wert) is True:
            if nan_back is True:
                return np.nan
            return True
    except Exception as Fehler:
        if debug is True:
            print(Fehler)

    try:
        if pd.isnull(wert) is True:
            if nan_back is True:
                return np.nan
            return True
    except Exception as Fehler:
        if debug is True:
            print(Fehler)

    try:
        if math.isnan(wert) is True:
            if nan_back is True:
                return np.nan
            return True
    except Exception as Fehler:
        if debug is True:
            print(Fehler)

    try:
        if wert is None:
            return True
    except Exception as Fehler:
        if debug is True:
            print(Fehler)

    for allaaa in allenanvalues:
        try:
            nanda = re.findall(str(fr"^\s*{wert}\s*$"), str(allaaa))
            if any(nanda):
                return True
        except Exception as Fehler:
            if debug is True:
                print(Fehler)
            return False
    return False


def _if_not_list_to_list(list_):

    if not isinstance(list_, list):
        try:
            list_ = list_.tolist()
        except Exception:
            list_ = list(list_)
    return list_


def _exs_normalize_lists_in_series(list_, maxlen, seriesback=True):

    if qq_s_isnan(list_):
        if seriesback:
            return pd.Series([pd.NA] * maxlen)
        else:
            return [pd.NA] * maxlen

    list_ = _if_not_list_to_list(list_)

    add_lists = (maxlen - len(list_)) * [pd.NA]
    if seriesback:
        return pd.Series(list_ + add_lists)
    return list_ + add_lists


def qq_s_lists_to_df(df):
    df2 = df.copy()
    maxlen = df2.dropna().map(lambda x: len(x)).max()
    return df2.apply(
        lambda x: _exs_normalize_lists_in_series(x, maxlen, seriesback=True)
    ).copy()


def nested_something_to_df(
    nested_dict,
    w_dict_anotation=False,
    w_with_depth=False,
    w_keys_as_additional_cols=False,
    w_all_keys_as_tuple_in_col=False,
):

    flattenddict = list((fla_tu(nested_dict)))
    flattenddict = [
        list(x)[0] if "generator" in str(type(x)) else x for x in flattenddict
    ]
    df = pd.DataFrame(flattenddict)
    df.columns = ["aa_value", "aa_all_keys"]
    indexdf = qq_s_lists_to_df(df.aa_all_keys)
    indexdf.columns = [f"aa_key_{x}" for x in indexdf.columns]

    df = qq_ds_merge_multiple_dfs_and_series_on_index(df, [indexdf])
    df.index = [df[f"aa_key_{x}"].__array__() for x in range(len(df.columns) - 2)]
    df = qq_d_sort_columns_alphabetically(df)

    if w_with_depth:
        df["aa_depth"] = df.aa_all_keys.map(len)
    if w_dict_anotation:
        df["aa_dict_anotation"] = df.aa_all_keys.apply(
            lambda y: "".join(
                [f'["{x}"]' if isinstance(x, str) else f"[{x}]" for x in y]
            )
        )
    if not w_keys_as_additional_cols:
        df = df.drop(columns=[x for x in df.columns if x.startswith("aa_key_")])
    if not w_all_keys_as_tuple_in_col:
        df = df.drop(columns=["aa_all_keys"])
    return df




gf = nested_something_to_df(data5)

gf1 = gf.loc[
    (slice("features"), slice(0), slice("geometry"), slice("coordinates"))
].reset_index()
gf2 = gf.loc[(slice("features"), slice(0), slice("geometry"), slice("coordinates"))]

idx = pd.IndexSlice
gf3 = gf.loc[idx["features", 0, "geometry", "coordinates"], :]
gf4 = gf.loc[idx["features", 0, "geometry", "coordinates"], :].reset_index()




gf1
Out[4]: 
      level_0 level_1   level_2  ... level_6 level_7    aa_value
0    features       0  geometry  ...     NaN     NaN -122.405122
1    features       0  geometry  ...     NaN     NaN   37.708858
2    features       0  geometry  ...     NaN     NaN -122.404737
3    features       0  geometry  ...     NaN     NaN   37.709655
4    features       0  geometry  ...     NaN     NaN -122.404239
..        ...     ...       ...  ...     ...     ...         ...
717  features       0  geometry  ...     NaN     NaN   37.710414
718  features       0  geometry  ...     NaN     NaN -122.404737
719  features       0  geometry  ...     NaN     NaN   37.709655
720  features       0  geometry  ...     NaN     NaN -122.405122
721  features       0  geometry  ...     NaN     NaN   37.708858



gf2
Out[5]: 
                                                 aa_value
features 0 geometry coordinates 0 0   NaN NaN -122.405122
                                          NaN   37.708858
                                  1   NaN NaN -122.404737
                                          NaN   37.709655
                                  2   NaN NaN -122.404239
                                                   ...
                                1 181 NaN NaN   37.710414
                                  182 NaN NaN -122.404737
                                          NaN   37.709655
                                  183 NaN NaN -122.405122
                                          NaN   37.708858

gf3
Out[6]: 
                 aa_value
0 0   NaN NaN -122.405122
          NaN   37.708858
  1   NaN NaN -122.404737
          NaN   37.709655
  2   NaN NaN -122.404239
                   ...
1 181 NaN NaN   37.710414
  182 NaN NaN -122.404737
          NaN   37.709655
  183 NaN NaN -122.405122
          NaN   37.708858


gf4
Out[7]: 
    level_0 level_1 level_2 level_3    aa_value
0         0       0     NaN     NaN -122.405122
1         0       0     NaN     NaN   37.708858
2         0       1     NaN     NaN -122.404737
3         0       1     NaN     NaN   37.709655
4         0       2     NaN     NaN -122.404239
..      ...     ...     ...     ...         ...
717       1     181     NaN     NaN   37.710414
718       1     182     NaN     NaN -122.404737
719       1     182     NaN     NaN   37.709655
720       1     183     NaN     NaN -122.405122
721       1     183     NaN     NaN   37.708858


gf
Out[8]: 
                                                                                                aa_value
features 0     geometry   coordinates 0.0 0.0 NaN NaN                                        -122.405122
                                                  NaN                                          37.708858
                                          1.0 NaN NaN                                        -122.404737
                                                  NaN                                          37.709655
                                          2.0 NaN NaN                                        -122.404239
                                                                                                  ...
         19    properties route_url   NaN NaN NaN NaN                               https://SFMTA.com/43
               type       NaN         NaN NaN NaN NaN                                            Feature
meta     after NaN        NaN         NaN NaN NaN NaN                                            9998755
         next  NaN        NaN         NaN NaN NaN NaN  https://api.transit.land/api/v2/rest/routes.ge...
type     NaN   NaN        NaN         NaN NaN NaN NaN                                  FeatureCollection

I have never used geopandas, but this might help you. Pandas' MultiIndex is imho the method to navigate through nested dicts, list, etc.

Hans
  • 148
  • 2
  • 7