0

json file like this:

{"authors":[{"ids":["4888852"],"name":"Andrea D. Foebel"},{"ids":["3325893"],"name":"John P. Hirdes"},{"ids":["5316482"],"name":"George A. Heckman"}]}
{"authors":[{"ids":["4836831"],"name":"A. S. Gavrish"},{"ids":["4061357"],"name":"E. N. Kilimnik"}]}
{"authors":[{"ids":["4888852"],"name":"A. S. Gavrish"},{"ids":["4061357"],"name":"E. N. Kilimnik"}]}

Code

import csv
import json
import pandas as pd
from itertools import islice
from collections import Counter

data=[]
with open('papers-2017-10-30-sample.json',encoding='utf-8') as f:
   for line in f:
       data.append(json.loads(line))
       c = Counter(player['ids'] for player in data)
       print(c)

I want to count same ids's value and use name to group

any ideas? please help

Rahul
  • 10,830
  • 4
  • 53
  • 88
keavy
  • 33
  • 6

4 Answers4

0

You can try something like this :

t = simplejson.load(data)
print "Total Number  :", len(t['values'])
t1 = t['values']
i = 0
while i < len(t['values']):
    print t1[i]['xxxx']
    i = i + 1
Mayank Srivastava
  • 149
  • 1
  • 3
  • 18
0
import json
from collections import Counter


jsonstr = "[{\"authors\":[{\"ids\":[\"4888852\"],\"name\":\"Andrea D. Foebel\"},{\"ids\":[\"3325893\"],\"name\":\"John P. Hirdes\"},{\"ids\":[\"5316482\"],\"name\":\"George A. Heckman\"}]} ,{\"authors\":[{\"ids\":[\"4836831\"],\"name\":\"A. S. Gavrish\"},{\"ids\":[\"4061357\"],\"name\":\"E. N. Kilimnik\"}]}, {\"authors\":[{\"ids\":[\"4888852\"],\"name\":\"A. S. Gavrish\"},{\"ids\":[\"4061357\"],\"name\":\"E. N. Kilimnik\"}]}]"

jobj = json.loads(jsonstr)
listid = []
print('jobj=', jobj)
for author in jobj:
    for book in author['authors']:
        ids = book['ids']
        for id in ids:
            print('id=', id)
            listid.append(id)

print('listid=',listid)
c = Counter(listid)

print('keys=',c.keys())
print('values=',c.values())

output is

jobj= [{'authors': [{'name': 'Andrea D. Foebel', 'ids': ['4888852']}, {'name': 'John P. Hirdes', 'ids': ['3325893']}, {'name': 'George A. Heckman', 'ids': ['5316482']}]}, {'authors': [{'name': 'A. S. Gavrish', 'ids': ['4836831']}, {'name': 'E. N. Kilimnik', 'ids': ['4061357']}]}, {'authors': [{'name': 'A. S. Gavrish', 'ids': ['4888852']}, {'name': 'E. N. Kilimnik', 'ids': ['4061357']}]}]
id= 4888852
id= 3325893
id= 5316482
id= 4836831
id= 4061357
id= 4888852
id= 4061357
listid= ['4888852', '3325893', '5316482', '4836831', '4061357', '4888852', '4061357']
keys= dict_keys(['4888852', '4836831', '5316482', '3325893', '4061357'])
values= dict_values([2, 1, 1, 1, 2])
Junhee Shin
  • 748
  • 6
  • 8
  • thank you so much, i also have anther question, when i want to extract "name", it show like this {'a': 54923, ' ': 53900, 'e': 40855, 'i': 38364, 'n': 36307, 'r': 30811, 'o': 28767} it can not recoginized the complete name? – keavy Mar 19 '18 at 06:24
0

This is one way to get your required output.

Demo:

import pprint
d = [{"authors":[{"ids":["4888852"],"name":"Andrea D. Foebel"},{"ids":["3325893"],"name":"John P. Hirdes"},{"ids":["5316482"],"name":"George A. Heckman"}]}, 
{"authors":[{"ids":["4836831"],"name":"A. S. Gavrish"},{"ids":["4061357"],"name":"E. N. Kilimnik"}]},
{"authors":[{"ids":["4888852"],"name":"A. S. Gavrish"},{"ids":["4061357"],"name":"E. N. Kilimnik"}]}]


res = {}
for i in d:
    for j in i["authors"]:   #Iterate over your list of dict
        if j["name"] not in res:   #Check if name exists as key
            res[j["name"]] = [j]
        else:
            res[j["name"]].append(j)
pprint.pprint(res)

Output:

{'A. S. Gavrish': [{'ids': ['4836831'], 'name': 'A. S. Gavrish'},
                   {'ids': ['4888852'], 'name': 'A. S. Gavrish'}],
 'Andrea D. Foebel': [{'ids': ['4888852'], 'name': 'Andrea D. Foebel'}],
 'E. N. Kilimnik': [{'ids': ['4061357'], 'name': 'E. N. Kilimnik'},
                    {'ids': ['4061357'], 'name': 'E. N. Kilimnik'}],
 'George A. Heckman': [{'ids': ['5316482'], 'name': 'George A. Heckman'}],
 'John P. Hirdes': [{'ids': ['3325893'], 'name': 'John P. Hirdes'}]}
Rakesh
  • 81,458
  • 17
  • 76
  • 113
0
import json
import pandas as pd

data=[]
with open('papers-2017-10-30-sample.json',encoding='utf-8') as f:
    for line in f:
        dict_line = json.loads(line)
        for elmd in dict_line['authors']:
            data.append(elmd)
df = pd.DataFrame.from_dict(data)
df.groupby('name').count()

The output is :

                   ids
name                  
A. S. Gavrish        2
Andrea D. Foebel     1
E. N. Kilimnik       2
George A. Heckman    1
John P. Hirdes       1
MSS
  • 3,306
  • 1
  • 19
  • 50