Using Code from OP
- The method for creating the dataframe is not correct, which is way
awsc
is empty.
- Without seeing a file, it's not possible to know if
pd.read_json(file, lines=True)
is the correct method to use.
pd.json_normalize(awsc['userIdentity'])
will work on a column of dicts
. It's more than likely the column is strings though.
- If the
dicts
are str
type, use ast.literal_eval
to convert them to dict
type.
import pandas as pd
from ast import literal_eval
# crate a list to add dataframes to
awsc_list = list()
# iterate through the list of and append them to awsc_list
for file in file_list:
awsc_list.append(pd.read_json(file, lines=True))
# concat the files into a single dataframe
awsc = pd.concat(awsc_list).reset_index(drop=True)
# convert the userIdentity column to dict type, if it contains str type
awsc.userIdentity = awsc.userIdentity.apply(literal_eval)
# normalize userIdentity
normalized = pd.json_normalize(awsc['userIdentity'], sep='_')
# join awsc with normalized and drop the userIdentity column
awsc = awsc.join(normalized).drop('userIdentity', 1)
# rename the columns
awsc.rename(columns={'type':'userIdentity_type',
'principalId':'userIdentity_principalId',
'arn':'userIdentity_arn',
'accountId':'userIdentity_accountId',
'accessKeyId':'userIdentity_accessKeyId',
'userName':'userIdentity_userName',}, inplace=True)
New Code with Sample Data
import json
import pandas as pd
# crate a list to add dataframes to
awsc_list = list()
# list of files
files_list = ['test.json', 'test2.json']
# read the filess
for file in files_list:
with open(file, 'r', encoding='utf-8') as f:
data = json.loads(f.read())
# normalize the file and append it to the list of dataframe
awsc_list.append(pd.json_normalize(data, 'Records', sep='_'))
# concat the files into a single dataframe
awsc = pd.concat(awsc_list).reset_index(drop=True)
# display(awsc)
eventVersion eventTime eventSource eventName awsRegion sourceIPAddress userAgent userIdentity_type userIdentity_principalId userIdentity_arn userIdentity_accessKeyId userIdentity_accountId userIdentity_userName requestParameters_instancesSet_items responseElements_instancesSet_items requestParameters_force userIdentity_sessionContext_attributes_mfaAuthenticated userIdentity_sessionContext_attributes_creationDate requestParameters_keyName responseElements_keyName responseElements_keyFingerprint responseElements_keyMaterial
0 1.0 2014-03-06T21:22:54Z ec2.amazonaws.com StartInstances us-east-2 205.251.233.176 ec2-api-tools 1.6.12.2 IAMUser EX_PRINCIPAL_ID arn:aws:iam::123456789012:user/Alice EXAMPLE_KEY_ID 123456789012 Alice [{'instanceId': 'i-ebeaf9e2'}] [{'instanceId': 'i-ebeaf9e2', 'currentState': {'code': 0, 'name': 'pending'}, 'previousState': {'code': 80, 'name': 'stopped'}}] NaN NaN NaN NaN NaN NaN NaN
1 1.0 2014-03-06T21:01:59Z ec2.amazonaws.com StopInstances us-east-2 205.251.233.176 ec2-api-tools 1.6.12.2 IAMUser EX_PRINCIPAL_ID arn:aws:iam::123456789012:user/Alice EXAMPLE_KEY_ID 123456789012 Alice [{'instanceId': 'i-ebeaf9e2'}] [{'instanceId': 'i-ebeaf9e2', 'currentState': {'code': 64, 'name': 'stopping'}, 'previousState': {'code': 16, 'name': 'running'}}] False NaN NaN NaN NaN NaN NaN
2 1.0 2014-03-06T17:10:34Z ec2.amazonaws.com CreateKeyPair us-east-2 72.21.198.64 EC2ConsoleBackend, aws-sdk-java/Linux/x.xx.fleetxen Java_HotSpot(TM)_64-Bit_Server_VM/xx IAMUser EX_PRINCIPAL_ID arn:aws:iam::123456789012:user/Alice EXAMPLE_KEY_ID 123456789012 Alice NaN NaN NaN false 2014-03-06T15:15:06Z mykeypair mykeypair 30:1d:46:d0:5b:ad:7e:1b:b6:70:62:8b:ff:38:b5:e9:ab:5d:b8:21 <sensitiveDataRemoved>
3 1.0 2014-03-06T21:22:54Z ec2.amazonaws.com StartInstances us-east-2 205.251.233.176 ec2-api-tools 1.6.12.2 IAMUser EX_PRINCIPAL_ID arn:aws:iam::123456789012:user/Alice EXAMPLE_KEY_ID 123456789012 Alice [{'instanceId': 'i-ebeaf9e2'}] [{'instanceId': 'i-ebeaf9e2', 'currentState': {'code': 0, 'name': 'pending'}, 'previousState': {'code': 80, 'name': 'stopped'}}] NaN NaN NaN NaN NaN NaN NaN
Sample Data
test.json
[{
"Records": [{
"eventVersion": "1.0",
"userIdentity": {
"type": "IAMUser",
"principalId": "EX_PRINCIPAL_ID",
"arn": "arn:aws:iam::123456789012:user/Alice",
"accessKeyId": "EXAMPLE_KEY_ID",
"accountId": "123456789012",
"userName": "Alice"
},
"eventTime": "2014-03-06T21:22:54Z",
"eventSource": "ec2.amazonaws.com",
"eventName": "StartInstances",
"awsRegion": "us-east-2",
"sourceIPAddress": "205.251.233.176",
"userAgent": "ec2-api-tools 1.6.12.2",
"requestParameters": {
"instancesSet": {
"items": [{
"instanceId": "i-ebeaf9e2"
}
]
}
},
"responseElements": {
"instancesSet": {
"items": [{
"instanceId": "i-ebeaf9e2",
"currentState": {
"code": 0,
"name": "pending"
},
"previousState": {
"code": 80,
"name": "stopped"
}
}
]
}
}
}
]
}, {
"Records": [{
"eventVersion": "1.0",
"userIdentity": {
"type": "IAMUser",
"principalId": "EX_PRINCIPAL_ID",
"arn": "arn:aws:iam::123456789012:user/Alice",
"accountId": "123456789012",
"accessKeyId": "EXAMPLE_KEY_ID",
"userName": "Alice"
},
"eventTime": "2014-03-06T21:01:59Z",
"eventSource": "ec2.amazonaws.com",
"eventName": "StopInstances",
"awsRegion": "us-east-2",
"sourceIPAddress": "205.251.233.176",
"userAgent": "ec2-api-tools 1.6.12.2",
"requestParameters": {
"instancesSet": {
"items": [{
"instanceId": "i-ebeaf9e2"
}
]
},
"force": false
},
"responseElements": {
"instancesSet": {
"items": [{
"instanceId": "i-ebeaf9e2",
"currentState": {
"code": 64,
"name": "stopping"
},
"previousState": {
"code": 16,
"name": "running"
}
}
]
}
}
}
]
}, {
"Records": [{
"eventVersion": "1.0",
"userIdentity": {
"type": "IAMUser",
"principalId": "EX_PRINCIPAL_ID",
"arn": "arn:aws:iam::123456789012:user/Alice",
"accountId": "123456789012",
"accessKeyId": "EXAMPLE_KEY_ID",
"userName": "Alice",
"sessionContext": {
"attributes": {
"mfaAuthenticated": "false",
"creationDate": "2014-03-06T15:15:06Z"
}
}
},
"eventTime": "2014-03-06T17:10:34Z",
"eventSource": "ec2.amazonaws.com",
"eventName": "CreateKeyPair",
"awsRegion": "us-east-2",
"sourceIPAddress": "72.21.198.64",
"userAgent": "EC2ConsoleBackend, aws-sdk-java/Linux/x.xx.fleetxen Java_HotSpot(TM)_64-Bit_Server_VM/xx",
"requestParameters": {
"keyName": "mykeypair"
},
"responseElements": {
"keyName": "mykeypair",
"keyFingerprint": "30:1d:46:d0:5b:ad:7e:1b:b6:70:62:8b:ff:38:b5:e9:ab:5d:b8:21",
"keyMaterial": "\u003csensitiveDataRemoved\u003e"
}
}
]
}
]
test2.json
{
"Records": [{
"eventVersion": "1.0",
"userIdentity": {
"type": "IAMUser",
"principalId": "EX_PRINCIPAL_ID",
"arn": "arn:aws:iam::123456789012:user/Alice",
"accessKeyId": "EXAMPLE_KEY_ID",
"accountId": "123456789012",
"userName": "Alice"
},
"eventTime": "2014-03-06T21:22:54Z",
"eventSource": "ec2.amazonaws.com",
"eventName": "StartInstances",
"awsRegion": "us-east-2",
"sourceIPAddress": "205.251.233.176",
"userAgent": "ec2-api-tools 1.6.12.2",
"requestParameters": {
"instancesSet": {
"items": [{
"instanceId": "i-ebeaf9e2"
}
]
}
},
"responseElements": {
"instancesSet": {
"items": [{
"instanceId": "i-ebeaf9e2",
"currentState": {
"code": 0,
"name": "pending"
},
"previousState": {
"code": 80,
"name": "stopped"
}
}
]
}
}
}
]
}