I'm trying to read an Excel spreadsheet and create a graph based on the data from certain columns. I'm using Python 3.6.2 since some libraries used in my script, do not work well in the newer versions.
This is my code:
import os
import plotly
import plotly.graph_objects as go
import pandas as pd
import plotly.express as px
def create_graph(directoryPath):
for root, subFolders, files in os.walk(directoryPath):
for f in files:
if "screenlog" in f.lower():
print("Creating graphs from the Screen logs")
absolutePathOfFile = os.path.join(root, f)
pd.options.mode.chained_assignment = None # default="warn"
#PROCESSING THE EXCEL FILE
df_init = pd.read_excel(absolutePathOfFile,engine="openpyxl")
df_init["Screen&Action"]= df_init["SCREEN"].str.cat(df_init["Action_Name"], sep=" - ")
##### PLOTING Scatter chart######
print("Plotting the scatter graphs")
#using plotly to create a scatter graph
fig_scatter= px.scatter(df_init, x="INSTANT", y="DURATION",color="Screen&Action",height=500,title="Traditional Screen Analysis")
fig_scatter.update_layout(showlegend=False,title={
"y":0.9,
"x":0.5,
"xanchor": "center",
"yanchor": "top"})
fig_scatter.show()
The error message:
> Creating graphs from the Screen logs
> ...\AppData\Local\Programs\Python\Python36\lib\site-packages\numpy\core\fromnumeric.py:87:
> VisibleDeprecationWarning:
>
> Creating an ndarray from ragged nested sequences (which is a
> list-or-tuple of lists-or-tuples-or ndarrays with different lengths or
> shapes) is deprecated. If you meant to do this, you must specify
> 'dtype=object' when creating the ndarray
>
> Plotting the scatter graphs Traceback (most recent call last): File
> "...\Documents\log_app\log_parser.py", line 3812, in <module>
> scripts.screen.create_graph(directoryPath) File "...\Documents\log_app\scripts\screen.py", line 20, in create_graph
> fig_scatter= px.scatter(df_init, x="INSTANT", y="DURATION",color="Screen&Action",height=500,title="Traditional
> Screen Analysis") File
> "...\AppData\Local\Programs\Python\Python36\lib\site-packages\plotly\express\_chart_types.py",
> line 66, in scatter
> return make_figure(args=locals(), constructor=go.Scatter) File "...\AppData\Local\Programs\Python\Python36\lib\site-packages\plotly\express\_core.py",
> line 1988, in make_figure
> group = grouped.get_group(group_name if len(group_name) > 1 else group_name[0]) File
> "...\AppData\Local\Programs\Python\Python36\lib\site-packages\pandas\core\groupby\groupby.py",
> line 680, in get_group
> raise KeyError(name) KeyError: (nan, '', '', '', '')
I spent some time searching for a workaround, and I found several sites that mentioned adding "dtype=object" as an extra parameter should fix the problem.
However, how can I add "dtype=object" in this line?:
fig_scatter= px.scatter(df_init, x="INSTANT", y="DURATION",color="Screen&Action",height=500,title="Traditional Screen Analysis")
I always get a syntax error.
Dataframe:
print(df_init)
Tenant_Id ... Screen&Action
0 1 ... Screen_Logs - TelemetryClickEvent.SendEvent
1 1 ... Screen_Logs - FilterOrReset
2 1 ... Cyclic_Job_Logs - TelemetryClickEvent.SendEvent
3 1 ... Screen_Logs - Preparation
4 20 ... EventoDetalhe_New - Load_Atletas
... ... ... ...
31893 20 ... Login - Preparation
31894 1 ... Login - Preparation
31895 20 ... Login - Preparation
31896 1 ... Login - Preparation
31897 20 ... Login - Preparation
[31898 rows x 20 columns]
print(df_init.head(10).to_dict("records"))
[{'Tenant_Id': 1, 'INSTANT': Timestamp('2021-11-25 21:44:15.345000'),
'DURATION': 14, 'SCREEN': 'Screen_Logs', 'Session_Id': 'N30TTfz+v0G6OuDKYznMeA==',
'User_Id': 1263, 'Espace_Id': 1, 'MSISDN': nan, 'Screen_Type': 'WEB',
'Executed_By': 'E3Q3J-PR4U18', 'Session_Bytes': 8207, 'Viewstate_Bytes': 4204,
'Session_Requests': 1, 'Access_Mode': 'Ajax', 'Request_Key': 'ac581f0c-ae8a-4f1d-8eea-39c0f89171b7',
'Action_Name': 'TelemetryClickEvent.SendEvent', 'Espace_Name': 'ServiceCenter',
'Application_Name': 'Service Center', 'Application_Key': '463836d2-9aea-42ff-9f58-a1e78e163c11',
'Screen&Action': 'Screen_Logs - TelemetryClickEvent.SendEvent'},
{'Tenant_Id': 1, 'INSTANT': Timestamp('2021-11-25 21:44:15.202000'),
'DURATION': 31, 'SCREEN': 'Screen_Logs', 'Session_Id': 'N30TTfz+v0G6OuDKYznMeA==',
'User_Id': 1263, 'Espace_Id': 1, 'MSISDN': nan, 'Screen_Type': 'WEB', 'Executed_By': 'E3Q3J-PR4U18',
'Session_Bytes': 8207, 'Viewstate_Bytes': 4184, 'Session_Requests': 1, 'Access_Mode': 'Ajax',
'Request_Key': 'd4f00718-3fbe-416d-8540-aeed02f371b3', 'Action_Name': 'FilterOrReset',
'Espace_Name': 'ServiceCenter', 'Application_Name': 'Service Center',
'Application_Key': '463836d2-9aea-42ff-9f58-a1e78e163c11', 'Screen&Action':
'Screen_Logs - FilterOrReset'}, {'Tenant_Id': 1, 'INSTANT': Timestamp('2021-11-25 21:44:11.733000'),
'DURATION': 15, 'SCREEN': 'Cyclic_Job_Logs', 'Session_Id': 'N30TTfz+v0G6OuDKYznMeA==',
'User_Id': 1263, 'Espace_Id': 1, 'MSISDN': nan, 'Screen_Type': 'WEB',
'Executed_By': 'E3Q3J-PR4U18', 'Session_Bytes': 8093, 'Viewstate_Bytes': 4160,
'Session_Requests': 1, 'Access_Mode': 'Ajax', 'Request_Key': 'a35aa5c7-81e9-487a-8df3-474b12df2a1a',
'Action_Name': 'TelemetryClickEvent.SendEvent', 'Espace_Name': 'ServiceCenter',
'Application_Name': 'Service Center', 'Application_Key': '463836d2-9aea-42ff-9f58-a1e78e163c11',
'Screen&Action': 'Cyclic_Job_Logs - TelemetryClickEvent.SendEvent'},
{'Tenant_Id': 1, 'INSTANT': Timestamp('2021-11-25 21:44:11.733000'), 'DURATION': 797,
'SCREEN': 'Screen_Logs', 'Session_Id': 'N30TTfz+v0G6OuDKYznMeA==', 'User_Id': 1263,
'Espace_Id': 1, 'MSISDN': nan, 'Screen_Type': 'WEB', 'Executed_By': 'E3Q3J-PR4U18',
'Session_Bytes': 8093, 'Viewstate_Bytes': 0, 'Session_Requests': 1,
'Access_Mode': 'Screen', 'Request_Key': '4ac69574-86e2-4b85-86eb-e8cda703193c',
'Action_Name': 'Preparation', 'Espace_Name': 'ServiceCenter', 'Application_Name': 'Service Center',
'Application_Key': '463836d2-9aea-42ff-9f58-a1e78e163c11', 'Screen&Action': 'Screen_Logs - Preparation'},
{'Tenant_Id': 20, 'INSTANT': Timestamp('2021-11-25 21:44:05.108000'), 'DURATION': 1359,
'SCREEN': 'EventoDetalhe_New', 'Session_Id': 'jXC8xqSk1Eu0q4C+VQNZww==', 'User_Id': 151,
'Espace_Id': 80, 'MSISDN': nan, 'Screen_Type': 'WEB', 'Executed_By': 'E3Q3J-PR4U18',
'Session_Bytes': 17444, 'Viewstate_Bytes': 45784, 'Session_Requests': 4, 'Access_Mode': 'Ajax',
'Request_Key': '9f62097f-fe6a-468a-a4d4-5b2c39e295ce', 'Action_Name': 'Load_Atletas',
'Espace_Name': 'TalentWeb', 'Application_Name': 'TalentWeb',
'Application_Key': '448b995f-e07e-4a21-a442-12204cfffea6',
'Screen&Action': 'EventoDetalhe_New - Load_Atletas'},
{'Tenant_Id': 20, 'INSTANT': Timestamp('2021-11-25 21:44:04.573000'), 'DURATION': 33,
'SCREEN': 'EventoDetalhe_New', 'Session_Id': 'jXC8xqSk1Eu0q4C+VQNZww==', 'User_Id': 151,
'Espace_Id': 80, 'MSISDN': nan, 'Screen_Type': 'WEB', 'Executed_By': 'E3Q3J-PR4U1S',
'Session_Bytes': 12431, 'Viewstate_Bytes': 45824, 'Session_Requests': 1, 'Access_Mode': 'Ajax',
'Request_Key': '61d1d2a1-2c56-4e1d-a752-8d1719aa77a8', 'Action_Name': 'Tabs.OnChange',
'Espace_Name': 'TalentWeb', 'Application_Name': 'TalentWeb',
'Application_Key': '448b995f-e07e-4a21-a442-12204cfffea6', 'Screen&Action': 'EventoDetalhe_New - Tabs.OnChange'},
{'Tenant_Id': 1, 'INSTANT': Timestamp('2021-11-25 21:44:02.658000'), 'DURATION': 31,
'SCREEN': 'Login', 'Session_Id': 'am9Hoz4suEifYkaRsh8usQ==', 'User_Id': 0, 'Espace_Id': 1,
'MSISDN': nan, 'Screen_Type': 'WEB', 'Executed_By': 'E3Q3J-PR4U2H',
'Session_Bytes': 0, 'Viewstate_Bytes': 0, 'Session_Requests': 1, 'Access_Mode': 'Screen',
'Request_Key': '1b4f4776-f622-46da-ac0f-88a897c5b8a7', 'Action_Name': 'Preparation',
'Espace_Name': 'ServiceCenter', 'Application_Name': 'Service Center',
'Application_Key': '463836d2-9aea-42ff-9f58-a1e78e163c11', 'Screen&Action': 'Login - Preparation'},
{'Tenant_Id': 20, 'INSTANT': Timestamp('2021-11-25 21:43:59.729000'), 'DURATION': 314,
'SCREEN': 'EventoDetalhe_New', 'Session_Id': 'jXC8xqSk1Eu0q4C+VQNZww==', 'User_Id': 151,
'Espace_Id': 80, 'MSISDN': nan, 'Screen_Type': 'WEB', 'Executed_By': 'E3Q3J-PR4U1S',
'Session_Bytes': 14891, 'Viewstate_Bytes': 0, 'Session_Requests': 3,
'Access_Mode': 'Screen', 'Request_Key': '866f27bb-f86a-4add-b084-f433e1cadc73',
'Action_Name': 'Preparation', 'Espace_Name': 'TalentWeb', 'Application_Name': 'TalentWeb',
'Application_Key': '448b995f-e07e-4a21-a442-12204cfffea6', 'Screen&Action': 'EventoDetalhe_New - Preparation'},
{'Tenant_Id': 1, 'INSTANT': Timestamp('2021-11-25 21:43:53.594000'), 'DURATION': 1904,
'SCREEN': 'Cyclic_Job_Logs', 'Session_Id': 'N30TTfz+v0G6OuDKYznMeA==', 'User_Id': 1263,
'Espace_Id': 1, 'MSISDN': nan, 'Screen_Type': 'WEB', 'Executed_By': 'E3Q3J-PR4U18',
'Session_Bytes': 8093, 'Viewstate_Bytes': 4160, 'Session_Requests': 1,
'Access_Mode': 'Screen', 'Request_Key': '26f2d266-b2fc-4a2f-8551-046ba9604477',
'Action_Name': 'ExportToExcel', 'Espace_Name': 'ServiceCenter', 'Application_Name': 'Service Center',
'Application_Key': '463836d2-9aea-42ff-9f58-a1e78e163c11',
'Screen&Action': 'Cyclic_Job_Logs - ExportToExcel'},
{'Tenant_Id': 1, 'INSTANT': Timestamp('2021-11-25 21:43:53.594000'), 'DURATION': 2060,
'SCREEN': 'Cyclic_Job_Logs', 'Session_Id': 'N30TTfz+v0G6OuDKYznMeA==', 'User_Id': 1263,
'Espace_Id': 1, 'MSISDN': nan, 'Screen_Type': 'WEB', 'Executed_By': 'E3Q3J-PR4U18',
'Session_Bytes': 8093, 'Viewstate_Bytes': 4160, 'Session_Requests': 1,
'Access_Mode': 'Ajax', 'Request_Key': '66d521dd-905d-4f66-ac99-a982bcccb99b',
'Action_Name': 'TelemetryClickEvent.SendEvent', 'Espace_Name': 'ServiceCenter',
'Application_Name': 'Service Center', 'Application_Key': '463836d2-9aea-42ff-9f58-a1e78e163c11',
'Screen&Action': 'Cyclic_Job_Logs - TelemetryClickEvent.SendEvent'}]
print(df_init[df_init['Screen&Action'].isnull()])
Tenant_Id INSTANT DURATION ... Application_Name Application_Key Screen&Action
25708 20 2021-11-24 13:45:47.022 93 ... Statvue Emails f256c73c-722e-4adb-b2c1-c80cc20c2745 NaN
25763 20 2021-11-24 13:39:20.798 6 ... Statvue Emails f256c73c-722e-4adb-b2c1-c80cc20c2745 NaN
25782 20 2021-11-24 13:38:06.664 171 ... Statvue Emails f256c73c-722e-4adb-b2c1-c80cc20c2745 NaN
25805 20 2021-11-24 13:36:15.512 2295 ... Statvue Emails f256c73c-722e-4adb-b2c1-c80cc20c2745 NaN
The Excel file I'm trying to read.
Any help is appreciated.