I am getting some execution plans in json format.
val df: DataFrame = ???
val jsonPlan = df.queryExecution.optimizedPlan.toJSON
jsonPlan output:
You can see that the InMemoryRelation operator has the child key: "child": [..]
.
Is it possible to add user information to the plan tree node before calling the toJSON method? So that after calling toJSON, the added information is serialized along with the tree node.
For example I want to add labels for all child nodes (label - it a cute child). Then the result will be like this:
"child":[
{
"class":"org.apache.spark.sql.execution.WholeStageCodegenExec",
"num-children":1,
"child":0,
"codegenStageId":2
// My Added Information
"label": "it a cute child"
},
...
]
Full json plan:
[
{
"class":"org.apache.spark.sql.execution.columnar.InMemoryRelation",
"num-children":0,
"output":[
[
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"criminal_name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":73,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":[
]
}],
[
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"punishment",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":78,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":[
]
}]],
"cacheBuilder":null,
"outputOrdering":[
],
"child":[
{
"class":"org.apache.spark.sql.execution.WholeStageCodegenExec",
"num-children":1,
"child":0,
"codegenStageId":2
},
{
"class":"org.apache.spark.sql.execution.ProjectExec",
"num-children":1,
"projectList":[
[
{
"class":"org.apache.spark.sql.catalyst.expressions.Alias",
"num-children":1,
"child":0,
"name":"criminal_name",
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":73,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":[
],
"explicitMetadata":{
}
},
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":56,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":"[spark_catalog, murphy, staff_dossiers]"
}],
[
{
"class":"org.apache.spark.sql.catalyst.expressions.Alias",
"num-children":1,
"child":0,
"name":"punishment",
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":78,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":[
],
"explicitMetadata":{
}
},
{
"class":"org.apache.spark.sql.catalyst.expressions.ScalaUDF",
"num-children":1,
"function":null,
"dataType":"string",
"children":[
0],
"inputEncoders":null,
"nullable":true,
"udfDeterministic":true
},
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"hobby",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":64,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":"[spark_catalog, murphy, indicators]"
}]],
"child":0
},
{
"class":"org.apache.spark.sql.execution.joins.BroadcastHashJoinExec",
"num-children":2,
"leftKeys":[
[
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":56,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":"[spark_catalog, murphy, staff_dossiers]"
}]],
"rightKeys":[
[
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":62,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":"[spark_catalog, murphy, indicators]"
}]],
"joinType":{
"object":"org.apache.spark.sql.catalyst.plans.Inner$"
},
"buildSide":{
"object":"org.apache.spark.sql.execution.joins.package$BuildLeft$"
},
"left":0,
"right":1
},
{
"class":"org.apache.spark.sql.execution.InputAdapter",
"num-children":1,
"child":0
},
{
"class":"org.apache.spark.sql.execution.exchange.BroadcastExchangeExec",
"num-children":1,
"mode":{
"product-class":"org.apache.spark.sql.execution.joins.HashedRelationBroadcastMode",
"key":[
[
{
"class":"org.apache.spark.sql.catalyst.expressions.BoundReference",
"num-children":0,
"ordinal":0,
"dataType":"string",
"nullable":true
}]]
},
"child":0
},
{
"class":"org.apache.spark.sql.execution.WholeStageCodegenExec",
"num-children":1,
"child":0,
"codegenStageId":1
},
{
"class":"org.apache.spark.sql.execution.ProjectExec",
"num-children":1,
"projectList":[
[
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":56,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":[
]
}]],
"child":0
},
{
"class":"org.apache.spark.sql.execution.FilterExec",
"num-children":1,
"condition":[
{
"class":"org.apache.spark.sql.catalyst.expressions.And",
"num-children":2,
"left":0,
"right":1
},
{
"class":"org.apache.spark.sql.catalyst.expressions.IsNotNull",
"num-children":1,
"child":0
},
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":56,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":"[spark_catalog, murphy, staff_dossiers]"
},
{
"class":"org.apache.spark.sql.catalyst.expressions.EqualTo",
"num-children":2,
"left":0,
"right":1
},
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":56,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":"[spark_catalog, murphy, staff_dossiers]"
},
{
"class":"org.apache.spark.sql.catalyst.expressions.Literal",
"num-children":0,
"value":"Nikita",
"dataType":"string"
}],
"child":0
},
{
"class":"org.apache.spark.sql.execution.ColumnarToRowExec",
"num-children":1,
"child":0
},
{
"class":"org.apache.spark.sql.execution.InputAdapter",
"num-children":1,
"child":0
},
{
"class":"org.apache.spark.sql.execution.FileSourceScanExec",
"num-children":0,
"relation":null,
"output":[
[
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":56,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":[
]
}]],
"requiredSchema":{
"type":"struct",
"fields":[
{
"name":"name",
"type":"string",
"nullable":true,
"metadata":{
}
}]
},
"partitionFilters":[
],
"dataFilters":[
[
{
"class":"org.apache.spark.sql.catalyst.expressions.IsNotNull",
"num-children":1,
"child":0
},
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":56,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":"[spark_catalog, murphy, staff_dossiers]"
}],
[
{
"class":"org.apache.spark.sql.catalyst.expressions.EqualTo",
"num-children":2,
"left":0,
"right":1
},
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":56,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":"[spark_catalog, murphy, staff_dossiers]"
},
{
"class":"org.apache.spark.sql.catalyst.expressions.Literal",
"num-children":0,
"value":"Nikita",
"dataType":"string"
}]],
"tableIdentifier":{
"product-class":"org.apache.spark.sql.catalyst.TableIdentifier",
"table":"staff_dossiers",
"database":"murphy"
}
},
{
"class":"org.apache.spark.sql.execution.ProjectExec",
"num-children":1,
"projectList":[
[
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":62,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":[
]
}],
[
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"hobby",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":64,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":[
]
}]],
"child":0
},
{
"class":"org.apache.spark.sql.execution.FilterExec",
"num-children":1,
"condition":[
{
"class":"org.apache.spark.sql.catalyst.expressions.And",
"num-children":2,
"left":0,
"right":1
},
{
"class":"org.apache.spark.sql.catalyst.expressions.EqualTo",
"num-children":2,
"left":0,
"right":1
},
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":62,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":"[spark_catalog, murphy, indicators]"
},
{
"class":"org.apache.spark.sql.catalyst.expressions.Literal",
"num-children":0,
"value":"Nikita",
"dataType":"string"
},
{
"class":"org.apache.spark.sql.catalyst.expressions.IsNotNull",
"num-children":1,
"child":0
},
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":62,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":"[spark_catalog, murphy, indicators]"
}],
"child":0
},
{
"class":"org.apache.spark.sql.execution.ColumnarToRowExec",
"num-children":1,
"child":0
},
{
"class":"org.apache.spark.sql.execution.InputAdapter",
"num-children":1,
"child":0
},
{
"class":"org.apache.spark.sql.execution.FileSourceScanExec",
"num-children":0,
"relation":null,
"output":[
[
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":62,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":[
]
}],
[
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"hobby",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":64,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":[
]
}]],
"requiredSchema":{
"type":"struct",
"fields":[
{
"name":"name",
"type":"string",
"nullable":true,
"metadata":{
}
},
{
"name":"hobby",
"type":"string",
"nullable":true,
"metadata":{
}
}]
},
"partitionFilters":[
],
"dataFilters":[
[
{
"class":"org.apache.spark.sql.catalyst.expressions.EqualTo",
"num-children":2,
"left":0,
"right":1
},
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":62,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":"[spark_catalog, murphy, indicators]"
},
{
"class":"org.apache.spark.sql.catalyst.expressions.Literal",
"num-children":0,
"value":"Nikita",
"dataType":"string"
}],
[
{
"class":"org.apache.spark.sql.catalyst.expressions.IsNotNull",
"num-children":1,
"child":0
},
{
"class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
"num-children":0,
"name":"name",
"dataType":"string",
"nullable":true,
"metadata":{
},
"exprId":{
"product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
"id":62,
"jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
},
"qualifier":"[spark_catalog, murphy, indicators]"
}]],
"tableIdentifier":{
"product-class":"org.apache.spark.sql.catalyst.TableIdentifier",
"table":"indicators",
"database":"murphy"
}
}
]
}
]