0

I am getting some execution plans in json format.

val df: DataFrame = ???
val jsonPlan = df.queryExecution.optimizedPlan.toJSON

jsonPlan output:

enter image description here

You can see that the InMemoryRelation operator has the child key: "child": [..].

Is it possible to add user information to the plan tree node before calling the toJSON method? So that after calling toJSON, the added information is serialized along with the tree node.

For example I want to add labels for all child nodes (label - it a cute child). Then the result will be like this:

"child":[
  {
    "class":"org.apache.spark.sql.execution.WholeStageCodegenExec",
    "num-children":1,
    "child":0,
    "codegenStageId":2
    // My Added Information
    "label": "it a cute child"
  },
  ...
 ]

Full json plan:

    [
  {
    "class":"org.apache.spark.sql.execution.columnar.InMemoryRelation",
    "num-children":0,
    "output":[
      [
        {
          "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
          "num-children":0,
          "name":"criminal_name",
          "dataType":"string",
          "nullable":true,
          "metadata":{
          },
          "exprId":{
            "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
            "id":73,
            "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
          },
          "qualifier":[
          ]
        }],
      [
        {
          "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
          "num-children":0,
          "name":"punishment",
          "dataType":"string",
          "nullable":true,
          "metadata":{
          },
          "exprId":{
            "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
            "id":78,
            "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
          },
          "qualifier":[
          ]
        }]],
    "cacheBuilder":null,
    "outputOrdering":[
    ],
    "child":[
      {
        "class":"org.apache.spark.sql.execution.WholeStageCodegenExec",
        "num-children":1,
        "child":0,
        "codegenStageId":2
      },
      {
        "class":"org.apache.spark.sql.execution.ProjectExec",
        "num-children":1,
        "projectList":[
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.Alias",
              "num-children":1,
              "child":0,
              "name":"criminal_name",
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":73,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":[
              ],
              "explicitMetadata":{
              }
            },
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"name",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":56,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":"[spark_catalog, murphy, staff_dossiers]"
            }],
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.Alias",
              "num-children":1,
              "child":0,
              "name":"punishment",
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":78,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":[
              ],
              "explicitMetadata":{
              }
            },
            {
              "class":"org.apache.spark.sql.catalyst.expressions.ScalaUDF",
              "num-children":1,
              "function":null,
              "dataType":"string",
              "children":[
                0],
              "inputEncoders":null,
              "nullable":true,
              "udfDeterministic":true
            },
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"hobby",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":64,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":"[spark_catalog, murphy, indicators]"
            }]],
        "child":0
      },
      {
        "class":"org.apache.spark.sql.execution.joins.BroadcastHashJoinExec",
        "num-children":2,
        "leftKeys":[
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"name",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":56,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":"[spark_catalog, murphy, staff_dossiers]"
            }]],
        "rightKeys":[
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"name",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":62,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":"[spark_catalog, murphy, indicators]"
            }]],
        "joinType":{
          "object":"org.apache.spark.sql.catalyst.plans.Inner$"
        },
        "buildSide":{
          "object":"org.apache.spark.sql.execution.joins.package$BuildLeft$"
        },
        "left":0,
        "right":1
      },
      {
        "class":"org.apache.spark.sql.execution.InputAdapter",
        "num-children":1,
        "child":0
      },
      {
        "class":"org.apache.spark.sql.execution.exchange.BroadcastExchangeExec",
        "num-children":1,
        "mode":{
          "product-class":"org.apache.spark.sql.execution.joins.HashedRelationBroadcastMode",
          "key":[
            [
              {
                "class":"org.apache.spark.sql.catalyst.expressions.BoundReference",
                "num-children":0,
                "ordinal":0,
                "dataType":"string",
                "nullable":true
              }]]
        },
        "child":0
      },
      {
        "class":"org.apache.spark.sql.execution.WholeStageCodegenExec",
        "num-children":1,
        "child":0,
        "codegenStageId":1
      },
      {
        "class":"org.apache.spark.sql.execution.ProjectExec",
        "num-children":1,
        "projectList":[
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"name",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":56,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":[
              ]
            }]],
        "child":0
      },
      {
        "class":"org.apache.spark.sql.execution.FilterExec",
        "num-children":1,
        "condition":[
          {
            "class":"org.apache.spark.sql.catalyst.expressions.And",
            "num-children":2,
            "left":0,
            "right":1
          },
          {
            "class":"org.apache.spark.sql.catalyst.expressions.IsNotNull",
            "num-children":1,
            "child":0
          },
          {
            "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
            "num-children":0,
            "name":"name",
            "dataType":"string",
            "nullable":true,
            "metadata":{
            },
            "exprId":{
              "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
              "id":56,
              "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
            },
            "qualifier":"[spark_catalog, murphy, staff_dossiers]"
          },
          {
            "class":"org.apache.spark.sql.catalyst.expressions.EqualTo",
            "num-children":2,
            "left":0,
            "right":1
          },
          {
            "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
            "num-children":0,
            "name":"name",
            "dataType":"string",
            "nullable":true,
            "metadata":{
            },
            "exprId":{
              "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
              "id":56,
              "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
            },
            "qualifier":"[spark_catalog, murphy, staff_dossiers]"
          },
          {
            "class":"org.apache.spark.sql.catalyst.expressions.Literal",
            "num-children":0,
            "value":"Nikita",
            "dataType":"string"
          }],
        "child":0
      },
      {
        "class":"org.apache.spark.sql.execution.ColumnarToRowExec",
        "num-children":1,
        "child":0
      },
      {
        "class":"org.apache.spark.sql.execution.InputAdapter",
        "num-children":1,
        "child":0
      },
      {
        "class":"org.apache.spark.sql.execution.FileSourceScanExec",
        "num-children":0,
        "relation":null,
        "output":[
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"name",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":56,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":[
              ]
            }]],
        "requiredSchema":{
          "type":"struct",
          "fields":[
            {
              "name":"name",
              "type":"string",
              "nullable":true,
              "metadata":{
              }
            }]
        },
        "partitionFilters":[
        ],
        "dataFilters":[
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.IsNotNull",
              "num-children":1,
              "child":0
            },
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"name",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":56,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":"[spark_catalog, murphy, staff_dossiers]"
            }],
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.EqualTo",
              "num-children":2,
              "left":0,
              "right":1
            },
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"name",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":56,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":"[spark_catalog, murphy, staff_dossiers]"
            },
            {
              "class":"org.apache.spark.sql.catalyst.expressions.Literal",
              "num-children":0,
              "value":"Nikita",
              "dataType":"string"
            }]],
        "tableIdentifier":{
          "product-class":"org.apache.spark.sql.catalyst.TableIdentifier",
          "table":"staff_dossiers",
          "database":"murphy"
        }
      },
      {
        "class":"org.apache.spark.sql.execution.ProjectExec",
        "num-children":1,
        "projectList":[
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"name",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":62,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":[
              ]
            }],
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"hobby",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":64,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":[
              ]
            }]],
        "child":0
      },
      {
        "class":"org.apache.spark.sql.execution.FilterExec",
        "num-children":1,
        "condition":[
          {
            "class":"org.apache.spark.sql.catalyst.expressions.And",
            "num-children":2,
            "left":0,
            "right":1
          },
          {
            "class":"org.apache.spark.sql.catalyst.expressions.EqualTo",
            "num-children":2,
            "left":0,
            "right":1
          },
          {
            "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
            "num-children":0,
            "name":"name",
            "dataType":"string",
            "nullable":true,
            "metadata":{
            },
            "exprId":{
              "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
              "id":62,
              "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
            },
            "qualifier":"[spark_catalog, murphy, indicators]"
          },
          {
            "class":"org.apache.spark.sql.catalyst.expressions.Literal",
            "num-children":0,
            "value":"Nikita",
            "dataType":"string"
          },
          {
            "class":"org.apache.spark.sql.catalyst.expressions.IsNotNull",
            "num-children":1,
            "child":0
          },
          {
            "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
            "num-children":0,
            "name":"name",
            "dataType":"string",
            "nullable":true,
            "metadata":{
            },
            "exprId":{
              "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
              "id":62,
              "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
            },
            "qualifier":"[spark_catalog, murphy, indicators]"
          }],
        "child":0
      },
      {
        "class":"org.apache.spark.sql.execution.ColumnarToRowExec",
        "num-children":1,
        "child":0
      },
      {
        "class":"org.apache.spark.sql.execution.InputAdapter",
        "num-children":1,
        "child":0
      },
      {
        "class":"org.apache.spark.sql.execution.FileSourceScanExec",
        "num-children":0,
        "relation":null,
        "output":[
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"name",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":62,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":[
              ]
            }],
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"hobby",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":64,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":[
              ]
            }]],
        "requiredSchema":{
          "type":"struct",
          "fields":[
            {
              "name":"name",
              "type":"string",
              "nullable":true,
              "metadata":{
              }
            },
            {
              "name":"hobby",
              "type":"string",
              "nullable":true,
              "metadata":{
              }
            }]
        },
        "partitionFilters":[
        ],
        "dataFilters":[
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.EqualTo",
              "num-children":2,
              "left":0,
              "right":1
            },
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"name",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":62,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":"[spark_catalog, murphy, indicators]"
            },
            {
              "class":"org.apache.spark.sql.catalyst.expressions.Literal",
              "num-children":0,
              "value":"Nikita",
              "dataType":"string"
            }],
          [
            {
              "class":"org.apache.spark.sql.catalyst.expressions.IsNotNull",
              "num-children":1,
              "child":0
            },
            {
              "class":"org.apache.spark.sql.catalyst.expressions.AttributeReference",
              "num-children":0,
              "name":"name",
              "dataType":"string",
              "nullable":true,
              "metadata":{
              },
              "exprId":{
                "product-class":"org.apache.spark.sql.catalyst.expressions.ExprId",
                "id":62,
                "jvmId":"2996f433-9e44-40c1-9aaf-d74c0768f68b"
              },
              "qualifier":"[spark_catalog, murphy, indicators]"
            }]],
        "tableIdentifier":{
          "product-class":"org.apache.spark.sql.catalyst.TableIdentifier",
          "table":"indicators",
          "database":"murphy"
        }
      }
    ]
  }
]
OneCricketeer
  • 179,855
  • 19
  • 132
  • 245
Mardaunt
  • 82
  • 1
  • 13

1 Answers1

1

Sure, you can parse and modify any JSON object in memory, but that has nothing to do with Spark. Related: What JSON library to use in Scala?

Any modifications you make wouldn't be persisted within the execution plan itself.

OneCricketeer
  • 179,855
  • 19
  • 132
  • 245
  • Yes, you're right. Apparently it is impossible to change the nodes of the plan until they are serialized. – Mardaunt Feb 24 '22 at 20:18