Let's see how we can extract a Json object from a spark dataframe column
This is an example data frame
import numpy as npimport pandas as pd
from pyspark.sql.functions import *
from pyspark.sql.types import *
spark.conf.set('spark.sql.execution.arrow.pyspark.enabled',False)
d = { 'result': [{"A": {"B": [{"key":{"dateAtString": "1990-10-12T10:45:11.691274Z", "dateLastString": "1990-10-12T10:46:45.372113Z", "valueRes": {"C": "AZZ", "test": "2"}, "result": "True"}},{"key":{"dateAtString": "1990-10-12T10:45:11.691274Z", "dateLastString": "1990-10-12T10:46:45.372113Z", "valueRes": {"C": "AW", "test": "2"}, "result": "true"}}]}}]}
df = pd.DataFrame(d)
sparkDF = spark.createDataFrame(df)
1. Let's extract value of 'A'
sparkDF = sparkDF.select(explode(sparkDF.result.A).alias('col1','col2'))
sparkDF = sparkDF.select(explode(sparkDF.result.A.B).alias('result'))
sparkDF = sparkDF.select(explode(sparkDF.result.A.B).alias('result')).select('result.key')