#!/usr/bin/env python3

# tag::sol6_1[]
import json
import pprint

exo6_1_json = {
    "name": "Sample name",
    "keywords": ["PySpark", "Python", "Data"],
}

exo6_1_json = json.dumps(exo6_1_json)

pprint.pprint(exo6_1_json)
# '{"name": "Sample name", "keywords": ["PySpark", "Python", "Data"]}'

sol6_1 = spark.read.json(spark.sparkContext.parallelize([exo6_1_json]))

sol6_1.printSchema()
# root
#  |-- keywords: array (nullable = true)
#  |    |-- element: string (containsNull = true)
#  |-- name: string (nullable = true)
# end::sol6_1[]

# tag::exo6_1[]

"""{"name": "Sample name",
    "keywords": ["PySpark", "Python", "Data"]}"""

# end::exo6_1[]


# tag::sol6_2[]
import json
import pprint

exo6_2_json = {
    "name": "Sample name",
    "keywords": ["PySpark", 3.2, "Data"],
}

exo6_2_json = json.dumps(exo6_2_json)

pprint.pprint(exo6_2_json)
# '{"name": "Sample name", "keywords": ["PySpark", 3.2, "Data"]}'

sol6_2 = spark.read.json(spark.sparkContext.parallelize([exo6_2_json]))

sol6_2.printSchema()
# root
#  |-- keywords: array (nullable = true)
#  |    |-- element: string (containsNull = true)
#  |-- name: string (nullable = true)

sol6_2.show()
# +--------------------+-----------+
# |            keywords|       name|
# +--------------------+-----------+
# |[PySpark, 3.2, Data]|Sample name|
# +--------------------+-----------+

# end::sol6_2[]

# tag::exo6_2[]

"""{"name": "Sample name",
    "keywords": ["PySpark", 3.2, "Data"]}"""

# end::exo6_2[]


# tag::sol6_4[]
struct_ex = shows.select(
    F.struct(
        F.col("status"), F.col("weight"), F.lit(True).alias("has_watched")
    ).alias("info")
)

struct_ex.printSchema()
# root
#  |-- info: struct (nullable = false)
#  |    |-- status: string (nullable = true)
#  |    |-- weight: long (nullable = true)
#  |    |-- has_watched: boolean (nullable = false)

struct_ex.show()
# +-----------------+
# |             info|
# +-----------------+
# |{Ended, 96, true}|
# +-----------------+

struct_ex.select("info.status").show()
# +------+
# |status|
# +------+
# | Ended|
# +------+

struct_ex.withColumn("info.status", F.lit("Wrong")).show()
# +-----------------+-----------+
# |             info|info.status|
# +-----------------+-----------+
# |{Ended, 96, true}|      Wrong|
# +-----------------+-----------+

struct_ex.withColumn("info.status", F.lit("Wrong")).select(
    "info.status"
).show()
# +------+
# |status|
# +------+
# | Ended| <1>
# +------+

# end::sol6_4[]

# tag::sol6_5[]

import pyspark.sql.types as T

sol6_5 = T.StructType(
    [
        T.StructField("one", T.LongType()),
        T.StructField("two", T.ArrayType(T.LongType())),
    ]
)

# end::sol6_5[]

# tag::sol6_6[]

sol6_6 = three_shows.select(
    "name",
    F.array_min("_embedded.episodes.airdate").cast("date").alias("first"),
    F.array_max("_embedded.episodes.airdate").cast("date").alias("last"),
).select("name", (F.col("last") - F.col("first")).alias("tenure"))

sol6_6.show(truncate=50)

# end::sol6_6[]

# tag::sol6_7[]
sol6_7 = shows.select(
    "_embedded.episodes.name", "_embedded.episodes.airdate"
)

sol6_7.show()
# +--------------------+--------------------+
# |                name|             airdate|
# +--------------------+--------------------+
# |[Minimum Viable P...|[2014-04-06, 2014...|
# +--------------------+--------------------+
# end::sol6_7[]

# tag::exo6_8[]
exo6_8 = spark.createDataFrame([[1, 2], [2, 4], [3, 9]], ["one", "square"])
# end::exo6_8[]

# tag::sol6_8[]

sol6_8 = (
    exo6_8.groupby()
    .agg(
        F.collect_list("one").alias("one"),
        F.collect_list("square").alias("square"),
    )
    .select(F.map_from_arrays("one", "square"))
)

# sol6_8.show(truncate=50)
# +----------------------------+
# |map_from_arrays(one, square)|
# +----------------------------+
# |    {1 -> 2, 2 -> 4, 3 -> 9}|
# +----------------------------+

# end::sol6_8[]
