Error: pyspark.sql.utils.AnalysisException: Only one generator allowed per select clause but found 2: explode(_2), explode(_3)
Dataset 1:
data_list = [(1, [5,6,7]), (2, [6,7,8])]
df= spark.createDataFrame( sc.parallelize(data_list) )
df.show()
+---+---------+
| _1| _2|
+---+---------+
| 1|[5, 6, 7]|
| 2|[6, 7, 8]|
+---+---------+
Dataset 2:
data_list_2=[(1, [2,3,4]), (2,[3,4,5]), (3, [])]
df_2= spark.createDataFrame(sc.parallelize(data_list_2))
df_2.show()
+---+---------+
| _1| _2|
+---+---------+
| 1|[2, 3, 4]|
| 2|[3, 4, 5]|
| 3| []|
+---+---------+
from pyspark.sql.functions import explode
explode(array_column)
from pyspark.sql.functions import explode
df_update = df.select("_1", explode("_2"))
df_update.show()
+---+---+
| _1|col|
+---+---+
| 1| 5|
| 1| 6|
| 1| 7|
| 2| 6|
| 2| 7|
| 2| 8|
+---+---+
from pyspark.sql.functions import posexplode
posexplode(array_column)
from pyspark.sql.functions import posexplode
df_update = df.select("_1", posexplode("_2"))
df_update.show()
+---+---+---+
| _1|pos|col|
+---+---+---+
| 1| 0| 5|
| 1| 1| 6|
| 1| 2| 7|
| 2| 0| 6|
| 2| 1| 7|
| 2| 2| 8|
+---+---+---+
from pyspark.sql.functions import explode_outer
explode_outer(array_column)
from pyspark.sql.functions import explode_outer
df_update = df_2.select("_1", explode_outer("_2"))
df_update.show()
+---+----+
| _1| col|
+---+----+
| 1| 2|
| 1| 3|
| 1| 4|
| 2| 3|
| 2| 4|
| 2| 5|
| 3|null| # Null was returned
+---+----+
from pyspark.sql.functions import posexplode_outer
posexplode_outer(array_column)
from pyspark.sql.functions import posexplode_outer
df_update = df_2.select("_1", posexplode_outer("_2"))
df_update.show()
+---+----+----+
| _1| pos| col|
+---+----+----+
| 1| 0| 2|
| 1| 1| 3|
| 1| 2| 4|
| 2| 0| 3|
| 2| 1| 4|
| 2| 2| 5|
| 3|null|null| # Null was returned
+---+----+----+