This tutorial will explain how to write Spark dataframe into various types of comma separated value(CSV) files or other delimited files.
df.write.csv("file:///path_to_directory/csv_without_header")
df.write.mode("overwrite").csv("file:///path_to_directory/csv_without_header")
df.coalesce(1).write.mode("overwrite").csv("file:///path_to_directory/csv_without_header")
df.write.options(header=True).csv("file:///path_to_directory/csv_with_header")
df.write.options(header=True).mode("append").csv("file:///path_to_directory/csv_with_header")
df.write.options(delimiter="\t").csv("file:///path_to_directory/tab_delimited")
df.write.options(header=True, delimiter="|").csv("file:///path_to_directory/pipe_delimited")
cat -v /path_to_directory/ctrl_a_delimited_file.txt
**********File content**********
db_id^Adb_name^Adb_type
12^ATeradata^ARDBMS
14^ASnowflake^ACloudDB
********************************
df.write.options(header=True, delimiter="\01").csv("file:///path_to_directory/ctrl_a_delimited")
cat /path_to_directory/multichar_delimited_file.txt
**********File content**********
db_id|*|db_name|*|db_type
12|*|Teradata|*|RDBMS
14|*|Snowflake|*|CloudDB
********************************
df.write.options(header=True, delimiter="|*|").csv("file:///path_to_directory/multichar_delimited")
df_multiline.show()
+-----+---------+--------+
|db_id| db_name| db_type|
+-----+---------+--------+
| 12| Teradata|RDBMS
DB|
| 14|Snowflake|
CloudDB|
| 15| Vertica| RDBMS|
| 17|Oracle
DB| RDBMS|
| 19| MongoDB| NOSQL|
+-----+---------+--------+
df_multiline.write.options(header=True).csv("file:///path_to_directory/multiline_file_with_header")
df.write.csv("hdfs://localhost:9000/user/hive/warehouse/retail.db/orders")
df.write.options(header=True).csv("hdfs://localhost:9000/user/hive/warehouse/retail.db/orders1")
import pandas as pd
df_pd = df.toPandas() #Spark dataframe (df)
df_pd.to_csv("/path_to_file/test2.csv", sep="|", index=False)