df = spark.read.parquet("file:///path_to_files/date_format_example_file.parquet")
df.show()
+---------+----------+-------------------+
| engine| dt_field| ts_field|
+---------+----------+-------------------+
| spark|2022-11-03|2022-06-06 06:06:06|
|mapreduce|2022-10-25|2022-06-21 13:04:43|
+---------+----------+-------------------+
df.printSchema()
root
|-- engine: string (nullable = true)
|-- dt_field: date (nullable = true)
|-- ts_field: timestamp (nullable = true)
from pyspark.sql.functions import current_timestamp
df_update = df.select("ts_field", current_timestamp())
df_update.show(truncate=False)
+-------------------+-----------------------+
|ts_field |current_timestamp() |
+-------------------+-----------------------+
|2022-06-06 06:06:06|2022-06-14 19:12:20.568|
|2022-06-21 13:04:43|2022-06-14 19:12:20.568|
+-------------------+-----------------------+
from pyspark.sql.functions import current_date
df_update = df.select("ts_field", current_date())
df_update.show(truncate=False)
+-------------------+--------------+
|ts_field |current_date()|
+-------------------+--------------+
|2022-06-06 06:06:06|2022-06-14 |
|2022-06-21 13:04:43|2022-06-14 |
+-------------------+--------------+
from pyspark.sql.functions import current_date, year
df_update = df.select("dt_field", year("dt_field").alias("year"), current_date(), year(current_date()).alias("year"))
df_update.show(truncate=False)
+----------+----+--------------+----+
|dt_field |year|current_date()|year|
+----------+----+--------------+----+
|2022-11-03|2022|2022-06-14 |2022|
|2022-10-25|2022|2022-06-14 |2022|
+----------+----+--------------+----+
from pyspark.sql.functions import current_date, month
df_update = df.select("dt_field", month("dt_field").alias("month"), current_date(), month(current_date()).alias("month"))
df_update.show(truncate=False)
+----------+-----+--------------+-----+
|dt_field |month|current_date()|month|
+----------+-----+--------------+-----+
|2022-11-03|11 |2022-06-14 |6 |
|2022-10-25|10 |2022-06-14 |6 |
+----------+-----+--------------+-----+
from pyspark.sql.functions import current_date, dayofmonth
df_update = df.select("dt_field", dayofmonth("dt_field").alias("dayofmonth"), current_date(), dayofmonth(current_date()).alias("dayofmonth"))
df_update.show(truncate=False)
+----------+----------+--------------+----------+
|dt_field |dayofmonth|current_date()|dayofmonth|
+----------+----------+--------------+----------+
|2022-11-03|3 |2022-06-14 |14 |
|2022-10-25|25 |2022-06-14 |14 |
+----------+----------+--------------+----------+
from pyspark.sql.functions import current_date, dayofyear
df_update = df.select("ts_field", dayofyear("ts_field").alias("dayofyear"), current_date(), dayofyear(current_date()).alias("dayofyear"))
df_update.show(truncate=False)
+-------------------+---------+--------------+---------+
|ts_field |dayofyear|current_date()|dayofyear|
+-------------------+---------+--------------+---------+
|2022-06-06 06:06:06|157 |2022-06-14 |165 |
|2022-06-21 13:04:43|172 |2022-06-14 |165 |
+-------------------+---------+--------------+---------+
from pyspark.sql.functions import current_date, dayofweek
df_update = df.select("ts_field", dayofweek("ts_field").alias("dayofweek"), current_date(), dayofweek(current_date()).alias("dayofweek"))
df_update.show(truncate=False)
+-------------------+---------+--------------+---------+
|ts_field |dayofweek|current_date()|dayofweek|
+-------------------+---------+--------------+---------+
|2022-06-06 06:06:06|2 |2022-06-14 |3 |
|2022-06-21 13:04:43|3 |2022-06-14 |3 |
+-------------------+---------+--------------+---------+
from pyspark.sql.functions import current_date, quarter
df_update = df.select("ts_field", quarter("ts_field").alias("quarter"), current_date(), quarter(current_date()).alias("quarter"))
df_update.show(truncate=False)
+-------------------+-------+--------------+-------+
|ts_field |quarter|current_date()|quarter|
+-------------------+-------+--------------+-------+
|2022-06-06 06:06:06|2 |2022-06-14 |2 |
|2022-06-21 13:04:43|2 |2022-06-14 |2 |
+-------------------+-------+--------------+-------+
from pyspark.sql.functions import current_timestamp, hour
df_update = df.select("ts_field", hour("ts_field").alias("hour"), current_timestamp(), hour(current_timestamp()).alias("hour"))
df_update.show(truncate=False)
+-------------------+----+-----------------------+----+
|ts_field |hour|current_timestamp() |hour|
+-------------------+----+-----------------------+----+
|2022-06-06 06:06:06|6 |2022-06-14 20:35:46.406|20 |
|2022-06-21 13:04:43|13 |2022-06-14 20:35:46.406|20 |
+-------------------+----+-----------------------+----+
from pyspark.sql.functions import current_timestamp, minute
df_update = df.select("ts_field", minute("ts_field").alias("minute"), current_timestamp(), minute(current_timestamp()).alias("minute"))
df_update.show(truncate=False)
+-------------------+------+-----------------------+------+
|ts_field |minute|current_timestamp() |minute|
+-------------------+------+-----------------------+------+
|2022-06-06 06:06:06|6 |2022-06-14 20:37:40.813|37 |
|2022-06-21 13:04:43|4 |2022-06-14 20:37:40.813|37 |
+-------------------+------+-----------------------+------+
from pyspark.sql.functions import current_timestamp, second
df_update = df.select("ts_field", second("ts_field").alias("second"), current_timestamp(), second(current_timestamp()).alias("second"))
df_update.show(truncate=False)
+-------------------+------+-----------------------+------+
|ts_field |second|current_timestamp() |second|
+-------------------+------+-----------------------+------+
|2022-06-06 06:06:06|6 |2022-06-14 20:38:00.812|0 |
|2022-06-21 13:04:43|43 |2022-06-14 20:38:00.812|0 |
+-------------------+------+-----------------------+------+