create maptype using ordered dictioanry

I am trying to convert an ordered dict into a pyspark MapType.

from pyspark.sql.functions import create_map, lit
from pyspark.sql import SparkSession
from collections import OrderedDict

# Sample ordered dictionary
ordered_dict = OrderedDict([('a', 1), ('b', 2), ('c', 3)])
create_map([lit(k) for k in ordered_dict.keys()], [lit(v) for v in ordered_dict.values()])

gives an error:

TypeError: Invalid argument, not a string or column: [Column<'a'>, Column<'b'>, Column<'c'>] of type <class 'list'>. For column literals, use 'lit', 'array', 'struct' or 'create_map' function.

Spark version 3.2,
any suggestions to resolve this will be highly appreciated. thanks

F.create_map expects a flat sequence of keys and values:

from pyspark.sql import functions as F
from collections import OrderedDict
from itertools import chain

ordered_dict = OrderedDict([('a', 1), ('b', 2), ('c', 3)])
kv_list = [[k, v] for k, v in ordered_dict.items()]
kv_flat = list(chain(*kv_list))

map_col = F.create_map([F.lit(e) for e in kv_flat]).alias('map_col')
df = spark.range(1).select(map_col)
df.printSchema()
df.show(1, False)

# root
#  |-- map_col: map (nullable = false)
#  |    |-- key: string
#  |    |-- value: integer (valueContainsNull = false)

# +------------------------+
# |map_col                 |
# +------------------------+
# |{a -> 1, b -> 2, c -> 3}|
# +------------------------+

does the below help?

from pyspark.sql.functions import create_map, lit
from itertools import chain
simple_dict={"a":1, "b":2, "c":3 }
mapping_expr = create_map([lit(x) for x in chain(*simple_dict.items())])
print(type(mapping_expr))

Leave a Comment