I am trying to convert an ordered dict into a pyspark MapType.
from pyspark.sql.functions import create_map, lit
from pyspark.sql import SparkSession
from collections import OrderedDict
# Sample ordered dictionary
ordered_dict = OrderedDict([('a', 1), ('b', 2), ('c', 3)])
create_map([lit(k) for k in ordered_dict.keys()], [lit(v) for v in ordered_dict.values()])
gives an error:
TypeError: Invalid argument, not a string or column: [Column<'a'>, Column<'b'>, Column<'c'>] of type <class 'list'>. For column literals, use 'lit', 'array', 'struct' or 'create_map' function.
Spark version 3.2,
any suggestions to resolve this will be highly appreciated. thanks
F.create_map
expects a flat sequence of keys and values:
from pyspark.sql import functions as F
from collections import OrderedDict
from itertools import chain
ordered_dict = OrderedDict([('a', 1), ('b', 2), ('c', 3)])
kv_list = [[k, v] for k, v in ordered_dict.items()]
kv_flat = list(chain(*kv_list))
map_col = F.create_map([F.lit(e) for e in kv_flat]).alias('map_col')
df = spark.range(1).select(map_col)
df.printSchema()
df.show(1, False)
# root
# |-- map_col: map (nullable = false)
# | |-- key: string
# | |-- value: integer (valueContainsNull = false)
# +------------------------+
# |map_col |
# +------------------------+
# |{a -> 1, b -> 2, c -> 3}|
# +------------------------+
does the below help?
from pyspark.sql.functions import create_map, lit
from itertools import chain
simple_dict={"a":1, "b":2, "c":3 }
mapping_expr = create_map([lit(x) for x in chain(*simple_dict.items())])
print(type(mapping_expr))