39

I want to serialize a HashMap with structs as keys:

use serde::{Deserialize, Serialize}; // 1.0.68
use std::collections::HashMap;

fn main() {
    #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash)]
    struct Foo {
        x: u64,
    }

    #[derive(Serialize, Deserialize, Debug)]
    struct Bar {
        x: HashMap<Foo, f64>,
    }

    let mut p = Bar { x: HashMap::new() };
    p.x.insert(Foo { x: 0 }, 0.0);
    let serialized = serde_json::to_string(&p).unwrap();
}

This code compiles, but when I run it I get an error:

Error("key must be a string", line: 0, column: 0)'

I changed the code:

#[derive(Serialize, Deserialize, Debug)]
struct Bar {
    x: HashMap<u64, f64>,
}

let mut p = Bar { x: HashMap::new() };
p.x.insert(0, 0.0);
let serialized = serde_json::to_string(&p).unwrap();

The key in the HashMap is now a u64 instead of a string. Why does the first code give an error?

Peter Hall
  • 53,120
  • 14
  • 139
  • 204
YjyJeff
  • 833
  • 1
  • 6
  • 14
  • 5
    @YjyJeff are you aware that JSON explicitly requires string keys ([wikipedia](https://en.wikipedia.org/wiki/JSON#Data_types,_syntax_and_example))? (Serde probab!y knows how to turn an `u64` into a string but not your struct...) – MB-F Jul 11 '18 at 06:32
  • 2
    @kazemakase I realized it. Therefore, I derive the Serialize trait for the Foo struct. – YjyJeff Jul 11 '18 at 06:39
  • @Boiethios Thanks! I fixed my example code. – YjyJeff Jul 11 '18 at 06:40
  • @dtolnay Actually, the Foo are enum in my code. The enum contains u64 and string. I expect the HashMap to store u64 or string as keys and f64 as values. For example: {"key": 0.5, "9": 0.6} – YjyJeff Jul 11 '18 at 07:18

4 Answers4

19

You can use serde_as from the serde_with crate to encode the HashMap as a sequence of key-value pairs:

use serde_with::serde_as; // 1.5.1

#[serde_as]
#[derive(Serialize, Deserialize, Debug)]
struct Bar {
    #[serde_as(as = "Vec<(_, _)>")]
    x: HashMap<Foo, f64>,
}

Which will serialize to (and deserialize from) this:

{
  "x":[
    [{"x": 0}, 0.0],
    [{"x": 1}, 0.0],
    [{"x": 2}, 0.0]
  ]
}

There is likely some overhead from converting the HashMap to Vec, but this can be very convenient.

Peter Hall
  • 53,120
  • 14
  • 139
  • 204
15

According to JSONs specification, JSON keys must be strings. serde_json uses fmt::Display in here, for some non-string keys, to allow serialization of wider range of HashMaps. That's why HashMap<u64, f64> works as well as HashMap<String, f64> would. However, not all types are covered (Foo's case here).

That's why we need to provide our own Serialize implementation:

impl Display for Foo {
    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
        write!(f, "{}", self.x)
    }
}

impl Serialize for Bar {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let mut map = serializer.serialize_map(Some(self.x.len()))?;
        for (k, v) in &self.x {
            map.serialize_entry(&k.to_string(), &v)?;
        }
        map.end()
    }
}

(playground)

Community
  • 1
  • 1
dotPoozer
  • 322
  • 4
  • 8
  • 7
    This doesn't really answer the question. He derived the serde implementations for his types, so that should be sufficient. – Steven Roose May 29 '19 at 10:43
  • Limitation lies on JSON serializer side. It requires keys to be `String` (or to be more specific `Display`). On the other side, what behaviour would it incorporate? Keys looking like stringified structs aren't best looking and can be misleading in this scenario. – dotPoozer Sep 15 '20 at 22:37
  • This one is definitely useful, I was having trouble serializing a BTreeMap and making some minor changes it works like a charm! – andresvsm Nov 09 '22 at 06:56
5

I've found the bulletproof solution

  • Extra dependencies not required
  • Compatible with HashMap, BTreeMap and other iterable types
  • Works with flexbuffers

The following code converts a field (map) to the intermediate Vec representation:

pub mod vectorize {
    use serde::{Deserialize, Deserializer, Serialize, Serializer};
    use std::iter::FromIterator;

    pub fn serialize<'a, T, K, V, S>(target: T, ser: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
        T: IntoIterator<Item = (&'a K, &'a V)>,
        K: Serialize + 'a,
        V: Serialize + 'a,
    {
        let container: Vec<_> = target.into_iter().collect();
        serde::Serialize::serialize(&container, ser)
    }

    pub fn deserialize<'de, T, K, V, D>(des: D) -> Result<T, D::Error>
    where
        D: Deserializer<'de>,
        T: FromIterator<(K, V)>,
        K: Deserialize<'de>,
        V: Deserialize<'de>,
    {
        let container: Vec<_> = serde::Deserialize::deserialize(des)?;
        Ok(T::from_iter(container.into_iter()))
    }
}

To use it just add the module's name as an attribute:

#[derive(Debug, Serialize, Deserialize)]
struct MyComplexType {
    #[serde(with = "vectorize")]
    map: HashMap<MyKey, String>,
}

The remained part if you want to check it locally:

use anyhow::Error;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct MyKey {
    one: String,
    two: u16,
    more: Vec<u8>,
}

#[derive(Debug, Serialize, Deserialize)]
struct MyComplexType {
    #[serde(with = "vectorize")]
    map: HashMap<MyKey, String>,
}

fn main() -> Result<(), Error> {
    let key = MyKey {
        one: "1".into(),
        two: 2,
        more: vec![1, 2, 3],
    };
    let mut map = HashMap::new();
    map.insert(key.clone(), "value".into());
    let instance = MyComplexType { map };
    let serialized = serde_json::to_string(&instance)?;
    println!("JSON: {}", serialized);
    let deserialized: MyComplexType = serde_json::from_str(&serialized)?;
    let expected_value = "value".to_string();
    assert_eq!(deserialized.map.get(&key), Some(&expected_value));
    Ok(())
}

And on the Rust playground: https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=bf1773b6e501a0ea255ccdf8ce37e74d

DenisKolodin
  • 13,501
  • 3
  • 62
  • 65
  • And finally I've released the solution as the crate, because needed it many times: https://crates.io/crates/vectorize – DenisKolodin Apr 04 '21 at 10:02
0

While all provided answers will fulfill the goal of serializing your HashMap to json they are ad hoc or hard to maintain.

One correct way to allow a specific data structure to be serialized with serde as keys in a map, is the same way serde handles integer keys in HashMaps (which works): They serialize the value to String. This has a few advantages; namely

  1. Intermediate data-structure omitted,
  2. no need to clone the entire HashMap,
  3. easier maintained by applying OOP concepts, and
  4. serialization usable in more complex structures such as MultiMap.

EDIT: The crate serde_jdon_any_key is the most time efficient manner to implement this. Thanks to @HighCommander4 for pointing out the crate.

Alternatively, a manual implementation can be used:

This can be done by manually implementing Serialize and Deserialize for your data-type.

I use composite ids for maps.

#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct Proj {
    pub value: u64,
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct Doc {
    pub proj: Proj,
    pub value: u32,
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct Sec {
    pub doc: Doc,
    pub value: u32,
}

So now manually implementing serde serialization for them is kind of a hassle, so instead we delegate the implementation to the FromStr and From<Self> for String (Into<String> blanket) traits.

impl From<Doc> for String {
    fn from(val: Doc) -> Self {
        format!("{}{:08X}", val.proj, val.value)
    }
}
impl FromStr for Doc {
    type Err = String;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match parse_doc(s) {
            Ok((_, p)) => Ok(p),
            Err(e) => Err(e.to_string()),
        }
    }
}

In order to parse the Doc we make use of nom. The parse functionality below is explained in their examples.

fn is_hex_digit(c: char) -> bool {
    c.is_digit(16)
}

fn from_hex8(input: &str) -> Result<u32, std::num::ParseIntError> {
    u32::from_str_radix(input, 16)
}

fn parse_hex8(input: &str) -> IResult<&str, u32> {
    map_res(take_while_m_n(8, 8, is_hex_digit), from_hex8)(input)
}

fn parse_doc(input: &str) -> IResult<&str, Doc> {
    let (input, proj) = parse_proj(input)?;
    let (input, value) = parse_hex8(input)?;
    Ok((input, Doc { value, proj }))
}

Now we need to hook up self.to_string() and str::parse(&str) to serde we can do this using a simple macro.

macro_rules! serde_str {
    ($type:ty) => {
        impl Serialize for $type {
            fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
            where
                S: serde::Serializer,
            {
                let s: String = self.clone().into();
                serializer.serialize_str(&s)
            }
        }

        impl<'de> Deserialize<'de> for $type {
            fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
            where
                D: serde::Deserializer<'de>,
            {
                paste! {deserializer.deserialize_string( [<$type Visitor>] {})}
            }
        }

        paste! {struct [<$type Visitor>] {}}

        impl<'de> Visitor<'de> for paste! {[<$type Visitor>]} {
            type Value = $type;

            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
                formatter.write_str("\"")
            }

            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
            where
                E: serde::de::Error,
            {
                match str::parse(v) {
                    Ok(id) => Ok(id),
                    Err(_) => Err(serde::de::Error::custom("invalid format")),
                }
            }
        }
    };
}

Here we are using paste to interpolate the names. Beware that now the struct will always serialize as defined above. Never as a struct, always as a string.

It is important to implement fn visit_str instead of fn visit_string because visit_string defers to visit_str.

Finally, we have to call the macro for our custom structs

serde_str!(Sec);
serde_str!(Doc);
serde_str!(Proj);

Now the specified types can be serialized to and from string with serde.

Prophet Lamb
  • 530
  • 3
  • 17
  • This solution has the notable downside that you have to come up with and implement your own way of encoding the key into a string. A more automatable (but admittedly less space-efficient) approach would be to encode the key as an escaped JSON string, the way this crate does: https://github.com/tzcnt/serde_json_any_key/ – HighCommander4 May 12 '23 at 07:19
  • @HighCommander4 thanks for pointing out the crate. This is arguable the most time efficient way. :) – Prophet Lamb May 13 '23 at 09:44