-1

What's the appropriate way to construct a HashMap from a Polars dataframe in rust? Looping through each row works but is too slow for my use case. Using the apply/map from the docs example, I was hoping to take advantage of the parallelization that Polars offers. But there are now ownership issues with multiple threads writing to the HashMap.

let mut h: HashMap<&str, &str> = HashMap::new();       
let out = df
            .lazy()
            .select([
                as_struct(&[col("a"), col("b"), col("c"), col("d")])
                    .apply(
                        |s| {
                            let ca = s.struct_()?;
                            let s_a = &ca.fields()[0];
                            let s_b = &ca.fields()[2];

                            let ca_a = s_a.utf8()?;
                            let ca_b = s_b.utf8()?;
                            
                            // iterate both `ChunkedArrays`
                            let out: Utf8Chunked = ca_a
                                .into_iter()
                                .zip(ca_b)
                                .map(|(opt_a, opt_b)| {
                                    h.insert(opt_a.unwrap(), opt_b.unwrap());
                                    Some("")
                                })
                            .collect();
                        Ok(Some(out.into_series()))
                        
                    },
                    GetOutput::from_type(DataType::Utf8),
                ).alias("e"),
            ]).collect();

error[E0597]: `s` does not live long enough
   --> src/lib.rs:76:38
    |
68  | ...et mut h: HashMap<&str, &str> = HashMap::new();
    |       ----- lifetime `'1` appears in the type of `h`
...
75  | ...               |s| {
    |                    - binding `s` declared here
76  | ...                   let ca = s.struct_()?;
    |                                ^^^^^^^^^^^ borrowed value does not live long enough
...
91  | ...                           h.insert(opt_a.unwrap(), opt_b.unwrap(...
    |                               ---------------------------------------- argument requires that `s` is borrowed for `'1`
...
104 | ...           },
    |               - `s` dropped here while still borrowed

error[E0596]: cannot borrow `h` as mutable, as it is a captured variable in a `Fn` closure
  --> src/lib.rs:90:38
   |
90 | ...           .map(|(opt_a, opt_b)| {
   |                    ^^^^^^^^^^^^^^^^ cannot borrow as mutable
91 | ...               h.insert(opt_a.unwrap(), opt_b.unwrap());
   |                   - mutable borrow occurs due to use of `h` in closure

error[E0373]: closure may outlive the current function, but it borrows `h`, which is owned by the current function
   --> src/lib.rs:75:25
    |
75  | ...   |s| {
    |       ^^^ may outlive borrowed value `h`
...
91  | ...               h.insert(opt_a.unwrap(), opt_b.unwrap...
    |                   - `h` is borrowed here
kliao
  • 549
  • 1
  • 6
  • 14
  • 3
    You might want to revisit [the difference between `String` and `str`](https://stackoverflow.com/questions/24158114/what-are-the-differences-between-rusts-string-and-str/63874387#63874387). Another relevant question: https://stackoverflow.com/questions/29428227/return-local-string-as-a-slice-str You will not be able to build a `Map<&str, &str>` using strings constructed inside the dataframe iteration loop. – E_net4 Aug 29 '23 at 16:43

0 Answers0