5

I am trying to build nom parser to examine URLs with ID as UUID

rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912

I created the following:

extern crate uuid;
use uuid::Uuid;

named!(room_uuid<&str, Option<Uuid>>,
    do_parse!(
        tag_s!("rooms") >>
        id: opt!(complete!(preceded!(
            tag_s!("/"),
            map_res!(take_s!(36), FromStr::from_str)
        ))) >>

        (id)
    )
);

It handles almost all cases well:

assert_eq!(room_uuid("rooms"), Done("", None));
assert_eq!(room_uuid("rooms/"), Done("/", None));
assert_eq!(room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"), Done("", Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())));

Except cases where ID is not a valid UUID:

assert!(room_uuid("rooms/123").is_err()); # it fails
# room_uuid("rooms/123").to_result() => Ok(None)

As far as I understand it happens because opt! converts inner Err into None.

I would like to have ID as optional section but if it is present it should be a valid UUID.
Unfortunately, I don't understand how to combine both those things: optionality and strict format.

Aleksey
  • 2,289
  • 17
  • 27

3 Answers3

2

Given parsing URLs doesn't need a streaming interface you could use synom instead. It was maintained as part of the syn crate, but is sadly not maintained anymore (it was merged into syn and changed to only process rust tokens).

Sadly synom doesn't provide take_s! and eof! (the latter one is going to forbid the trailing "unparsed" 123), but it's easy enough to implement those.

Using eof! also means you can't return an unparsed "/" (although I consider that a good thing); and the nested option! needs some unwrapping at the end (you could return Option<Option<Uuid>> instead to detect the trailing "/").

Playground

#[macro_use]
extern crate synom;

extern crate uuid;
use uuid::Uuid;

macro_rules! take_s {
    ($i:expr, $length:expr) => {{
        let length: usize = $length;
        if 0 == length {
            synom::IResult::Done($i, "")
        } else {
            let mut ci = $i.char_indices().skip(length - 1);
            match ci.next() {
                None => synom::IResult::Error,
                Some(_) => {
                    match ci.next() {
                        None => synom::IResult::Done("", $i),
                        Some((pos, _)) => {
                            let (value, rem) = $i.split_at(pos);
                            synom::IResult::Done(rem, value)
                        },
                    }
                }
            }
        }
    }};
}

macro_rules! eof {
    ($i:expr,) => {{
        if $i.is_empty() {
            synom::IResult::Done($i, ())
        } else {
            synom::IResult::Error
        }
    }};
}

named!(room_uuid -> Option<Uuid>,
    do_parse!(
        tag!("rooms") >>
        id: option!(preceded!(
            tag!("/"),
            option!(
                switch!(map!(take_s!(36), str::parse),
                    Ok(v) => value!(v)
                )
            )
        )) >>
        eof!() >>

        (id.unwrap_or(None))
    )
);

fn main() {
    use synom::IResult::*;

    assert_eq!(room_uuid("rooms"), Done("", None));
    assert_eq!(room_uuid("rooms/"), Done("", None));
    assert_eq!(
        room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"),
        Done(
            "",
            Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())
        )
    );
    assert_eq!(room_uuid("rooms/123"), Error);
}
Stefan
  • 5,304
  • 2
  • 25
  • 44
  • thank you! I wasn't aware of `synom`. I am a little scared to use it because original `nom` has more built-in parsers and documentation and I am newbie enough that I couldn't write something like `take_s` myself (like you did). As far as I can see `synom::IResult::Error` doesn't contain error itself. Is it okay? Actually my hole task is to parse URL like: `api/v1/rooms/UUID/tracks/UUID` Is it possible with `synom`? – Aleksey Feb 14 '18 at 05:44
  • I just realized `synom` isn't maintained anymore. Maybe [`strom`](https://docs.rs/strom/0.1.1/strom/) is an alternative, but it seems dead too. Maybe I'll write my own :) Maybe [`router`](https://docs.rs/router/0.6.0/router/macro.router.html) is a solution in your case? – Stefan Feb 14 '18 at 10:08
  • Not sure `router` can be used outside of Iron. I build MQTT-based application, so, I need to parse topics and it looks like very custom logic. I have just add `eof!` to my initial version and it results in `Error`. Bad thing is that it is `Error(Eof)` and I would like in to be `Error(Verify)` or similar. Still hoping it is possible to implement using `nom`. – Aleksey Feb 14 '18 at 10:45
2

I've only started working with nom myself in the last couple of weeks but I found one way of solving this. It doesn't fit exclusively within a macro but it does give the correct behavior with one modification. I swallow the / rather than leave it dangling after when a UUID is not given.

#[macro_use]
extern crate nom;
extern crate uuid;

use std::str::FromStr;
use nom::IResult;
use uuid::Uuid;

fn room_uuid(input: &str) -> IResult<&str, Option<Uuid>> {
    // Check that it starts with "rooms"
    let res = tag_s!(input, "rooms");
    let remaining = match res {
        IResult::Incomplete(i) => return IResult::Incomplete(i),
        IResult::Error(e) => return IResult::Error(e),
        IResult::Done(i, _) => i
    };

    // If a slash is not present, return early
    let optional_slash = opt!(remaining, tag_s!("/"));
    let remaining = match optional_slash {
        IResult::Error(_) |
        IResult::Incomplete(_) => return IResult::Done(remaining, None),
        IResult::Done(i, _) => i
    };

    // If something follows a slash, make sure
    // it's a valid UUID
    if remaining.len() > 0 {
        let res = complete!(remaining, map_res!(take_s!(36), FromStr::from_str));
        match res {
            IResult::Done(i, o) => IResult::Done(i, Some(o)),
            IResult::Error(e) => IResult::Error(e),
            IResult::Incomplete(n) => IResult::Incomplete(n)
        }
    } else {
        // This branch allows for "rooms/"
        IResult::Done(remaining, None)
    }
}

#[test]
fn match_room_plus_uuid() {
    use nom::IResult::*;

    assert_eq!(room_uuid("rooms"), Done("", None));
    assert_eq!(room_uuid("rooms/"), Done("", None));
    assert_eq!(room_uuid("rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"), Done("", Some(Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap())));
    assert!(room_uuid("rooms/123").is_err());
}
Mike Cluck
  • 31,869
  • 13
  • 80
  • 91
  • 1
    Thank you very much Mike! I also thought about using functions and not only macros but I didn't have an idea how to do it. `/` as remaining part is totally not required thing to have (I just managed to parse it only this way and that is it) so no problem here. However, with `rooms/123` it fails not because of wrong format but because of `Incomplete` (less than 36). Also with `rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912-12312` (valid UUID and some more symbols) it succeeds with just taking 36 out of remaining. But I get the idea, I will try to improve the solution, thanks once again! – Aleksey Feb 14 '18 at 19:12
  • @Aleksey Very true. I should have made this more robust lol But I'm glad I could help! Good luck! – Mike Cluck Feb 14 '18 at 19:18
  • 1
    Looks like one can use `take_while_s!(|chr| nom::is_hex_digit(chr as u8) || chr == '-')` instead of `take_s!(36)`. This way it fails with `Err(MapRes)`. Actually my use case suppose that `room` can have nested resources, e.g. `rooms/UUID/tracks/UUID`. That is why I can't just read to the end of line. – Aleksey Feb 14 '18 at 20:37
1

Ok, so I got it working with nom and the extended URL format api/v1/rooms/UUID/tracks/UUID.

The basics are the same as before: you want to check for eof, ignore trailing "/" and never wait for incomplete results (alt_complete! is doing a good job here).

Regarding your ErrorKind::Verify wish: I don't think the error kind is actually important, just ignore it, or map it to whatever you want manually.

Be careful with the alt_complete! branches: in case of overlaps the preferred option (usually the "longer one") should come first.

I like my with! helper, but you could also inline it.

Playground doesn't support nom, so no link this time.

#[macro_use]
extern crate nom;

extern crate uuid;
use uuid::Uuid;

named!(uuid<&str, Uuid>, preceded!(
    tag_s!("/"),
    map_res!(take_s!(36), str::parse)
));

#[derive(Clone, PartialEq, Eq, Debug)]
enum ApiRequest {
    Rooms,
    Room { room: Uuid },
    Tracks { room: Uuid },
    Track { room: Uuid, track: Uuid },
}

/// shortcut for: `do_parse!(name: expr >> r: otherexpr >> (r))`
///
/// `otherexpr` should use `name`, otherwise you could just use `preceded!`.
macro_rules! with {
    ($i:expr, $var:ident: $submac:ident!( $($args:tt)* ) >> $($rest:tt)*) => {
        do_parse!($i, $var: $submac!($($args)*) >> r: $($rest)* >> (r));
    };
    ($i:expr, $var:ident: $submac:ident >> $($rest:tt)*) => {
        do_parse!($i, $var: $submac >> r: $($rest)* >> (r));
    };
}

// /api/v1/rooms/UUID/tracks/UUID
named!(apiv1<&str, ApiRequest>, preceded!(tag_s!("/api/v1"),
    alt_complete!(
        preceded!(tag_s!("/rooms"), alt_complete!(
            with!(room: uuid >> alt_complete!(
                preceded!(tag_s!("/tracks"), alt_complete!(
                    with!(track: uuid >> alt_complete!(
                        // ... sub track requests?
                        value!(ApiRequest::Track{room, track})
                    ))
                    |
                    value!(ApiRequest::Tracks{room})
                ))
                // other room requests
                |
                value!(ApiRequest::Room{room})
            ))
            |
            value!(ApiRequest::Rooms)
        ))
        // | ... other requests
    )
));

named!(api<&str, ApiRequest>, terminated!(
    alt_complete!(
        apiv1
        // | ... other versions
        // also could wrap in new enum like:
        //     apiv1 => { ApiRequest::V1 }
        //     |
        //     apiv2 => { ApiRequest::V2 }
    ),
    tuple!(
        alt_complete!(tag_s!("/") | value!("")), // ignore trailing "/"
        eof!() // make sure full URL was parsed
    )
));

fn main() {
    use nom::IResult::*;
    use nom::ErrorKind;

    let room = Uuid::parse_str("e19c94cf-53eb-4048-9c94-7ae74ff6d912").unwrap();
    let track = Uuid::parse_str("83d235e8-03cd-420d-a8c6-6e42440a5573").unwrap();

    assert_eq!(api("/api/v1/rooms"), Done("", ApiRequest::Rooms));
    assert_eq!(api("/api/v1/rooms/"), Done("", ApiRequest::Rooms));
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912"),
        Done("", ApiRequest::Room { room })
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/"),
        Done("", ApiRequest::Room { room })
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks"),
        Done("", ApiRequest::Tracks { room })
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/"),
        Done("", ApiRequest::Tracks { room })
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573"),
        Done("", ApiRequest::Track{room, track})
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573/"),
        Done("", ApiRequest::Track{room, track})
    );
    assert_eq!(api("/api/v1"), Error(ErrorKind::Alt));
    assert_eq!(api("/api/v1/foo"), Error(ErrorKind::Alt));
    assert_eq!(api("/api/v1/rooms/123"), Error(ErrorKind::Eof));
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/bar"),
        Error(ErrorKind::Eof)
    );
    assert_eq!(
        api("/api/v1/rooms/e19c94cf-53eb-4048-9c94-7ae74ff6d912/tracks/83d235e8-03cd-420d-a8c6-6e42440a5573/123"),
        Error(ErrorKind::Eof)
    );
    assert_eq!(api("/api/v2"), Error(ErrorKind::Alt));
}

You could also use a more strict alt_full_opt_slash! branch method, which would ensure a branch only matches if it fully parsed the input.

You could then use a more "flat" way (although nested branches should still be working) to parse the alternatives (although this means you might end up parsing some UUIDs more than once; also now all errors are of kind Alt):

/// Similar to alt_complete, but also requires the branch parses until
/// the end of the input (but ignores a trailing "/").
macro_rules! alt_full_opt_slash {
    (__impl_push2 ($i:expr,) ($($new:tt)*), $($rest:tt)*) => {
        alt_full_opt_slash!(__impl ($i, $($new)*), $($rest)*)
    };
    (__impl_push2 ($i:expr, $($result:tt)+) ($($new:tt)*), $($rest:tt)*) => {
        alt_full_opt_slash!(__impl ($i, $($result)+ | $($new)*), $($rest)*)
    };
    (__impl_push ($($result:tt)*) ($($new:tt)*), $($rest:tt)*) => {
        // modify branch:
        alt_full_opt_slash!(__impl_push2 ($($result)*) (
            terminated!(
                $($new)*,
                tuple!(
                    alt_complete!(tag_s!("/") | value!("")), // ignore trailing "/"
                    eof!() // make sure full URL was parsed
                )
            )
        ), $($rest)*)
    };
    (__impl ($($result:tt)*), $e:ident | $($rest:tt)*) => {
        alt_full_opt_slash!(__impl_push ($($result)*) ( $e ), $($rest)*)
    };
    (__impl ($($result:tt)*), $subrule:ident!( $($args:tt)*) | $($rest:tt)*) => {
        alt_full_opt_slash!(__impl_push ($($result)*) ( $subrule!($($args)*) ), $($rest)*)
    };
    (__impl ($($result:tt)*), $subrule:ident!( $($args:tt)* ) => { $gen:expr } | $($rest:tt)*) => {
        alt_full_opt_slash!(__impl_push ($($result)*) ( $subrule!($($args)*) => { $gen } ), $($rest)*)
    };
    (__impl ($($result:tt)*), $e:ident => { $gen:expr } | $($rest:tt)*) => {
        alt_full_opt_slash!(__impl_push ($($result)*) ( $e => { $gen } ), $($rest)*)
    };
    (__impl ($i:expr, $($result:tt)*), __end) => {
        alt_complete!($i, $($result)*)
    };
    ($i:expr, $($rest:tt)*) => {{
        alt_full_opt_slash!(__impl ($i, ), $($rest)* | __end)
    }};
}

// /api/v1/rooms/UUID/tracks/UUID
named!(apiv1<&str, ApiRequest>, preceded!(tag_s!("/api/v1"),
    alt_full_opt_slash!(
        do_parse!(
            tag_s!("/rooms") >>
            (ApiRequest::Rooms)
        )
        |
        do_parse!(
            tag_s!("/rooms") >>
            room: uuid >>
            (ApiRequest::Room{room})
        )
        |
        do_parse!(
            tag_s!("/rooms") >>
            room: uuid >>
            tag_s!("/tracks") >>
            (ApiRequest::Tracks{room})
        )
        |
        do_parse!(
            tag_s!("/rooms") >>
            room: uuid >>
            tag_s!("/tracks") >>
            track: uuid >>
            (ApiRequest::Track{room, track})
        )
    )
));

named!(api<&str, ApiRequest>, alt_complete!(
    apiv1
    // | ... other versions
    // also could wrap in new enum like:
    //     apiv1 => { ApiRequest::V1 }
    //     |
    //     apiv2 => { ApiRequest::V2 }
));
Stefan
  • 5,304
  • 2
  • 25
  • 44