1

How to validate UUIDs generated using base64UUID function (i.e. org.elasticsearch.common.UUIDs.base64UUID)

We don't want users to persist anything which doesn't confirm to the above format

Rpj
  • 5,348
  • 16
  • 62
  • 122
  • I believe that was already answered here: https://stackoverflow.com/questions/7905929/how-to-test-valid-uuid-guid – cg1 Sep 07 '21 at 13:21
  • The problem with that is not if the UUID follows the format, the problem is if really is a randomly generated UUID. You should check why do you need that user data. – josejuan Sep 07 '21 at 13:21
  • @josejuan its the primary key for an elasticsearch document – Rpj Sep 07 '21 at 13:27
  • If you trust on clients, you should not check the code, your backend must throw an exception if is not valid (anyway unless the code is isolated in some way (e.g. it is a composite key) I would never trust clients). – josejuan Sep 07 '21 at 17:01

1 Answers1

0

This example can validate a random-based base64UUID.

Example.valid() method returns true if a base64 string is a valid RFC-4122 UUIDv4.

package com.example;

import java.math.BigInteger;
import java.util.Arrays;
import java.util.Base64;

public class Example {

    // returns true if a base64 is a valid RFC-4122 UUIDv4
    public static boolean valid(String base64UUID) {

        // decode the string to a numerical value
        byte[] decoded = Base64.getUrlDecoder().decode(base64UUID);
        byte[] bytes16 = Arrays.copyOf(decoded, 16);
        BigInteger number = new BigInteger(/* positive */ 1, bytes16);

        // check the UUID numerical value (0 .. 2^128)
        final BigInteger minimum = BigInteger.ZERO;
        final BigInteger maximum = BigInteger.ONE.shiftLeft(128); // 2^128
        boolean value = number.compareTo(minimum) >= 0 && number.compareTo(maximum) <= 0;

        // check the RFC-4122 version (byte6 = 0011xxxx)
        boolean version = (bytes16[6] & 0xff) >>> 4 == 4;

        // check the RFC-4122 variant (byte8 = 10xxxxxx)
        boolean variant = (bytes16[8] & 0xff) >>> 6 == 2;

        return value && version && variant;
    }

    public static void main(String[] args) {

        String[] samples = { //
                // VALID SAMPLES
                "PQQWfFMgSiitBjGVhxrCbQ", // 3d04167c-5320-4a28-ad06-3195871ac26d
                "bJGoRfu1Qy-6uhSNetIQCg", // 6c91a845-fbb5-432f-baba-148d7ad2100a
                "Gj8czi7tQ6i1xpIgjSh06w", // 1a3f1cce-2eed-43a8-b5c6-92208d2874eb
                "JY5omv7ZQgSdsdNBFFv-rw", // 258e689a-fed9-4204-9db1-d341145bfeaf
                "K2EqshyFRs-K_gajQl7z7g", // 2b612ab2-1c85-46cf-8afe-06a3425ef3ee
                "qfXGIFn0T_WDhTD0kMfuzw", // a9f5c620-59f4-4ff5-8385-30f490c7eecf
                "bEviI0JcTjuofyF7Zz9c8A", // 6c4be223-425c-4e3b-a87f-217b673f5cf0
                "CHi4BqAcTpO7n36mXYDpRw", // 0878b806-a01c-4e93-bb9f-7ea65d80e947
                "be32sQWcTFGH4RApDL1w_Q", // 6dedf6b1-059c-4c51-87e1-10290cbd70fd
                "ZP9Zfu6PQ2GfJCw8YnoPJQ", // 90ff597e-ee8f-4361-9f24-2c3c627a0f25
                // NOT VALID SAMPLES
                "D0EFnxTIEoorQYxlYcawm0", // 3d04167c-5320-4a28-ad06-3195871ac26d << 4 // too big
                "GyRqEX7tUMvuroUjXrSEAo", // 6c91a845-fbb5-432f-baba-148d7ad2100a << 4 // too big
                "Bo_HM4u7UOotcaSII0odOs", // 1a3f1cce-2eed-43a8-b5c6-92208d2874eb << 4 // too big
                "CWOaJr-2UIEnbHTQRRb_q8", // 258e689a-fed9-4204-9db1-d341145bfeaf << 4 // too big
                "K2EqshyF1s-K_gajQl7z7g", // 2b612ab2-1c85-d6cf-8afe-06a3425ef3ee // wrong VErsion
                "qfXGIFn07_WDhTD0kMfuzw", // a9f5c620-59f4-eff5-8385-30f490c7eecf // wrong VErsion
                "bEviI0Jc_juofyF7Zz9c8A", // 6c4be223-425c-fe3b-a87f-217b673f5cf0 // wrong VErsion
                "CHi4BqAcTpPbn36mXYDpRw", // 0878b806-a01c-4e93-db9f-7ea65d80e947 // wrong VAriant
                "be32sQWcTFHn4RApDL1w_Q", // 6dedf6b1-059c-4c51-e7e1-10290cbd70fd // wrong VAriant
                "kP9Zfu6PQ2H_JCw8YnoPJQ", // 90ff597e-ee8f-4361-ff24-2c3c627a0f25 // wrong VAriant
        };

        for (String s : samples) {
            System.out.println(s + ": " + valid(s));
        }
    }
}

This is the output:

PQQWfFMgSiitBjGVhxrCbQ: true
bJGoRfu1Qy-6uhSNetIQCg: true
Gj8czi7tQ6i1xpIgjSh06w: true
JY5omv7ZQgSdsdNBFFv-rw: true
K2EqshyFRs-K_gajQl7z7g: true
qfXGIFn0T_WDhTD0kMfuzw: true
bEviI0JcTjuofyF7Zz9c8A: true
CHi4BqAcTpO7n36mXYDpRw: true
be32sQWcTFGH4RApDL1w_Q: true
ZP9Zfu6PQ2GfJCw8YnoPJQ: true
D0EFnxTIEoorQYxlYcawm0: false
GyRqEX7tUMvuroUjXrSEAo: false
Bo_HM4u7UOotcaSII0odOs: false
CWOaJr-2UIEnbHTQRRb_q8: false
K2EqshyF1s-K_gajQl7z7g: false
qfXGIFn07_WDhTD0kMfuzw: false
bEviI0Jc_juofyF7Zz9c8A: false
CHi4BqAcTpPbn36mXYDpRw: false
be32sQWcTFHn4RApDL1w_Q: false
kP9Zfu6PQ2H_JCw8YnoPJQ: false

This python script was used to generate the test samples:

#!/bin/env python3

from uuid import uuid4

alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" # base-64-url

def basen(number, base, alphabet):
    output = ""
    while number:
        output += alphabet[number % base]
        number //= base
    return output[::-1] or "0"
        
def print_uuid(uuid):

    number = uuid.int
    
    base = 64
    length = 22
    padding = length 
    padding_char = alphabet[0]
    
    encoded = ""
    
    padding = 24
    number = number << 16 # padding with 16 bits (2 bytes)
    
    encoded = basen(number, base, alphabet)
    
    print('"' + encoded.rjust(padding, padding_char)[:length] + '", // ' + str(uuid))

def print_list(size):

    for i in range(0, size):
        uuid = uuid4()
        print_uuid(uuid)


print_list(10)

If you prefer to use a specialized library, check out Base64UrlCodec and UuidValidator from uuid-creator. Base64UrlCodec is more efficient than Base64.getUrlDecoder().decode("") for base64UUID. This is a shorter example using uuid-creator:

    public static boolean valid(String base64UUID) {
        UUID uuid = Base64UrlCodec.INSTANCE.decode(base64UUID);
        return UuidUtil.isRandomBased(uuid);
    }
fabiolimace
  • 972
  • 11
  • 13