1

I am trying to check if post data is a proper form www-urlencoded using nodejs:

const connect = require('connect');
const { request } = require('node:http');


const app = connect();

/**
 * Check if valis url encoded.
 * For now I set the value as true
 * @param {String} body 
 * @returns Boolean
 */
const isValidFormUrlEncoded = (body) => {
    return true;
}

app.use(function(req,res,next){
    req.id = 1;

    var body = [];
    
    req.on('data',(data) =>
    {
        console.log('Getting Body');
        body.push(data)
    });

    req.on('end',() => {
        body = Buffer.concat(body).toString();

        if(
            !body ||
            request.header['content-type'] != 'application/x-www-form-urlencoded' ||
            (request.header['content-type'] != 'application/x-www-form-urlencoded' && isValidFormUrlEncoded(body) )
        ){ 
            next();
        } else {
            res.setHeader("Content-Type",'text/plain');
            res.writeHead(400,{'X-val':3});
            res.end("Body not form-url endoded.")
        }
        
    });
});

app.use(function(req,res){
    console.log("id:",req.id);
    res.setHeader("Content-Type",'text/plain');
    res.writeHead(200,{'X-val':3});
    res.end("jhello");
});

app.listen(8090)

But I do not know on hot to validate the body of application/x-www-form-urlencoded.

I thought to use the node:querystring package. But it just parses anything even if body is invalid.

For example I tried:

const querystring = require("node:querystring");
let parsedData = querystring.parse("Hello");
console.log(parsedData);

parsedData = querystring.parse("Hello Big Brother = d2232332r3*cdw How are you");
console.log(parsedData);
console.log(parsedData);

And I get:

node stackoverflow/connect_expirement.js
[Object: null prototype] { Hello: '' }
[Object: null prototype] { 'Hello Big Brother': 'd2232332r3*cdw How are you' }

Bot of them I consider invalis but querystring does it best to forcefully match/partse even if string is invalid. Any idea on how I can check that a body is a valid url-encoded string?

Edit 1

I tried to do via regex:

const regex2 = new RegExp('^(([a-zA-Z1-9])+(\[(1-9)*\])?=[a-zA-Z1-9%]+&?)*([a-zA-Z1-9])+(\[?(1-9)*\]?)=[a-zA-Z1-9%]+$');

But despite these tests seem to work:

console.log(regex2.test('panties')); // false - works fine

console.log(regex2.test('hello=value&am=i')); // true - works fine

But these seem to fail, I expect the regex to match but is does not:

console.log(regex2.test('pleas=help&me=plz&var[]=true&var[]=false'));
console.log(regex2.test('pleas=help&me=plz&var[]=true&var[2]=false&var['blahblah']=ipsum'));

I expect a true value from this one above. But I do not get it. So I am still stuck.

I still tried this one as well: https://regex101.com/r/CiJafp/1

And I got not expected result at all.

Dimitrios Desyllas
  • 9,082
  • 15
  • 74
  • 164
  • I'm not entirely sure that validating query string with regex is a good idea. But if it is, I think you'll have better luck starting with this: [`^(?:(?:\w+)(?:\[(?:\d*|'[^']*')\])?=[\w%]*(?:&|$))*$`](https://regex101.com/r/CiJafp/4) – markalex Apr 24 '23 at 21:14
  • According to the link the `pleas=&me=&var[]=` is consindered valid. But should be according to RFC? – Dimitrios Desyllas Apr 25 '23 at 10:37
  • You don't know exact RFC's stance on query strings, but I don't see the reason why this string should be invalid. On the other hand I've proposed this regex only as a starting point: I'll be surprise if it doesn't miss something. – markalex Apr 25 '23 at 10:44
  • Sorry, I meant "I don't know exact ...". – markalex Apr 25 '23 at 14:23
  • Also, see [this question](https://stackoverflow.com/questions/4557387/is-a-url-query-parameter-valid-if-it-has-no-value) – markalex Apr 25 '23 at 14:44

1 Answers1

0

The best working code is:

const connect = require('connect');

const app = connect();

/**
 * Check if valis url encoded.
 * For now I set the value as true
 * @param {String} body 
 * @returns Boolean
 */
const isValidFormUrlEncoded = (body) => {
   return /^(?:(?:\w+)(?:\[(?:\d*|'[^']*')\])*=[\w\-\+\.%]*(?:&|$))+$/.test(body);

}

app.use(function(req,res,next){
    req.id = 1;

    var body = [];
    
    req.on('data',(data) =>
    {
        console.log('Getting Body');
        body.push(data)
    });

    req.on('end',() => {

        console.log(req.headers);
        body = Buffer.concat(body).toString();
        console.log(body);
        if(
            !body ||
            req.headers['content-type'] != 'application/x-www-form-urlencoded' ||
            (req.headers['content-type'] == 'application/x-www-form-urlencoded' && isValidFormUrlEncoded(body) )
        ){ 
            next();
        } else {
            res.setHeader("Content-Type",'text/plain');
            res.writeHead(400,{'X-val':3});
            res.end("Body not form-url endoded.")
        }
        
    });
});

app.use(function(req,res){
    console.log("id:",req.id);
    res.setHeader("Content-Type",'text/plain');
    res.writeHead(200,{'X-val':3});
    res.end("jhello");
});

app.listen(8090)

As you can see the test uses the regex: https://regex101.com/r/CiJafp/4

And can be done using:

/^(?:(?:\w+)(?:\[(?:\d*|'[^']*')\])*=[\w\-\+\.%]*(?:&|$))+$/.test(body);

The regex is this one.

Keep in mind that regex is annotated using the // Instead of new Regexp().

Dimitrios Desyllas
  • 9,082
  • 15
  • 74
  • 164