3

I'm having a huge difficulty in creating a program to check the number of occurrences of a document based on rules set by me. With the help of regex, I check some fields, and if a particular field exists , I can count the number of occurrences of it, or I create a deeper scan. It's a little confusing, and I do not know exactly how to explain.

I 'm checking text files, but to reduce the complexity , I will use arrays.

I have the following array:

let strings = [
  'COMPANY: NAME  ID: 12',
  'COMPANY: NAME  ID: 12',
  'COMPANY: NAME  ID: 12',
  'COMPANY: NAME2 ID: 10'
];

And this is the desire output:

{
  'NAME' :  { '12': 3 },
  'NAME2':  { '10': 1 }
}

To achieve this, I need to do some checks, so I came up with the following 'MAP':

let patterns = [
  {
    'pattern': 'COMPANY:\\s*?([\\w]+)\\s',
    'modifier': ''
  },
  {
    'pattern'  : 'ID:\\s*?(\\d{2})\\s*',
    'modifier' : ''
  }
];

I 'm having a hard time creating the pseudo- code, I know it's something that can be done recursively, but I'm stuck . The biggest problem is because of nested, I can have several levels of nested, not necessarily two.

In the last hours I created the following code:

'use strict';

let patterns = [
  {
    'pattern': 'COMPANY:\\s*?([\\w]+)\\s',
    'modifier': ''
  },
  {
    'pattern'  : 'ID:\\s*?(\\d{2})\\s*',
    'modifier' : ''
  }
];

let strings = [
  'COMPANY: NAME  ID: 12',
  'COMPANY: NAME  ID: 12',
  'COMPANY: NAME  ID: 12',
  'COMPANY: NAME2 ID: 10'
];

var _data = {};
for (let string of strings) {

  var root = _data;

  for (let i = 0, length = patterns.length; i < length; i++) {

    let item   = patterns[i];

    let regex  = new RegExp(item.pattern, item.modifier);
    let result = regex.exec(string);

    if (i < patterns.length -1) {
      root = root[result[1]] = {};
    } else {
      root = root[result[1]] = 1;
    }
  }
}

document.body.innerHTML = JSON.stringify({_data});

Now i'm trying to get the last part, count the number of occurrences, which is being a pain in the ass. Maybe recursion or generator could resolve this.

UPDATE -

It's important understand that should work with 3, 4, 5, objects. Example:

let patterns = [
  {
    'pattern': 'COMPANY:\\s*?([\\w]+)\\s',
    'modifier': ''
  },
  {
    'pattern'  : 'ID:\\s*?(\\d{2})\\s*',
    'modifier' : ''
  },
  {
    'pattern'  : 'SOMETHING:\\s*?(\\d+)\\s*',
    'modifier' : ''
  }
];

let strings = [
  'COMPANY: NAME  ID: 12 SOMETHING: 1010',
  'COMPANY: NAME  ID: 12 SOMETHING: 1010',
  'COMPANY: NAME  ID: 12 SOMETHING: 1010',
  'COMPANY: NAME2 ID: 10 SOMETHING: 1010'
];

Output should be:

{
  'NAME': {
    '12': {
      '1010': 3
    }
  },
  'NAME2': {
    '10': {
      '1010': 1
    }
  }
}

3 Answers3

0
'use strict';

    let patterns = [
      {
        'pattern': 'COMPANY:\\s*?([\\w]+)\\s',
        'modifier': ''
      },
      {
        'pattern'  : 'ID:\\s*?(\\d{2})\\s*',
        'modifier' : ''
      },
      {
        'pattern'  : 'EFD:\\s*?(\\d{2})\\s*',
        'modifier' : ''
      }
    ];

let strings = [
  'COMPANY: NAME  ID: 12 SOMETHING: 1010',
  'COMPANY: NAME  ID: 12 SOMETHING: 1010',
  'COMPANY: NAME  ID: 12 SOMETHING: 1010',
  'COMPANY: NAME2 ID: 10 SOMETHING: 1010'
];

    var result = {};
    strings.forEach(function(value, index) {
        var split = value.replace(/ +(?= )/g,'').split(" ");
      var name = split[1];
      var correspondingValue = split[3];
      var efd = split[5];


      if (!(result[name])) {
        result[name] = {};
        result[name][correspondingValue] = {};
        result[name][correspondingValue][efd] = 1;
      } else {
        result[name][correspondingValue][efd]++;
      }

    });

    document.body.innerHTML = JSON.stringify(result);
samir benzenine
  • 478
  • 4
  • 11
0

You may do like this. Array.prototype.reduce() is very handy for these jobs.

var strings = [
  'COMPANY: NAME  ID: 12',
  'COMPANY: NAME  ID: 12',
  'COMPANY: NAME  ID: 12',
  'COMPANY: NAME2 ID: 10'
],

reduced = strings.reduce((p,c) => {var co = c.match(/\w+(?=\s*ID)/)[0],
                                   id = c.match(/\d+$/)[0];
                                   p[co] ? p[co][id]++ : p[co] = {[id]:1};
                                   return p},{});
document.write("<pre>" +JSON.stringify(reduced,null,2) + "</pre>");

So now i modified the code to work with unlimited nested properties. I had to use two of my invention Object methods Object.prototype.getNestedValue() and Object.prototype.setNestedValue() which are used to access and set / modify nested object properties and their values through dynamicaly provided arguments. The last argument provided is the value to get or set. The previopus arguments are the nested properties. They are very handy methods for these use cases. So here it goes

Object.prototype.getNestedValue = function(...a) {
  return a.length > 1 ? (this[a[0]] !== void 0 && this[a[0]].getNestedValue(...a.slice(1))) : this[a[0]];
};
Object.prototype.setNestedValue = function(...a) {
  a.length > 2 ? typeof this[a[0]] === "object" && this[a[0]] !== null ? this[a[0]].setNestedValue(...a.slice(1))
                                                                       : (this[a[0]] = typeof a[1] === "string" ? {} : new Array(a[1]),
                                                                                 this[a[0]].setNestedValue(...a.slice(1)))
                      : this[a[0]] = a[1];
  return this;
};


var strings = [
  'COMPANY: NAME  ID: 12 SOMETHING: 1010 MORE: 857',
  'COMPANY: NAME  ID: 12 SOMETHING: 1010 MORE: 857',
  'COMPANY: NAME  ID: 12 SOMETHING: 1010 MORE: 857',
  'COMPANY: NAME2 ID: 10 SOMETHING: 1010 MORE: 333'
],

reduced = strings.reduce((p,c) => {var props = c.match(/(?::\s*)[^\s]+/g).map(e => e.split(":")[1].trim()),
                                       value = p.getNestedValue(...props);
                                   !!value ? p.setNestedValue(...props,++value) : p.setNestedValue(...props,1);
                                   return p},{});

document.write("<pre>" + JSON.stringify(reduced,null,2) + "</pre>");
Community
  • 1
  • 1
Redu
  • 25,060
  • 6
  • 56
  • 76
  • @D_REIS Just saw... No big deal can be done in the same fashion. I will modify my answer accordingly. – Redu May 19 '16 at 19:00
  • Very difficult right? I'm trying for at least 2 days. –  May 19 '16 at 19:26
  • @D_REIS Since you say it's unlimited nested objects it's "slightly" complicated but i am on it. I will come up with an answer. – Redu May 19 '16 at 19:44
  • Yes . I think the tricky part is check each nested path, and if already exists, go deeper.. –  May 19 '16 at 19:53
  • 1
    @D_REIS yes that's exactly what i am working on right now – Redu May 19 '16 at 19:54
  • @D_REIS i hope this is fine with you – Redu May 19 '16 at 21:04
  • too hardcoded, I think OP wants to process *any* regexps – vp_arth May 19 '16 at 21:33
  • 1
    @vp_arth: Getting the properties to be used in the nested structure that we want to construct is not OP's main problem at all. The problem was constructing the nested structure dynamically once you have those properties "regardless" how many of them there are. OP can easily use any regex to split the source text into his desired property set and this code will do the rest. – Redu May 19 '16 at 21:43
  • Great Redu, u are really fantastic man! Thank u VERY MUCH, u deserve a kiss. –  May 19 '16 at 21:55
0

It's not an ES6 solution but it's relatively simple to understand:

var strings = [
  'COMPANY: NAME  ID: 12 SOMETHING: 1010',
  'COMPANY: NAME  ID: 12 SOMETHING: 1010',
  'COMPANY: NAME  ID: 12 SOMETHING: 1010',
  'COMPANY: NAME2 ID: 10 SOMETHING: 1010',
  'COMPANY: NAME2 ID: 11 SOMETHING: 1010'
];

var output = {};

for (var i = 0; i < strings.length; i++) {
    var line = strings[i];
    // regex to extract only the values from the current line
    // e.g (NAME, 12, 1010)
    var matches = line.match(/[^\s:]+(?=\s+[^:]+:|$)/g);
    var currentObj = output;
    for (var y = 0; y < matches.length; y++) {
        var match = matches[y];
        var value = currentObj[match];

        // if the value is not the deepest field, 
        //   then create the deeper object to hold the next iteration's values
        // else if it is the deepest field then store the appropriate count
        currentObj[match] = y < matches.length - 1
                              ? value || {}
                              : value ? value + 1 : 1;

        // set up for the next iteration
        currentObj = currentObj[match];
    }
}

console.log(output);

Output:

{
   'NAME':{
      '12':{
         '1010':3
      }
   },
   'NAME2':{
      '10':{
         '1010':1
      },
      '11':{
         '1010':1
      }
   }
}

Demo here.
Regex demo.

James Buck
  • 1,640
  • 9
  • 13