decimal position are not adding while converting mainframe copy book to csv

Question

I am trying to convert mainframe copybook to csv (only length of all data types) but the decimal positions are not adding. For example, suppose col1 has S9(13)v9(7). It's converting into total size 13 but the expected output should be 13+7 = 20

import re, string, sys
import csv
import os 

class PictureString:

    REPEATS_RE = re.compile(r'(.)\((\d+)\)')
    #print REPEATS_RE
    FLOAT_RE = re.compile(r'S?[9Z]*[.V][9Z]+')
    INTEGER_RE = re.compile(r'S?[9Z]+')
    INTEGER_RE1 = re.compile(r'[9Z]+')
    COMP_TYPES = ['Integer', 'Float', 'Double', 'BCD']

    def expand_repeat_chars(self, pic_str):
        while True:
            match = self.REPEATS_RE.search(pic_str)
            #print match
            if not match:
                break
            expanded_str = match.group(1) * int(match.group(2))
            #print expanded_str
            #pic_str = self.REPEATS_RE.sub(expanded_str, pic_str, 1)
            pic_str=expanded_str
            print pic_str

        return pic_str

    def parse(self, pic_str, comp=0):
        pic_str = self.expand_repeat_chars(pic_str)
        if comp:
            data_type = self.COMP_TYPES[int(comp)]
        elif self.FLOAT_RE.match(pic_str):
            data_type = 'Float'
        elif self.INTEGER_RE.match(pic_str):
            data_type = 'Integer'
        elif self.INTEGER_RE1.match(pic_str):
            data_type = 'Integer'
        else:
            data_type = 'Char'
        decimal_pos = 0
        if 'V' in pic_str:
            decimal_pos = pic_str.index('V') + 1
            #print decimal_pos
            pic_str = pic_str.replace('V', '')
        result = (data_type, len(pic_str), decimal_pos)
        writer=csv.writer(csv_file)
        writer.writerow(result)
        #print result
        return result


class Field:

    FIELD_PTRN = r'^(?P<level>\d{2})\s+(?P<name>\S+)'
    PIC_PTRN = r'\s+PIC\s+(?P<pic>\S+)'
    DEPENDING_ON_PTRN = r'\s+OCCURS.*DEPENDING ON (?P<occurs>\S+)'
    OCCURS_PTRN = r'\s+OCCURS (?P<occurs>\d+) TIMES'
    COMP_PTRN = r'\s+COMP-(?P<comp>[1-3])'
    FIELD_RE = [ re.compile(i + '.') for i in [
        FIELD_PTRN + PIC_PTRN + COMP_PTRN,
        FIELD_PTRN + DEPENDING_ON_PTRN,
        FIELD_PTRN + OCCURS_PTRN + PIC_PTRN + COMP_PTRN,
        FIELD_PTRN + OCCURS_PTRN + PIC_PTRN,
        FIELD_PTRN + OCCURS_PTRN,
        FIELD_PTRN + PIC_PTRN,
        FIELD_PTRN
    ] ]
    FIELDS = ['occurs', 'level', 'name', 'type', 'length', 'decimal_pos', 'pic', 'comp']
    pic = PictureString()

    def parse(self, line_num, line):
        fields = { 'name': '', 'level': '0', 'occurs': '1', 'comp': '0' }
        pattern_num, num_patterns = 0, len(self.FIELD_RE)
        while pattern_num < num_patterns:
            match = self.FIELD_RE[pattern_num].match(line)
            if match:
                for key, value in match.groupdict().items():
                    fields[key] = value
                    #print fields[key]
                break
            pattern_num += 1
            #pattern_num -= 1
        result = [ fields[i] for i in self.FIELDS[:3] ]
        #print result
        if 'pic' in fields:
             result += self.pic.parse(fields['pic'], int(fields['comp']))
             #print result
        return result


class Copybook:

    LEGAL_DB_NAME_RE = re.compile(r'[^\w*+]')
    OCCURS, LEVEL, NAME = range(3)
    #csv_file = open('csv_file.csv','w+')

    def legalize_db_name(self, name, camel_case=False):
        name = self.LEGAL_DB_NAME_RE.sub('_', name)
        if camel_case:
            return ''.join([ i.capitalize() for i in name.split('_') ])
        return name.upper()

    def set2legal_db_names(self):
        result = []
        for field in self.fields:
            field = list(field)
            if len(field) <= 3:
                field[self.NAME] = self.legalize_db_name(field[self.NAME], True)
                if not field[self.OCCURS].isdigit():
                    field[self.OCCURS] = self.legalize_db_name(field[self.OCCURS])
            else:
                field[self.NAME] = self.legalize_db_name(field[self.NAME])
            result.append(field)
        self.fields = result

    def occurs_n_times(self):
        levels = [0]
        for field in self.fields:
            line = ''
           #print field
            level = int(field[self.LEVEL])
            if level == 1:
                line = field[self.NAME]
            if level <= (levels[0]-1):
                levels.pop()
            tabs = '\t' * (len(levels) - 1)
            if len(field) > 3:
                line = ', '.join([ str(i) for i in field[self.NAME:] ])
                #print len(field)
            elif field[self.OCCURS] != '1':
                line = '{0[2]} OCCURS {0[0]!r} TIMES:'.format(field)
                levels.append(level)
            if line:
                writer=csv.writer(csv_file1,delimiter=',')
                writer.writerow(line)
                #sys.stdout.write(tabs + line + '\n')

    def parse(self, lines):
        lines = [ i.strip() for i in lines ]
        lines = [ i for i in lines if i ]
        lines = [ i for i in lines if i[0] != '*' ]
        field = Field()
        self.fields = [ field.parse(i, j) for i, j in enumerate(lines) ]
        self.set2legal_db_names()
        self.occurs_n_times()


    def camel_case(self, name):
        return ''.join([ i.title() for i in name.split('_') ])


def main(args):
    Copybook().parse(args.copybook.readlines())

if __name__ == '__main__':
    from cmd_line_args import Args


    args = Args(USAGE, __version__)
    args.allow_stdin()
    args.add_files('copybook')
    main(args.parse())

Please provide an [MCVE](http://stackoverflow.com/help/mcve): the code you have listed is just class definitions and a little too long for somebody to debug it by going line by line. Also some more example of the input and expected output would help. — Cyb3rFly3r, Apr 30 '16 at 13:09
It would probably be a good idea to give a quick explanation of "picture string" formatting; I suspect that few Python programmers are familiar with this notation. — PM 2Ring, Apr 30 '16 at 13:25
@PM 2Ring : do you have any idea how to get complete size of the field including precision — Prasanna Kumar, Apr 30 '16 at 14:11
@PM2Ring I'm not sure a "quick explanation" of [picture strings](http://www.ibm.com/support/knowledgecenter/#!/SS6SG3_6.1.0/com.ibm.cobol61.ent.doc/PGandLR/ref/rlddepic.html) is possible. This is a non-trivial feature of COBOL and the PICTURE clause is affected by the [USAGE](http://www.ibm.com/support/knowledgecenter/#!/SS6SG3_6.1.0/com.ibm.cobol61.ent.doc/PGandLR/ref/rlddeusa.html) clause which the OP did not specify. This is why I requested the OP include the COBOL copybook in their question. — cschneid, Apr 30 '16 at 22:17
Also helpful would be sample input and expected output. And whether the Python code will be running on the mainframe (which OS?). — cschneid, Apr 30 '16 at 22:25

score 1 · Answer 1 · edited May 23 '17 at 12:23

If you can use of languages other than python, A couple of other options include

JRecord has a Cobol2Csv program
Look at the Stingray Project. It is a python project for reading Mainframe cobol data
If it is Mainframe Cobol, Use Cb2xml to convert the Cobol-Copybook to Xml. Cb2xml will parse the Cobol for you, it will calculate field positions/lengths/scales etc. It easy to load the Cb2xml-Xml into python (or Ruby or JavScript etc). You will still need to check if it is an assumed decimal (V picture) or an actual decimal point (.).
If you are prepared to use Jython, this allows direct access to the Java programs.

e.g. to load the xml produced by cb2xml in python

tree = ET.parse('cbl2xml_Test110.cbl.xml')

The following program will print the Cobol copybok attributes becomes:

## Get an attribute if it exist otherwise return ''
def getAttr(d, key):
    r=''
    if key in d.attrib:
        r = d.attrib[key]
    return r

# Get attribute if it exists
def getAttrId(d, key):
    r=''
    if key in d.attrib:
        r = key + '=' + d.attrib[key]
    return r

##########################################################################
# Purpose: Print one item
##########################################################################
def printItem(indent, item):
    n = indent + item.attrib['level'] + " " + item.attrib['name'] + "                                                                                    "
    n = n[:50]

    print n, '\t', item.attrib['position'], '\t', item.attrib['storage-length'], '\t', getAttr(item, 'display-length'), getAttr(item, 'picture'), getAttrId(item, 'usage'), getAttrId(item, 'numeric'), getAttrId(item, 'signed')
    for child in item.findall('item'):
    printItem(indent + "    ", child)

#########################################################################

tree = ET.parse('cbl2xml_Test110.cbl.xml')
root = tree.getroot()

print ">> ", root.tag, root.attrib['filename']

for child in root.findall('item'):
    printItem("  ", child)

Some examples of people who have used cb2xml from other lanuages

CopybookUtils is a ruby wrapper around cb2xml. It reads cobol text files in ruby
Hostbridge have been using cb2xml-xml from Java-Script to convert Cobol-Cics payloads to/from JSON.
Dynamically Reading COBOL Redefines with C# used cb2xml to generate cobol programs

Note: I wrote JRecord and maintain cb2xml

decimal position are not adding while converting mainframe copy book to csv

1 Answers1