I have tried a number of solutions and I have read many websites and I cannot seem to solve this. I have a file that contain message objects. Each message has a 4-byte value that is the message type, a 4-byte value that is the length and then the message data which is ASCII in Unicode. When I print to the screen it looks like ASCII. When I direct the output to a file I get Unicode so something is not right with the way I am trying to decode all this. Here is the python script:
import sys
import codecs
import encodings.idna
import unicodedata
def getHeader(fileObj):
mstype_array = bytearray(4)
mslen_array = bytearray(4)
mstype = 0
mslen = 0
fileObj.seek(-1, 1)
mstype_array = fileObj.read(4)
mslen_array = fileObj.read(4)
mstype = int.from_bytes(mstype_array, byteorder=sys.byteorder)
mslen = int.from_bytes(mslen_array, byteorder=sys.byteorder)
return mstype,mslen
def getMessage(fileObj, count):
str = fileObj.read(count)#.decode("utf-8", "strict")
return str
def getFields(msg):
msg = codecs.decode(msg, 'utf-8')
fields = msg.split(';')
return fields
mstype = 0
mslen = 0
with open('../putty.log', 'rb') as f:
while True:
byte = f.read(1)
if not byte:
break
if byte == b'\x1D':
mstype, mslen = getHeader(f)
print (f"Msg Type: {mstype} Msg Len: {mslen}")
msg = getMessage(f, mslen)
print(f"Message: {codecs.decode(msg, 'utf-8')}")
#print(type(msg))
fields = getFields(msg)
print("Fields:")
for field in fields:
print(field)
else:
print (f"Char read: {byte} {hex(ord(byte))}")
Use can use this link to get the file to decode.