2

i'm trying to adapt this Use binary COPY table FROM with psycopg2 example from @Mike T to my data but with i'm having some problems.

import psycopg2
import numpy as np
from struct import pack
from io import BytesIO
from datetime import datetime


conn = psycopg2.connect(host = 'x', database = 'x', user = 'x')
curs = conn.cursor()

DROP TABLE IF EXISTS test_test;
CREATE TABLE test_test(
    id_from_database INT PRIMARY KEY, 
    version VARCHAR, 
    information TEXT
    );

data = [(3,1,'hello hello!!'), (2,'123','test test???!'),(3,9, 'bye bye :)')]
dtype = [('id_from_database', 'object'),('version', 'object'),('information', 'object')]
data = np.array(data,dtype=dtype)

def prepare_text(dat):
   cpy = BytesIO()
   for row in dat:
       cpy.write('\t'.join([repr(x) for x in row]) + '\n')
   return(cpy)

def prepare_binary(dat):
    pgcopy_dtype = [('num_fields','>i2')]
    for field, dtype in dat.dtype.descr:
        pgcopy_dtype += [(field + '_length', '>i4'),
                         (field, dtype.replace('<', '>'))]
    pgcopy = np.empty(dat.shape, pgcopy_dtype)
    pgcopy['num_fields'] = len(dat.dtype)
    for i in range(len(dat.dtype)):
        field = dat.dtype.names[i]
        pgcopy[field + '_length'] = dat.dtype[i].alignment
        pgcopy[field] = dat[field]
    cpy = BytesIO()
    cpy.write(pack('!11sii', b'PGCOPY\n\377\r\n\0', 0, 0))
    cpy.write(pgcopy.tostring())  # all rows
    cpy.write(pack('!h', -1))  # file trailer
    #print("cpy")
    #print(cpy)
    return(cpy)
###
def time_pgcopy(dat, table, binary):
    print('Processing copy object for ' + table)
    tstart = datetime.now()
    cpy = prepare_binary(dat)
    tendw = datetime.now()
    print('Copy object prepared in ' + str(tendw - tstart) + '; ' +
        str(cpy.tell()) + ' bytes; transfering to database')
    cpy.seek(0)
    curs.copy_expert('COPY ' + table + ' FROM STDIN WITH BINARY', cpy)
    conn.commit()
    tend = datetime.now()
    print('Database copy time: ' + str(tend - tendw))
    print('        Total time: ' + str(tend - tstart))
    return
print(time_pgcopy(data, 'test_test', binary=True))

I'm getting this error:

curs.copy_expert('COPY ' + table + ' FROM STDIN WITH BINARY', cpy) psycopg2.DataError: incorrect binary data format CONTEXT: COPY test_test, line 1, column id_from_database

What am i doing wrong?

Thank you :)

(I can not comment on the original question because i don't have enough reputation)

Community
  • 1
  • 1
1pa
  • 715
  • 2
  • 9
  • 23
  • Solution to datetime and string columns: https://stackoverflow.com/questions/54678121/manually-convert-postgresql-binary-format-of-timestamp-to-integer https://github.com/enomado/python-postgresql-load/blob/master/fill_db.py hh = pack('!i{}s'.format(size), size, value) for string size and value binary encoded with encode('utf8') and hh = pack('!iQ'.format(size), size, from_epoch) for epoch difference between 2020-01-01 and the given date after it. – 42n4 May 13 '21 at 10:56
  • For struct pack: https://docs.python.org/3/library/struct.html – 42n4 May 13 '21 at 12:10
  • Some solutions: https://github.com/pbrumm/pg_data_encoder/blob/master/lib/pg_data_encoder/decoder.rb – 42n4 May 13 '21 at 12:14
  • https://dba.stackexchange.com/questions/190780/parsing-copys-binary-format-to-access-a-tsrange – 42n4 May 13 '21 at 12:16

1 Answers1

0

cpgcopy might be relevant here.

HoverHell
  • 4,739
  • 3
  • 21
  • 23