I have encountered a problem working with the creation of new tarfiles using the latest python 3.5 package and its tarfile
module. This problem is similar to those discussed here and here. In the former case the suggested solution returns the error ReadError: empty header
, and in the latter case it's a closed issue from eight years ago, the patch for which should already have been applied by loading the latest version of the language. The docs for the Tarfile module explicitly state that "a" can be used to create new tarfiles.
The issue specifically arises in the case of attempting to create a new tarfile. The code that generates it is replicated in full below; a simple script I was using to benchmark the task in multiprocessing.
#just a little script to test copy times for a control
import os
import os.path
import time
import shutil
import multiprocessing
import tarfile
global source; source = "/home/patches/Documents/Scripting Projects/Resources/Test Payload"
global dest; dest = "/home/patches/Desktop/Copy Benchmark
Output.tar.bz2"
global start
global end
global diff
global testing; testing = True
global numConsumers; numConsumers = multiprocessing.cpu_count()
#Classes!
class copyProc(multiprocessing.Process):
def __init__(self, qTask):
multiprocessing.Process.__init__(self)
self.qTask = qTask
os.chdir(source)
def run(self):
proc_name = self.name
while True:
next_task = self.qTask.get()
if next_task is None:
# Poison pill means shutdown
print('%s: Exiting' % proc_name)
self.qTask.task_done()
break
next_task()
self.qTask.task_done()
return
class copyJob(object):
def __init__(self, a):
self.tgt = a
def __call__(self):
tar = tarfile.open(dest, "a")
tar.add(self.tgt)
tar.close()
#Function
def announce():
print("Starting copy benchmark - multiprocessing.")
foo = input("Press any key to begin")
def startTimer():
global start
start = time.time()
def setup():
os.chdir(source)
for a, b, files in os.walk(source):
for file in files:
tasks.put(copyJob(file))
for i in range(numConsumers):
tasks.put(None)
def endTimer():
global end
end = time.time()
def prompt():
diff = end - start
# os.remove(dest)
print("The test took %s seconds" % str(diff))
bar = input("Run again? Y/n")
if bar == "n":
testing = False
#runtime
if __name__ == '__main__':
multiprocessing.set_start_method("spawn")
tasks = multiprocessing.JoinableQueue()
announce()
startTimer()
setup()
consumers = []
for i in range(numConsumers):
consumers.append(copyProc(tasks))
for w in consumers:
w.start()
tasks.join()
endTimer()
prompt()
Edit to add: the problem specifically is that, instead of the specific behaviour, the script instead throws a "ReadError: empty header" exception on attempting to open the tarfile.