Hi I have data stored in chunk of n number of threads. the size of file is 102kb, so I am trying to lock the shared resource i.e. the file and then when I write the first chunk i release the lock, but when it comes for the next chink from the second thread, instead of file to continue from where it left it starts to write the chunk on top if it...
so the 102 kb file becomes 51 for two threads each having chunk of 51kb
here is the piece of code.
for th in threads:
th.join()
for th in threads:
lock.acquire()
with open(fileName, 'w+') as fh:
fh.write(th.data)
lock.release()
I am even using mode w+
still instead of appending its overwriting..
update
def main(url=None, splitBy=2):
start_time = time.time()
if not url:
print "Please Enter some url to begin download."
return
fileName = url.split('/')[-1]
sizeInBytes = requests.head(url, headers={'Accept-Encoding': 'identity'}).headers.get('content-length', None)
# if os.path.exists(fileName):
# if int(sizeInBytes) == os.path.getsize(fileName):
# raise SystemExit("File already exists.")
print "%s bytes to download." % sizeInBytes
if not sizeInBytes:
print "Size cannot be determined."
return
threads = []
lock = threading.Lock()
byteRanges = buildRange(int(sizeInBytes), splitBy)
for idx in range(splitBy):
bufTh = SplitBufferThread(url, byteRanges[idx])
bufTh.daemon = True
bufTh.start()
threads.append(bufTh)
print "--- %s seconds ---" % str(time.time() - start_time)
for i, th in enumerate(threads):
th.join()
lock.acquire()
with open(fileName, 'a') as fh:
fh.write(th.data)
if i == len(threads) - 1:
fh.seek(0, 0)
fh.flush()
lock.release()
Update 2
I have totally removed the extra threads list, just using the join()
method does the magic , but how does the thread wait for one chunk to finish writing is it using with
waits for one thread.data to be written and then next one gets to start appending ??
def main(url=None, splitBy=6):
if not url:
print "Please Enter some url to begin download."
return
fileName = url.split('/')[-1]
sizeInBytes = requests.head(url, headers={'Accept-Encoding': 'identity'}).headers.get('content-length', None)
if os.path.exists(fileName):
if int(sizeInBytes) == os.path.getsize(fileName):
ask = raw_input('[YES]')
if not ask or ask.lower() in ['y', 'yes']:
os.remove(fileName)
else:
raise SystemExit("File already exists.")
start_time = time.time()
print "%s bytes to download." % sizeInBytes
if not sizeInBytes:
print "Size cannot be determined."
return
byteRanges = buildRange(int(sizeInBytes), splitBy)
for idx in range(splitBy):
bufTh = SplitBufferThread(url, byteRanges[idx])
bufTh.daemon = True
bufTh.start()
with open(fileName, 'a+') as fh:
bufTh.join()
fh.write(bufTh.data)
print "--- %s seconds ---" % str(time.time() - start_time)
print "Finished Writing file %s" % fileName