0

I have 10000 files which I'm trying to split into 7 chunks and create simlinks to in a different place. For some reason, the last chunk ends up with exactly the same number of files as the other chunks, even though 10000 doesn't divide by 7 so it should have four extra. Why is this happening and how can I fix it?

import os

StartDir = "OriginalDir"
FirstEndDir   = "FirstSeventh"
SecondEndDir  = "SecondSeventh"
ThirdEndDir   = "ThirdSeventh"
FourthEndDir  = "FourthSeventh"
FifthEndDir   = "FifthSeventh"
SixthEndDir   = "SixthSeventh"
SeventhEndDir = "SeventhSeventh"

AllFileList = os.listdir(StartDir)

SortedList = sorted(AllFileList, key=lambda x: int(x.split("_")[-1].split(".")[0]))
print SortedList[0]

n = len(SortedList)/7

FirstList = [SortedList[i:i + n] for i in xrange(0, n, 1)]
SecondList = [SortedList[i:i + n] for i in xrange(n, 2*n, 1)]
ThirdList = [SortedList[i:i + n] for i in xrange(2*n, 3*n, 1)]
FourthList = [SortedList[i:i + n] for i in xrange(3*n, 4*n, 1)]
FifthList = [SortedList[i:i + n] for i in xrange(4*n, 5*n, 1)]
SixthList = [SortedList[i:i + n] for i in xrange(5*n, 6*n, 1)]
SeventhList = [SortedList[i:i + n] for i in xrange(6*n, len(SortedList), 1)]

for ii in FirstList[0]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(FirstEndDir,ii))

for ii in SecondList[0]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(SecondEndDir,ii))

for ii in ThirdList[0]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(ThirdEndDir,ii))

for ii in FourthList[0]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(FourthEndDir,ii))

for ii in FifthList[0]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(FifthEndDir,ii))

for ii in SixthList[0]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(SixthEndDir,ii))

for ii in SeventhList[0]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(SeventhEndDir,ii))
Beth Long
  • 375
  • 3
  • 24
  • 1
    This code doesn't come remotely close to splitting `AllFileList` into 7 chunks. For example: `FirstList` is a list of lists of file names, where the inner lists are a sort of sliding window of length `n`. There's more work to do here than the question asks. Take a look at `chunker()` in this [answer](https://stackoverflow.com/a/76158173/4583620) and see if it works for you. – Michael Ruth May 11 '23 at 22:38
  • Oh, just noticed `xrange()`. Are you looking for a Python 2 solution? If so, please tag this question appropriately. – Michael Ruth May 11 '23 at 22:39
  • I mean, the first 6 sublists contain exactly the chunks of allfilelist that I want them to, so it does come remotely close to doing what I want it to. For the rest, yes python 2 is preferable – Beth Long May 12 '23 at 06:38

1 Answers1

0

Solved using this answer with numpy:

#Creates Symlink to RawMC Files in StartDir, splitting them into 7 chunks and creating symlinks to all the files in each chunk in FirstEndDir, SecondEndDir, ..., SeventhEndDir

import os
import numpy as np
import sys

#Define directories
StartDir = "OriginalDir"
FirstEndDir   = "FirstSeventh"
SecondEndDir  = "SecondSeventh"
ThirdEndDir   = "ThirdSeventh"
FourthEndDir  = "FourthSeventh"
FifthEndDir   = "FifthSeventh"
SixthEndDir   = "SixthSeventh"
SeventhEndDir = "SeventhSeventh"

#Find all RawMC files
AllFileList = os.listdir(StartDir)

#Sort files in numerical order
SortedList = sorted(AllFileList, key=lambda x: int(x.split("_")[-1].split(".")[0]))

#split list into equal chunks
SplitList = np.array_split(SortedList, 7)

#remove existing symlinks
AllEndDirs = [FirstEndDir,SecondEndDir,ThirdEndDir,FourthEndDir,FifthEndDir,SixthEndDir,SeventhEndDir]

for enddir in AllEndDirs:
    ProceedCheckString = "about to rm everything in "+enddir+". Proceed? Type y for yes, n for no:"
    ProceedCheck = raw_input(ProceedCheckString)

    if ProceedCheck=="y":
        os.system("rm -rf "+os.path.join(enddir,"*"))
    else:
        sys.exit()

#Create symlinks in seventh directories
for ii in SplitList[0]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(FirstEndDir,ii))

for ii in SplitList[1]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(SecondEndDir,ii))

for ii in SplitList[2]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(ThirdEndDir,ii))

for ii in SplitList[3]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(FourthEndDir,ii))

for ii in SplitList[4]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(FifthEndDir,ii))

for ii in SplitList[5]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(SixthEndDir,ii))

for ii in SplitList[6]:
    print ii
    os.symlink(os.path.join(StartDir,ii),os.path.join(SeventhEndDir,ii))

Beth Long
  • 375
  • 3
  • 24