Not without reading the first line, as you mentioned.
However, it might be easier to do:
do_not_read_cols = [3, 4, 9]
data = np.loadtxt('filename')
data = np.delete(data, do_read_cols, axis=1)
This won't be terribly memory-efficient, but loadtxt
doesn't try to be very memory-efficient to begin with. Unless you're deleting the majority of the columns, you'll use more memory with the call to loadtxt
than you will with the subsequent temporary copy that delete
will make.
To expand on my comment below, if you want to be memory-efficient and don't want to use pandas
, another option is something like this: (Note: written a bit sloppily.)
import numpy as np
def generate_text_file(length=1e6, ncols=20):
data = np.random.random((length, ncols))
np.savetxt('large_text_file.csv', data, delimiter=',')
def iter_loadtxt(filename, delimiter=',', skiprows=0, skipcols=None,dtype=float):
if skipcols is None:
skipcols = []
def iter_func():
with open(filename, 'r') as infile:
for _ in range(skiprows):
next(infile)
for line in infile:
line = line.rstrip().split(delimiter)
for i, item in enumerate(line):
if i in skipcols:
continue
yield dtype(item)
iter_loadtxt.rowlength = len(line) - len(skipcols)
data = np.fromiter(iter_func(), dtype=dtype)
data = data.reshape((-1, iter_loadtxt.rowlength))
return data
#generate_text_file()
data = iter_loadtxt('large_text_file.csv')