presuming you want to take sections of four lines coming after the two lines to skip, just skip two lines and take a slice of four rows from a csv reader obejct:
from itertools import islice, chain
import pandas as pd
import csv
def parts(r):
_, n = next(r), next(r)
while n:
yield islice(r, 4)
_, n = next(r, ""), next(r, "")
_, n = next(r, ""), next(r, "")
with open("test.txt")as f:
r = csv.reader(f)
print(pd.DataFrame(list(chain.from_iterable(parts(r)))))
Output:
0 1 2
0 1 2 3
1 4 5 6
2 7 8 9
3 10 11 12
Or pass the chain object to pd.DataFrame.from_records
:
with open("test.txt")as f:
r = csv.reader(f)
print(pd.DataFrame.from_records(chain.from_iterable(parts(r))))
0 1 2
0 1 2 3
1 4 5 6
2 7 8 9
3 10 11 12
Or a more general approach using a function using the consume recipe to skip lines:
from itertools import islice, chain
from collections import deque
import pandas as pd
import csv
def consume(iterator, n):
"Advance the iterator n-steps ahead. If n is none, consume entirely."
# Use functions that consume iterators at C speed.
if n is None:
# feed the entire iterator into a zero-length deque
deque(iterator, maxlen=0)
else:
# advance to the empty slice starting at position n
next(islice(iterator, n, n), None)
def parts(r, sec_len, skip):
consume(r,skip)
for sli in iter(lambda: list(islice(r, sec_len)), []):
yield sli
consume(r, skip)
with open("test.txt")as f:
r = csv.reader(f)
print(pd.DataFrame.from_records((chain.from_iterable(parts(r, 4, 2)))))
The last option is to write to an StringIo object and pass that:
from io import StringIO
def parts(r, sec_len, skip):
consume(r, skip)
for sli in iter(lambda: list(islice(r, sec_len)), []):
yield "".join(sli)
consume(r, skip)
with open("test.txt")as f:
so = StringIO()
so.writelines(parts(f, 4, 2))
so.seek(0)
print(pd.read_csv(so, header=None))