I have some confusion to differentiate the two options: sys.argv[1] and sys.stdin in my implementation. The python function is to read [fastq]files:
#!/usr/bin/python
def readfq(fp): # this is a generator function
last = None # this is a buffer keeping the last unprocessed line
while True: # mimic closure; is it a bad idea?
if not last: # the first record or a record following a fastq
for l in fp: # search for the start of the next record
if l[0] in '>@': # fasta/q header line
last = l[:-1] # save this line
break
if not last: break
name, seqs, last = last[1:].partition(" ")[0], [], None
for l in fp: # read the sequence
if l[0] in '@+>': # This has a trick to skip the + row.
last = l[:-1] # remember the sentenial variable last
break
seqs.append(l[:-1]) # Append the sequence to seqs which is a list.
if not last or last[0] != '+': # this is a fasta record
yield name, ''.join(seqs), None # yield a fasta record
if not last: break
else: # this is a fastq record
seq, leng, seqs = ''.join(seqs), 0, []
for l in fp: # read the quality
seqs.append(l[:-1])
leng += len(l) - 1
if leng >= len(seq): # have read enough quality
last = None
yield name, seq, ''.join(seqs); # yield a fastq record
break
if last: # reach EOF before reading enough quality
yield name, seq, None # yield a fasta record instead
break
# Following is the tester
if __name__ == "__main__":
import sys
n, slen, qlen = 0, 0, 0
for name, seq, qual in readfq(sys.stdin): #Original line
# **for name, seq, qual in readfq(sys.argv[1]):** #This is what I want, but did not work.
n += 1
slen += len(seq)
qlen += qual and len(qual) or 0
print(seq)
print (n, '\t', slen, '\t', qlen)
I must have missed important parts to understand sys.stdin as stream and sys.argv[1] as file object, because the code does not work.
Closely related post discussed about the using of stdin or argv[1] from command line as argument parsing, but my question is more about the difference between the contents of the two. More specifically, my this question is more about file stream(?), I think.