# This pre-prunes the count file to get it to a size that's more easily managed by R. import gzip OHANDLE=gzip.open("genic_subcounts.tsv.gz", "wb") for line in gzip.open("genic_counts.tsv.gz", "rU"): segments=line.strip().split("\t") if segments[0]=="GeneID" or segments[0][1:3]=="ENS": OHANDLE.write(line) else: total=0 for x in segments[2:]: total+=int(x) if total >= 10: OHANDLE.write(line) OHANDLE.close()