#Copyright (C) 2016, Author: Tobias Ahsendorf, E-Mail: tobias.ahsendorf@gmail.com # #This program is free software: you can redistribute it and/or modify #it under the terms of the GNU General Public License as published by #the Free Software Foundation, either version 3 of the License, or #(at your option) any later version. # #This program is distributed in the hope that it will be useful, #but WITHOUT ANY WARRANTY; without even the implied warranty of #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #GNU General Public License for more details. # #You should have received a copy of the GNU General Public License #along with this program. If not, see import sys import csv import HTSeq import itertools #path where the GTF file of interest can be found # e.g. "/home/user/gencode.v18.annotation.gtf" gtf_path = sys.argv[1] # path where csv file with all position and strand information for each gene and transcript should be stored # e.g. "/home/user/Transcripts-v.18.csv" csv_path = sys.argv[2] b = open(csv_path, 'w') a = csv.writer(b) gtffile = HTSeq.GFF_Reader(gtf_path) tssfeature = list() #loop throught the GTF file and store transcripts as [GENE_ID, TRANSCRIPT_ID, GENE_TYPE, START POS OF TRANSCRIPT, END POS OF TRANSCRIPT, STRAND] for feature in gtffile: if feature.type == "transcript": x=[feature.attr["gene_id"], feature.attr["transcript_id"],feature.attr['transcript_type'],feature.iv.start_as_pos.chrom,feature.iv.start,feature.iv.end,feature.iv.start_as_pos.strand] tssfeature.append(x) a.writerows(tssfeature) b.close()