#Copyright (C) 2016, Author: Tobias Ahsendorf, E-Mail: tobias.ahsendorf@gmail.com
#
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program.  If not, see <http://www.gnu.org/licenses/>


import sys
import csv
import HTSeq
import itertools

#path where the GTF file of interest can be found
# e.g. "/home/user/gencode.v18.annotation.gtf"
gtf_path = sys.argv[1]

# path where csv file with all position and strand information for each gene and transcript should be stored
# e.g. "/home/user/Transcripts-v.18.csv"
csv_path = sys.argv[2]


b = open(csv_path, 'w')
a = csv.writer(b)


gtffile = HTSeq.GFF_Reader(gtf_path)
tssfeature = list()

#loop throught the GTF file and store transcripts as [GENE_ID, TRANSCRIPT_ID, GENE_TYPE, START POS OF TRANSCRIPT, END POS OF TRANSCRIPT, STRAND] 
for feature in gtffile:
    if feature.type == "transcript":
        x=[feature.attr["gene_id"], feature.attr["transcript_id"],feature.attr['transcript_type'],feature.iv.start_as_pos.chrom,feature.iv.start,feature.iv.end,feature.iv.start_as_pos.strand]
        tssfeature.append(x)


a.writerows(tssfeature)
b.close()