BedInverseExons: Difference between revisions

From genomewiki
Jump to navigationJump to search
No edit summary
 
No edit summary
 
(3 intermediate revisions by 2 users not shown)
Line 8: Line 8:


from sys import *
from sys import *
   
 
f = open(argv[1], "r")
f = open(argv[1], "r")
maxExon = {}
maxExon = {}
for l in f:
for l in f:
     fs = l.split()
     fs = l.split()
     (gene,dummy,exon) = fs[3].split("_")
     fs2 = fs[3].split("_")
    gene,exon = fs2[0],fs2[2] # might change this to fs2[0], fs[2]
 
     maxExon[gene] = int(exon)
     maxExon[gene] = int(exon)
   
 
f = open(argv[1], "r")
f = open(argv[1], "r")
   
 
for l in f:
for l in f:
     fs = l.split()
     fs = l.split()
    fs2 = fs[3].split("_")
    gene = fs2[0]
    exon = fs2[2]
     if fs[5]=="-":
     if fs[5]=="-":
        (gene,dummy,exon) = fs[3].split("_")
         name = gene + "_exon_" + str(maxExon[gene] - int(exon))
         name = gene + "_exon_" + str(maxExon[gene] - int(exon))
        print "\t".join([fs[0], fs[1], fs[2],name,fs[4],fs[5]])
     else:
     else:
         print l,
         name = gene + "_exon_" + exon
    print "\t".join([fs[0], fs[1], fs[2],name,fs[4],fs[5]])


</pre>
</pre>
[[Category:User Developed Scripts]]

Latest revision as of 12:10, 8 October 2007

When you export exons as bed with the table browser, they are numbered 0,1,2,3... by default, but always 5' to 3'. This is uncommon for biologists, they usually call the most upstream one exon 1. This scripts inverses the numbers.

#!/usr/bin/python 

# input: bed file with exons as exported from ucsc
# output: bed file with exons numbers reversed if strand="-"

from sys import *

f = open(argv[1], "r")
maxExon = {}
for l in f:
    fs = l.split()
    fs2 = fs[3].split("_")
    gene,exon = fs2[0],fs2[2] # might change this to fs2[0], fs[2] 

    maxExon[gene] = int(exon)

f = open(argv[1], "r")

for l in f:
    fs = l.split()
    fs2 = fs[3].split("_")
    gene = fs2[0]
    exon = fs2[2]
    if fs[5]=="-":
        name = gene + "_exon_" + str(maxExon[gene] - int(exon))
    else:
        name = gene + "_exon_" + exon
    print "\t".join([fs[0], fs[1], fs[2],name,fs[4],fs[5]])