results=[]
for i in range (0, L-window+1, step):
GC=0
for j in range (i, i+window) :
if seq[j]=="C" or seq[j] == "G" :
GC+=1
results.append((i+1, float(GC)/window))
return results
# 2/ version optimisee, on ne recalcule pas les parties chevauchantes
# de deux fenetres successives = gain de temps
def computeSlidingGCOPtim(seq,window,step):
results=[]
L=len(seq)
# premiere fenetre
GC=0
#print "calcul du gc pour la premiere fenetre"
for j in range(0,window):
if seq[j]=="C" or seq[j] == "G":
GC=GC+1
results.append((1, float(GC)/window))
#pour les autres fenetres
for i in range(step,L-window+1,step):
for j in range(i-step,i):
if seq[j]=="C" or seq[j] == "G":
GC=GC-1
for j in range(i+window-step,i+window):
if seq[j]=="C" or seq[j] == "G":
GC=GC+1
results.append((i+1, float(GC)/window))
return results
Les fonctions de composition.py
#!/Library/Frameworks/Python.framework/Versions/Current/bin/python
from sequence import *
tex="mgen.fst"
nom,seq=readFasta(tex)
pas=1000
lf=50000
#pos_gc=computeSlidingGC(seq,lf,pas)
pos_gc=computeSlidingGCOPtim(seq,lf,pas)
for i in pos_gc :
print i[0], i[1]
Le test dans le shell
chmod +x composition.py
time ./composition.py > toto
1.174u 0.076s 0:01.32 93.9% 0+0k 0+0io 0pf+0w
#sans l'optimisation
time ./composition.py > toto
28.037u 0.591s 0:30.33 94.3% 0+0k 0+2io 0pf+0w
#avec l'optimisation