所以我正在用python做一些生物信息学的工作,利用Biopython和Clustalw2来比对蛋白质序列。我对此相当陌生(只有几个月的经验),我在使用stdout和迭代整个目录时遇到了一个问题。任何帮助都将不胜感激。
所以我写了这个,它一次处理一个文件,并产生想要的结果…
#!/usr/bin/python
import Bio
import os
from Bio.Align.Applications import ClustalwCommandline
from Bio import Seq
from Bio import SeqIO
from Bio import AlignIO
from Bio.SeqRecord import SeqRecord
clustal_loc=r"/Users/Wes/Desktop/eggNOG_files/clustalw-2.1-macosx/clustalw2"
try:
f_in=raw_input("Enter the filepath of the FASTA to align: ")
f_out= raw_input("Enter the output filename: ")
fh= open(f_in)
fo=open(f_out,'w')
for record in SeqIO.parse(fh,"fasta"):
id = record.id
seq = record.seq
print("Name: %s, size: %s"%(id,len(seq)))
try:
cl = ClustalwCommandline(clustal_loc,infile=f_in, outfile=f_out, align=True, outorder="ALIGNED", convert=True, output="pir")
assert os.path.isfile(clustal_loc), "Clustal W not found"
stdout, stderr = cl()
print cl
except:
print("There was a problem aligning. Check ClustalW path and .fasta input.")
fh.close()
fo.close()
except:
print("Could not parse. Check to make sure filepath is correct and that file is in FASTA format")...And这似乎工作得很好。当我试图迭代整个目录(比如需要对齐的蛋白质序列的1000+文件)时,问题就来了。我知道问题出在stdout上,但在这一点上,我还有点外行,不知道如何修复它。下面是不完整的代码:
/usr/bin/python
import Bio
import os
from Bio.Align.Applications import ClustalwCommandline
from Bio import Seq
from Bio import SeqIO
from Bio import AlignIO
from Bio.SeqRecord import SeqRecord
import subprocess
from subprocess import Popen
clustal_loc=r"/Users/Wes/Desktop/eggNOG_files/clustalw-2.1-macosx/clustalw2"
try:
folder= raw_input("Enter the folder of .fasta files to iterate over and align: ")
listing = os.listdir(folder)
for infile in listing:
print folder+'/'+infile
f_in = open(folder+'/'+infile,'r')
f_out=open(folder+'/'+infile+".pir",'w')
for record in SeqIO.parse(f_in,"fasta"):
id = record.id
seq = record.seq
print("Name: %s, size: %s"%(id,len(seq)))
clustalw_cline= ClustalwCommandline(clustal_loc,infile=f_in, outfile=f_out, align=True, outorder="ALIGNED", convert=True, output="pir")
assert os.path.isfile(clustal_loc), "Clustal W not found"
saveout = sys.stdout
sys.stdout = clustalw_cline()
sys.stdout = saveout
f_in.close()
f_out.close()
except:
print("There was a problem aligning. Check ClustalW path and .fasta folder format/location")正如你所看到的,我已经把这个搞得很糟糕了。感谢您能提供的任何帮助。
发布于 2012-08-28 06:29:29
您看到的错误到底是什么?您不应该将sys.sterr和sys.stdout设置为字符串值( clustalw_cline()函数将clustal stderr和stdout作为字符串返回),因为您不能从python向stdout写入任何内容。
我试着清理并更正你下面的代码。
#!/usr/bin/env python
import Bio
import os
from glob import glob
from Bio.Align.Applications import ClustalwCommandline
from Bio import Seq
from Bio import SeqIO
from Bio import AlignIO
from Bio.SeqRecord import SeqRecord
import subprocess
from subprocess import Popen
clustal_loc=r"/Users/Wes/Desktop/eggNOG_files/clustalw-2.1-macosx/clustalw2"
try:
folder= raw_input("Enter the folder of .fasta files to iterate over and align: ")
listing = glob(os.path.join(folder, '*.fasta'))
for infile in listing:
print infile
with open(os.path.splitext(infile) + '.pir') as f_out:
with open(infile) as f_in:
for record in SeqIO.parse(infile,"fasta"):
id = record.id
seq = record.seq
print("Name: %s, size: %s"%(id,len(seq)))
assert os.path.isfile(clustal_loc), "Clustal W not found"
clustalw_cline= ClustalwCommandline(clustal_loc,infile=f_in,
outfile=f_out, align=True,
outorder="ALIGNED",convert=True, output="pir")
stdout, stderr = clustalw_cline()
except Exception:
print("There was a problem aligning. Check ClustalW path and .fasta folder format/location")https://stackoverflow.com/questions/12149479
复制相似问题