使用 ESMFold 预测 3D 结构并保存为 PDB 文件
预测单个序列
from transformers import AutoTokenizer, EsmForProteinFolding
model = EsmForProteinFolding.from_pretrained("facebook/esmfold_v1")
seq = "MKTVRQERLKSIVRILERSKEPVSGAQLAEELSVSRQVIVQDIAYLRSLGYNIVATPRGYVLAGG"
output = model.infer_pdb(seq)
with open("my_peptide.pdb", "w") as f:
f.write(output)
1
2
3
4
5
6
7
2
3
4
5
6
7
通过 .fasta
文件预测
from transformers import EsmForProteinFolding
import os
import torch
import argparse
from Bio import SeqIO
def load_model(use_gpu=True):
"""加载ESMFold模型"""
print("Loading ESMFold model...")
# 检查GPU可用性
if use_gpu and torch.cuda.is_available():
device = "cuda"
print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
device = "cpu"
print("Using CPU")
model = EsmForProteinFolding.from_pretrained("facebook/esmfold_v1")
model = model.to(device)
print("Model loaded successfully!")
return model, device
def predict_and_save(model, sequence_id, sequence, output_dir):
"""预测单个序列并保存PDB文件"""
try:
print(f"Predicting structure for {sequence_id}...")
output = model.infer_pdb(sequence)
# 清理序列ID作为文件名
clean_id = sequence_id.replace('|', '_').replace('/', '_').replace('\\', '_').replace(':', '_')
pdb_filename = f"{clean_id}.pdb"
pdb_path = os.path.join(output_dir, pdb_filename)
with open(pdb_path, "w") as f:
f.write(output)
print(f"Saved: {pdb_path}")
return True
except Exception as e:
print(f"Error predicting {sequence_id}: {str(e)}")
return False
def main():
fasta_file = "/raid1/xz/Protein/esmfold_test/PLD_Streptomyces_twoHKD_D_only.fasta"
output_dir = "/raid1/xz/Protein/esmfold_test/output"
# 创建输出目录
os.makedirs(output_dir, exist_ok=True)
# 加载模型
model, _ = load_model()
# 统计信息
total_sequences = 0
successful_predictions = 0
# 读取FASTA文件并逐个预测
print(f"Reading FASTA file: {fasta_file}")
for record in SeqIO.parse(fasta_file, "fasta"):
total_sequences += 1
sequence_id = record.id
sequence = str(record.seq)
print(f"\nProcessing sequence {total_sequences}: {sequence_id}")
print(f"Sequence length: {len(sequence)}")
if predict_and_save(model, sequence_id, sequence, output_dir):
successful_predictions += 1
print(f"\n=== Prediction Summary ===")
print(f"Total sequences: {total_sequences}")
print(f"Successful predictions: {successful_predictions}")
print(f"Failed predictions: {total_sequences - successful_predictions}")
print(f"Output directory: {output_dir}")
if __name__ == "__main__":
main()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
上次更新: 2025/09/18, 20:06:50