File size: 2,453 Bytes
b7e5b63
ea61d54
4466c5e
 
 
68a01ab
4466c5e
 
 
 
 
 
b7e5b63
4466c5e
 
 
 
 
 
 
 
 
 
 
 
b7e5b63
4466c5e
 
 
 
 
68a01ab
4466c5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68a01ab
4466c5e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# Heuristic mutation search used by the Optimize page.
# Each round scores single-site mutants with predict_amp; accepts the best gain above threshold.
import random
from utils.predict import predict_amp

# Residue groups used to propose chemistry-aware substitutions.
HYDROPHOBIC = set("AILMFWVPG")
HYDROPHILIC = set("STNQYCH")
POSITIVE = set("KRH")
NEGATIVE = set("DE")

def mutate_residue(residue):
    # Return a candidate replacement residue and rationale.
    if residue in POSITIVE:
        return residue, "Retained strong positive residue"
    elif residue in NEGATIVE:
        return random.choice(list(POSITIVE)), "Increased positive charge"
    elif residue in HYDROPHILIC:
        return random.choice(list(HYDROPHOBIC)), "Improved hydrophobicity balance"
    elif residue in HYDROPHOBIC:
        return random.choice(list(POSITIVE | HYDROPHILIC)), "Enhanced amphipathicity"
    else:
        return random.choice(list(HYDROPHOBIC)), "Adjusted physicochemical profile"

def optimize_sequence(seq, model, max_rounds=20, confidence_threshold=0.001):
    # Iteratively improve AMP probability by accepting the best mutation per round.
    current_seq = seq
    label, conf = predict_amp(current_seq, model)
    best_conf = conf
    history = [(current_seq, conf, "-", "-", "-", "Original sequence")]

    # Greedy loop: keep only the best confidence-improving mutation each round.
    for _ in range(max_rounds):
        best_mutation = None
        best_mutation_conf = best_conf

        for pos, old_res in enumerate(current_seq):
            new_res, reason = mutate_residue(old_res)
            if new_res == old_res:
                continue
            new_seq = current_seq[:pos] + new_res + current_seq[pos+1:]
            _, new_conf = predict_amp(new_seq, model)

            if new_conf > best_mutation_conf:
                best_mutation_conf = new_conf
                best_mutation = (new_seq, pos, old_res, new_res, reason)

        if best_mutation and best_mutation_conf - best_conf >= confidence_threshold:
            current_seq, pos, old_res, new_res, reason = best_mutation
            best_conf = best_mutation_conf
            change = f"Pos {pos+1}: {old_res}{new_res}"
            history.append((current_seq, best_conf, change, old_res, new_res, reason))
        else:

            # Stop when no mutation clears the minimum improvement threshold.
            break

    return current_seq, best_conf, history