Skip to content

Commit

Permalink
add mutated token
Browse files Browse the repository at this point in the history
  • Loading branch information
VADIM RATNER [email protected] committed Apr 14, 2024
1 parent d3348e4 commit 407a9d1
Show file tree
Hide file tree
Showing 11 changed files with 111 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2693,6 +2693,15 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 299,
"content": "<MUTATED>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3007,6 +3016,7 @@
"<CDR3_REGION>": 296,
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"#": 527,
"%": 528,
"(": 529,
Expand Down Expand Up @@ -8948,4 +8958,4 @@
"c2n c3n("
]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2693,6 +2693,15 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 299,
"content": "<MUTATED>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3013,6 +3022,7 @@
"<CDR3_REGION>": 296,
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"[CL:0000499]": 3522,
"[CL:2000060]": 3523,
"[CL:0000235]": 3524,
Expand Down Expand Up @@ -3805,4 +3815,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2693,6 +2693,15 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 299,
"content": "<MUTATED>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3013,6 +3022,7 @@
"<CDR3_REGION>": 296,
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"[100130093]": 5000,
"[100133445]": 5001,
"[100286793]": 5002,
Expand Down Expand Up @@ -97920,4 +97930,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2693,6 +2693,15 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 299,
"content": "<MUTATED>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3013,6 +3022,7 @@
"<CDR3_REGION>": 296,
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"A": 501,
"B": 502,
"C": 503,
Expand Down Expand Up @@ -3042,4 +3052,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2693,6 +2693,15 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 299,
"content": "<MUTATED>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3007,6 +3016,7 @@
"<CDR3_REGION>": 296,
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"#": 527,
"%": 528,
"(": 529,
Expand Down Expand Up @@ -8948,4 +8958,4 @@
"c2n c3n("
]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2693,6 +2693,15 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 299,
"content": "<MUTATED>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3013,6 +3022,7 @@
"<CDR3_REGION>": 296,
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"[CL:0000499]": 3522,
"[CL:2000060]": 3523,
"[CL:0000235]": 3524,
Expand Down Expand Up @@ -3805,4 +3815,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2693,6 +2693,15 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 299,
"content": "<MUTATED>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3013,6 +3022,7 @@
"<CDR3_REGION>": 296,
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"A": 501,
"B": 502,
"C": 503,
Expand Down Expand Up @@ -3042,4 +3052,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2693,6 +2693,15 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 299,
"content": "<MUTATED>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3007,6 +3016,7 @@
"<CDR3_REGION>": 296,
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"#": 527,
"%": 528,
"(": 529,
Expand Down Expand Up @@ -8948,4 +8958,4 @@
"c2n c3n("
]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2693,6 +2693,15 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 299,
"content": "<MUTATED>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3013,6 +3022,7 @@
"<CDR3_REGION>": 296,
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"[CL:0000499]": 3522,
"[CL:2000060]": 3523,
"[CL:0000235]": 3524,
Expand Down Expand Up @@ -3805,4 +3815,4 @@
},
"unk_token": "<UNK>"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2693,6 +2693,15 @@
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 299,
"content": "<MUTATED>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
Expand Down Expand Up @@ -3013,6 +3022,7 @@
"<CDR3_REGION>": 296,
"<GENERAL_CHAIN>": 297,
"<SUBMOLECULAR_ENTITY>": 298,
"<MUTATED>": 299,
"A": 501,
"B": 502,
"C": 503,
Expand Down Expand Up @@ -3042,4 +3052,4 @@
},
"unk_token": "<UNK>"
}
}
}
1 change: 1 addition & 0 deletions fusedrug/data/tokenizer/modulartokenizer/special_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@
"ALTERNATIVE",
"GENERAL_CHAIN",
"CDR3_REGION",
"MUTATED"
]

AA_tokens = [
Expand Down

0 comments on commit 407a9d1

Please sign in to comment.