-
Notifications
You must be signed in to change notification settings - Fork 100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make use of isotope label information in InChi values in transition lists and .msp files #3230
base: master
Are you sure you want to change the base?
Changes from 10 commits
9d306d7
57c3dab
55b08da
0253b52
be9efac
221acf4
0148fdc
3e1e8f3
afc3757
8fb8e4c
d8a2665
b0ea350
e2a3e14
10d2169
b6d239e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -102,6 +102,16 @@ public static Dictionary<string, string> FormatAccessionNumbers(string keysTSV, | |
return keys; | ||
} | ||
|
||
public static MoleculeAccessionNumbers Create(Dictionary<string, string> accessions) | ||
{ | ||
if (accessions==null || accessions.Count == 0) | ||
{ | ||
return EMPTY; | ||
} | ||
|
||
return new MoleculeAccessionNumbers(accessions); | ||
} | ||
|
||
public MoleculeAccessionNumbers(string keysTSV, string inChiKey = null) | ||
{ | ||
var keys = FormatAccessionNumbers(keysTSV, inChiKey); | ||
|
@@ -115,6 +125,57 @@ public MoleculeAccessionNumbers(string keysTSV, string inChiKey = null) | |
AccessionNumbers = ImmutableSortedList<string, string>.FromValues(keys, ACCESSION_TYPE_SORTER); | ||
} | ||
|
||
private static readonly Regex REGEX_INCHI_ISOTOPES = new Regex(@"\d+([A-Za-z]+\d+)", // Position, isotope, weight (or isotope and count, for D and T) | ||
RegexOptions.CultureInvariant | RegexOptions.Compiled); // N.B. we ignore position, as we don't carry that much structure detail | ||
|
||
// Look for labels buried in descriptions, e.g. InChi's /i section | ||
public Dictionary<string, int> FindLabels() | ||
{ | ||
Dictionary<string, int> result = null; | ||
var inchi = GetInChI(); | ||
if (!string.IsNullOrEmpty(inchi)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd recommend doing something like:
so that the main body of the function is less indented. |
||
{ | ||
// e.g. InChI=1S/C8H8O/c1-7(9)8-5-3-2-4-6-8/h2-6H,1H3/i1D4 (replace 4 H with H') | ||
// e.g. InChI=1S/C8H8O/c1-7(9)8-5-3-2-4-6-8/h2-6H,1H3/i1C13,2C13,3C13,4C13 (replace C with C' at positions 1,2,3, and 4) | ||
var parts = inchi.Split('/'); | ||
if (parts.Length > 4) | ||
{ | ||
var matches = REGEX_INCHI_ISOTOPES.Matches(parts[4]); | ||
|
||
foreach (Match match in matches) | ||
{ | ||
var isotope = match.Groups[1].Value; | ||
var count = 1; | ||
if (isotope.StartsWith(BioMassCalc.D) || isotope.StartsWith(BioMassCalc.T)) // e.g. "D4" in ".../i1D4" (replace 4 H with H') | ||
{ | ||
if (!int.TryParse(isotope.Substring(1), out count)) // Get the count, if any e.g. 3 in "/i1T3" | ||
{ | ||
count = 1; | ||
} | ||
isotope = isotope.Substring(0, 1); | ||
} | ||
if (Adduct.DICT_ADDUCT_ISOTOPE_NICKNAMES.TryGetValue(isotope, out var skylineIsotope)) // e.g. "C13" => "C'" | ||
{ | ||
if (result == null) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd recommend replacing this "if" and the two subsequent two "else" clauses with:
|
||
{ | ||
result = new Dictionary<string, int>() { { skylineIsotope, count } }; | ||
} | ||
else if (!result.TryGetValue(skylineIsotope, out var existing)) | ||
{ | ||
result.Add(skylineIsotope, count); | ||
} | ||
else | ||
{ | ||
result[skylineIsotope] = existing + count; | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
return result; | ||
} | ||
|
||
public bool IsEmpty { | ||
get | ||
{ | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -344,4 +344,7 @@ | |
<data name="NistLibraryBase_GetMod_Unknown_modification__0__at_line__1_" xml:space="preserve"> | ||
<value>Unknown modification {0} at line {1}</value> | ||
</data> | ||
<data name="NistLibraryBase_CreateCache_Missing_details_for__0__at_line__1___ignored" xml:space="preserve"> | ||
<value>Missing details for {0} at line {1}, ignored</value> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could this warning message be rephrased so that it is more clear how big of a thing is being ignored? Is just the line which has the missing details being ignored, or is the entire spectrum for the molecule being excluded from the spectral library? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, updated to say that the entry is being ignored |
||
</data> | ||
</root> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The spacing around the "==" is incorrect.