RDKit
Open-source cheminformatics and machine learning.
Abbreviations.h
Go to the documentation of this file.
1//
2// Copyright (C) 2020 Greg Landrum and T5 Informatics GmbH
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_ABBREVIATIONS_H
12#define RD_ABBREVIATIONS_H
13#include <utility>
14#include <vector>
15#include <string>
16#include <memory>
17
18namespace RDKit {
19class ROMol;
20class RWMol;
21
22namespace Abbreviations {
24 std::string label;
25 std::string displayLabel;
26 std::string displayLabelW;
27 std::string smarts;
28 std::shared_ptr<ROMol> mol; //! optional
29 std::vector<unsigned int> extraAttachAtoms; //! optional
30 bool operator==(const AbbreviationDefinition& other) const {
31 return label == other.label && displayLabel == other.displayLabel &&
32 displayLabelW == other.displayLabelW && smarts == other.smarts;
33 }
34 bool operator!=(const AbbreviationDefinition& other) const {
35 return !(*this == other);
36 }
37};
39 std::vector<std::pair<int, int>> match;
41 AbbreviationMatch(std::vector<std::pair<int, int>> matchArg,
42 AbbreviationDefinition abbrevArg)
43 : match(std::move(matchArg)), abbrev(std::move(abbrevArg)) {}
44 AbbreviationMatch() : match(), abbrev() {}
45 bool operator==(const AbbreviationMatch& other) const {
46 return abbrev == other.abbrev && match == other.match;
47 }
48 bool operator!=(const AbbreviationMatch& other) const {
49 return !(*this == other);
50 }
51};
52namespace common_properties {
53RDKIT_ABBREVIATIONS_EXPORT extern const std::string numDummies;
54}
55namespace Utils {
56//! returns the default set of abbreviation definitions
57RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
59//! returns the default set of linker definitions
60RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
62
63//! parses a string describing abbreviation matches and returns the result
64/*
65
66\param text the data to be parsed, see below for the format
67\param removeExtraDummies controls whether or not dummy atoms beyond atom 0 are
68 removed. Set this to true to create abbreviations for linkers
69\param allowConnectionToDummies allows abbreviations to directly connect to
70 abbreviations. set this to true for linkers
71
72Format of the text data:
73 A series of lines, each of which contains:
74
75 label SMARTS displayLabel displayLabelW
76
77 the "displayLabel" and "displayLabelW" fields are optional.
78 where label is the label used for the abbreviation,
79 SMARTS is the SMARTS definition of the abbreviation.
80 displayLabel is used in drawings to render the abbreviations.
81 displayLabelW is the display label if a bond comes in from the right
82
83 Use dummies to indicate attachment points. The assumption is that the first
84 atom is a dummy (one will be added if this is not true) and that the second
85 atom is the surrogate for the rest of the group.
86
87*/
88RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationDefinition>
89parseAbbreviations(const std::string& text, bool removeExtraDummies = false,
90 bool allowConnectionToDummies = false);
91//! \brief equivalent to calling \c parseAbbreviations(text,true,true)
92inline std::vector<AbbreviationDefinition> parseLinkers(
93 const std::string& text) {
94 return parseAbbreviations(text, true, true);
95};
96} // namespace Utils
97
98//! returns all matches for the abbreviations across the molecule
99/*!
100
101 \param abbrevs the abbreviations to look for. This list is used in order.
102 \param maxCoverage any abbreviation that covers than more than this fraction
103 of the molecule's atoms (not counting dummies) will not be returned.
104*/
105RDKIT_ABBREVIATIONS_EXPORT std::vector<AbbreviationMatch>
107 const ROMol& mol, const std::vector<AbbreviationDefinition>& abbrevs,
108 double maxCoverage = 0.4);
109//! applies the abbreviation matches to a molecule, modifying it in place.
110//! the modified molecule is not sanitized
112 RWMol& mol, const std::vector<AbbreviationMatch>& matches);
113//! creates "SUP" SubstanceGroups on the molecule describing the abbreviation
115 RWMol& mol, const std::vector<AbbreviationMatch>& matches);
116//! convenience function for finding and applying abbreviations
117//! the modified molecule is not sanitized
119 RWMol& mol, const std::vector<AbbreviationDefinition>& abbrevs,
120 double maxCoverage = 0.4, bool sanitize = true);
121//! convenience function for finding and labeling abbreviations as SUP
122//! SubstanceGroups
124 RWMol& mol, const std::vector<AbbreviationDefinition>& abbrevs,
125 double maxCoverage = 0.4);
126//! collapses abbreviation (i.e. "SUP") substance groups
127//! the modified molecule is not sanitized
129
130} // namespace Abbreviations
131} // namespace RDKit
132#endif
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
#define RDKIT_ABBREVIATIONS_EXPORT
Definition: export.h:9
std::vector< AbbreviationDefinition > parseLinkers(const std::string &text)
equivalent to calling parseAbbreviations(text,true,true)
Definition: Abbreviations.h:92
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationDefinition > getDefaultLinkers()
returns the default set of linker definitions
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationDefinition > parseAbbreviations(const std::string &text, bool removeExtraDummies=false, bool allowConnectionToDummies=false)
parses a string describing abbreviation matches and returns the result
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationDefinition > getDefaultAbbreviations()
returns the default set of abbreviation definitions
RDKIT_ABBREVIATIONS_EXPORT const std::string numDummies
RDKIT_ABBREVIATIONS_EXPORT void applyMatches(RWMol &mol, const std::vector< AbbreviationMatch > &matches)
RDKIT_ABBREVIATIONS_EXPORT void condenseAbbreviationSubstanceGroups(RWMol &mol)
RDKIT_ABBREVIATIONS_EXPORT void labelMolAbbreviations(RWMol &mol, const std::vector< AbbreviationDefinition > &abbrevs, double maxCoverage=0.4)
RDKIT_ABBREVIATIONS_EXPORT void condenseMolAbbreviations(RWMol &mol, const std::vector< AbbreviationDefinition > &abbrevs, double maxCoverage=0.4, bool sanitize=true)
RDKIT_ABBREVIATIONS_EXPORT void labelMatches(RWMol &mol, const std::vector< AbbreviationMatch > &matches)
creates "SUP" SubstanceGroups on the molecule describing the abbreviation
RDKIT_ABBREVIATIONS_EXPORT std::vector< AbbreviationMatch > findApplicableAbbreviationMatches(const ROMol &mol, const std::vector< AbbreviationDefinition > &abbrevs, double maxCoverage=0.4)
returns all matches for the abbreviations across the molecule
Std stuff.
Definition: Abbreviations.h:18
std::vector< unsigned int > extraAttachAtoms
optional
Definition: Abbreviations.h:29
bool operator==(const AbbreviationDefinition &other) const
optional
Definition: Abbreviations.h:30
bool operator!=(const AbbreviationDefinition &other) const
Definition: Abbreviations.h:34
AbbreviationMatch(std::vector< std::pair< int, int > > matchArg, AbbreviationDefinition abbrevArg)
Definition: Abbreviations.h:41
bool operator!=(const AbbreviationMatch &other) const
Definition: Abbreviations.h:48
std::vector< std::pair< int, int > > match
Definition: Abbreviations.h:39
bool operator==(const AbbreviationMatch &other) const
Definition: Abbreviations.h:45