RDKit
Open-source cheminformatics and machine learning.
Reaction.h
Go to the documentation of this file.
1//
2// Copyright (c) 2007-2021, Novartis Institutes for BioMedical Research Inc.
3// and other RDKit contributors
4//
5// All rights reserved.
6//
7// Redistribution and use in source and binary forms, with or without
8// modification, are permitted provided that the following conditions are
9// met:
10//
11// * Redistributions of source code must retain the above copyright
12// notice, this list of conditions and the following disclaimer.
13// * Redistributions in binary form must reproduce the above
14// copyright notice, this list of conditions and the following
15// disclaimer in the documentation and/or other materials provided
16// with the distribution.
17// * Neither the name of Novartis Institutes for BioMedical Research Inc.
18// nor the names of its contributors may be used to endorse or promote
19// products derived from this software without specific prior written
20// permission.
21//
22// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33//
34
35#include <RDGeneral/export.h>
36#ifndef RD_REACTION_H_17Aug2006
37#define RD_REACTION_H_17Aug2006
38
39#include <GraphMol/RDKitBase.h>
40#include <RDGeneral/RDProps.h>
42#include <vector>
43
44namespace RDKit {
45class ReactionPickler;
46
47//! used to indicate an error in the chemical reaction engine
49 : public std::exception {
50 public:
51 //! construct with an error message
52 explicit ChemicalReactionException(const char *msg) : _msg(msg) {}
53 //! construct with an error message
54 explicit ChemicalReactionException(const std::string msg) : _msg(msg) {}
55 //! get the error message
56 const char *what() const noexcept override { return _msg.c_str(); }
57 ~ChemicalReactionException() noexcept override = default;
58
59 private:
60 std::string _msg;
61};
62
63//! This is a class for storing and applying general chemical reactions.
64/*!
65 basic usage will be something like:
66
67 \verbatim
68 ChemicalReaction rxn;
69 rxn.addReactantTemplate(r1);
70 rxn.addReactantTemplate(r2);
71 rxn.addProductTemplate(p1);
72 rxn.initReactantMatchers();
73
74 MOL_SPTR_VECT prods;
75 for(MOL_SPTR_VECT::const_iterator r1It=reactantSet1.begin();
76 r1It!=reactantSet1.end();++r1It;){
77 for(MOL_SPTR_VECT::const_iterator r2It=reactantSet2.begin();
78 r2It!=reactantSet2.end();++r2It;){
79 MOL_SPTR_VECT rVect(2);
80 rVect[0] = *r1It;
81 rVect[1] = *r2It;
82
83 std::vector<MOL_SPTR_VECT> lprods;
84 lprods = rxn.runReactants(rVect);
85 for(std::vector<MOL_SPTR_VECT>::const_iterator lpIt=lprods.begin();
86 lpIt!=lprods.end();++lpIt){
87 // we know this is a single-product reaction:
88 prods.push_back((*lpIt)[0]);
89 }
90 }
91 }
92 \endverbatim
93
94 NOTES:
95 - to allow more control over the reaction, it is possible to flag reactant
96 atoms as being protected by setting the common_properties::_protected
97 property on those
98 atoms. Here's an example:
99 \verbatim
100 std::string smi="[O:1]>>[N:1]";
101 ChemicalReaction *rxn = RxnSmartsToChemicalReaction(smi);
102 rxn->initReactantMatchers();
103
104 MOL_SPTR_VECT reacts;
105 reacts.clear();
106 smi = "OCO";
107 ROMol *mol = SmilesToMol(smi);
108 reacts.push_back(ROMOL_SPTR(mol));
109 std::vector<MOL_SPTR_VECT> prods;
110 prods = rxn->runReactants(reacts);
111 // here prods has two entries, because there are two Os in the
112 // reactant.
113
114 reacts[0]->getAtomWithIdx(0)->setProp(common_properties::_protected,1);
115 prods = rxn->runReactants(reacts);
116 // here prods only has one entry, the reaction at atom 0
117 // has been blocked by the _protected property
118 \endverbatim
119
120*/
122 friend class ReactionPickler;
123
124 public:
127 df_needsInit = other.df_needsInit;
128 df_implicitProperties = other.df_implicitProperties;
129 for (MOL_SPTR_VECT::const_iterator iter = other.beginReactantTemplates();
130 iter != other.endReactantTemplates(); ++iter) {
131 RWMol *reactant = new RWMol(**iter);
132 m_reactantTemplates.push_back(ROMOL_SPTR(reactant));
133 }
134 for (MOL_SPTR_VECT::const_iterator iter = other.beginProductTemplates();
135 iter != other.endProductTemplates(); ++iter) {
136 RWMol *product = new RWMol(**iter);
137 m_productTemplates.push_back(ROMOL_SPTR(product));
138 }
139 for (MOL_SPTR_VECT::const_iterator iter = other.beginAgentTemplates();
140 iter != other.endAgentTemplates(); ++iter) {
141 RWMol *agent = new RWMol(**iter);
142 m_agentTemplates.push_back(ROMOL_SPTR(agent));
143 }
144 d_props = other.d_props;
145 }
146 //! construct a reaction from a pickle string
147 ChemicalReaction(const std::string &binStr);
148
149 //! Adds a new reactant template
150 /*!
151 \return the number of reactants
152
153 */
154 unsigned int addReactantTemplate(ROMOL_SPTR mol) {
155 this->df_needsInit = true;
156 this->m_reactantTemplates.push_back(mol);
157 return rdcast<unsigned int>(this->m_reactantTemplates.size());
158 }
159
160 //! Adds a new agent template
161 /*!
162 \return the number of agent
163
164 */
165 unsigned int addAgentTemplate(ROMOL_SPTR mol) {
166 this->m_agentTemplates.push_back(mol);
167 return rdcast<unsigned int>(this->m_agentTemplates.size());
168 }
169
170 //! Adds a new product template
171 /*!
172 \return the number of products
173
174 */
175 unsigned int addProductTemplate(ROMOL_SPTR mol) {
176 this->m_productTemplates.push_back(mol);
177 return rdcast<unsigned int>(this->m_productTemplates.size());
178 }
179
180 //! Removes the reactant templates from a reaction if atom mapping ratio is
181 /// below a given threshold
182 /*! By default the removed reactant templates were attached to the agent
183 templates.
184 An alternative will be to provide a pointer to a molecule vector where
185 these reactants should be saved.
186 */
187 void removeUnmappedReactantTemplates(double thresholdUnmappedAtoms = 0.2,
188 bool moveToAgentTemplates = true,
189 MOL_SPTR_VECT *targetVector = nullptr);
190
191 //! Removes the product templates from a reaction if its atom mapping ratio is
192 /// below a given threshold
193 /*! By default the removed products templates were attached to the agent
194 templates.
195 An alternative will be to provide a pointer to a molecule vector where
196 these products should be saved.
197 */
198 void removeUnmappedProductTemplates(double thresholdUnmappedAtoms = 0.2,
199 bool moveToAgentTemplates = true,
200 MOL_SPTR_VECT *targetVector = nullptr);
201
202 /*! Removes the agent templates from a reaction if a pointer to a
203 molecule vector is provided the agents are stored therein.*/
204 void removeAgentTemplates(MOL_SPTR_VECT *targetVector = nullptr);
205
206 //! Runs the reaction on a set of reactants
207 /*!
208
209 \param reactants the reactants to be used. The length of this must be equal
210 to this->getNumReactantTemplates()
211 \param maxProducts: if non zero, the maximum number of products to generate
212 before stopping. If hit a warning will be generated.
213
214 \return a vector of vectors of products. Each subvector will be
215 this->getNumProductTemplates() long.
216
217 We return a vector of vectors of products because each individual template
218 may map multiple times onto its reactant. This leads to multiple possible
219 result sets.
220 */
221 std::vector<MOL_SPTR_VECT> runReactants(
222 const MOL_SPTR_VECT reactants, unsigned int numProducts = 1000) const;
223
224 //! Runs a single reactant against a single reactant template
225 /*!
226 \param reactant The single reactant to use
227
228 \param reactantTemplateIdx the reactant template to target in the reaction
229 */
230 std::vector<MOL_SPTR_VECT> runReactant(
231 ROMOL_SPTR reactant, unsigned int reactantTemplateIdx) const;
232
233 //! Runs a single reactant in place (the reactant is modified)
234 /*!
235 This is only useable with reactions which have a single reactant and product
236 and where no atoms are added in the product.
237
238 \param reactant The single reactant to use
239
240 \return whether or not the reactant was actually modified
241 */
242 bool runReactant(RWMol &reactant) const;
243
245 return this->m_reactantTemplates;
246 }
247 const MOL_SPTR_VECT &getAgents() const { return this->m_agentTemplates; }
248 const MOL_SPTR_VECT &getProducts() const { return this->m_productTemplates; }
249
250 MOL_SPTR_VECT::const_iterator beginReactantTemplates() const {
251 return this->m_reactantTemplates.begin();
252 }
253 MOL_SPTR_VECT::const_iterator endReactantTemplates() const {
254 return this->m_reactantTemplates.end();
255 }
256
257 MOL_SPTR_VECT::const_iterator beginProductTemplates() const {
258 return this->m_productTemplates.begin();
259 }
260 MOL_SPTR_VECT::const_iterator endProductTemplates() const {
261 return this->m_productTemplates.end();
262 }
263
264 MOL_SPTR_VECT::const_iterator beginAgentTemplates() const {
265 return this->m_agentTemplates.begin();
266 }
267 MOL_SPTR_VECT::const_iterator endAgentTemplates() const {
268 return this->m_agentTemplates.end();
269 }
270
271 MOL_SPTR_VECT::iterator beginReactantTemplates() {
272 return this->m_reactantTemplates.begin();
273 }
274 MOL_SPTR_VECT::iterator endReactantTemplates() {
275 return this->m_reactantTemplates.end();
276 }
277
278 MOL_SPTR_VECT::iterator beginProductTemplates() {
279 return this->m_productTemplates.begin();
280 }
281 MOL_SPTR_VECT::iterator endProductTemplates() {
282 return this->m_productTemplates.end();
283 }
284
285 MOL_SPTR_VECT::iterator beginAgentTemplates() {
286 return this->m_agentTemplates.begin();
287 }
288 MOL_SPTR_VECT::iterator endAgentTemplates() {
289 return this->m_agentTemplates.end();
290 }
291 unsigned int getNumReactantTemplates() const {
292 return rdcast<unsigned int>(this->m_reactantTemplates.size());
293 }
294 unsigned int getNumProductTemplates() const {
295 return rdcast<unsigned int>(this->m_productTemplates.size());
296 }
297 unsigned int getNumAgentTemplates() const {
298 return rdcast<unsigned int>(this->m_agentTemplates.size());
299 }
300
301 //! initializes our internal reactant-matching datastructures.
302 /*!
303 This must be called after adding reactants and before calling
304 runReactants.
305
306 \param silent: If this bool is true, no messages will be logged during the
307 validation. By default, validation problems are reported to the warning
308 and error logs depending on their severity.
309 */
310 void initReactantMatchers(bool silent = false);
311
312 bool isInitialized() const { return !df_needsInit; }
313
314 //! validates the reactants and products to make sure the reaction seems
315 /// "reasonable"
316 /*!
317 \return true if the reaction validates without errors (warnings do not
318 stop validation)
319
320 \param numWarnings used to return the number of validation warnings
321 \param numErrors used to return the number of validation errors
322
323 \param silent: If this bool is true, no messages will be logged during the
324 validation. By default, validation problems are reported to the warning
325 and error logs depending on their severity.
326
327 */
328 bool validate(unsigned int &numWarnings, unsigned int &numErrors,
329 bool silent = false) const;
330
331 //! returns whether or not the reaction uses implicit
332 //! properties on the product atoms
333 /*!
334
335 This toggles whether or not unspecified atomic properties in the
336 products are considered to be implicit and should be copied from
337 the actual reactants. This is necessary due to a semantic difference
338 between the "reaction SMARTS" approach and the MDL RXN
339 approach:
340 In "reaction SMARTS", this reaction:
341 [C:1]-[Br:2].[O-:3]>>[C:1]-[O:3].[Br-:2]
342 applied to [CH4+]Br should yield [CH4+]O
343 Something similar drawn in an rxn file, and applied to
344 [CH4+]Br should yield [CH3]O.
345 In rxn there is no charge on the product C because nothing is
346 specified in the rxn file; in "SMARTS" the charge from the
347 actual reactants is not *removed* because no charge is
348 specified in the reaction.
349
350 */
351 bool getImplicitPropertiesFlag() const { return df_implicitProperties; }
352 //! sets the implicit properties flag. See the documentation for
353 //! getImplicitProertiesFlag() for a discussion of what this means.
354 void setImplicitPropertiesFlag(bool val) { df_implicitProperties = val; }
355
356 private:
357 bool df_needsInit{true};
358 bool df_implicitProperties{false};
359 MOL_SPTR_VECT m_reactantTemplates, m_productTemplates, m_agentTemplates;
360 ChemicalReaction &operator=(const ChemicalReaction &); // disable assignment
361};
362
363//! tests whether or not the molecule has a substructure match
364//! to any of the reaction's reactants
365//! the \c which argument is used to return which of the reactants
366//! the molecule matches. If there's no match, it is equal to the number
367//! of reactants on return
369 const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which);
370//! \overload
372 const ChemicalReaction &rxn, const ROMol &mol);
373
374//! tests whether or not the molecule has a substructure match
375//! to any of the reaction's products
376//! the \c which argument is used to return which of the products
377//! the molecule matches. If there's no match, it is equal to the number
378//! of products on return
380 const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which);
381//! \overload
383 const ChemicalReaction &rxn, const ROMol &mol);
384
385//! tests whether or not the molecule has a substructure match
386//! to any of the reaction's agents
387//! the \c which argument is used to return which of the agents
388//! the molecule matches. If there's no match, it is equal to the number
389//! of agents on return
391 const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which);
392//! \overload
394 const ChemicalReaction &rxn, const ROMol &mol);
395
396//! returns indices of the atoms in each reactant that are changed
397//! in the reaction
398/*!
399 \param rxn the reaction we are interested in
400
401 \param mappedAtomsOnly if set, atoms that are not mapped will not be included
402 in the list of changed atoms (otherwise they are automatically included)
403
404 How are changed atoms recognized?
405 1) Atoms whose degree changes
406 2) Atoms whose bonding pattern changes
407 3) unmapped atoms (unless the mappedAtomsOnly flag is set)
408 4) Atoms connected to unmapped atoms
409 5) Atoms whose atomic number changes (unless the
410 corresponding product atom is a dummy)
411 6) Atoms with more than one atomic number query (unless the
412 corresponding product atom is a dummy)
413
414 Note that the atomic number of a query atom depends on how it's constructed.
415 When coming from SMARTS: if the first query is an atomic label/number that
416 sets the atomic number, otherwise it's zero.
417 For example [O;$(OC)] is atomic number 8 while [$(OC);O] is atomic
418 number 0.
419 When coming from RXN: the atomic number of the atom in the rxn file sets
420 the value.
421 */
423getReactingAtoms(const ChemicalReaction &rxn, bool mappedAtomsOnly = false);
424
425//! add the recursive queries to the reactants of a reaction
426/*!
427 This does its work using RDKit::addRecursiveQueries()
428
429 \param rxn the reaction we are interested in
430 \param queries - the dictionary of named queries to add
431 \param propName - the atom property to use to get query names
432 optional:
433 \param reactantLabels - to store pairs of (atom index, query string)
434 per reactant
435
436 NOTES:
437 - existing query information, if present, will be supplemented (AND logic)
438 - non-query atoms will be replaced with query atoms using only the query
439 logic
440 - query names can be present as comma separated lists, they will then
441 be combined using OR logic.
442 - throws a KeyErrorException if a particular query name is not present
443 in \c queries
444
445 */
447 ChemicalReaction &rxn, const std::map<std::string, ROMOL_SPTR> &queries,
448 const std::string &propName,
449 std::vector<std::vector<std::pair<unsigned int, std::string>>>
450 *reactantLabels = nullptr);
451
452} // namespace RDKit
453
454namespace RDDepict {
455//! \brief Generate 2D coordinates (a depiction) for a reaction
456/*!
457
458 \param rxn the reaction we are interested in
459
460 \param spacing the spacing between components of the reaction
461
462 \param updateProps if set, properties such as conjugation and
463 hybridization will be calculated for the reactant and product
464 templates before generating coordinates. This should result in
465 better depictions, but can lead to errors in some cases.
466
467 \param canonOrient canonicalize the orientation so that the long
468 axes align with the x-axis etc.
469
470 \param nFlipsPerSample - the number of rotatable bonds that are
471 flipped at random for each sample
472
473 \param nSamples - the number of samples
474
475 \param sampleSeed - seed for the random sampling process
476
477 \param permuteDeg4Nodes - try permuting the drawing order of bonds around
478 atoms with four neighbors in order to improve the depiction
479
480 for the other parameters see the documentation for compute2DCoords()
481
482*/
484 RDKit::ChemicalReaction &rxn, double spacing = 2.0, bool updateProps = true,
485 bool canonOrient = false, unsigned int nFlipsPerSample = 0,
486 unsigned int nSamples = 0, int sampleSeed = 0,
487 bool permuteDeg4Nodes = false);
488
489} // namespace RDDepict
490
491#endif
pulls in the core RDKit functionality
used to indicate an error in the chemical reaction engine
Definition: Reaction.h:49
const char * what() const noexcept override
get the error message
Definition: Reaction.h:56
ChemicalReactionException(const char *msg)
construct with an error message
Definition: Reaction.h:52
ChemicalReactionException(const std::string msg)
construct with an error message
Definition: Reaction.h:54
~ChemicalReactionException() noexcept override=default
This is a class for storing and applying general chemical reactions.
Definition: Reaction.h:121
unsigned int addProductTemplate(ROMOL_SPTR mol)
Adds a new product template.
Definition: Reaction.h:175
unsigned int addAgentTemplate(ROMOL_SPTR mol)
Adds a new agent template.
Definition: Reaction.h:165
unsigned int addReactantTemplate(ROMOL_SPTR mol)
Adds a new reactant template.
Definition: Reaction.h:154
unsigned int getNumAgentTemplates() const
Definition: Reaction.h:297
bool getImplicitPropertiesFlag() const
Definition: Reaction.h:351
unsigned int getNumReactantTemplates() const
Definition: Reaction.h:291
ChemicalReaction(const std::string &binStr)
construct a reaction from a pickle string
MOL_SPTR_VECT::iterator beginProductTemplates()
Definition: Reaction.h:278
void removeUnmappedReactantTemplates(double thresholdUnmappedAtoms=0.2, bool moveToAgentTemplates=true, MOL_SPTR_VECT *targetVector=nullptr)
MOL_SPTR_VECT::const_iterator beginProductTemplates() const
Definition: Reaction.h:257
void initReactantMatchers(bool silent=false)
initializes our internal reactant-matching datastructures.
std::vector< MOL_SPTR_VECT > runReactants(const MOL_SPTR_VECT reactants, unsigned int numProducts=1000) const
Runs the reaction on a set of reactants.
const MOL_SPTR_VECT & getReactants() const
Definition: Reaction.h:244
MOL_SPTR_VECT::const_iterator endReactantTemplates() const
Definition: Reaction.h:253
void setImplicitPropertiesFlag(bool val)
Definition: Reaction.h:354
const MOL_SPTR_VECT & getAgents() const
Definition: Reaction.h:247
MOL_SPTR_VECT::iterator endProductTemplates()
Definition: Reaction.h:281
bool runReactant(RWMol &reactant) const
Runs a single reactant in place (the reactant is modified)
unsigned int getNumProductTemplates() const
Definition: Reaction.h:294
MOL_SPTR_VECT::const_iterator endProductTemplates() const
Definition: Reaction.h:260
bool isInitialized() const
Definition: Reaction.h:312
ChemicalReaction(const ChemicalReaction &other)
Definition: Reaction.h:126
MOL_SPTR_VECT::const_iterator beginAgentTemplates() const
Definition: Reaction.h:264
void removeAgentTemplates(MOL_SPTR_VECT *targetVector=nullptr)
std::vector< MOL_SPTR_VECT > runReactant(ROMOL_SPTR reactant, unsigned int reactantTemplateIdx) const
Runs a single reactant against a single reactant template.
MOL_SPTR_VECT::const_iterator beginReactantTemplates() const
Definition: Reaction.h:250
const MOL_SPTR_VECT & getProducts() const
Definition: Reaction.h:248
MOL_SPTR_VECT::iterator beginReactantTemplates()
Definition: Reaction.h:271
MOL_SPTR_VECT::iterator endAgentTemplates()
Definition: Reaction.h:288
MOL_SPTR_VECT::iterator beginAgentTemplates()
Definition: Reaction.h:285
MOL_SPTR_VECT::iterator endReactantTemplates()
Definition: Reaction.h:274
void removeUnmappedProductTemplates(double thresholdUnmappedAtoms=0.2, bool moveToAgentTemplates=true, MOL_SPTR_VECT *targetVector=nullptr)
MOL_SPTR_VECT::const_iterator endAgentTemplates() const
Definition: Reaction.h:267
bool validate(unsigned int &numWarnings, unsigned int &numErrors, bool silent=false) const
Dict d_props
Definition: RDProps.h:16
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
handles pickling (serializing) reactions
#define RDKIT_CHEMREACTIONS_EXPORT
Definition: export.h:49
RDKIT_CHEMREACTIONS_EXPORT void compute2DCoordsForReaction(RDKit::ChemicalReaction &rxn, double spacing=2.0, bool updateProps=true, bool canonOrient=false, unsigned int nFlipsPerSample=0, unsigned int nSamples=0, int sampleSeed=0, bool permuteDeg4Nodes=false)
Generate 2D coordinates (a depiction) for a reaction.
Std stuff.
Definition: Abbreviations.h:18
RDKIT_CHEMREACTIONS_EXPORT bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
std::vector< INT_VECT > VECT_INT_VECT
Definition: types.h:291
RDKIT_CHEMREACTIONS_EXPORT bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
RDKIT_CHEMREACTIONS_EXPORT VECT_INT_VECT getReactingAtoms(const ChemicalReaction &rxn, bool mappedAtomsOnly=false)
boost::shared_ptr< ROMol > ROMOL_SPTR
std::vector< boost::shared_ptr< ROMol > > MOL_SPTR_VECT
Definition: FragCatParams.h:21
RDKIT_CHEMREACTIONS_EXPORT bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
RDKIT_CHEMREACTIONS_EXPORT void addRecursiveQueriesToReaction(ChemicalReaction &rxn, const std::map< std::string, ROMOL_SPTR > &queries, const std::string &propName, std::vector< std::vector< std::pair< unsigned int, std::string > > > *reactantLabels=nullptr)
add the recursive queries to the reactants of a reaction