RDKit
Open-source cheminformatics and machine learning.
RDDepictor.h
Go to the documentation of this file.
1//
2// Copyright (C) 2003-2022 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RDDEPICTOR_H
13#define RDDEPICTOR_H
14
16#include <RDGeneral/types.h>
17#include <Geometry/point.h>
18#include <boost/smart_ptr.hpp>
19
20namespace RDKit {
21class ROMol;
22}
23
24namespace RDDepict {
25
26RDKIT_DEPICTOR_EXPORT extern bool
27 preferCoordGen; // Ignored if coordgen support isn't active
28
29typedef boost::shared_array<double> DOUBLE_SMART_PTR;
30
31class RDKIT_DEPICTOR_EXPORT DepictException : public std::exception {
32 public:
33 DepictException(const char *msg) : _msg(msg) {}
34 DepictException(const std::string msg) : _msg(msg) {}
35 const char *what() const noexcept override { return _msg.c_str(); }
36 ~DepictException() noexcept override = default;
37
38 private:
39 std::string _msg;
40};
41
42//! \brief Generate 2D coordinates (a depiction) for a molecule
43/*!
44
45 \param mol the molecule were are interested in
46
47 \param coordMap a map of int to Point2D, between atom IDs and
48 their locations. This is the container the user needs to fill if
49 he/she wants to specify coordinates for a portion of the molecule,
50 defaults to 0
51
52 \param canonOrient canonicalize the orientation so that the long
53 axes align with the x-axis etc.
54
55 \param clearConfs clear all existing conformations on the molecule
56 before adding the 2D coordinates instead of simply adding to the
57 list
58
59 \param nFlipsPerSample - the number of rotatable bonds that are
60 flipped at random for each sample
61
62 \param nSamples - the number of samples
63
64 \param sampleSeed - seed for the random sampling process
65
66 \param permuteDeg4Nodes - try permuting the drawing order of bonds around
67 atoms with four neighbors in order to improve the depiction
68
69 \param forceRDKit - use RDKit to generate coordinates even if
70 preferCoordGen is set to true
71
72 \return ID of the conformation added to the molecule containing the
73 2D coordinates
74
75*/
77 RDKit::ROMol &mol, const RDGeom::INT_POINT2D_MAP *coordMap = nullptr,
78 bool canonOrient = false, bool clearConfs = true,
79 unsigned int nFlipsPerSample = 0, unsigned int nSamples = 0,
80 int sampleSeed = 0, bool permuteDeg4Nodes = false, bool forceRDKit = false);
81
82//! \brief Compute the 2D coordinates such the interatom distances
83/// mimic those in a distance matrix
84/*!
85
86 This function generates 2D coordinates such that the inter-atom
87 distances mimic those specified via dmat. This is done by randomly
88 sampling(flipping) the rotatable bonds in the molecule and
89 evaluating a cost function which contains two components. The
90 first component is the sum of inverse of the squared inter-atom
91 distances, this helps in spreading the atoms far from each
92 other. The second component is the sum of squares of the
93 difference in distance between those in dmat and the generated
94 structure. The user can adjust the relative importance of the two
95 components via a adjustable parameter (see below)
96
97 ARGUMENTS:
98
99 \param mol - molecule to generate coordinates for
100
101 \param dmat - the distance matrix we want to mimic, this is a
102 symmetric N by N matrix where N is the number of atoms in mol. All
103 negative entries in dmat are ignored.
104
105 \param canonOrient - canonicalize the orientation after the 2D
106 embedding is done
107
108 \param clearConfs - clear any previously existing conformations on
109 mol before adding a conformation
110
111 \param weightDistMat - A value between 0.0 and 1.0, this
112 determines the importance of mimicing the inter atoms
113 distances in dmat. (1.0 - weightDistMat) is the weight associated
114 to spreading out the structure (density) in the cost function
115
116 \param nFlipsPerSample - the number of rotatable bonds that are
117 flipped at random for each sample
118
119 \param nSamples - the number of samples
120
121 \param sampleSeed - seed for the random sampling process
122
123 \param permuteDeg4Nodes - try permuting the drawing order of bonds around
124 atoms with four neighbors in order to improve the depiction
125
126 \param forceRDKit - use RDKit to generate coordinates even if
127 preferCoordGen is set to true
128
129 \return ID of the conformation added to the molecule containing the
130 2D coordinates
131
132
133*/
135 RDKit::ROMol &mol, const DOUBLE_SMART_PTR *dmat = nullptr,
136 bool canonOrient = true, bool clearConfs = true, double weightDistMat = 0.5,
137 unsigned int nFlipsPerSample = 3, unsigned int nSamples = 100,
138 int sampleSeed = 25, bool permuteDeg4Nodes = true, bool forceRDKit = false);
139
140//! \brief Compute 2D coordinates where a piece of the molecule is
141/// constrained to have the same coordinates as a reference.
142/*!
143 This function generates a depiction for a molecule where a piece of the
144 molecule is constrained to have the same coordinates as a reference.
145
146 This is useful for, for example, generating depictions of SAR data
147 sets so that the cores of the molecules are all oriented the same way.
148
149 ARGUMENTS:
150
151 \param mol - the molecule to be aligned, this will come back
152 with a single conformer.
153 \param reference - a molecule with the reference atoms to align to;
154 this should have a depiction.
155 \param confId - (optional) the id of the reference conformation to use
156 \param referencePattern - (optional) a query molecule to be used to
157 generate the atom mapping between the molecule
158 and the reference.
159 \param acceptFailure - (optional) if true, standard depictions will be
160 generated for molecules that don't have a substructure
161 match to the reference; if false, throws a
162 DepictException.
163 \param forceRDKit - (optional) use RDKit to generate coordinates even if
164 preferCoordGen is set to true
165 \param allowOptionalAttachments - (optional) if true, terminal dummy atoms in
166 the reference are ignored if they match an implicit
167 hydrogen in the molecule, and a constrained
168 depiction is still attempted
169 RETURNS:
170
171 \return MatchVectType with (queryAtomidx, molAtomIdx) pairs used for
172 the constrained depiction
173*/
175 RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
176 const RDKit::ROMol *referencePattern =
177 static_cast<const RDKit::ROMol *>(nullptr),
178 bool acceptFailure = false, bool forceRDKit = false,
179 bool allowOptionalAttachments = false);
180
181//! \brief Compute 2D coordinates where a piece of the molecule is
182/// constrained to have the same coordinates as a reference.
183/*!
184 This function generates a depiction for a molecule where a piece of the
185 molecule is constrained to have the same coordinates as a reference.
186
187 This is useful for, for example, generating depictions of SAR data
188 sets so that the cores of the molecules are all oriented the same way.
189 This overload allow to specify the (referenceAtom, molAtom) index pairs
190 which should be matched as MatchVectType. Please note that the
191 vector can be shorter than the number of atoms in the reference.
192
193 ARGUMENTS:
194
195 \param mol - the molecule to be aligned, this will come back
196 with a single conformer.
197 \param reference - a molecule with the reference atoms to align to;
198 this should have a depiction.
199 \param refMatchVect - a MatchVectType that will be used to
200 generate the atom mapping between the molecule
201 and the reference.
202 \param confId - (optional) the id of the reference conformation to use
203 \param forceRDKit - (optional) use RDKit to generate coordinates even if
204 preferCoordGen is set to true
205*/
207 RDKit::ROMol &mol, const RDKit::ROMol &reference,
208 const RDKit::MatchVectType &refMatchVect, int confId = -1,
209 bool forceRDKit = false);
210
211//! \brief Generate a 2D depiction for a molecule where all or part of
212/// it mimics the coordinates of a 3D reference structure.
213/*!
214 Generates a depiction for a molecule where a piece of the molecule
215 is constrained to have coordinates similar to those of a 3D reference
216 structure.
217
218 ARGUMENTS:
219 \param mol - the molecule to be aligned, this will come back
220 with a single conformer containing 2D coordinates
221 \param reference - a molecule with the reference atoms to align to.
222 By default this should be the same as mol, but with
223 3D coordinates
224 \param confId - (optional) the id of the reference conformation to use
225 \param refPattern - (optional) a query molecule to map a subset of
226 the reference onto the mol, so that only some of the
227 atoms are aligned.
228 \param acceptFailure - (optional) if true, standard depictions will be
229 generated
230 for molecules that don't match the reference or the
231 referencePattern; if false, throws a DepictException.
232 \param forceRDKit - (optional) use RDKit to generate coordinates even if
233 preferCoordGen is set to true
234*/
236 RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId = -1,
237 RDKit::ROMol *referencePattern = nullptr, bool acceptFailure = false,
238 bool forceRDKit = false);
239
240//! \brief Rotate the 2D depiction such that the majority of bonds have a
241//! 30-degree angle with the X axis.
242/*!
243
244 ARGUMENTS:
245 \param mol - the molecule to be rotated
246 \param confId - (optional) the id of the reference conformation to use
247*/
248
250 int confId = -1);
251
252//! \brief Normalizes the 2D depiction.
253/*!
254 If canonicalize is != 0, the depiction is subjected to a canonical
255 transformation such that its main axis is aligned along the X axis
256 (canonicalize >0, the default) or the Y axis (canonicalize <0).
257 If canonicalize is 0, no canonicalization takes place.
258 If scaleFactor is <0.0 (the default) the depiction is scaled such
259 that bond lengths conform to RDKit standards. The applied scaling
260 factor is returned.
261
262 ARGUMENTS:
263 \param mol - the molecule to be normalized
264 \param confId - (optional) the id of the reference conformation to use
265 \param canonicalize - (optional) if != 0, a canonical transformation is
266 applied: if >0 (the default), the main molecule axis is
267 aligned to the X axis, if <0 to the Y axis.
268 If 0, no canonical transformation is applied.
269 \param scaleFactor - (optional) if >0.0, the scaling factor to apply. The
270 default (-1.0) means that the depiction is automatically
271 scaled such that bond lengths are the standard RDKit
272 ones.
273 RETURNS:
274
275 \return the applied scaling factor.
276*/
277
279 int confId = -1,
280 int canonicalize = 1,
281 double scaleFactor = -1.0);
282}; // namespace RDDepict
283
284#endif
DepictException(const char *msg)
Definition: RDDepictor.h:33
~DepictException() noexcept override=default
DepictException(const std::string msg)
Definition: RDDepictor.h:34
const char * what() const noexcept override
Definition: RDDepictor.h:35
#define RDKIT_DEPICTOR_EXPORT
Definition: export.h:89
boost::shared_array< double > DOUBLE_SMART_PTR
Definition: EmbeddedFrag.h:26
RDKIT_DEPICTOR_EXPORT void generateDepictionMatching3DStructure(RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId=-1, RDKit::ROMol *referencePattern=nullptr, bool acceptFailure=false, bool forceRDKit=false)
Generate a 2D depiction for a molecule where all or part of it mimics the coordinates of a 3D referen...
RDKIT_DEPICTOR_EXPORT double normalizeDepiction(RDKit::ROMol &mol, int confId=-1, int canonicalize=1, double scaleFactor=-1.0)
Normalizes the 2D depiction.
RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoords(RDKit::ROMol &mol, const RDGeom::INT_POINT2D_MAP *coordMap=nullptr, bool canonOrient=false, bool clearConfs=true, unsigned int nFlipsPerSample=0, unsigned int nSamples=0, int sampleSeed=0, bool permuteDeg4Nodes=false, bool forceRDKit=false)
Generate 2D coordinates (a depiction) for a molecule.
RDKIT_DEPICTOR_EXPORT void straightenDepiction(RDKit::ROMol &mol, int confId=-1)
Rotate the 2D depiction such that the majority of bonds have a 30-degree angle with the X axis.
RDKIT_DEPICTOR_EXPORT RDKit::MatchVectType generateDepictionMatching2DStructure(RDKit::ROMol &mol, const RDKit::ROMol &reference, int confId=-1, const RDKit::ROMol *referencePattern=static_cast< const RDKit::ROMol * >(nullptr), bool acceptFailure=false, bool forceRDKit=false, bool allowOptionalAttachments=false)
Compute 2D coordinates where a piece of the molecule is constrained to have the same coordinates as a...
RDKIT_DEPICTOR_EXPORT unsigned int compute2DCoordsMimicDistMat(RDKit::ROMol &mol, const DOUBLE_SMART_PTR *dmat=nullptr, bool canonOrient=true, bool clearConfs=true, double weightDistMat=0.5, unsigned int nFlipsPerSample=3, unsigned int nSamples=100, int sampleSeed=25, bool permuteDeg4Nodes=true, bool forceRDKit=false)
Compute the 2D coordinates such the interatom distances mimic those in a distance matrix.
RDKIT_DEPICTOR_EXPORT bool preferCoordGen
std::map< int, Point2D > INT_POINT2D_MAP
Definition: point.h:550
Std stuff.
Definition: Abbreviations.h:18
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)