RDKit
Open-source cheminformatics and machine learning.
MolStandardize.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018-2021 Susan H. Leung and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10/*! \file MolStandardize.h
11
12 \brief Defines the CleanupParameters and some convenience functions.
13
14*/
15#include <RDGeneral/export.h>
16#ifndef RD_MOLSTANDARDIZE_H
17#define RD_MOLSTANDARDIZE_H
18
19#include <string>
20#include <GraphMol/RDKitBase.h>
21
22namespace RDKit {
23class RWMol;
24class ROMol;
25
26namespace MolStandardize {
27
28//! The CleanupParameters structure defines the default parameters for the
29/// cleanup process and also allows the user to customize the process by
30/// changing the parameters.
31/*!
32
33 <b>Notes:</b>
34 - To customize the parameters, the structure must be initialized first.
35 (Another on the TODO list)
36 - For this project, not all the parameters have been revealed.
37 (TODO)
38
39*/
41 // TODO reveal all parameters
42 private:
43 const char *rdbase_cstr = std::getenv("RDBASE");
44
45 public:
46 std::string rdbase = rdbase_cstr != nullptr ? rdbase_cstr : "";
47 std::string normalizations;
48 std::string acidbaseFile;
49 std::string fragmentFile;
50 std::string tautomerTransforms;
51 int maxRestarts{200}; //! The maximum number of times to attempt to apply the
52 //! series of normalizations (default 200).
53 bool preferOrganic{false}; //! Whether to prioritize organic fragments when
54 //! choosing fragment parent (default False).
55 bool doCanonical{true}; //! Whether to apply normalizations in a
56 //! canonical order
57 int maxTautomers{1000}; //! The maximum number of tautomers to enumerate
58 //! (default 1000).
59 int maxTransforms{1000}; //! The maximum number of tautomer transformations
60 //! to apply (default 1000).
61 bool tautomerRemoveSp3Stereo{
62 true}; //! Whether to remove stereochemistry from sp3
63 //! centers involved in tautomerism (defaults to true)
64 bool tautomerRemoveBondStereo{
65 true}; //! Whether to remove stereochemistry from double
66 //! bonds involved in tautomerism (defaults to true)
67 bool tautomerRemoveIsotopicHs{
68 true}; //! Whether to remove isotopic Hs from centers
69 //! involved in tautomerism (defaults to true)
70 bool tautomerReassignStereo{
71 true}; //! Whether enumerate() should call assignStereochemistry
72 //! on all generated tautomers (defaults to true)
73 bool largestFragmentChooserUseAtomCount{
74 true}; //! Whether LargestFragmentChooser should use atom
75 //! count as main criterion before MW (defaults to true)
76 bool largestFragmentChooserCountHeavyAtomsOnly{
77 false}; //! Whether LargestFragmentChooser should only count
78 //! heavy atoms (defaults to false)
79 std::vector<std::pair<std::string, std::string>> normalizationData;
80 std::vector<std::pair<std::string, std::string>> fragmentData;
81 std::vector<std::tuple<std::string, std::string, std::string>> acidbaseData;
82 std::vector<std::tuple<std::string, std::string, std::string, std::string>>
85};
86
87RDKIT_MOLSTANDARDIZE_EXPORT extern const CleanupParameters
89
91 CleanupParameters &params, const std::string &json);
92
93//! The cleanup function is equivalent to the
94/// molvs.Standardizer().standardize(mol) function. It calls the same steps,
95/// namely: RemoveHs, RDKit SanitizeMol, MetalDisconnector, Normalizer,
96/// Reionizer, RDKit AssignStereochemistry.
98 const RWMol *mol,
100//! \overload
101inline RWMol *cleanup(const RWMol &mol, const CleanupParameters &params =
103 return cleanup(&mol, params);
104};
105
106//! Works the same as Normalizer().normalize(mol)
108 const RWMol *mol,
110
111//! Works the same as Reionizer().reionize(mol)
113 const RWMol *mol,
115
116//! Works the same as FragmentRemover().remove(mol)
118 const RWMol *mol,
120
121//! Works the same as TautomerEnumerator().canonicalize(mol)
123 const RWMol *mol,
125
126//! Returns the tautomer parent of a given molecule. The fragment parent is the
127/// standardized canonical tautomer of the molecule
129 const RWMol &mol,
131 bool skipStandardize = false);
132
133//! Returns the fragment parent of a given molecule. The fragment parent is the
134/// largest organic covalent unit in the molecule.
136 const RWMol &mol,
138 bool skip_standardize = false);
139
140//! calls removeStereochemistry() on the given molecule
142 const RWMol &mol,
144 bool skip_standardize = false);
145
146//! removes all isotopes specifications from the given molecule
148 const RWMol &mol,
150 bool skip_standardize = false);
151
152//! Returns the charge parent of a given molecule. The charge parent is the
153//! uncharged version of the fragment parent.
155 const RWMol &mol,
157 bool skip_standardize = false);
158
159//! Returns the super parent. The super parent is the fragment, charge, isotope,
160//! stereo, and tautomer parent of the molecule.
162 const RWMol &mol,
164 bool skip_standardize = false);
165
166//! Convenience function for quickly standardizing a single SMILES string.
167/// Returns a standardized canonical SMILES string given a SMILES string.
168/// This is the equivalent of calling cleanup() on each of the molecules
170 const std::string &smiles);
171
172//! TODO
174 const std::string &smiles,
176}; // namespace MolStandardize
177} // namespace RDKit
178#endif
pulls in the core RDKit functionality
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
#define RDKIT_MOLSTANDARDIZE_EXPORT
Definition: export.h:313
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * reionize(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Reionizer().reionize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT std::vector< std::string > enumerateTautomerSmiles(const std::string &smiles, const CleanupParameters &params=defaultCleanupParameters)
TODO.
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * isotopeParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
removes all isotopes specifications from the given molecule
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * tautomerParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skipStandardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * fragmentParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * normalize(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Normalizer().normalize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * removeFragments(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as FragmentRemover().remove(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * canonicalTautomer(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as TautomerEnumerator().canonicalize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * stereoParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
calls removeStereochemistry() on the given molecule
RDKIT_MOLSTANDARDIZE_EXPORT const CleanupParameters defaultCleanupParameters
Definition: Fragment.h:25
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * chargeParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT std::string standardizeSmiles(const std::string &smiles)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * cleanup(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
RDKIT_MOLSTANDARDIZE_EXPORT void updateCleanupParamsFromJSON(CleanupParameters &params, const std::string &json)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * superParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
Std stuff.
Definition: Abbreviations.h:18
std::vector< std::tuple< std::string, std::string, std::string, std::string > > tautomerTransformData
std::vector< std::tuple< std::string, std::string, std::string > > acidbaseData
std::vector< std::pair< std::string, std::string > > fragmentData
std::vector< std::pair< std::string, std::string > > normalizationData