RDKit
Open-source cheminformatics and machine learning.
SubstructureCache.h
Go to the documentation of this file.
1//
2// Copyright (C) 2014 Novartis Institutes for BioMedical Research
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#pragma once
12#include <list>
13#include <vector>
14#include <string>
15#include <stdexcept>
16#include "../RDKitBase.h"
17//#include "../Fingerprints/MorganFingerprints.h"
18#include "Graph.h"
19#include "Seed.h"
20#include "DebugTrace.h" // algorithm filter definitions
21
22namespace RDKit {
23namespace FMCS {
25 public:
26#pragma pack(push, 1)
28 typedef unsigned long long TValue;
29 TValue Value{0};
30
31 public:
33 };
34#pragma pack(pop)
35
36 struct HashKey {
38
39 public:
40 void computeKey(const Seed& seed,
41 const std::vector<unsigned>& queryAtomLabels,
42 const std::vector<unsigned>& queryBondLabels) {
43 computeMorganCodeHash(seed, queryAtomLabels, queryBondLabels);
44 }
45
46 private:
47 void computeMorganCodeHash(const Seed& seed,
48 const std::vector<unsigned>& queryAtomLabels,
49 const std::vector<unsigned>& queryBondLabels) {
50 size_t nv = seed.getNumAtoms();
51 size_t ne = seed.getNumBonds();
52 std::vector<unsigned long> currCodes(nv);
53 std::vector<unsigned long> prevCodes(nv);
54 size_t nIterations = seed.getNumBonds();
55 if (nIterations > 5) {
56 nIterations = 5;
57 }
58
59 for (unsigned seedAtomIdx = 0; seedAtomIdx < seed.getNumAtoms();
60 seedAtomIdx++) {
61 currCodes[seedAtomIdx] =
62 queryAtomLabels[seed.MoleculeFragment.AtomsIdx[seedAtomIdx]];
63 }
64
65 for (size_t iter = 0; iter < nIterations; iter++) {
66 for (size_t i = 0; i < nv; i++) {
67 prevCodes[i] = currCodes[i];
68 }
69
70 for (size_t seedBondIdx = 0; seedBondIdx < ne; seedBondIdx++) {
71 const Bond* bond = seed.MoleculeFragment.Bonds[seedBondIdx];
72 unsigned order =
73 queryBondLabels[seed.MoleculeFragment.BondsIdx[seedBondIdx]];
74 unsigned atom1 = seed.MoleculeFragment.SeedAtomIdxMap
75 .find(bond->getBeginAtomIdx())
76 ->second;
77 unsigned atom2 =
78 seed.MoleculeFragment.SeedAtomIdxMap.find(bond->getEndAtomIdx())
79 ->second;
80 unsigned v1 = prevCodes[atom1];
81 unsigned v2 = prevCodes[atom2];
82
83 currCodes[atom1] += v2 * v2 + (v2 + 23) * (order + 1721);
84 currCodes[atom2] += v1 * v1 + (v1 + 23) * (order + 1721);
85 }
86 }
87
88 KeyNumericMetrics::TValue result = 0;
89 for (unsigned seedAtomIdx = 0; seedAtomIdx < nv; seedAtomIdx++) {
90 unsigned long code = currCodes[seedAtomIdx];
91 result += code * (code + 6849) + 29;
92 }
93
94 NumericMetrics.Value = result;
95 }
96
97 // not implemented yet:
98 /*
99 void computeFingerprint(const Seed& seed)
100 {
101 unsigned int radius = seed.getNumBonds();
102 if (radius > 5)
103 radius = 5;
104 ExplicitBitVect *mf =
105 RDKit::MorganFingerprints::getFingerprintAsBitVect(seed.GraphTopology,
106 radius); //SLOW !!!
107 // ...
108 delete mf;
109 NumericMetrics.Field.hasFingerprint = 1;
110 }
111 */
112 };
113
114 typedef HashKey TKey;
115 typedef std::list<FMCS::Graph> TIndexEntry; // hash-key is not unique key
116 private:
117 std::vector<TIndexEntry> ValueStorage;
118 std::map<KeyNumericMetrics::TValue, size_t> NumericIndex; // TIndexEntry
119 public:
120 // returns computed key, and pointer to index entry with a set of subgraphs
121 // corresponding to the key if found.
122 // then caller must find exactly matched subgraph in the result set with own
123 // search algorithm,
124 // including a resolving of collisions of hash key
126 const std::vector<unsigned>& queryAtomLabels,
127 const std::vector<unsigned>& queryBondLabels,
128 TKey& key) { // compute key and find entry
129 key.computeKey(seed, queryAtomLabels, queryBondLabels);
130 std::map<KeyNumericMetrics::TValue, size_t>::const_iterator entryit =
131 NumericIndex.find(key.NumericMetrics.Value);
132 if (NumericIndex.end() != entryit) {
133 return &ValueStorage[entryit->second];
134 }
135 return nullptr; // not found
136 }
137
138 // if find() did not found any entry for this key of seed a new entry will be
139 // created
140 void add(const Seed& seed, TKey& key,
141 TIndexEntry* entry) { // "compute" value and store it in NEW entry
142 // if not found
143 if (!entry) {
144 try {
145 ValueStorage.emplace_back();
146 } catch (...) {
147 return; // not enough memory room to add the item, but it's just a
148 // cache
149 }
150 entry = &ValueStorage.back();
151 }
152 entry->push_back(seed.Topology);
153
154 if (!NumericIndex
155 .insert(std::pair<KeyNumericMetrics::TValue, size_t>(
156 key.NumericMetrics.Value, ValueStorage.size() - 1))
157 .second) {
158 return; // not enough memory room to add the item, but it is just cache
159 }
160 }
161
162 size_t keyssize() const { // for statistics only
163 return ValueStorage.size();
164 }
165
166 size_t fullsize() const { // for statistics only
167 size_t n = 0;
168 for (std::vector<TIndexEntry>::const_iterator e = ValueStorage.begin();
169 e != ValueStorage.end(); e++) {
170 n += e->size();
171 }
172 return n;
173 }
174};
175} // namespace FMCS
176} // namespace RDKit
std::list< FMCS::Graph > TIndexEntry
TIndexEntry * find(const Seed &seed, const std::vector< unsigned > &queryAtomLabels, const std::vector< unsigned > &queryBondLabels, TKey &key)
void add(const Seed &seed, TKey &key, TIndexEntry *entry)
#define RDKIT_FMCS_EXPORT
Definition: export.h:145
const uint32_t seed
Definition: MHFP.h:29
Std stuff.
Definition: Abbreviations.h:18
void computeKey(const Seed &seed, const std::vector< unsigned > &queryAtomLabels, const std::vector< unsigned > &queryBondLabels)