RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
ScaffoldNetwork.h
Go to the documentation of this file.
1//
2// Copyright (C) 2019 Greg Landrum and T5 Informatics GmbH
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_SCAFFOLDNETWORK_H
12#define RD_SCAFFOLDNETWORK_H
13
14#include <vector>
15#include <map>
16#include <string>
17#include <sstream>
18#include <memory>
19#include <iostream>
20
21#ifdef RDK_USE_BOOST_SERIALIZATION
22#include <RDGeneral/Invariant.h>
24#include <boost/archive/text_oarchive.hpp>
25#include <boost/archive/text_iarchive.hpp>
26#include <boost/serialization/vector.hpp>
27#include <boost/serialization/shared_ptr.hpp>
28#include <boost/serialization/version.hpp>
30#endif
31
32namespace RDKit {
33class ROMol;
35
36namespace ScaffoldNetwork {
37
40 true; ///< include scaffolds with all atoms replaced by dummies
42 false; ///< include scaffolds with all bonds replaced by single bonds
44 true; ///< remove attachment points from scaffolds and include the result
46 true; ///< Include the version of the scaffold with attachment points
48 false; ///< Include molecules names of the input molecules
50 true; ///< keep only the first fragment from the bond breaking rule
52 true; ///< Do a pruning/flattening step before starting fragmenting
53 bool flattenIsotopes = true; ///< remove isotopes when flattening
55 true; ///< remove chirality and bond stereo when flattening
57 true; ///< keep only the largest fragment when doing flattening
58 bool collectMolCounts = true; ///< keep track of the number of molecules each
59 ///< scaffold was reached from
60
61 std::vector<std::shared_ptr<ChemicalReaction>>
62 bondBreakersRxns; ///< the reaction(s) used to fragment. Should expect a
63 ///< single reactant and produce two products
65 : ScaffoldNetworkParams{{"[!#0;R:1]-!@[!#0:2]>>[*:1]-[#0].[#0]-[*:2]"}} {}
66 ScaffoldNetworkParams(const std::vector<std::string> &bondBreakersSmarts);
67};
68
69enum class EdgeType {
70 Fragment = 1, ///< molecule -> fragment
71 Generic = 2, ///< molecule -> generic molecule (all atoms are dummies)
72 GenericBond = 3, ///< molecule -> generic bond molecule (all bonds single)
73 RemoveAttachment = 4, ///< molecule -> molecule with no attachment points
74 Initialize = 5 ///< molecule -> flattened molecule
75};
76
78 size_t beginIdx;
79 size_t endIdx;
82 NetworkEdge(size_t bi, size_t ei, EdgeType typ)
83 : beginIdx(bi), endIdx(ei), type(typ) {}
85 return (beginIdx == o.beginIdx) && (endIdx == o.endIdx) && (type == o.type);
86 }
88 return (beginIdx != o.beginIdx) || (endIdx != o.endIdx) || (type != o.type);
89 }
90#ifdef RDK_USE_BOOST_SERIALIZATION
91 private:
92 friend class boost::serialization::access;
93 template <class Archive>
94 void serialize(Archive &ar, const unsigned int version) {
95 RDUNUSED_PARAM(version);
96 ar & beginIdx;
97 ar & endIdx;
98 ar & type;
99 }
100#endif
101};
102
104 std::vector<std::string> nodes; ///< SMILES for the scaffolds
105 std::vector<unsigned>
106 counts; ///< number of times each scaffold was encountered
107 std::vector<unsigned>
108 molCounts; ///< number of molecules each scaffold was found in
109 std::vector<NetworkEdge> edges; ///< edges in the network
111#ifdef RDK_USE_BOOST_SERIALIZATION
112 ScaffoldNetwork(const std::string &pkl) {
113 std::stringstream iss(pkl);
114 boost::archive::text_iarchive ia(iss);
115 ia >> *this;
116 }
117
118 private:
119 friend class boost::serialization::access;
120 template <class Archive>
121 void serialize(Archive &ar, const unsigned int version) {
122 RDUNUSED_PARAM(version);
123 ar & nodes;
124 ar & counts;
125 if (version > 0) {
126 ar & molCounts;
127 }
128 ar & edges;
129 }
130#endif
131};
132
133//! update an existing ScaffoldNetwork using a set of molecules
134template <typename T>
135void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network,
136 const ScaffoldNetworkParams &params);
137
138//! create a new ScaffoldNetwork for a set of molecules
139template <typename T>
141 const ScaffoldNetworkParams &params) {
142 ScaffoldNetwork res;
143 updateScaffoldNetwork(mols, res, params);
144 return res;
145}
146//! allows nodes to output nicely as strings
147inline std::ostream &operator<<(std::ostream &ostr,
149 switch (e) {
151 ostr << "Fragment";
152 break;
154 ostr << "Generic";
155 break;
157 ostr << "GenericBond";
158 break;
160 ostr << "RemoveAttachment";
161 break;
163 ostr << "Initialize";
164 break;
165 default:
166 ostr << "UNKNOWN";
167 break;
168 }
169 return ostr;
170}
171//! allows edges to output nicely as strings
172inline std::ostream &operator<<(std::ostream &ostr,
174 ostr << "NetworkEdge( " << e.beginIdx << "->" << e.endIdx
175 << ", type:" << e.type << " )";
176 return ostr;
177}
178
179//! returns parameters for constructing scaffold networks using BRICS
180//! fragmentation
182
183} // namespace ScaffoldNetwork
184} // namespace RDKit
185
186#ifdef RDK_USE_BOOST_SERIALIZATION
187namespace boost {
188namespace serialization {
189template <>
190struct version<RDKit::ScaffoldNetwork::ScaffoldNetwork> {
191 BOOST_STATIC_CONSTANT(int, value = 1);
192};
193} // namespace serialization
194} // namespace boost
195#endif
196
197#endif
RDKIT_GRAPHMOL_EXPORT std::ostream & operator<<(std::ostream &target, const RDKit::Atom &at)
allows Atom objects to be dumped to streams
#define RDUNUSED_PARAM(x)
Definition Invariant.h:197
This is a class for storing and applying general chemical reactions.
Definition Reaction.h:121
#define RDKIT_SCAFFOLDNETWORK_EXPORT
Definition export.h:513
ScaffoldNetwork createScaffoldNetwork(const T &mols, const ScaffoldNetworkParams &params)
create a new ScaffoldNetwork for a set of molecules
void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network, const ScaffoldNetworkParams &params)
update an existing ScaffoldNetwork using a set of molecules
@ Initialize
molecule -> flattened molecule
@ Generic
molecule -> generic molecule (all atoms are dummies)
@ RemoveAttachment
molecule -> molecule with no attachment points
@ GenericBond
molecule -> generic bond molecule (all bonds single)
RDKIT_SCAFFOLDNETWORK_EXPORT ScaffoldNetworkParams getBRICSNetworkParams()
Std stuff.
Definition RDLog.h:25
bool operator==(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
NetworkEdge(size_t bi, size_t ei, EdgeType typ)
bool operator!=(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
bool includeGenericBondScaffolds
include scaffolds with all bonds replaced by single bonds
bool includeNames
Include molecules names of the input molecules.
bool pruneBeforeFragmenting
Do a pruning/flattening step before starting fragmenting.
bool includeGenericScaffolds
include scaffolds with all atoms replaced by dummies
bool flattenKeepLargest
keep only the largest fragment when doing flattening
std::vector< std::shared_ptr< ChemicalReaction > > bondBreakersRxns
bool includeScaffoldsWithAttachments
Include the version of the scaffold with attachment points.
bool flattenIsotopes
remove isotopes when flattening
ScaffoldNetworkParams(const std::vector< std::string > &bondBreakersSmarts)
bool flattenChirality
remove chirality and bond stereo when flattening
bool keepOnlyFirstFragment
keep only the first fragment from the bond breaking rule
bool includeScaffoldsWithoutAttachments
remove attachment points from scaffolds and include the result
std::vector< NetworkEdge > edges
edges in the network
std::vector< unsigned > molCounts
number of molecules each scaffold was found in
std::vector< std::string > nodes
SMILES for the scaffolds.
std::vector< unsigned > counts
number of times each scaffold was encountered