Sonja Topf
commited on
Commit
·
e517ec0
1
Parent(s):
5567bdb
big refactoring
Browse files- .example.env +1 -0
- .gitignore +5 -2
- LICENSE +407 -0
- README.md +26 -5
- checkpoints/best.pt +3 -0
- assets/best1.pt → checkpoints/best_chemprop_model.pt +0 -0
- config/config.toml +48 -0
- predict.py +7 -11
- requirements.txt +7 -6
- src/{preprocess.py → data.py} +76 -3
- src/evaluation.py +43 -0
- src/preds.csv +0 -0
- src/smiles.csv +0 -646
- src/train_model.py +18 -0
- train.py +22 -0
.example.env
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
TOKEN=example_token
|
.gitignore
CHANGED
|
@@ -3,5 +3,8 @@ results.csv
|
|
| 3 |
predict copy.py
|
| 4 |
debug.py
|
| 5 |
__pycache__
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
predict copy.py
|
| 4 |
debug.py
|
| 5 |
__pycache__
|
| 6 |
+
predictions.json
|
| 7 |
+
notes.txt
|
| 8 |
+
logs/*
|
| 9 |
+
data/*
|
| 10 |
+
.env
|
LICENSE
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Attribution-NonCommercial 4.0 International
|
| 2 |
+
|
| 3 |
+
=======================================================================
|
| 4 |
+
|
| 5 |
+
Creative Commons Corporation ("Creative Commons") is not a law firm and
|
| 6 |
+
does not provide legal services or legal advice. Distribution of
|
| 7 |
+
Creative Commons public licenses does not create a lawyer-client or
|
| 8 |
+
other relationship. Creative Commons makes its licenses and related
|
| 9 |
+
information available on an "as-is" basis. Creative Commons gives no
|
| 10 |
+
warranties regarding its licenses, any material licensed under their
|
| 11 |
+
terms and conditions, or any related information. Creative Commons
|
| 12 |
+
disclaims all liability for damages resulting from their use to the
|
| 13 |
+
fullest extent possible.
|
| 14 |
+
|
| 15 |
+
Using Creative Commons Public Licenses
|
| 16 |
+
|
| 17 |
+
Creative Commons public licenses provide a standard set of terms and
|
| 18 |
+
conditions that creators and other rights holders may use to share
|
| 19 |
+
original works of authorship and other material subject to copyright
|
| 20 |
+
and certain other rights specified in the public license below. The
|
| 21 |
+
following considerations are for informational purposes only, are not
|
| 22 |
+
exhaustive, and do not form part of our licenses.
|
| 23 |
+
|
| 24 |
+
Considerations for licensors: Our public licenses are
|
| 25 |
+
intended for use by those authorized to give the public
|
| 26 |
+
permission to use material in ways otherwise restricted by
|
| 27 |
+
copyright and certain other rights. Our licenses are
|
| 28 |
+
irrevocable. Licensors should read and understand the terms
|
| 29 |
+
and conditions of the license they choose before applying it.
|
| 30 |
+
Licensors should also secure all rights necessary before
|
| 31 |
+
applying our licenses so that the public can reuse the
|
| 32 |
+
material as expected. Licensors should clearly mark any
|
| 33 |
+
material not subject to the license. This includes other CC-
|
| 34 |
+
licensed material, or material used under an exception or
|
| 35 |
+
limitation to copyright. More considerations for licensors:
|
| 36 |
+
wiki.creativecommons.org/Considerations_for_licensors
|
| 37 |
+
|
| 38 |
+
Considerations for the public: By using one of our public
|
| 39 |
+
licenses, a licensor grants the public permission to use the
|
| 40 |
+
licensed material under specified terms and conditions. If
|
| 41 |
+
the licensor's permission is not necessary for any reason--for
|
| 42 |
+
example, because of any applicable exception or limitation to
|
| 43 |
+
copyright--then that use is not regulated by the license. Our
|
| 44 |
+
licenses grant only permissions under copyright and certain
|
| 45 |
+
other rights that a licensor has authority to grant. Use of
|
| 46 |
+
the licensed material may still be restricted for other
|
| 47 |
+
reasons, including because others have copyright or other
|
| 48 |
+
rights in the material. A licensor may make special requests,
|
| 49 |
+
such as asking that all changes be marked or described.
|
| 50 |
+
Although not required by our licenses, you are encouraged to
|
| 51 |
+
respect those requests where reasonable. More considerations
|
| 52 |
+
for the public:
|
| 53 |
+
wiki.creativecommons.org/Considerations_for_licensees
|
| 54 |
+
|
| 55 |
+
=======================================================================
|
| 56 |
+
|
| 57 |
+
Creative Commons Attribution-NonCommercial 4.0 International Public
|
| 58 |
+
License
|
| 59 |
+
|
| 60 |
+
By exercising the Licensed Rights (defined below), You accept and agree
|
| 61 |
+
to be bound by the terms and conditions of this Creative Commons
|
| 62 |
+
Attribution-NonCommercial 4.0 International Public License ("Public
|
| 63 |
+
License"). To the extent this Public License may be interpreted as a
|
| 64 |
+
contract, You are granted the Licensed Rights in consideration of Your
|
| 65 |
+
acceptance of these terms and conditions, and the Licensor grants You
|
| 66 |
+
such rights in consideration of benefits the Licensor receives from
|
| 67 |
+
making the Licensed Material available under these terms and
|
| 68 |
+
conditions.
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
Section 1 -- Definitions.
|
| 72 |
+
|
| 73 |
+
a. Adapted Material means material subject to Copyright and Similar
|
| 74 |
+
Rights that is derived from or based upon the Licensed Material
|
| 75 |
+
and in which the Licensed Material is translated, altered,
|
| 76 |
+
arranged, transformed, or otherwise modified in a manner requiring
|
| 77 |
+
permission under the Copyright and Similar Rights held by the
|
| 78 |
+
Licensor. For purposes of this Public License, where the Licensed
|
| 79 |
+
Material is a musical work, performance, or sound recording,
|
| 80 |
+
Adapted Material is always produced where the Licensed Material is
|
| 81 |
+
synched in timed relation with a moving image.
|
| 82 |
+
|
| 83 |
+
b. Adapter's License means the license You apply to Your Copyright
|
| 84 |
+
and Similar Rights in Your contributions to Adapted Material in
|
| 85 |
+
accordance with the terms and conditions of this Public License.
|
| 86 |
+
|
| 87 |
+
c. Copyright and Similar Rights means copyright and/or similar rights
|
| 88 |
+
closely related to copyright including, without limitation,
|
| 89 |
+
performance, broadcast, sound recording, and Sui Generis Database
|
| 90 |
+
Rights, without regard to how the rights are labeled or
|
| 91 |
+
categorized. For purposes of this Public License, the rights
|
| 92 |
+
specified in Section 2(b)(1)-(2) are not Copyright and Similar
|
| 93 |
+
Rights.
|
| 94 |
+
d. Effective Technological Measures means those measures that, in the
|
| 95 |
+
absence of proper authority, may not be circumvented under laws
|
| 96 |
+
fulfilling obligations under Article 11 of the WIPO Copyright
|
| 97 |
+
Treaty adopted on December 20, 1996, and/or similar international
|
| 98 |
+
agreements.
|
| 99 |
+
|
| 100 |
+
e. Exceptions and Limitations means fair use, fair dealing, and/or
|
| 101 |
+
any other exception or limitation to Copyright and Similar Rights
|
| 102 |
+
that applies to Your use of the Licensed Material.
|
| 103 |
+
|
| 104 |
+
f. Licensed Material means the artistic or literary work, database,
|
| 105 |
+
or other material to which the Licensor applied this Public
|
| 106 |
+
License.
|
| 107 |
+
|
| 108 |
+
g. Licensed Rights means the rights granted to You subject to the
|
| 109 |
+
terms and conditions of this Public License, which are limited to
|
| 110 |
+
all Copyright and Similar Rights that apply to Your use of the
|
| 111 |
+
Licensed Material and that the Licensor has authority to license.
|
| 112 |
+
|
| 113 |
+
h. Licensor means the individual(s) or entity(ies) granting rights
|
| 114 |
+
under this Public License.
|
| 115 |
+
|
| 116 |
+
i. NonCommercial means not primarily intended for or directed towards
|
| 117 |
+
commercial advantage or monetary compensation. For purposes of
|
| 118 |
+
this Public License, the exchange of the Licensed Material for
|
| 119 |
+
other material subject to Copyright and Similar Rights by digital
|
| 120 |
+
file-sharing or similar means is NonCommercial provided there is
|
| 121 |
+
no payment of monetary compensation in connection with the
|
| 122 |
+
exchange.
|
| 123 |
+
|
| 124 |
+
j. Share means to provide material to the public by any means or
|
| 125 |
+
process that requires permission under the Licensed Rights, such
|
| 126 |
+
as reproduction, public display, public performance, distribution,
|
| 127 |
+
dissemination, communication, or importation, and to make material
|
| 128 |
+
available to the public including in ways that members of the
|
| 129 |
+
public may access the material from a place and at a time
|
| 130 |
+
individually chosen by them.
|
| 131 |
+
|
| 132 |
+
k. Sui Generis Database Rights means rights other than copyright
|
| 133 |
+
resulting from Directive 96/9/EC of the European Parliament and of
|
| 134 |
+
the Council of 11 March 1996 on the legal protection of databases,
|
| 135 |
+
as amended and/or succeeded, as well as other essentially
|
| 136 |
+
equivalent rights anywhere in the world.
|
| 137 |
+
|
| 138 |
+
l. You means the individual or entity exercising the Licensed Rights
|
| 139 |
+
under this Public License. Your has a corresponding meaning.
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
Section 2 -- Scope.
|
| 143 |
+
|
| 144 |
+
a. License grant.
|
| 145 |
+
|
| 146 |
+
1. Subject to the terms and conditions of this Public License,
|
| 147 |
+
the Licensor hereby grants You a worldwide, royalty-free,
|
| 148 |
+
non-sublicensable, non-exclusive, irrevocable license to
|
| 149 |
+
exercise the Licensed Rights in the Licensed Material to:
|
| 150 |
+
|
| 151 |
+
a. reproduce and Share the Licensed Material, in whole or
|
| 152 |
+
in part, for NonCommercial purposes only; and
|
| 153 |
+
|
| 154 |
+
b. produce, reproduce, and Share Adapted Material for
|
| 155 |
+
NonCommercial purposes only.
|
| 156 |
+
|
| 157 |
+
2. Exceptions and Limitations. For the avoidance of doubt, where
|
| 158 |
+
Exceptions and Limitations apply to Your use, this Public
|
| 159 |
+
License does not apply, and You do not need to comply with
|
| 160 |
+
its terms and conditions.
|
| 161 |
+
|
| 162 |
+
3. Term. The term of this Public License is specified in Section
|
| 163 |
+
6(a).
|
| 164 |
+
|
| 165 |
+
4. Media and formats; technical modifications allowed. The
|
| 166 |
+
Licensor authorizes You to exercise the Licensed Rights in
|
| 167 |
+
all media and formats whether now known or hereafter created,
|
| 168 |
+
and to make technical modifications necessary to do so. The
|
| 169 |
+
Licensor waives and/or agrees not to assert any right or
|
| 170 |
+
authority to forbid You from making technical modifications
|
| 171 |
+
necessary to exercise the Licensed Rights, including
|
| 172 |
+
technical modifications necessary to circumvent Effective
|
| 173 |
+
Technological Measures. For purposes of this Public License,
|
| 174 |
+
simply making modifications authorized by this Section 2(a)
|
| 175 |
+
(4) never produces Adapted Material.
|
| 176 |
+
|
| 177 |
+
5. Downstream recipients.
|
| 178 |
+
|
| 179 |
+
a. Offer from the Licensor -- Licensed Material. Every
|
| 180 |
+
recipient of the Licensed Material automatically
|
| 181 |
+
receives an offer from the Licensor to exercise the
|
| 182 |
+
Licensed Rights under the terms and conditions of this
|
| 183 |
+
Public License.
|
| 184 |
+
|
| 185 |
+
b. No downstream restrictions. You may not offer or impose
|
| 186 |
+
any additional or different terms or conditions on, or
|
| 187 |
+
apply any Effective Technological Measures to, the
|
| 188 |
+
Licensed Material if doing so restricts exercise of the
|
| 189 |
+
Licensed Rights by any recipient of the Licensed
|
| 190 |
+
Material.
|
| 191 |
+
|
| 192 |
+
6. No endorsement. Nothing in this Public License constitutes or
|
| 193 |
+
may be construed as permission to assert or imply that You
|
| 194 |
+
are, or that Your use of the Licensed Material is, connected
|
| 195 |
+
with, or sponsored, endorsed, or granted official status by,
|
| 196 |
+
the Licensor or others designated to receive attribution as
|
| 197 |
+
provided in Section 3(a)(1)(A)(i).
|
| 198 |
+
|
| 199 |
+
b. Other rights.
|
| 200 |
+
|
| 201 |
+
1. Moral rights, such as the right of integrity, are not
|
| 202 |
+
licensed under this Public License, nor are publicity,
|
| 203 |
+
privacy, and/or other similar personality rights; however, to
|
| 204 |
+
the extent possible, the Licensor waives and/or agrees not to
|
| 205 |
+
assert any such rights held by the Licensor to the limited
|
| 206 |
+
extent necessary to allow You to exercise the Licensed
|
| 207 |
+
Rights, but not otherwise.
|
| 208 |
+
|
| 209 |
+
2. Patent and trademark rights are not licensed under this
|
| 210 |
+
Public License.
|
| 211 |
+
|
| 212 |
+
3. To the extent possible, the Licensor waives any right to
|
| 213 |
+
collect royalties from You for the exercise of the Licensed
|
| 214 |
+
Rights, whether directly or through a collecting society
|
| 215 |
+
under any voluntary or waivable statutory or compulsory
|
| 216 |
+
licensing scheme. In all other cases the Licensor expressly
|
| 217 |
+
reserves any right to collect such royalties, including when
|
| 218 |
+
the Licensed Material is used other than for NonCommercial
|
| 219 |
+
purposes.
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
Section 3 -- License Conditions.
|
| 223 |
+
|
| 224 |
+
Your exercise of the Licensed Rights is expressly made subject to the
|
| 225 |
+
following conditions.
|
| 226 |
+
|
| 227 |
+
a. Attribution.
|
| 228 |
+
|
| 229 |
+
1. If You Share the Licensed Material (including in modified
|
| 230 |
+
form), You must:
|
| 231 |
+
|
| 232 |
+
a. retain the following if it is supplied by the Licensor
|
| 233 |
+
with the Licensed Material:
|
| 234 |
+
|
| 235 |
+
i. identification of the creator(s) of the Licensed
|
| 236 |
+
Material and any others designated to receive
|
| 237 |
+
attribution, in any reasonable manner requested by
|
| 238 |
+
the Licensor (including by pseudonym if
|
| 239 |
+
designated);
|
| 240 |
+
|
| 241 |
+
ii. a copyright notice;
|
| 242 |
+
|
| 243 |
+
iii. a notice that refers to this Public License;
|
| 244 |
+
|
| 245 |
+
iv. a notice that refers to the disclaimer of
|
| 246 |
+
warranties;
|
| 247 |
+
|
| 248 |
+
v. a URI or hyperlink to the Licensed Material to the
|
| 249 |
+
extent reasonably practicable;
|
| 250 |
+
|
| 251 |
+
b. indicate if You modified the Licensed Material and
|
| 252 |
+
retain an indication of any previous modifications; and
|
| 253 |
+
|
| 254 |
+
c. indicate the Licensed Material is licensed under this
|
| 255 |
+
Public License, and include the text of, or the URI or
|
| 256 |
+
hyperlink to, this Public License.
|
| 257 |
+
|
| 258 |
+
2. You may satisfy the conditions in Section 3(a)(1) in any
|
| 259 |
+
reasonable manner based on the medium, means, and context in
|
| 260 |
+
which You Share the Licensed Material. For example, it may be
|
| 261 |
+
reasonable to satisfy the conditions by providing a URI or
|
| 262 |
+
hyperlink to a resource that includes the required
|
| 263 |
+
information.
|
| 264 |
+
|
| 265 |
+
3. If requested by the Licensor, You must remove any of the
|
| 266 |
+
information required by Section 3(a)(1)(A) to the extent
|
| 267 |
+
reasonably practicable.
|
| 268 |
+
|
| 269 |
+
4. If You Share Adapted Material You produce, the Adapter's
|
| 270 |
+
License You apply must not prevent recipients of the Adapted
|
| 271 |
+
Material from complying with this Public License.
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
Section 4 -- Sui Generis Database Rights.
|
| 275 |
+
|
| 276 |
+
Where the Licensed Rights include Sui Generis Database Rights that
|
| 277 |
+
apply to Your use of the Licensed Material:
|
| 278 |
+
|
| 279 |
+
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
|
| 280 |
+
to extract, reuse, reproduce, and Share all or a substantial
|
| 281 |
+
portion of the contents of the database for NonCommercial purposes
|
| 282 |
+
only;
|
| 283 |
+
|
| 284 |
+
b. if You include all or a substantial portion of the database
|
| 285 |
+
contents in a database in which You have Sui Generis Database
|
| 286 |
+
Rights, then the database in which You have Sui Generis Database
|
| 287 |
+
Rights (but not its individual contents) is Adapted Material; and
|
| 288 |
+
|
| 289 |
+
c. You must comply with the conditions in Section 3(a) if You Share
|
| 290 |
+
all or a substantial portion of the contents of the database.
|
| 291 |
+
|
| 292 |
+
For the avoidance of doubt, this Section 4 supplements and does not
|
| 293 |
+
replace Your obligations under this Public License where the Licensed
|
| 294 |
+
Rights include other Copyright and Similar Rights.
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
|
| 298 |
+
|
| 299 |
+
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
|
| 300 |
+
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
|
| 301 |
+
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
|
| 302 |
+
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
|
| 303 |
+
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
|
| 304 |
+
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
| 305 |
+
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
|
| 306 |
+
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
|
| 307 |
+
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
|
| 308 |
+
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
|
| 309 |
+
|
| 310 |
+
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
|
| 311 |
+
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
|
| 312 |
+
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
|
| 313 |
+
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
|
| 314 |
+
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
|
| 315 |
+
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
|
| 316 |
+
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
|
| 317 |
+
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
|
| 318 |
+
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
|
| 319 |
+
|
| 320 |
+
c. The disclaimer of warranties and limitation of liability provided
|
| 321 |
+
above shall be interpreted in a manner that, to the extent
|
| 322 |
+
possible, most closely approximates an absolute disclaimer and
|
| 323 |
+
waiver of all liability.
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
Section 6 -- Term and Termination.
|
| 327 |
+
|
| 328 |
+
a. This Public License applies for the term of the Copyright and
|
| 329 |
+
Similar Rights licensed here. However, if You fail to comply with
|
| 330 |
+
this Public License, then Your rights under this Public License
|
| 331 |
+
terminate automatically.
|
| 332 |
+
|
| 333 |
+
b. Where Your right to use the Licensed Material has terminated under
|
| 334 |
+
Section 6(a), it reinstates:
|
| 335 |
+
|
| 336 |
+
1. automatically as of the date the violation is cured, provided
|
| 337 |
+
it is cured within 30 days of Your discovery of the
|
| 338 |
+
violation; or
|
| 339 |
+
|
| 340 |
+
2. upon express reinstatement by the Licensor.
|
| 341 |
+
|
| 342 |
+
For the avoidance of doubt, this Section 6(b) does not affect any
|
| 343 |
+
right the Licensor may have to seek remedies for Your violations
|
| 344 |
+
of this Public License.
|
| 345 |
+
|
| 346 |
+
c. For the avoidance of doubt, the Licensor may also offer the
|
| 347 |
+
Licensed Material under separate terms or conditions or stop
|
| 348 |
+
distributing the Licensed Material at any time; however, doing so
|
| 349 |
+
will not terminate this Public License.
|
| 350 |
+
|
| 351 |
+
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
|
| 352 |
+
License.
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
Section 7 -- Other Terms and Conditions.
|
| 356 |
+
|
| 357 |
+
a. The Licensor shall not be bound by any additional or different
|
| 358 |
+
terms or conditions communicated by You unless expressly agreed.
|
| 359 |
+
|
| 360 |
+
b. Any arrangements, understandings, or agreements regarding the
|
| 361 |
+
Licensed Material not stated herein are separate from and
|
| 362 |
+
independent of the terms and conditions of this Public License.
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
Section 8 -- Interpretation.
|
| 366 |
+
|
| 367 |
+
a. For the avoidance of doubt, this Public License does not, and
|
| 368 |
+
shall not be interpreted to, reduce, limit, restrict, or impose
|
| 369 |
+
conditions on any use of the Licensed Material that could lawfully
|
| 370 |
+
be made without permission under this Public License.
|
| 371 |
+
|
| 372 |
+
b. To the extent possible, if any provision of this Public License is
|
| 373 |
+
deemed unenforceable, it shall be automatically reformed to the
|
| 374 |
+
minimum extent necessary to make it enforceable. If the provision
|
| 375 |
+
cannot be reformed, it shall be severed from this Public License
|
| 376 |
+
without affecting the enforceability of the remaining terms and
|
| 377 |
+
conditions.
|
| 378 |
+
|
| 379 |
+
c. No term or condition of this Public License will be waived and no
|
| 380 |
+
failure to comply consented to unless expressly agreed to by the
|
| 381 |
+
Licensor.
|
| 382 |
+
|
| 383 |
+
d. Nothing in this Public License constitutes or may be interpreted
|
| 384 |
+
as a limitation upon, or waiver of, any privileges and immunities
|
| 385 |
+
that apply to the Licensor or You, including from the legal
|
| 386 |
+
processes of any jurisdiction or authority.
|
| 387 |
+
|
| 388 |
+
=======================================================================
|
| 389 |
+
|
| 390 |
+
Creative Commons is not a party to its public
|
| 391 |
+
licenses. Notwithstanding, Creative Commons may elect to apply one of
|
| 392 |
+
its public licenses to material it publishes and in those instances
|
| 393 |
+
will be considered the “Licensor.” The text of the Creative Commons
|
| 394 |
+
public licenses is dedicated to the public domain under the CC0 Public
|
| 395 |
+
Domain Dedication. Except for the limited purpose of indicating that
|
| 396 |
+
material is shared under a Creative Commons public license or as
|
| 397 |
+
otherwise permitted by the Creative Commons policies published at
|
| 398 |
+
creativecommons.org/policies, Creative Commons does not authorize the
|
| 399 |
+
use of the trademark "Creative Commons" or any other trademark or logo
|
| 400 |
+
of Creative Commons without its prior written consent including,
|
| 401 |
+
without limitation, in connection with any unauthorized modifications
|
| 402 |
+
to any of its public licenses or any other arrangements,
|
| 403 |
+
understandings, or agreements concerning use of licensed material. For
|
| 404 |
+
the avoidance of doubt, this paragraph does not form part of the
|
| 405 |
+
public licenses.
|
| 406 |
+
|
| 407 |
+
Creative Commons may be contacted at creativecommons.org.
|
README.md
CHANGED
|
@@ -13,16 +13,31 @@ short_description: Chemprop Baseline Classifier for Tox21
|
|
| 13 |
|
| 14 |
This repository hosts a Hugging Face Space that provides an examplary API for submitting models to the [Tox21 Leaderboard](https://huggingface.co/spaces/tschouis/tox21_leaderboard).
|
| 15 |
|
| 16 |
-
In this example, we trained a Chemprop classifier on the Tox21 targets and saved the trained model in the `
|
| 17 |
|
| 18 |
-
**Important:** For leaderboard submission, your Space
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
# Repository Structure
|
| 21 |
- `predict.py` - Defines the `predict()` function required by the leaderboard (entry point for inference).
|
| 22 |
- `app.py` - FastAPI application wrapper (can be used as-is).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
- `src/` - Core model & preprocessing logic:
|
| 25 |
-
- `
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# Quickstart with Spaces
|
| 28 |
|
|
@@ -32,10 +47,16 @@ You can easily adapt this project in your own Hugging Face account:
|
|
| 32 |
|
| 33 |
- Click "Duplicate this Space" (top-right corner).
|
| 34 |
|
|
|
|
|
|
|
| 35 |
- Modify `src/` for your preprocessing pipeline and model class
|
| 36 |
|
| 37 |
- Modify `predict()` inside `predict.py` to perform model inference while keeping the function skeleton unchanged to remain compatible with the leaderboard.
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
That’s it, your model will be available as an API endpoint for the Tox21 Leaderboard.
|
| 40 |
|
| 41 |
# Installation
|
|
@@ -75,6 +96,6 @@ The output will be a nested dictionary in the format:
|
|
| 75 |
|
| 76 |
# Notes
|
| 77 |
|
| 78 |
-
-
|
| 79 |
|
| 80 |
-
- Preprocessing (here inside `src/
|
|
|
|
| 13 |
|
| 14 |
This repository hosts a Hugging Face Space that provides an examplary API for submitting models to the [Tox21 Leaderboard](https://huggingface.co/spaces/tschouis/tox21_leaderboard).
|
| 15 |
|
| 16 |
+
In this example, we trained a Chemprop classifier on the Tox21 targets and saved the trained model in the `checkpoints/` folder.
|
| 17 |
|
| 18 |
+
**Important:** For leaderboard submission, your Space needs to include training code. The file `train.py` should train the model using the config specified inside the `config/` folder and save the final model parameters into a file inside the `checkpoints/` folder. The model should be trained using the [Tox21_dataset](https://huggingface.co/datasets/tschouis/tox21) provided on Hugging Face. The datasets can be loaded like this:
|
| 19 |
+
```python
|
| 20 |
+
from datasets import load_dataset
|
| 21 |
+
ds = load_dataset("tschouis/tox21", token=token)
|
| 22 |
+
train_df = ds["train"].to_pandas()
|
| 23 |
+
val_df = ds["validation"].to_pandas()
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
Additionally, the Space needs to implement inference in the `predict()` function inside `predict.py`. The `predict()` function must keep the provided skeleton: it should take a list of SMILES strings as input and return a nested prediction dictionary as output, with SMILES as keys and dictionaries containing targetname-prediction pairs as values. Therefore, any preprocessing of SMILES strings must be executed on-the-fly during inference.
|
| 27 |
|
| 28 |
# Repository Structure
|
| 29 |
- `predict.py` - Defines the `predict()` function required by the leaderboard (entry point for inference).
|
| 30 |
- `app.py` - FastAPI application wrapper (can be used as-is).
|
| 31 |
+
- `train.py` - trains and saves a model using the config in the `config/` folder.
|
| 32 |
+
- `config/` - the config file used by `train.py`.
|
| 33 |
+
- `logs/` - all the logs of `train.py`, the saved model, and predictions on the validation set.
|
| 34 |
+
- `data/` - chemprop uses data in csv form. During preprocessing in `train.py` two csv files are created and saved here.
|
| 35 |
+
- `checkpoints/` - the saved model that is used in `predict.py` is here.
|
| 36 |
|
| 37 |
- `src/` - Core model & preprocessing logic:
|
| 38 |
+
- `data.py` - SMILES preprocessing pipeline
|
| 39 |
+
- `evaluation.py` - compute ROC AUC metric from a csv
|
| 40 |
+
- `train_model.py` - trains a single model
|
| 41 |
|
| 42 |
# Quickstart with Spaces
|
| 43 |
|
|
|
|
| 47 |
|
| 48 |
- Click "Duplicate this Space" (top-right corner).
|
| 49 |
|
| 50 |
+
- Create a `.env` according to `.example.env`.
|
| 51 |
+
|
| 52 |
- Modify `src/` for your preprocessing pipeline and model class
|
| 53 |
|
| 54 |
- Modify `predict()` inside `predict.py` to perform model inference while keeping the function skeleton unchanged to remain compatible with the leaderboard.
|
| 55 |
|
| 56 |
+
- Modify `train.py` according to your model and preprocessing pipeline.
|
| 57 |
+
|
| 58 |
+
- Modify the file inside `config/` to contain all hyperparameters that are set in `train.py`.
|
| 59 |
+
|
| 60 |
That’s it, your model will be available as an API endpoint for the Tox21 Leaderboard.
|
| 61 |
|
| 62 |
# Installation
|
|
|
|
| 96 |
|
| 97 |
# Notes
|
| 98 |
|
| 99 |
+
- Adapting `predict.py`, `train.py`, `config/`, and `checkpoints/` is required for leaderboard submission.
|
| 100 |
|
| 101 |
+
- Preprocessing (here inside `src/data.py`) must be done inside `predict.py` not just `train.py`.
|
checkpoints/best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:409e15603b089e53efb7358cc3383a1b590c599299754daf2d59a700046e9c0b
|
| 3 |
+
size 2844412
|
assets/best1.pt → checkpoints/best_chemprop_model.pt
RENAMED
|
File without changes
|
config/config.toml
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
config-path = train\config.toml
|
| 2 |
+
data-path = tox21_data\combined_with_testset.csv
|
| 3 |
+
output-dir = train
|
| 4 |
+
target-columns = [NR-AhR, NR-AR, NR-AR-LBD, NR-Aromatase, NR-ER, NR-ER-LBD, NR-PPAR-gamma, SR-ARE, SR-ATAD5, SR-HSE, SR-MMP, SR-p53]
|
| 5 |
+
splits-column = split
|
| 6 |
+
task-type = classification
|
| 7 |
+
patience = 8
|
| 8 |
+
depth = 3
|
| 9 |
+
dropout = 0.3
|
| 10 |
+
ffn-hidden-dim = 400
|
| 11 |
+
ffn-num-layers = 2
|
| 12 |
+
message-hidden-dim = 400
|
| 13 |
+
batch-size = 32
|
| 14 |
+
epochs = 50
|
| 15 |
+
init-lr = 0.0007
|
| 16 |
+
max-lr = 0.0015
|
| 17 |
+
final-lr = 0.01
|
| 18 |
+
aggregation = mean
|
| 19 |
+
num-workers = 0
|
| 20 |
+
accelerator = auto
|
| 21 |
+
devices = auto
|
| 22 |
+
rxn-mode = REAC_DIFF
|
| 23 |
+
multi-hot-atom-featurizer-mode = V2
|
| 24 |
+
frzn-ffn-layers = 0
|
| 25 |
+
ensemble-size = 1
|
| 26 |
+
aggregation-norm = 100
|
| 27 |
+
activation = RELU
|
| 28 |
+
multiclass-num-classes = 3
|
| 29 |
+
atom-ffn-hidden-dim = 300
|
| 30 |
+
atom-ffn-num-layers = 1
|
| 31 |
+
atom-multiclass-num-classes = 3
|
| 32 |
+
bond-ffn-hidden-dim = 300
|
| 33 |
+
bond-ffn-num-layers = 1
|
| 34 |
+
bond-multiclass-num-classes = 3
|
| 35 |
+
atom-constrainer-ffn-hidden-dim = 300
|
| 36 |
+
atom-constrainer-ffn-num-layers = 1
|
| 37 |
+
bond-constrainer-ffn-hidden-dim = 300
|
| 38 |
+
bond-constrainer-ffn-num-layers = 1
|
| 39 |
+
v-kl = 0.0
|
| 40 |
+
eps = 1e-08
|
| 41 |
+
alpha = 0.1
|
| 42 |
+
tracking-metric = val_loss
|
| 43 |
+
warmup-epochs = 2
|
| 44 |
+
split = RANDOM
|
| 45 |
+
split-sizes = [0.8, 0.1, 0.1]
|
| 46 |
+
split-key-molecule = 0
|
| 47 |
+
num-replicates = 1
|
| 48 |
+
data-seed = 0
|
predict.py
CHANGED
|
@@ -2,7 +2,7 @@ import torch
|
|
| 2 |
import csv
|
| 3 |
import subprocess
|
| 4 |
|
| 5 |
-
from
|
| 6 |
|
| 7 |
def predict(smiles_list):
|
| 8 |
"""
|
|
@@ -37,7 +37,7 @@ def predict(smiles_list):
|
|
| 37 |
print(f"Received {len(smiles_list)} SMILES strings")
|
| 38 |
|
| 39 |
# put smiles into csv
|
| 40 |
-
with open("./
|
| 41 |
writer = csv.writer(f)
|
| 42 |
writer.writerow(["smiles"]) # header
|
| 43 |
for smi in clean_smiles:
|
|
@@ -45,10 +45,9 @@ def predict(smiles_list):
|
|
| 45 |
# predict
|
| 46 |
command = [
|
| 47 |
"chemprop", "predict",
|
| 48 |
-
"--test-path", "
|
| 49 |
-
"--model-path", "
|
| 50 |
-
"--smiles-columns", "smiles"
|
| 51 |
-
"--preds-path", "src/preds.csv"
|
| 52 |
]
|
| 53 |
|
| 54 |
# Run the command
|
|
@@ -56,10 +55,10 @@ def predict(smiles_list):
|
|
| 56 |
|
| 57 |
# create results dictionary from predictions
|
| 58 |
|
| 59 |
-
csv_path = "./
|
| 60 |
|
| 61 |
predictions = {}
|
| 62 |
-
with open(
|
| 63 |
reader = csv.DictReader(f)
|
| 64 |
rows = list(reader)
|
| 65 |
target_names = [col for col in reader.fieldnames if col != "smiles"]
|
|
@@ -78,6 +77,3 @@ def predict(smiles_list):
|
|
| 78 |
|
| 79 |
return predictions
|
| 80 |
|
| 81 |
-
# smiles = pd.read_csv("tox21_test.csv")["smiles"].tolist()
|
| 82 |
-
# predict(smiles)
|
| 83 |
-
|
|
|
|
| 2 |
import csv
|
| 3 |
import subprocess
|
| 4 |
|
| 5 |
+
from data import create_clean_smiles
|
| 6 |
|
| 7 |
def predict(smiles_list):
|
| 8 |
"""
|
|
|
|
| 37 |
print(f"Received {len(smiles_list)} SMILES strings")
|
| 38 |
|
| 39 |
# put smiles into csv
|
| 40 |
+
with open("./data/smiles.csv", "w", newline="") as f:
|
| 41 |
writer = csv.writer(f)
|
| 42 |
writer.writerow(["smiles"]) # header
|
| 43 |
for smi in clean_smiles:
|
|
|
|
| 45 |
# predict
|
| 46 |
command = [
|
| 47 |
"chemprop", "predict",
|
| 48 |
+
"--test-path", "data/smiles.csv",
|
| 49 |
+
"--model-path", "checkpoints/best.pt",
|
| 50 |
+
"--smiles-columns", "smiles"
|
|
|
|
| 51 |
]
|
| 52 |
|
| 53 |
# Run the command
|
|
|
|
| 55 |
|
| 56 |
# create results dictionary from predictions
|
| 57 |
|
| 58 |
+
csv_path = "./assets/preds.csv"
|
| 59 |
|
| 60 |
predictions = {}
|
| 61 |
+
with open(csv_path, "r", newline="") as f:
|
| 62 |
reader = csv.DictReader(f)
|
| 63 |
rows = list(reader)
|
| 64 |
target_names = [col for col in reader.fieldnames if col != "smiles"]
|
|
|
|
| 77 |
|
| 78 |
return predictions
|
| 79 |
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
fastapi
|
| 2 |
uvicorn[standard]
|
|
|
|
| 3 |
torch==2.3.0
|
| 4 |
-
torch-geometric==2.6.1
|
| 5 |
-
numpy==1.26.2
|
| 6 |
-
pandas==2.2.2
|
| 7 |
-
rdkit==2024.3.6
|
| 8 |
pydantic
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
fastapi
|
| 2 |
uvicorn[standard]
|
| 3 |
+
python-dotenv
|
| 4 |
torch==2.3.0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
pydantic
|
| 6 |
+
pandas==2.2.2
|
| 7 |
+
numpy==1.26.2
|
| 8 |
+
scikit-learn==1.7.1
|
| 9 |
+
rdkit-pypi
|
| 10 |
+
datasets
|
| 11 |
+
|
src/{preprocess.py → data.py}
RENAMED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
-
import numpy as np
|
| 2 |
-
|
| 3 |
from rdkit import Chem
|
| 4 |
from rdkit.Chem.MolStandardize import rdMolStandardize
|
| 5 |
from rdkit import Chem
|
| 6 |
import numpy as np
|
|
|
|
|
|
|
| 7 |
|
| 8 |
def create_clean_smiles(smiles_list: list[str]) -> tuple[list[str], np.ndarray]:
|
| 9 |
"""
|
|
@@ -36,4 +36,77 @@ def create_clean_smiles(smiles_list: list[str]) -> tuple[list[str], np.ndarray]:
|
|
| 36 |
print(f"Failed to clean {smi}: {e}")
|
| 37 |
valid_mask.append(False)
|
| 38 |
|
| 39 |
-
return clean_smis, np.array(valid_mask, dtype=bool)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from rdkit import Chem
|
| 2 |
from rdkit.Chem.MolStandardize import rdMolStandardize
|
| 3 |
from rdkit import Chem
|
| 4 |
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from datasets import load_dataset
|
| 7 |
|
| 8 |
def create_clean_smiles(smiles_list: list[str]) -> tuple[list[str], np.ndarray]:
|
| 9 |
"""
|
|
|
|
| 36 |
print(f"Failed to clean {smi}: {e}")
|
| 37 |
valid_mask.append(False)
|
| 38 |
|
| 39 |
+
return clean_smis, np.array(valid_mask, dtype=bool)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def clean_smiles_in_csv(input_csv: str, output_csv: str, smiles_col: str = "smiles", target_cols: list[str] | None = None):
|
| 43 |
+
"""
|
| 44 |
+
Reads a CSV, cleans SMILES, and saves only valid cleaned rows with all target columns to a new CSV.
|
| 45 |
+
"""
|
| 46 |
+
# Load dataset
|
| 47 |
+
df = pd.read_csv(input_csv)
|
| 48 |
+
if smiles_col not in df.columns:
|
| 49 |
+
raise ValueError(f"'{smiles_col}' column not found in CSV.")
|
| 50 |
+
|
| 51 |
+
# Infer target columns if not specified
|
| 52 |
+
if target_cols is None:
|
| 53 |
+
target_cols = [c for c in df.columns if c != smiles_col]
|
| 54 |
+
keep_cols = target_cols + ["split"]
|
| 55 |
+
# Validate target columns
|
| 56 |
+
missing_targets = [c for c in target_cols if c not in df.columns]
|
| 57 |
+
if missing_targets:
|
| 58 |
+
raise ValueError(f"Missing target columns in CSV: {missing_targets}")
|
| 59 |
+
|
| 60 |
+
# Clean SMILES
|
| 61 |
+
clean_smis, valid_mask = create_clean_smiles(df[smiles_col].tolist())
|
| 62 |
+
|
| 63 |
+
# Keep only valid rows
|
| 64 |
+
df_clean = df.loc[valid_mask, keep_cols].copy()
|
| 65 |
+
df_clean.insert(0, smiles_col, clean_smis) # smiles first column
|
| 66 |
+
|
| 67 |
+
# Save cleaned dataset
|
| 68 |
+
df_clean.to_csv(output_csv, index=False)
|
| 69 |
+
print(f"✅ Cleaned dataset saved to '{output_csv}' ({len(df_clean)} valid molecules).")
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def get_tox21_split(token, cvfold=None):
|
| 73 |
+
ds = load_dataset("tschouis/tox21", token=token)
|
| 74 |
+
|
| 75 |
+
train_df = ds["train"].to_pandas()
|
| 76 |
+
val_df = ds["validation"].to_pandas()
|
| 77 |
+
|
| 78 |
+
if cvfold is None:
|
| 79 |
+
return {
|
| 80 |
+
"train": train_df,
|
| 81 |
+
"validation": val_df
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
combined_df = pd.concat([train_df, val_df], ignore_index=True)
|
| 85 |
+
cvfold = float(cvfold)
|
| 86 |
+
|
| 87 |
+
# create new splits
|
| 88 |
+
cvfold = float(cvfold)
|
| 89 |
+
train_df = combined_df[combined_df.CVfold != cvfold]
|
| 90 |
+
val_df = combined_df[combined_df.CVfold == cvfold]
|
| 91 |
+
|
| 92 |
+
# exclude train mols that occur in the validation split
|
| 93 |
+
val_inchikeys = set(val_df["inchikey"])
|
| 94 |
+
train_df = train_df[~train_df["inchikey"].isin(val_inchikeys)]
|
| 95 |
+
|
| 96 |
+
return {"train": train_df.reset_index(drop=True), "validation": val_df.reset_index(drop=True)}
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def get_combined_dataset_csv(token, save_path):
|
| 100 |
+
datasets = get_tox21_split(token, cvfold=4)
|
| 101 |
+
train_df, val_df = datasets["train"], datasets["validation"]
|
| 102 |
+
test_df = val_df.copy()
|
| 103 |
+
# Add split column
|
| 104 |
+
train_df["split"] = "train"
|
| 105 |
+
val_df["split"] = "val"
|
| 106 |
+
test_df["split"] = "test"
|
| 107 |
+
|
| 108 |
+
# Combine all into one DataFrame
|
| 109 |
+
combined_df = pd.concat([train_df, val_df, test_df], ignore_index=True)
|
| 110 |
+
|
| 111 |
+
# Save to a new CSV
|
| 112 |
+
combined_df.to_csv(save_path, index=False)
|
src/evaluation.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sklearn.metrics import roc_auc_score
|
| 4 |
+
|
| 5 |
+
def compute_roc_auc_from_csv(preds_csv: str, labels_csv: str):
|
| 6 |
+
"""
|
| 7 |
+
Compute ROC AUC per class and overall mean, similar to the PyTorch-style function.
|
| 8 |
+
Handles missing labels (NaN) like y_mask.
|
| 9 |
+
"""
|
| 10 |
+
preds = pd.read_csv(preds_csv)
|
| 11 |
+
labels = pd.read_csv(labels_csv)
|
| 12 |
+
|
| 13 |
+
smiles_cols = [c for c in preds.columns if "smiles" in c.lower()]
|
| 14 |
+
if smiles_cols:
|
| 15 |
+
print(f"🧪 Dropping SMILES columns: {smiles_cols}")
|
| 16 |
+
preds = preds.drop(columns=smiles_cols, errors="ignore")
|
| 17 |
+
labels = labels.drop(columns=smiles_cols, errors="ignore")
|
| 18 |
+
|
| 19 |
+
shared_cols = [c for c in preds.columns if c in labels.columns]
|
| 20 |
+
preds = preds[shared_cols].apply(pd.to_numeric, errors="coerce")
|
| 21 |
+
labels = labels[shared_cols].apply(pd.to_numeric, errors="coerce")
|
| 22 |
+
|
| 23 |
+
y_pred = preds.to_numpy(dtype=float)
|
| 24 |
+
y_true = labels.to_numpy(dtype=float)
|
| 25 |
+
|
| 26 |
+
y_mask = ~np.isnan(y_true)
|
| 27 |
+
|
| 28 |
+
auc_list = []
|
| 29 |
+
for i in range(y_true.shape[1]):
|
| 30 |
+
mask_i = y_mask[:, i]
|
| 31 |
+
if mask_i.sum() > 0:
|
| 32 |
+
try:
|
| 33 |
+
auc = roc_auc_score(y_true[mask_i, i], y_pred[mask_i, i])
|
| 34 |
+
except ValueError:
|
| 35 |
+
auc = np.nan
|
| 36 |
+
else:
|
| 37 |
+
auc = np.nan
|
| 38 |
+
auc_list.append(auc)
|
| 39 |
+
|
| 40 |
+
auc_array = np.array(auc_list, dtype=np.float32)
|
| 41 |
+
mean_auc = np.nanmean(auc_array)
|
| 42 |
+
|
| 43 |
+
return auc_array, mean_auc
|
src/preds.csv
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/smiles.csv
DELETED
|
@@ -1,646 +0,0 @@
|
|
| 1 |
-
smiles
|
| 2 |
-
C[C@]12C=CC(=O)C=C1CC[C@@H]1C2=CC[C@]2(C)C(C(=O)CN3CCN(c4cc(N5CCCC5)nc(N5CCCC5)n4)CC3)CC[C@@H]12.O=C(O)/C=C\C(=O)O
|
| 3 |
-
Nc1ncnc2c1nc(Br)n2C1OC2CO[P@@](=O)([O-])O[C@@H]2C1O.[Na+]
|
| 4 |
-
O=c1c2ccccc2nc2n1CCc1c-2[nH]c2ccccc12
|
| 5 |
-
Cl.Fc1ccc(C(OCCCc2c[nH]cn2)c2ccc(F)cc2)cc1
|
| 6 |
-
Cc1ccc(S(=O)(=O)NC(Cc2ccccc2)C(=O)CCl)cc1
|
| 7 |
-
Cl.Nc1ccc(-c2ccc3ccccc3n2)cc1
|
| 8 |
-
O=C1NC(=O)/C(=C/c2ccc3c(c2)OC(F)(F)O3)S1
|
| 9 |
-
CCc1ccc(/C=C2/SC(=S)NC2=O)cc1
|
| 10 |
-
C=CCc1cccc(C=NNC(=O)CN2CCN(Cc3ccccc3)CC2)c1O
|
| 11 |
-
CCN1/C(=C/C(C)=O)Sc2ccc(OC)cc21
|
| 12 |
-
O=C1NC(=O)c2cc(Nc3ccccc3)c(Nc3ccccc3)cc21
|
| 13 |
-
CCn1c(=O)[nH]c2cc(Cl)c(Cl)cc21
|
| 14 |
-
Clc1ccc(C(c2ccc(Cl)cc2)[n+]2ccn(CC(OCc3ccc(Cl)cc3Cl)c3ccc(Cl)cc3Cl)c2)cc1.[Cl-]
|
| 15 |
-
COc1cc2ncnc(Nc3cccc(Cl)c3)c2cc1OC
|
| 16 |
-
O=Nc1c(-c2c(O)[nH]c3ccccc23)[nH]c2ccccc12
|
| 17 |
-
CCCCCCCCCCC(C)(C)C(=O)Nc1c(OC)cc(OC)cc1OC
|
| 18 |
-
Cn1cc(C2=C(c3ccc(Cl)cc3Cl)C(=O)NC2=O)c2ccccc21
|
| 19 |
-
CCCCCCCCCCCCCCCC(=O)OC(CC(=O)O)C[N+](C)(C)C.[Cl-]
|
| 20 |
-
Cl.O=c1cc(N2CCOCC2)oc2c(-c3ccccc3)cccc12
|
| 21 |
-
CCCCCCCCCCCCCC(=O)O[C@@H]1[C@@H](C)[C@]2(O)C3=CC(C)C(=O)[C@]3(O)CC(CO)=C[C@H]2[C@@H]2C(C)(C)[C@]12OC(C)=O
|
| 22 |
-
C/C=C(/C)C(=O)O[C@H]1C(C)=C2[C@H]([C@@H]1OC(=O)CCCCCCC)[C@@](C)(OC(C)=O)C[C@H](OC(=O)CCC)[C@@]1(O)[C@H]2OC(=O)[C@@]1(C)O
|
| 23 |
-
COc1cc2c(cc1O)CC[C@@H]1[C@@H]2CC[C@]2(C)[C@@H](O)CC[C@@H]12
|
| 24 |
-
C/C(=C\c1ccc(C(=O)O)cc1)c1ccc2c(c1)C(C)(C)CCC2(C)C
|
| 25 |
-
CCCCCCCCc1ccc(-c2ccc(C(=O)O)cc2)cc1
|
| 26 |
-
[Cl-].c1ccc2c(c1)[I+]c1ccccc1-2
|
| 27 |
-
CS(=O)(=O)O.N=C(NC(=O)c1nc(Cl)c(N)nc1N)Nc1ccccc1
|
| 28 |
-
CC[C@@]1(O)C(=O)OCc2c1cc1n(c2=O)Cc2cc3ccccc3nc2-1
|
| 29 |
-
CC(C)(C)NCC(O)COc1cccc2[nH]c(=O)[nH]c12.Cl
|
| 30 |
-
COC1(C2=NCCN2)COc2ccccc2O1.Cl
|
| 31 |
-
CC(=N)NCCCCC(N)C(=O)O.Cl
|
| 32 |
-
N=C(NCCCC(N)C(=O)O)N[N+](=O)[O-]
|
| 33 |
-
NC(CCCCCP(=O)(O)O)C(=O)O
|
| 34 |
-
NC(CCCP(=O)(O)O)C(=O)O
|
| 35 |
-
Cl.NCCCCC(=O)O
|
| 36 |
-
N[C@@]1(C(=O)O)CCC(C(=O)O)C1
|
| 37 |
-
Cl.NC1(C(=O)O)CCCCC1
|
| 38 |
-
C#CC(N)CCC(=O)O
|
| 39 |
-
CCCCCC(O)CCCC(=O)[O-].[Na+]
|
| 40 |
-
Cl.NC1(C(=O)O)CC1
|
| 41 |
-
C=CCC(N)C(=O)O
|
| 42 |
-
N=C(N)NOCCC(N)C(=O)O.O=S(=O)(O)O
|
| 43 |
-
C[C@H](Cc1ccc2c(c1)OC(C(=O)[O-])(C(=O)[O-])O2)NC[C@H](O)c1cccc(Cl)c1.[Na+].[Na+]
|
| 44 |
-
CCc1ccccc1OC[C@@H](O)CN[C@H]1CCc2ccccc2C1.O=C(O)C(=O)O
|
| 45 |
-
CN(CC[C@@H](Oc1ccc(C(F)(F)F)cc1)c1ccccc1)CC(=O)[O-].[Li+]
|
| 46 |
-
CCCCOc1cc(CC2CNC(=O)N2)ccc1OC
|
| 47 |
-
CN1C2CCC1CC(OC(c1ccccc1)c1ccccc1N)C2
|
| 48 |
-
CC(C)CC(NC(=O)C1OC1C(=O)O)C(=O)NCCCCN=C(N)N
|
| 49 |
-
CC(C)[C@H]1CNC(C(=O)O)[C@H]1CC(=O)O
|
| 50 |
-
CC(=O)NC(C(=O)O)C(C)(C)SN=O
|
| 51 |
-
O=C(O)C1CCCNC1C(=O)O
|
| 52 |
-
COc1ccc2c(c1O)-c1cccc3c1[C@@H](C2)N(C)CC3.Cl
|
| 53 |
-
Br.CN1CCc2cc(O)cc3c2[C@H]1Cc1ccc(O)c(O)c1-3
|
| 54 |
-
O=[N+]([O-])c1cccc2c(Br)[nH]nc12
|
| 55 |
-
O=c1[nH]c2cc([N+](=O)[O-])c([N+](=O)[O-])cc2[nH]c1=O
|
| 56 |
-
O=[N+]([O-])c1cc(O)c(O)c([N+](=O)[O-])c1
|
| 57 |
-
Oc1cc2c(cc1O)CN(C(=S)NCCc1ccc(Cl)cc1)CCC2
|
| 58 |
-
Cl.c1ccc(CN2CCC3(CCCc4ccccc43)CC2)cc1
|
| 59 |
-
Cc1cccc(C#Cc2ccccc2)n1.Cl
|
| 60 |
-
Cc1cccc(/C=C/c2ccccc2)n1
|
| 61 |
-
COc1ccc2c(c1)CCC1C2CC[C@@]2(C)C1CC[C@@H]2NCCCCCCn1c(O)ccc1O
|
| 62 |
-
N=S(=O)([O-])Cc1noc2ccccc12.[Na+]
|
| 63 |
-
N#Cc1cc2nc([O-])c([O-])nc2cc1[N+](=O)[O-].[Na+].[Na+]
|
| 64 |
-
COc1ccc(CCn2nnnc2C2(c3ccc(C)cc3)CC2)cc1
|
| 65 |
-
C[C@@H](Cc1ccc(C=O)cc1)NC[C@@H](O)c1ccc(O)c(NCO)c1.C[C@@H](Cc1ccc(C=O)cc1)NC[C@@H](O)c1ccc(O)c(NCO)c1.O=C(O)/C=C/C(=O)O
|
| 66 |
-
CCCCCCCCCC(=O)NC(CN1CCOCC1)[C@H](O)c1ccccc1.Cl
|
| 67 |
-
CCCCCCCCCC[Si](C)(C)CCC(=O)NC(Cc1ccc(C)cc1)c1ccccc1
|
| 68 |
-
CC(C)N(CCC(=O)c1ccc2ccccc2c1)Cc1ccccc1.Cl
|
| 69 |
-
c1ccc(C(NCc2cccnc2)(c2ccccc2)c2ccccc2)cc1
|
| 70 |
-
Cl.O=C(O)C1=CCCN(CCON=C(c2ccccc2)c2ccccc2)C1
|
| 71 |
-
O=c1cc(-c2ccccc2)[nH]c2nc(Cl)ccc12
|
| 72 |
-
O=c1[nH]ccc2c(O)cccc12
|
| 73 |
-
O=C(c1ccc2c(c1)OCCO2)N1CCCCC1
|
| 74 |
-
CN(C(=O)Cc1ccc(Cl)c(Cl)c1)C(CN1CCCC1)c1cccc(OCC(=O)O)c1.Cl
|
| 75 |
-
CC1CCN(CC[C@H]2CCCN2S(=O)(=O)c2cccc(O)c2)CC1.Cl
|
| 76 |
-
Cn1ncc(S(=O)(=O)Cc2ccccc2)c(Br)c1=O
|
| 77 |
-
CC(C)(Sc1ccc(CCN(CCCCC2CCCCC2)C(=O)NC2CCCCC2)cc1)C(=O)O
|
| 78 |
-
Cl.O=C(Cc1ccc(Cl)c(Cl)c1)N1CCCCC1CN1CCCC1
|
| 79 |
-
Brc1ccc([C@H]2CN3CCSC3=N2)cc1.O=C(O)C(=O)O
|
| 80 |
-
O=C(O)c1cc(=O)c2c(Cl)cc(Cl)cc2[nH]1
|
| 81 |
-
CC1CNCCN1S(=O)(=O)c1cccc2cnccc12.Cl.Cl
|
| 82 |
-
CNCCNS(=O)(=O)c1cccc2cnccc12.Cl.Cl
|
| 83 |
-
CCc1cc(Cl)c(OC)c(C(=O)NC[C@@H]2CCN(CC)C2)c1O.Cl
|
| 84 |
-
CNc1cc(NS(=O)(=O)c2ccc(N)cc2)nc(NC)n1.Cl.Cl
|
| 85 |
-
Cl.O=C(N[C@H]1CN2CCC1CC2)c1ccc(Cl)cc1
|
| 86 |
-
O=C(O)CCC(C(=O)O)N1C(=O)c2ccccc2C1=O
|
| 87 |
-
N#CC(=Cc1ccc(O)c(O)c1)C(=O)NCCc1ccccc1
|
| 88 |
-
Cl.c1ccc2sc(C3(N4CCCCC4)CCCCC3)cc2c1
|
| 89 |
-
C=CCc1ccc([N+](C)(C)CCC(=O)CC[N+](C)(C)c2ccc(CC=C)cc2)cc1.[Br-].[Br-]
|
| 90 |
-
CN1CCc2ccccc2Cc2[nH]c3ccccc3c2CC1
|
| 91 |
-
CC[C@H](NC(=O)c1c(C)c(-c2ccccc2)nc2ccccc12)c1ccccc1
|
| 92 |
-
[NH-]S(=O)(=O)c1cccc2c1c([N+](=O)[O-])cc1nc([O-])c(=O)[nH]c12.[Na+].[Na+]
|
| 93 |
-
CCC(=O)N1CCN(C(=O)c2ccccc2)CC1
|
| 94 |
-
N=C(N)NC(=O)c1nc(Cl)c(N2CCCCCC2)nc1N
|
| 95 |
-
Nc1nc(OCc2ccccc2)c2[nH]cnc2n1
|
| 96 |
-
CCCCCC[C@@H]([C@@H](C)O)n1cnc2c(N)ncnc21.Cl
|
| 97 |
-
Oc1ccc(Cn2cc[nH]c2=S)cc1
|
| 98 |
-
CSc1ccccc1N1CCN(CCCCCC(=O)NC2CCCc3ccccc32)CC1.Cl
|
| 99 |
-
CC[n+]1c(/C=C/C=C2/N(C)c3ccccc3C2(C)C)sc2ccccc21.[I-]
|
| 100 |
-
C[N+]1(C)CCN(c2ccccc2)CC1.[I-]
|
| 101 |
-
Nc1ccc(CCN2CCN(c3cccc(C(F)(F)F)c3)CC2)cc1
|
| 102 |
-
Cl.c1ccc2c(N3CCNCC3)cccc2c1
|
| 103 |
-
Cl.c1ccc2nc(C3=NCCN3)ccc2c1
|
| 104 |
-
Cc1nccc2c1[nH]c1ccccc12
|
| 105 |
-
O=C(c1ccc(Cl)cc1)N1CCN(c2ccc([N+](=O)[O-])c3ncccc23)CC1
|
| 106 |
-
C=CCNc1ncnc2ccc(Br)cc12
|
| 107 |
-
COc1ccccc1N1CCN(CCCCNC(=O)c2ccc3ccccc3c2)CC1
|
| 108 |
-
COc1cccc(C(=O)NCCN2CCN(c3ccc(Cl)cc3)CC2)c1
|
| 109 |
-
CC[N+](CC)(CC)CC(=O)Nc1c(C)cccc1C.[Br-]
|
| 110 |
-
Cc1cc(NC2CCCCC2)nc(-n2nc(C)cc2C)n1
|
| 111 |
-
O=c1onc2cnc3ccccc3n12
|
| 112 |
-
Cc1c(=O)n(C)c(O)c2[nH]cnc12
|
| 113 |
-
CC(O)C(O)c1cnc2[nH]c(N)nc(=O)c2n1
|
| 114 |
-
Cl.c1ccc(CN(Cc2ncc[nH]2)c2ccccc2)cc1
|
| 115 |
-
Cc1cc(NC(=O)Nc2ccc3c(ccn3C)c2)sn1
|
| 116 |
-
Cc1nn(-c2ccccc2)c(C)c1C=NN1CCN(Cc2ccccc2)CC1
|
| 117 |
-
Cc1cc2c(cc1Cl)N(C(=O)Nc1ccnc3ccccc13)CC2
|
| 118 |
-
Clc1ccc2c(c1)N=C(N1CCNCC1)c1ccccc1N2
|
| 119 |
-
Cc1cccc(-c2[nH]c(C(C)(C)C)nc2-c2ccc3nccnc3c2)n1
|
| 120 |
-
O=C(O)c1ccccc1Nc1ccccc1
|
| 121 |
-
CC(C)(C)c1ccc(/C=C/C(=O)Nc2ccc3c(c2)OCCO3)cc1
|
| 122 |
-
CCCN(CC1CC1)c1nc(C)nc(Nc2c(Cl)cc(Cl)cc2Cl)c1Cl
|
| 123 |
-
Cn1cnc2c(NCc3ccccc3)nc(NCCO)nc21
|
| 124 |
-
CCCn1c(=O)c2[nH]c(-c3ccc(S(=O)(=O)O)cc3)nc2n(CCC)c1=O
|
| 125 |
-
C#CCn1c(=O)c2c(ncn2C)n(C)c1=O
|
| 126 |
-
CC(C)Cn1c(=O)n(C)c(=O)c2[nH]cnc21
|
| 127 |
-
Cn1c(=O)c2[nH]c(-c3ccccc3)nc2n(C)c1=O
|
| 128 |
-
Oc1ccc(-c2nc(-c3ccc(F)cc3)c(-c3ccncc3)[nH]2)cc1
|
| 129 |
-
OC[C@H]1O[C@@H](c2nc3cc(Cl)c(Cl)cc3[nH]2)[C@H](O)[C@@H]1O
|
| 130 |
-
Cn1c(=O)c2[nH]c(C3CCCC3)nc2n(C)c1=O
|
| 131 |
-
CCCn1c(=O)c2[nH]c(C3CCCC3)nc2n(CCCOC(=O)c2ccc(S(=O)(=O)F)cc2)c1=O
|
| 132 |
-
O=C(Nc1cccc(C(=O)Nc2ccc(S(=O)(=O)[O-])c3cc(S(=O)(=O)[O-])cc(S(=O)(=O)[O-])c23)c1)Nc1cccc(C(=O)Nc2ccc(S(=O)(=O)[O-])c3cc(S(=O)(=O)[O-])cc(S(=O)(=O)[O-])c23)c1.[Na+].[Na+].[Na+].[Na+].[Na+].[Na+]
|
| 133 |
-
O=c1[nH]c2cc(C(F)(F)F)ccc2n1-c1cc(C(F)(F)F)ccc1O
|
| 134 |
-
COc1ccccc1N1CCN(CCN(C(=O)c2ccc(F)cc2)c2ccccn2)CC1.Cl.Cl
|
| 135 |
-
O=c1[nH]c2ccccc2c2ccccc12
|
| 136 |
-
Nc1ncnc2[nH]nc(Nc3ccc(F)cc3)c12
|
| 137 |
-
CC1(C)C(=O)N(CCN2CCC(C(=O)c3ccc(F)cc3)CC2)c2ccccc21.Cl
|
| 138 |
-
O=c1onc2n1-c1cc(Br)ccc1OC2
|
| 139 |
-
CC(=O)NNC(=O)N1Cc2ccccc2Oc2ccc(Cl)cc21
|
| 140 |
-
COc1ccccc1N1CCN(CCN(C(=O)C2CCCCC2)c2ccccn2)CC1.O=C(O)/C=C\C(=O)O
|
| 141 |
-
COc1ccc(NC(=O)c2ccc(-c3ccc(-c4noc(C)n4)cc3C)cc2)cc1N1CCN(C)CC1.Cl
|
| 142 |
-
O=C(O)c1ccccc1Nc1cccc(OCc2ccc3ccccc3n2)c1
|
| 143 |
-
N#C/C(=C\c1ccc(-c2cc(Cl)ccc2Cl)o1)C(=O)Nc1cccc2ncccc12
|
| 144 |
-
COc1cc2nc(N3CCCN(C)CC3)nc(NC3CCN(Cc4ccccc4)CC3)c2cc1OC.Cl.Cl.Cl
|
| 145 |
-
COc1ccc2c(c1)c(CC(=O)O)c(C)n2C(=O)c1ccccc1
|
| 146 |
-
O=c1[nH]c2cc(Cl)ccc2c(O)c1-c1ccc(Oc2ccccc2)cc1
|
| 147 |
-
Nc1nc2ccc(Cl)cc2c2nc(-c3ccco3)nn12
|
| 148 |
-
Nc1nc2c(cnn2CCc2ccccc2)c2nc(-c3ccco3)nn12
|
| 149 |
-
O=C1NC(=O)C(c2ccccc2[N+](=O)[O-])C1=Nc1ccc(O)c(Cl)c1
|
| 150 |
-
O=C1Cc2c([nH]c3ccc(Br)cc23)-c2ccccc2N1
|
| 151 |
-
O=S1(=O)N(CCN2CC=C(c3c[nH]c4cc(F)ccc34)CC2)c2cccc3c2N1CCC3
|
| 152 |
-
CCCc1c(-c2ccc(O)cc2)nn(-c2ccc(O)cc2)c1-c1ccc(O)cc1
|
| 153 |
-
OCc1ccc(-c2nn(Cc3ccccc3)c3ccccc23)o1
|
| 154 |
-
CN1CC[C@@]2(C)c3cc(O)ccc3N(C)C12.O=C(O)/C=C\C(=O)O
|
| 155 |
-
CCOC(=O)C(C)Oc1cccc2c(=O)n(CC(=O)Nc3ccc4c(c3)OCCO4)ccc12
|
| 156 |
-
CS(=O)(=O)O.Cn1cc(C(F)(F)F)nc1-c1ccc(OCC(O)CNCCOc2ccc(O)c(C(N)=O)c2)cc1
|
| 157 |
-
COc1cccc(OC)c1OCCNCC1CSc2ccccc2O1.Cl
|
| 158 |
-
N#CSc1ccc2oc(C3=NCCN3)cc2c1
|
| 159 |
-
O=C(O)C1CCN1
|
| 160 |
-
O=C(O)C1CCNCC1
|
| 161 |
-
NC(CS(=O)O)C(=O)O
|
| 162 |
-
CCCCCCCCCCCCCCSCC(=O)O
|
| 163 |
-
C[N+](C)(C)CCOC(=O)CBr.[Br-]
|
| 164 |
-
Br.CO[C@H]1CC=C2CCN3CCC4=C(CC(=O)OC4)[C@]23C1
|
| 165 |
-
CCCCCCC(=O)OC1CC(C)(C)NC(C)(C)C1.Cl
|
| 166 |
-
CN1CC(O)C(O)C(O)C1CO
|
| 167 |
-
Cl.OCC1NCC(O)C(O)C1O
|
| 168 |
-
CCCC(=O)NCCc1c2n(c3ccc(OC)cc13)CCCc1ccccc1-2
|
| 169 |
-
O=c1cc(-c2ccccc2F)[nH]c2cc3c(cc12)OCO3
|
| 170 |
-
Nc1nc(Cl)nc2c1ncn2[C@@H]1O[C@H](CO)C(O)[C@H]1O
|
| 171 |
-
OC[C@H]1O[C@@H](n2cnc3c(NC4CCCC4)ncnc32)[C@H](O)C1O
|
| 172 |
-
OC[C@H]1O[C@@H](n2cnc3cncnc32)[C@H](O)[C@@H]1O
|
| 173 |
-
CNc1ncnc2c1ncn2[C@H]1C[C@H](OP(=O)([O-])O)[C@@H](COP(=O)([O-])O)O1.[NH4+].[NH4+]
|
| 174 |
-
Nc1ncnc2c1ncn2C1CCCO1
|
| 175 |
-
Nc1nc(=O)c2nc(Br)n(C3OC4CO[P@@](=O)([O-])O[C@@H]4C3O)c2[nH]1.[Na+]
|
| 176 |
-
O=C(O)c1cc([N+](=O)[O-])ccc1NCCCc1ccccc1
|
| 177 |
-
C[N+]1(C)C2CCC1CC(OC(=O)c1c[nH]c3ccccc13)C2.[I-]
|
| 178 |
-
CC(=O)O[C@H]1Cc2ccccc2N(C(N)=O)c2ccccc21
|
| 179 |
-
O=c1[nH]c(=S)[nH]c(-c2ccccc2)c1Cc1c(O)ccc2ccccc12
|
| 180 |
-
CC#CCOC(=O)c1c(C)nc2sc3c(c2c1N)CCC(O)C3
|
| 181 |
-
N#Cc1cc([N+](=O)[O-])c2sc(C(N)=O)[n+]([O-])c2c1
|
| 182 |
-
Cc1cnc(NC(=O)C2=C([O-])c3ccccc3S(=O)(=O)N2C)s1.[Na+]
|
| 183 |
-
Nc1c(S(=O)(=O)[O-])cc(Nc2ccc(Nc3nc(Cl)nc(Nc4ccc(S(=O)(=O)[O-])cc4)n3)c(S(=O)(=O)[O-])c2)c2c1C(=O)c1ccccc1C2=O.[Na+].[Na+].[Na+]
|
| 184 |
-
COc1cc(/C=C(\C#N)C(=O)Nc2nnc(C(F)(F)F)s2)ccc1OCc1ccc(C(F)(F)F)cc1C(F)(F)F
|
| 185 |
-
CCCCOC(=O)NS(=O)(=O)c1sc(CC(C)C)cc1-c1ccc(Cn2c(CC)nc3c(C)cc(C)nc32)cc1
|
| 186 |
-
Cc1nc(N=Nc2ccc(S(=O)(=O)[O-])cc2S(=O)(=O)[O-])c(COP(=O)([O-])[O-])c(C=O)c1O.[Na+].[Na+].[Na+].[Na+]
|
| 187 |
-
CN1CCC(c2c[nH]c3ccc(O)cc23)CC1.O=C(O)/C=C\C(=O)O
|
| 188 |
-
COc1cc2c(CCNC(C)=O)c[nH]c2cc1O
|
| 189 |
-
O=C(O)Cc1c[nH]c2ccc(O)cc12
|
| 190 |
-
CC(N)Cc1c[nH]c2ccc(OCc3cccs3)cc12.Cl
|
| 191 |
-
COc1ccc2[nH]cc(CCN)c2c1.Cl
|
| 192 |
-
CCN(CC)c1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])C(Br)(Br)P(=O)([O-])O)[C@@H](O)[C@H]1O.[Na+].[Na+].[Na+]
|
| 193 |
-
CSc1nc(N)c2ncn([C@@H]3O[C@H](COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-])C(O)[C@H]3O)c2n1.[Na+].[Na+].[Na+].[Na+]
|
| 194 |
-
Nc1ncnc2c1nc(Sc1ccc(Cl)cc1)n2[C@H]1OC2CO[P@@](=O)([O-])O[C@@H]2C1O.[Na+]
|
| 195 |
-
Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])OP(=O)(O)OC[C@@H]2O[C@H](n3cnc4c(N)ncnc43)[C@@H](O)C2O)C(O)[C@H]1O.[NH4+].[NH4+].[NH4+]
|
| 196 |
-
NC(=O)C1=CN(C2OC(COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])C3O)C(O)C2O)C=CC1.[Na+].[Na+].[Na+].[Na+]
|
| 197 |
-
NC(=S)c1ccc[n+]([C@@H]2O[C@@H](COP(=O)([O-])OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])O)C3O)C(O)C2O)c1.[Na+]
|
| 198 |
-
Nc1ccc(CCNc2ncnc3c2ncn3C2OC(CO)C(O)C2O)cc1
|
| 199 |
-
CNC(=O)C1O[C@@H](n2cnc3c(NCc4ccc(N)cc4)ncnc32)[C@H](O)C1O
|
| 200 |
-
CCNC(=O)C1O[C@@H](n2cnc3c(N)nc(NCCc4ccc(CCC(=O)O)cc4)nc32)[C@H](O)C1O.Cl
|
| 201 |
-
CCNC(=O)C1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)C1O
|
| 202 |
-
C[C@H](Cc1ccccc1)Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO)C(O)[C@H]1O
|
| 203 |
-
NCCc1c[nH]c2ccc(C(N)=O)cc12.O=C(O)/C=C\C(=O)O
|
| 204 |
-
O=C([O-])Cc1ccccc1Nc1c(Cl)cccc1Cl.[Na+]
|
| 205 |
-
COC(=O)C1=C(C)N=C(C)C([N+](=O)[O-])C1c1ccccc1C(F)(F)F
|
| 206 |
-
CC(=O)NC(Cc1c[nH]c2ccccc12)C(=O)OCc1cc(C(F)(F)F)cc(C(F)(F)F)c1
|
| 207 |
-
CC(C)C1C(=O)C(C(N)=O)C(=O)[C@@]2(O)C(=O)C3C(=O)c4c(O)ccc(N(C)C)c4C[C@H]3C[C@@H]12.Cl
|
| 208 |
-
Cl.Oc1ccc2c3c1O[C@H]1c4[nH]c5ccccc5c4C[C@@]4(O)C(C2)N(CC2CC2)CCC314
|
| 209 |
-
COc1cc(N)c(Cl)cc1C(=O)OCCN1CCCCC1
|
| 210 |
-
CCN(CC)CCOC(=O)c1cc(Cl)c(N)cc1OC.Cl
|
| 211 |
-
CCCCCCCC/C=C\CCCCCCCC(=O)NCCO
|
| 212 |
-
C[N+](C)(C)CC#CCN1CCCC1=O.[I-]
|
| 213 |
-
CCN1C(=O)C=CC1=O
|
| 214 |
-
Cl.NOCC(=O)O.NOCC(=O)O
|
| 215 |
-
CC(=O)C(C)=NO
|
| 216 |
-
CC(=O)NBr
|
| 217 |
-
NC(=O)C(=O)[O-].[Na+]
|
| 218 |
-
CCN(CC)CCCCCCCCOC(=O)c1cc(OC)c(OC)c(OC)c1.Cl
|
| 219 |
-
O=Nc1ccc2oc(=O)ccc2c1
|
| 220 |
-
COc1cccc(-c2cc(=O)c3ccccc3o2)c1N
|
| 221 |
-
CC(=O)c1c(O)c(C)c(O)c(Cc2c(O)c3c(c(C(=O)/C=C/c4ccccc4)c2O)OC(C)(C)C=C3)c1O
|
| 222 |
-
O=C1C(O)c2c(O)cc(O)cc2OC1c1ccc(O)c(O)c1
|
| 223 |
-
COC[C@H]1OC(=O)c2coc3c2[C@@]1(C)C1=C(C3=O)C2CCC(=O)[C@@]2(C)C[C@H]1OC(C)=O
|
| 224 |
-
COC(=O)C1=CO[C@@H](O)[C@@H]2C(CO)=CC[C@H]12
|
| 225 |
-
C=C1C[C@@]23CC[C@H]4[C@@](C)(CCC[C@@]4(C)C(=O)OC4OC(CO)C(O)C(O)C4O)[C@@H]2CC[C@]1(OC1OC(CO)C(O)C(O)C1OC1OC(CO)C(O)C(O)C1O)C3
|
| 226 |
-
C=C(C)[C@H]1C2C(=O)O[C@H]1[C@H]1OC(=O)[C@@]34O[C@@H]3C[C@]2(O)[C@@]14C.CC(C)(O)[C@H]1C2C(=O)O[C@H]1[C@H]1OC(=O)[C@@]34O[C@@H]3C[C@]2(O)[C@@]14C
|
| 227 |
-
CC1(C(=O)O)C2CCC(O2)C1(C)C(=O)O
|
| 228 |
-
OCC1O[C@H](O)C(Cl)C(O)C1O
|
| 229 |
-
Cl.Cl.Cl.NCCC1CCCC(CCN)N1
|
| 230 |
-
Cl.Cl.Cl.NCCCCNCCCN
|
| 231 |
-
S=C([S-])N1CCCC1.[NH4+]
|
| 232 |
-
CC(=O)SCC[N+](C)(C)C.[Cl-]
|
| 233 |
-
NCCCS(=O)(=O)[O-].[Na+]
|
| 234 |
-
C[N+](C)(C)CC=O.[Cl-]
|
| 235 |
-
Cl.Cl.NCCSSCCN
|
| 236 |
-
Br.CCSC(=N)N
|
| 237 |
-
[Li]Cl
|
| 238 |
-
CSC(=N)N.CSC(=N)N.O=S(=O)(O)O
|
| 239 |
-
Cl.Cl.NCCCCN
|
| 240 |
-
O=c1ccc2c(OCCCCc3ccccc3)c3ccoc3cc2o1
|
| 241 |
-
CCCC[C@@]1(C2CCCC2)Cc2cc(OCC(=O)O)c(Cl)c(Cl)c2C1=O
|
| 242 |
-
C[C@]12CCC3C(CC[C@H]4C[C@H](O)CC[C@]34C)C1CCC2C(=O)CO
|
| 243 |
-
N#CC(=Cc1ccc(O)cc1)C(=N)C(C#N)C#N
|
| 244 |
-
N#CC(=Cc1ccc(O)c(O)c1)C(=O)NCCCNC(=O)C(C#N)=Cc1ccc(O)c(O)c1
|
| 245 |
-
COc1ccc(C=C(C#N)C#N)cc1
|
| 246 |
-
CC(C)(C)c1cc(C=C(C#N)C(N)=S)cc(C(C)(C)C)c1O
|
| 247 |
-
N#CC(=Cc1ccc(O)c(O)c1)C(N)=S
|
| 248 |
-
N#CC(C#N)=CC(=N)C(C#N)c1cc(O)c(O)c(O)c1
|
| 249 |
-
CC(C)(C)c1cc(C=C(C#N)C#N)cc(C(C)(C)C)c1O
|
| 250 |
-
CC(C)NP(=O)(NC(C)C)OP(=O)(NC(C)C)NC(C)C
|
| 251 |
-
O=C(O)CCC/C=C\C[C@@H]1[C@@H](/C=C/[C@H](O)C2Cc3ccccc3C2)[C@@H](F)C[C@@H]1O
|
| 252 |
-
CC(=O)C1CCC2C3CC=C4C[C@@H](OS(=O)(=O)[O-])CC[C@]4(C)C3CC[C@]12C.[Na+]
|
| 253 |
-
C1CNCCN1.COc1cc(C(O)CO)ccc1O.COc1cc(C(O)CO)ccc1O
|
| 254 |
-
CC1CCN=C(N)S1.Cl
|
| 255 |
-
NC(C(=O)O)c1cc(O)cc(O)c1
|
| 256 |
-
O=C1/C(=C/c2ccsc2)CC/C1=C\c1cccs1
|
| 257 |
-
C=C(C)C1CC=C(C(=O)O)CC1
|
| 258 |
-
CCCCCCCCCCC/C=C\C(C)(C)/C=C\CCCC(=O)O
|
| 259 |
-
CCCCCCC/C=C\CCCCCCCCC(=O)O
|
| 260 |
-
NC(C(=O)O)c1cc(=O)[nH]o1
|
| 261 |
-
C=C/C(C)=C/[C@@]1(C)SC(=O)C(C)C1=O
|
| 262 |
-
Br.C#CCOC(=O)C1=CCCN(C)C1
|
| 263 |
-
O=C(O)C1CN(CCCP(=O)(O)O)CCN1
|
| 264 |
-
Cl.O=C(O)C1=CCNCC1
|
| 265 |
-
CP(=O)([O-])C1=CC[NH2+]CC1
|
| 266 |
-
Cl.NCC(O)c1cc(O)c(O)cc1F
|
| 267 |
-
Cl.O=C(O)Cc1c[nH]cn1
|
| 268 |
-
Br.Br.N=C(N)SCCc1c[nH]cn1
|
| 269 |
-
COC(=O)C(C)(N)Cc1ccc(O)cc1.Cl
|
| 270 |
-
Cl.NC1C=CC=C(C(=O)O)C1
|
| 271 |
-
NC(Cc1cccc(-c2ccccc2CP(=O)(O)O)c1)C(=O)O
|
| 272 |
-
Cl.O[Si](CCCN1CCCCC1)(c1ccc(F)cc1)C1CCCCC1
|
| 273 |
-
C[C@@H](N)C1CCC(C(=O)Nc2ccncc2)CC1.Cl.Cl
|
| 274 |
-
CNCCc1ccc(O)c(O)c1.Cl
|
| 275 |
-
Br.CCCN(CCC)CCc1ccc(O)c(O)c1
|
| 276 |
-
C[C@@H](CN1CCC(Cc2ccccc2)CC1)[C@@H](O)c1ccc(O)cc1.Cl
|
| 277 |
-
O=C(O)CNC(=O)C(CS)Cc1ccccc1
|
| 278 |
-
C#CCN[C@H](C)Cc1ccccc1.Cl
|
| 279 |
-
Cl.NCCc1ccc(S(=O)(=O)F)cc1
|
| 280 |
-
CC(N)c1cccc(Cl)c1Cl.Cl
|
| 281 |
-
NC(=O)c1cccc(N)c1
|
| 282 |
-
CC(=O)O.N=C(N)N/N=C/c1c(Cl)cccc1Cl
|
| 283 |
-
Br.CCCN(CCC)[C@@H]1CCc2ccc(O)cc2C1
|
| 284 |
-
Br.CCCN(CCC)C1CCc2cccc(O)c2C1
|
| 285 |
-
CCCN(CCc1ccccc1)C1CCc2c(O)cccc2C1.Cl
|
| 286 |
-
CCCN(CCC)[C@H]1CCc2c(F)ccc(O)c2C1.Cl
|
| 287 |
-
C[N+](C)(C)CC#CCOC(=O)Nc1cccc(Cl)c1.[Cl-]
|
| 288 |
-
Cl.N=C(N)N=C(N)Nc1cccc(Cl)c1
|
| 289 |
-
Cl.Clc1cc(I)cc(Cl)c1NC1=NCCN1
|
| 290 |
-
S=C(Nc1ccccc1)Nc1nccs1
|
| 291 |
-
N=C(N)N=C(N)Nc1ccccc1
|
| 292 |
-
Brc1c(Br)c(Br)c2[nH]nnc2c1Br
|
| 293 |
-
CN1CCc2cc(Cl)c(O)cc2C(c2ccccc2)C1.Cl
|
| 294 |
-
Br.Cc1cccc(C2CN(C)CCc3c2cc(O)c(O)c3Cl)c1
|
| 295 |
-
Br.C=CCN1CCc2cc(O)c(O)cc2C(c2ccccc2)C1
|
| 296 |
-
Cl.OC1CCCCC1N1CCC(c2ccccc2)CC1
|
| 297 |
-
Cc1ccccc1/C=C/C1=NCCN1.O=C(O)C(=O)O
|
| 298 |
-
O=C([O-])C[C@H](O)CC(O)/C=C/c1c(C2CC2)nc2ccccc2c1-c1ccc(F)cc1.O=C([O-])C[C@H](O)C[C@H](O)/C=C/c1c(C2CC2)nc2ccccc2c1-c1ccc(F)cc1.[Ca+2]
|
| 299 |
-
C[N+](C)(C)CCCCCC[N+](C)(C)C.[Cl-].[Cl-]
|
| 300 |
-
[Cl-].c1ccc(C[P+](c2ccccc2)(c2ccccc2)c2ccccc2)cc1
|
| 301 |
-
CN(C)c1ccc(C(=C2C=CC(=[N+](C)C)C=C2)c2ccccc2)cc1.[Cl-]
|
| 302 |
-
CCCCCCCCCCCCCC[N+](C)(C)Cc1ccccc1.[Cl-]
|
| 303 |
-
CC[N+](CC)(CC)Cc1ccccc1.[Cl-]
|
| 304 |
-
CCc1c(C(=O)[O-])c(=O)cnn1-c1ccc(Cl)cc1.[K+]
|
| 305 |
-
O=P([O-])([O-])CN(CP(=O)([O-])[O-])CP(=O)([O-])O.[Na+].[Na+].[Na+].[Na+].[Na+]
|
| 306 |
-
O=S(=O)([O-])c1cc(S(=O)(=O)[O-])c2ccc3c(S(=O)(=O)[O-])cc(S(=O)(=O)[O-])c4ccc1c2c43.[Na+].[Na+].[Na+].[Na+]
|
| 307 |
-
COc1ccc([N+](=O)[O-])cc1[O-].[Na+]
|
| 308 |
-
CCc1cccc(CC)c1N(COC)C(=O)CS(=O)(=O)[O-].[Na+]
|
| 309 |
-
O=S([O-])O.[Na+]
|
| 310 |
-
COCc1c(-c2ccc(F)cc2)c(=CC[C@@H](O)C[C@@H](O)CC(=O)[O-])c(C(C)C)nc1=C(C)C.[Na+]
|
| 311 |
-
[Na+].[O-]c1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl
|
| 312 |
-
CCc1cccc(CC)c1NC(=O)C(=O)[O-].[Na+]
|
| 313 |
-
Nc1ccc(S(=O)(=O)[N-]c2nccs2)cc1.[Na+]
|
| 314 |
-
Nc1ccc2c(S(=O)(=O)O)cccc2c1S(=O)(=O)[O-].[Na+]
|
| 315 |
-
CCN(CCC#N)c1ccc(/N=N/c2ccc([N+](=O)[O-])cc2)cc1
|
| 316 |
-
B.C1COCCN1
|
| 317 |
-
N#CBr
|
| 318 |
-
C=CC(=O)OCC(Br)CBr
|
| 319 |
-
CCc1c(Br)c(Br)c(Br)c(Br)c1Br
|
| 320 |
-
Brc1cc(Br)cc(Br)c1
|
| 321 |
-
Nc1c(Br)cc([N+](=O)[O-])cc1Br
|
| 322 |
-
Brc1ccccn1
|
| 323 |
-
Brc1c(Br)c(Br)c(Oc2c(Br)c(Br)c(Br)c(Br)c2Br)c(Br)c1Br
|
| 324 |
-
O=C1OC2(c3ccccc31)c1ccc(O)c(Br)c1Oc1c2ccc(O)c1Br
|
| 325 |
-
BrCC(Br)c1ccccc1
|
| 326 |
-
CCCCCCCCCCBr
|
| 327 |
-
C#CC(O)CCC
|
| 328 |
-
CN(C)c1ccc(C(=C2C=CC(=[N+](C)C)C=C2)c2ccccc2)cc1.O=C([O-])C(=O)O
|
| 329 |
-
C=CC(=C)CC/C=C(\C)CCC=C(C)C
|
| 330 |
-
CC(C)=CCCC(C)O
|
| 331 |
-
CC(C)=NNC(N)=S
|
| 332 |
-
C[As+](C)(C)CC(=O)[O-]
|
| 333 |
-
COCC(=O)N(c1c(C)cccc1C)C(C)C(=O)OC
|
| 334 |
-
CCOC(=O)C(C)O
|
| 335 |
-
C/C(=N/[Si](C)(C)C)O[Si](C)(C)C
|
| 336 |
-
CCO[Si](C)(C)OCC
|
| 337 |
-
C[SiH](C)O[SiH](C)C
|
| 338 |
-
CC(=O)[CH-]C(C)=O.CC(=O)[CH-]C(C)=O.[AlH3].[CH2-]C(=O)CC(C)=O
|
| 339 |
-
CC(=O)[CH-]C(C)=O.CC(=O)[CH-]C(C)=O.[CH2-]C(=O)CC(C)=O.[Cr]
|
| 340 |
-
CC(=O)[CH-]C(C)=O.CC(=O)[CH-]C(C)=O.[CH2-]C(=O)CC(C)=O.[Fe]
|
| 341 |
-
CC(C)=CCC[C@H](C)CC=O
|
| 342 |
-
CCC(=O)OC/C=C(\C)CCC=C(C)C
|
| 343 |
-
CC(C)=CCCC(C)CCOC(=O)C(C)C
|
| 344 |
-
C=CC/C=C/C/C=C/C/C=C/C
|
| 345 |
-
C=C(C)CCCC
|
| 346 |
-
C=C(C)CCO
|
| 347 |
-
C=C[Si](C)(C)Cl
|
| 348 |
-
C=C1CC[C@H]2O[C@]2(C)CC[C@@H]2[C@@H]1CC2(C)C
|
| 349 |
-
C=CC(=O)O[C@@H]1C[C@H]2CC[C@]1(C)C2(C)C
|
| 350 |
-
C=CC(=O)OCCOC1C2CCC1C1CC=CC12
|
| 351 |
-
C=CC(C)(C)O
|
| 352 |
-
C=CC(C)C#N
|
| 353 |
-
C=CC=CC=CCCCCC
|
| 354 |
-
C=Cc1ccncc1
|
| 355 |
-
C=COCCCC
|
| 356 |
-
C=COCCCl
|
| 357 |
-
C1=CCCC1
|
| 358 |
-
c1ccc(-c2ccccc2-c2ccccc2)cc1
|
| 359 |
-
c1ccc2c(c1)CCC2
|
| 360 |
-
c1ccc([Sb](c2ccccc2)c2ccccc2)cc1
|
| 361 |
-
c1cc[nH]c1
|
| 362 |
-
C1CCSC1
|
| 363 |
-
c1ccc2c(c1)-c1ccc3ccc4cccc5cc-2c1c3c45
|
| 364 |
-
C=C(C)C1CCC(C)CC1O
|
| 365 |
-
CCC=C(C)C=O
|
| 366 |
-
CC(=O)CC(N)=O
|
| 367 |
-
CCCC(C)CCC(C)=O
|
| 368 |
-
CCCCCCCCC(C)=O
|
| 369 |
-
CC(=O)O[C@@H]1C[C@@H]2CC[C@@]1(C)C2(C)C
|
| 370 |
-
CC(=O)OCC=C(C)CCC=C(C)CCC=C(C)C
|
| 371 |
-
CCc1cc(C)cc(CC)c1-c1c(OC(=O)C(C)(C)C)n2n(c1=O)CCOCC2
|
| 372 |
-
CC(C)(C)C(O)C(Oc1ccc(-c2ccccc2)cc1)n1cncn1
|
| 373 |
-
CC(C)(C)C1=CC(=O)C(C(C)(C)C)=CC1=O
|
| 374 |
-
CSc1nc(=O)c(C(C)(C)C)n[nH]1
|
| 375 |
-
CCC(C)c1cc(C(C)(C)C)cc(-n2nc3ccccc3n2)c1O
|
| 376 |
-
Cc1cc(C)c(O)c(C(C)(C)C)c1
|
| 377 |
-
CC(C)(C)c1cc(CCC(=O)NCCCCCCNC(=O)CCc2cc(C(C)(C)C)c(O)c(C(C)(C)C)c2)cc(C(C)(C)C)c1O
|
| 378 |
-
CC(C)(C)c1cc(CCC(=O)OCCSCCOC(=O)CCc2cc(C(C)(C)C)c(O)c(C(C)(C)C)c2)cc(C(C)(C)C)c1O
|
| 379 |
-
CC(C)(C)c1ccc(OC2CCCCC2O)cc1
|
| 380 |
-
CC(c1cc(C(C)(C)C)cc(C(C)(C)C)c1O)c1cc(C(C)(C)C)cc(C(C)(C)C)c1O
|
| 381 |
-
CC(CC(C)(C)C)CC(C)(C)CC(C)(C)C
|
| 382 |
-
CC(C)(C)N.CCCC(NC(C)C(=O)N1C(C(=O)O)C[C@@H]2CCCC[C@@H]21)C(=O)OCC
|
| 383 |
-
CCC(C)(OOC(C)(C)C)OOC(C)(C)C
|
| 384 |
-
CC(C)(COCC1CO1)COCC1CO1
|
| 385 |
-
CC(C)C(=O)C(C)C
|
| 386 |
-
CC(C)C(=O)OCc1ccccc1
|
| 387 |
-
CC(C)C(=O)Cl
|
| 388 |
-
CC(C)c1ccccc1N
|
| 389 |
-
CC(C)CBr
|
| 390 |
-
CC(C)CCOCCC(C)C
|
| 391 |
-
CC=CC(=O)OCC(C)C
|
| 392 |
-
COCCc1ccc(OCC(O)CNC(C)C)cc1
|
| 393 |
-
CC(C)OP(OC(C)C)OC(C)C
|
| 394 |
-
CC(C=O)c1ccccc1
|
| 395 |
-
CC(c1ccccc1)(c1ccc(O)cc1)c1ccc(O)cc1
|
| 396 |
-
CC(c1ccc(O)cc1)c1ccc(O)cc1
|
| 397 |
-
CCC(C)C(=O)OC
|
| 398 |
-
CCC(=O)CC(C)CC
|
| 399 |
-
CCCC(C)C=O
|
| 400 |
-
CCOC(=O)CC(C)O
|
| 401 |
-
CCSC(C)CC1CC(=O)C(C(CC)=NOC/C=C/Cl)C(=O)C1
|
| 402 |
-
CC=CC(=O)C1=C(C)C=CCC1(C)C
|
| 403 |
-
C=C(/C=C/C)C1=C(C)CCCC1(C)C
|
| 404 |
-
CC=CCO
|
| 405 |
-
CC1(C)C(=O)N(CO)C(=O)N1CO
|
| 406 |
-
CC1(C)COCN1
|
| 407 |
-
CCC1(C)C(=O)N(Cl)C(=O)N1Cl
|
| 408 |
-
Cc1cc(C)[nH]c(=O)n1.O=C(Nc1ccc([N+](=O)[O-])cc1)Nc1ccc([N+](=O)[O-])cc1
|
| 409 |
-
CC1=CC[C@@H](C(C)(C)O)CC1
|
| 410 |
-
Cc1ccc(O)o1
|
| 411 |
-
Cc1c[nH]c(=O)[nH]c1=O
|
| 412 |
-
Cc1cc(C)c(C=O)c(C)c1
|
| 413 |
-
CC(C)=CC1CC(C)CCO1
|
| 414 |
-
CNC(=O)Oc1cc(C)c(SC)c(C)c1
|
| 415 |
-
Cc1cccc(N(C)C)c1
|
| 416 |
-
Cc1cccc(OP(=O)(Oc2cccc(C)c2)Oc2cccc(C)c2)c1
|
| 417 |
-
CSc1ccc(O)cc1C
|
| 418 |
-
Cc1ccc2[nH]nnc2c1
|
| 419 |
-
Cc1ccc(C=O)cc1
|
| 420 |
-
Cc1ccc(C(=O)Cl)cc1
|
| 421 |
-
CC1CCC(C(C)(C)O)CC1
|
| 422 |
-
Cc1ccc(S(=O)(=O)C(I)I)cc1
|
| 423 |
-
COc1cc(C(C)C)ccc1C
|
| 424 |
-
Cc1ccc(CCl)cc1
|
| 425 |
-
Cc1ccc2nc3sc(=O)sc3nc2c1
|
| 426 |
-
Cc1cccc(C)c1C
|
| 427 |
-
Cc1cccc(C)n1
|
| 428 |
-
C=Cc1cccc(C)c1
|
| 429 |
-
CC1CCCCC1
|
| 430 |
-
CCc1ccccc1C
|
| 431 |
-
Cc1cncc(C)c1
|
| 432 |
-
CC1CNCC(C)O1
|
| 433 |
-
Cc1cnccn1
|
| 434 |
-
COC[C@H](C)N(C(=O)CCl)c1c(C)csc1C
|
| 435 |
-
CC1(C)[C@H]2CC[C@]1(C)C(=O)C2
|
| 436 |
-
Cc1ccc(N=Nc2c(O)ccc3ccccc23)c(C)c1
|
| 437 |
-
Cc1cccc2c1ccc1ccccc12
|
| 438 |
-
Cc1ccc2sc3cccc(C)c3c2c1
|
| 439 |
-
CCC(=O)O
|
| 440 |
-
CCC(=O)OCC=Cc1ccccc1
|
| 441 |
-
CCC(C)c1ccccc1
|
| 442 |
-
CCC(O)CO
|
| 443 |
-
CCc1c(C)cc(N)c(CC)c1N
|
| 444 |
-
CCc1cccc(CC)c1
|
| 445 |
-
CCC1CCCCC1
|
| 446 |
-
CCc1ccc(C)nc1
|
| 447 |
-
CCCC(O)C(O)CO
|
| 448 |
-
CCCCCCCC(C)=O
|
| 449 |
-
CCCCCCCCc1ccc(Nc2ccc(CCCCCCCC)cc2)cc1
|
| 450 |
-
CCCCCCCCCCc1ccccc1
|
| 451 |
-
CCCCCCCCCCCCCc1ccccc1
|
| 452 |
-
CCCCCCCCCCCCCCCCCO
|
| 453 |
-
CCCCCCS
|
| 454 |
-
CCCCN(C)C
|
| 455 |
-
CCCCOP(=O)(CCCC)OCCCC
|
| 456 |
-
CCCCOP(OCCCC)OCCCC
|
| 457 |
-
CCN(CC)C(=O)Cl
|
| 458 |
-
CCN(CC)c1cccc(C)c1
|
| 459 |
-
CCN(CC)c1ccc(/N=N/c2ccccc2)cc1
|
| 460 |
-
CCN(CC)c1ccc(C=O)cc1
|
| 461 |
-
CCN(CC)c1ccc2c(c1)Oc1cc(N(CC)CC)ccc1C21OC(=O)c2ccccc21
|
| 462 |
-
CCn1c2ccccc2c2ccccc21
|
| 463 |
-
CCOB(OCC)OCC
|
| 464 |
-
CCOC(/C=C(\C)CCC=C(C)C)OCC
|
| 465 |
-
CCOC(=O)OC(=O)OCC
|
| 466 |
-
CC=Cc1ccc(OCC)c(O)c1
|
| 467 |
-
CCCCOCC
|
| 468 |
-
CCOCOCC
|
| 469 |
-
CCOP(=O)(OCC)SCCSCC
|
| 470 |
-
CCOP(=S)(OCC)SCS(=O)(=O)C(C)(C)C
|
| 471 |
-
CCOP(=S)(OCC)SCS(=O)(=O)CC
|
| 472 |
-
CC(Cl)CN(C)C.Cl
|
| 473 |
-
Cl.Cl.Nc1ccc(N)cc1
|
| 474 |
-
CN(C)CCCl.Cl
|
| 475 |
-
Cl.ClCCN1CCOCC1
|
| 476 |
-
Cl.ClCCNCCCl
|
| 477 |
-
CCOC(=O)CN.Cl
|
| 478 |
-
CC1(C)C(C(=O)OCc2cccc(Oc3ccccc3)c2)[C@@H]1C=C(Cl)Cl
|
| 479 |
-
Cl[C@H]1[C@H](Cl)[C@@H](Cl)[C@H](Cl)[C@@H](Cl)[C@H]1Cl
|
| 480 |
-
Cl[Sn](c1ccccc1)(c1ccccc1)c1ccccc1
|
| 481 |
-
CC1(C)C(C(=O)O[C@H](C#N)c2cccc(Oc3ccccc3)c2)[C@@H]1/C=C(\Cl)C(F)(F)F
|
| 482 |
-
Cl/C=C\CCl
|
| 483 |
-
CCCCCCCCCCCC(=O)Cl
|
| 484 |
-
O=C(Cl)OCCOC(=O)Cl
|
| 485 |
-
ClC(c1ccccc1)(c1ccccc1)c1ccccc1
|
| 486 |
-
O=S(=O)(C(Cl)(Cl)Cl)C(Cl)(Cl)Cl
|
| 487 |
-
C#CC1=C(C#C)C(=O)C(Cl)=C(Cl)C1=O
|
| 488 |
-
Cc1cc(Cl)ccc1OC(C)C(=O)O
|
| 489 |
-
CC(Oc1ccc(Cl)cc1Cl)C(=O)O
|
| 490 |
-
CC(C)N.O=C(O)COc1ccc(Cl)cc1Cl
|
| 491 |
-
Clc1ccc2c(Cl)ccnc2c1
|
| 492 |
-
N#CCc1ccc(Cl)cc1
|
| 493 |
-
Clc1ccc(-c2ccccc2)cc1
|
| 494 |
-
COc1cc2ncnc(Nc3cccc(Cl)c3)c2cc1OC
|
| 495 |
-
Clc1cccc(Cl)n1
|
| 496 |
-
Clc1cccnc1
|
| 497 |
-
Clc1nc2ccccc2s1
|
| 498 |
-
ClC1=C(Cl)[C@]2(Cl)[C@H]3C[C@@H](Cl)C(Cl)[C@H]3[C@@]1(Cl)C2(Cl)Cl
|
| 499 |
-
ClC1=C(Cl)[C@]2(Cl)[C@H]3C[C@H](Cl)C(Cl)[C@H]3[C@@]1(Cl)C2(Cl)Cl
|
| 500 |
-
CC[Hg+].Oc1[n-]c(O)c2c1C1(Cl)C(Cl)=C(Cl)C2(Cl)C1(Cl)Cl
|
| 501 |
-
Clc1cc(Cl)c(-c2c(Cl)c(Cl)cc(Cl)c2Cl)cc1Cl
|
| 502 |
-
Clc1ccc(Oc2ccccc2)cc1
|
| 503 |
-
O=C(NCc1ncc(C(F)(F)F)cc1Cl)c1c(Cl)cccc1Cl
|
| 504 |
-
ClC(Cl)=C(c1ccc(Cl)cc1)c1ccccc1Cl
|
| 505 |
-
NC(=O)CCl
|
| 506 |
-
ClCc1ccccc1CCl
|
| 507 |
-
OCCCCl
|
| 508 |
-
CCCCCCCCCCCCS(=O)(=O)Cl
|
| 509 |
-
C=CC(=O)N(C)C
|
| 510 |
-
CN(C)CCN1CCOCC1
|
| 511 |
-
Cn1cccc1
|
| 512 |
-
CN1CCN(C)CC1
|
| 513 |
-
Cn1ccnc1
|
| 514 |
-
CN1CN(C)CN(C)C1
|
| 515 |
-
CO[Si](CCC1CCC2OC2C1)(OC)OC
|
| 516 |
-
CO[Si](CCCNCCNCCN)(OC)OC
|
| 517 |
-
CO[Si](OC)(c1ccccc1)c1ccccc1
|
| 518 |
-
CCCCCCCC(OC)OC
|
| 519 |
-
CC=Cc1ccc(OC)c(OC)c1
|
| 520 |
-
COc1ccc(C#N)cc1
|
| 521 |
-
COc1ccc(OC)c(C=O)c1
|
| 522 |
-
COc1cccc(OC)c1O
|
| 523 |
-
COc1ccccc1C=O
|
| 524 |
-
CCNc1nc(NC(C)(C)C)nc(OC)n1
|
| 525 |
-
COc1nccnc1CC(C)C
|
| 526 |
-
COc1ccc(OC)c(N=Nc2c(O)ccc3ccccc23)c1
|
| 527 |
-
COS(=O)(=O)c1ccc(C)cc1
|
| 528 |
-
CS(C)(=O)=O
|
| 529 |
-
COC(=O)N(C(=O)N1COC2(C(=O)OC)Cc3cc(Cl)ccc3C2=N1)c1ccc(OC(F)(F)F)cc1
|
| 530 |
-
N#Cc1ccc(C/C(=N\NC(=O)Nc2ccc(OC(F)(F)F)cc2)c2cccc(C(F)(F)F)c2)cc1
|
| 531 |
-
CC(=NNC(=O)Nc1cc(F)cc(F)c1)c1ncccc1C(=O)O
|
| 532 |
-
O=C(O)c1cn(-c2ccc(F)cc2)c2cc(N3CCNCC3)c(F)cc2c1=O
|
| 533 |
-
Oc1c(I)cc(I)cc1I
|
| 534 |
-
Oc1ccc(I)cc1
|
| 535 |
-
N#C/C=C/c1ccccc1
|
| 536 |
-
CC(O)C#N
|
| 537 |
-
N#Cc1cc([N+](=O)[O-])ccc1N
|
| 538 |
-
N#Cc1ccc(O)cc1
|
| 539 |
-
N#Cc1ccccc1Cl
|
| 540 |
-
N#Cc1ccccc1N
|
| 541 |
-
N#CCCO
|
| 542 |
-
N#CCCOCCC#N
|
| 543 |
-
NC(=O)C1CO1
|
| 544 |
-
C=CCOC(N)=O
|
| 545 |
-
Nc1cc([N+](=O)[O-])cc(Cl)c1O
|
| 546 |
-
Nc1cc([N+](=O)[O-])ccc1Cl
|
| 547 |
-
CC(C)(c1ccc(Oc2ccc(N)cc2)cc1)c1ccc(Oc2ccc(N)cc2)cc1
|
| 548 |
-
Nc1ccc(Oc2cccc(Oc3ccc(N)cc3)c2)cc1
|
| 549 |
-
Nc1ccc([N+](=O)[O-])cc1Cl
|
| 550 |
-
Nc1cccnc1N
|
| 551 |
-
Nc1ccc(Cl)cc1C(=O)c1ccccc1
|
| 552 |
-
NCc1ccco1
|
| 553 |
-
COP(N)(=S)OC
|
| 554 |
-
Cl[Dy](Cl)Cl.O.O.O.O.O.O
|
| 555 |
-
Cl[Eu](Cl)Cl.O.O.O.O.O.O
|
| 556 |
-
O.O.O.O.O.O.[Cl-].[Cl-].[Cl-].[Sc+3]
|
| 557 |
-
O=C(C(O)CO)C(O)CO
|
| 558 |
-
C[C@]12CC[C@H](O)C[C@H]1CC[C@@H]1[C@@H]2C[C@@H](O)[C@]2(C)[C@@H](C3=CC(=O)OC3)CC[C@]12O
|
| 559 |
-
CCOC(=O)C(O)C(O)C(=O)OCC
|
| 560 |
-
O[Si](c1ccccc1)(c1ccccc1)c1ccccc1
|
| 561 |
-
O=[N+]([O-])c1cc(Cl)ccc1Cl
|
| 562 |
-
Nc1ccc(Cl)c([N+](=O)[O-])c1
|
| 563 |
-
Nc1ccc(F)c([N+](=O)[O-])c1
|
| 564 |
-
O=[N+]([O-])c1ccc([N+](=O)[O-])cc1
|
| 565 |
-
COC(=O)c1ccc([N+](=O)[O-])cc1
|
| 566 |
-
O=[N+]([O-])c1ccc(CO)cc1
|
| 567 |
-
O=C(c1ccccc1)C(Cl)Cl
|
| 568 |
-
CCOC(OCC)C(=O)c1ccccc1
|
| 569 |
-
CC(=O)c1ccc(-c2ccccc2)cc1
|
| 570 |
-
CC(=O)c1ccccc1N
|
| 571 |
-
CC(=O)OC1CCCCC1
|
| 572 |
-
C=CC(=O)OCCN(CC)CC
|
| 573 |
-
COC(=O)C=Cc1ccccc1
|
| 574 |
-
O=C(c1ccccc1)c1ccc(Cl)cc1
|
| 575 |
-
CCCC(=O)OC(OC(=O)CCC)OC(=O)CCC
|
| 576 |
-
CCCCCCCC(=O)OCC
|
| 577 |
-
CC/C=C\C/C=C\C/C=C\CCCCCCCC(=O)OC
|
| 578 |
-
CN(CCCC(=O)c1cccnc1)N=O
|
| 579 |
-
O=C(Cl)c1ccc(Cl)cc1
|
| 580 |
-
O=C(Cl)c1ccc(Cl)cc1Cl
|
| 581 |
-
O=C(Cl)Oc1ccccc1
|
| 582 |
-
NC(=O)c1ccccc1Cl
|
| 583 |
-
NC(=O)c1ccc(N)nc1
|
| 584 |
-
CCOC(=O)c1ccccc1S(=O)(=O)NC(=O)Nc1nc(Cl)cc(OC)n1
|
| 585 |
-
COc1cc(OC)nc(NC(=O)NS(=O)(=O)Nc2ccccc2C(=O)N(C)C)n1
|
| 586 |
-
CCc1cccc(C)c1N(C(=O)C(=O)O)C(C)COC
|
| 587 |
-
C/C(=C\c1ccc(C(=O)O)cc1)c1ccc2c(c1)C(C)(C)CCC2(C)C
|
| 588 |
-
O=C(O)CN(CCCN(CC(=O)O)CC(=O)O)CC(=O)O
|
| 589 |
-
CCOc1nc(F)cc2nc(S(=O)(=O)Nc3c(Cl)cccc3C(=O)OC)nn12
|
| 590 |
-
COCCC(=O)OC
|
| 591 |
-
CCOC=C(C(=O)OCC)C(=O)OCC
|
| 592 |
-
CCOC(=O)C1=NN(c2ccc(Cl)cc2Cl)C(C)(C(=O)OCC)C1
|
| 593 |
-
CCCCCCCOC(=O)c1ccccc1C(=O)OCCCCCCC
|
| 594 |
-
O=C=NCc1cccc(CN=C=O)c1
|
| 595 |
-
Cc1cc(=O)[nH]c(=O)[nH]1
|
| 596 |
-
O=c1cccc[nH]1
|
| 597 |
-
Cn1sccc1=O
|
| 598 |
-
Cn1c(=O)[nH]c2ncn(C)c2c1=O
|
| 599 |
-
O=C1Cc2ccccc2C1
|
| 600 |
-
Oc1ccc(O)[nH]1
|
| 601 |
-
CC(C)(C)c1n[nH]c(=O)n(N)c1=O
|
| 602 |
-
O=C1OCc2ccccc21
|
| 603 |
-
O=C1C=CC(=O)c2ccccc21
|
| 604 |
-
CCCCNc1ccc(NCCCC)c2c1C(=O)c1ccccc1C2=O
|
| 605 |
-
Nc1c(Oc2ccccc2)cc(O)c2c1C(=O)c1ccccc1C2=O
|
| 606 |
-
O=C1C=CC(=O)N1c1cccc(N2C(=O)C=CC2=O)c1
|
| 607 |
-
O=C1c2ccccc2C(=O)c2c(Cl)cccc21
|
| 608 |
-
C[C@]12CC[C@@H](O)C[C@@H]1CC[C@@H]1[C@@H]2CC[C@]2(C)C(=O)CC[C@@H]12
|
| 609 |
-
O=C1OC(=O)c2ccc3c4c(ccc1c24)C(=O)OC3=O
|
| 610 |
-
CC(=O)C=O
|
| 611 |
-
O=CC1CC1
|
| 612 |
-
CCN(CC)c1ccc(C=O)c(O)c1
|
| 613 |
-
Cc1cc(C)c(C(=O)P(=O)(c2ccccc2)c2ccccc2)c(C)c1
|
| 614 |
-
COP(=O)(CCC(=O)NCO)OC
|
| 615 |
-
CC1=CP(=O)(c2ccccc2)CC1
|
| 616 |
-
CCOP(=S)(OCC)SCCS(=O)(=O)CC
|
| 617 |
-
CCS(=O)(=O)c1cccnc1S(=O)(=O)NC(=O)Nc1nc(OC)cc(OC)n1
|
| 618 |
-
CCc1cccc(C)c1N(C(=O)CS(=O)(=O)O)C(C)COC
|
| 619 |
-
CCOCN(C(=O)CS(=O)(=O)O)c1c(C)cccc1CC
|
| 620 |
-
CN(CCCC(O)c1cccnc1)N=O
|
| 621 |
-
OC(Cc1ccccc1Cl)(Cn1cncn1)C1(Cl)CC1
|
| 622 |
-
Cc1ccc(C2OC[C@@H]3OC(c4ccc(C)cc4)O[C@H]([C@H](O)CO)[C@@H]3O2)cc1
|
| 623 |
-
O=c1[nH]cccc1O
|
| 624 |
-
CCCCCCCCSCc1cc(C)c(O)c(CSCCCCCCCC)c1
|
| 625 |
-
Oc1cccc(-c2ccccc2)c1
|
| 626 |
-
Oc1cc2ccccc2cc1O
|
| 627 |
-
C=Cc1ccc(O)c(OC)c1
|
| 628 |
-
CCCCCc1ccc(O)cc1
|
| 629 |
-
Cc1c(Cl)c(=O)oc2cc(O)ccc12
|
| 630 |
-
Oc1ccc2c(c1)CCC2
|
| 631 |
-
COc1cccc(O)c1
|
| 632 |
-
Oc1ccccc1Br
|
| 633 |
-
O=S(=O)(c1ccc(OCc2ccccc2)cc1)c1ccccc1O
|
| 634 |
-
O=c1cc[nH]cc1
|
| 635 |
-
Cc1ccc(O)cn1
|
| 636 |
-
CC(=O)Nc1ccc(N=Nc2cc(C)ccc2O)cc1
|
| 637 |
-
CC(C)(c1ccc(O)c(-c2ccccc2)c1)c1ccc(O)c(-c2ccccc2)c1
|
| 638 |
-
Oc1ccc(C2(c3ccc(O)c(C4CCCCC4)c3)CCCCC2)cc1C1CCCCC1
|
| 639 |
-
Oc1cccc2c(O)cccc12
|
| 640 |
-
OCCC(O)CO
|
| 641 |
-
OCCCc1cccnc1
|
| 642 |
-
OCCCS
|
| 643 |
-
OCCN1CN(CCO)CN(CCO)C1
|
| 644 |
-
Cn1[nH]nnc1=S
|
| 645 |
-
OC(Cc1ccccc1Cl)(Cn1[nH]cnc1=S)C1(Cl)CC1
|
| 646 |
-
SCc1ccco1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/train_model.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import subprocess
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
|
| 4 |
+
def train_model(data_path: str, config_path: str, save_model_path: str):
|
| 5 |
+
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
| 6 |
+
command = [
|
| 7 |
+
"chemprop", "train",
|
| 8 |
+
"--data-path", data_path,
|
| 9 |
+
"--config-path", config_path,
|
| 10 |
+
"--splits-column", "split",
|
| 11 |
+
"--logfile", save_model_path + f"/train_{timestamp}.log",
|
| 12 |
+
"--task-type", "classification",
|
| 13 |
+
"--target-columns", "NR-AhR","NR-AR","NR-AR-LBD","NR-Aromatase","NR-ER","NR-ER-LBD","NR-PPAR-gamma","SR-ARE","SR-ATAD5","SR-HSE","SR-MMP","SR-p53",
|
| 14 |
+
"--save-dir", save_model_path
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
# Run the command
|
| 18 |
+
subprocess.run(command, check=True)
|
train.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
from src.data import clean_smiles_in_csv, get_combined_dataset_csv
|
| 5 |
+
from src.train_model import train_model
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
data_path = "./data/combined_clean.csv"
|
| 9 |
+
config_path = "./config/config.toml"
|
| 10 |
+
save_model_path = "./logs"
|
| 11 |
+
|
| 12 |
+
# load and clean data
|
| 13 |
+
print("Loading Datasets...")
|
| 14 |
+
load_dotenv()
|
| 15 |
+
token = os.getenv("TOKEN")
|
| 16 |
+
target_cols = ["NR-AhR","NR-AR","NR-AR-LBD","NR-Aromatase","NR-ER","NR-ER-LBD","NR-PPAR-gamma","SR-ARE","SR-ATAD5","SR-HSE","SR-MMP","SR-p53"]
|
| 17 |
+
get_combined_dataset_csv(token, save_path="./data/combined_not_clean.csv")
|
| 18 |
+
clean_smiles_in_csv("./data/combined_not_clean.csv", data_path, "smiles", target_cols)
|
| 19 |
+
|
| 20 |
+
# train model
|
| 21 |
+
print("Starting Training...")
|
| 22 |
+
train_model(data_path, config_path, save_model_path)
|