# check if notebook is in google colab
use_colab = True
try:
  import google.colab.userdata as userdata
  from google.colab.userdata import SecretNotFoundError
except ModuleNotFoundError:
  use_colab = False

if use_colab:
  YOUR_API_KEY = userdata.get("REAL_API_KEY") # provide your API key here

  # root server URL, typically https://real.enamine.net, but can be other in case you have API server hosted elsewhere
  try:
      API_SERVER_URL = userdata.get("API_SERVER_URL")
  except SecretNotFoundError:
      API_SERVER_URL = "https://real.enamine.net"

  try:
      FILE_SERVER_URL = userdata.get("FILE_SERVER_URL")
  except SecretNotFoundError:
      FILE_SERVER_URL = "https://real.enamine.net"
else:
    from dotenv import load_dotenv
    import os

    # Try to load from .env file
    load_dotenv()

    YOUR_API_KEY = os.getenv("REAL_API_KEY", "ENTER_YOUR_API_KEY_HERE")
    API_SERVER_URL = os.getenv("API_SERVER_URL", "https://real.enamine.net")
    FILE_SERVER_URL = os.getenv("FILE_SERVER_URL", API_SERVER_URL)

structure_of_interest_smiles = "CNC(=O)C1Cc2ccccc2N1C(=O)c1cccc(OCC(F)(F)F)n1"

import requests
import json

# single smiles search URLs
SINGLE_SMILES_EXACT_STEREO_SEARCH_URL = f"{API_SERVER_URL}/api/v1/space/real/search-structure/single/exact-stereo"
SINGLE_SMILES_ANY_STEREO_SEARCH_URL = f"{API_SERVER_URL}/api/v1/space/real/search-structure/single/any-stereo"

response = requests.post(
    SINGLE_SMILES_EXACT_STEREO_SEARCH_URL,
    json={"smiles": structure_of_interest_smiles, "reaction_types": [0,1050, 1051]},
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_data: list = response.json()

import json

print("Structures found:", len(response_data))
print(json.dumps(response_data, indent=4))

Structures found: 1
[
    {
        "smiles": "CNC(=O)C1Cc2ccccc2N1C(=O)c1cccc(OCC(F)(F)F)n1",
        "sntDt": "2015-05-25T11:58:02",
        "vSynt": [
            {
                "rgn": [
                    {
                        "code": "EN300-219065",
                        "smiles": "Cl.CNC(=O)C1CC=2C=CC=CC2N1"
                    },
                    {
                        "code": "EN300-82844",
                        "smiles": "O=C(O)C=1C=CC=C(N1)OCC(F)(F)F"
                    }
                ],
                "rsn": [
                    "s11____10391112____5456408",
                    "s1626____22060942____22074828"
                ]
            }
        ],
        "query_smiles": "CNC(=O)C1Cc2ccccc2N1C(=O)c1cccc(OCC(F)(F)F)n1",
        "space": "sREAL"
    }
]

response = requests.post(
    SINGLE_SMILES_ANY_STEREO_SEARCH_URL,
    json={"smiles": structure_of_interest_smiles, "reaction_types": [0,1050, 1051]},
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_data: list = response.json()
print("Structures found:", len(response_data))
print(json.dumps(response_data, indent=4))

Structures found: 2
[
    {
        "smiles": "CNC(=O)C1Cc2ccccc2N1C(=O)c1cccc(OCC(F)(F)F)n1",
        "sntDt": "2015-05-25T11:58:02",
        "vSynt": [
            {
                "rgn": [
                    {
                        "code": "EN300-219065",
                        "smiles": "Cl.CNC(=O)C1CC=2C=CC=CC2N1"
                    },
                    {
                        "code": "EN300-82844",
                        "smiles": "O=C(O)C=1C=CC=C(N1)OCC(F)(F)F"
                    }
                ],
                "rsn": [
                    "s1626____22060942____22074828",
                    "s11____10391112____5456408"
                ]
            }
        ],
        "query_smiles": "CNC(=O)C1Cc2ccccc2N1C(=O)c1cccc(OCC(F)(F)F)n1",
        "space": "sREAL",
        "exact": true
    },
    {
        "smiles": "CNC(=O)[C@@H]1Cc2ccccc2N1C(=O)c1cccc(OCC(F)(F)F)n1",
        "sntDt": "2022-11-30T17:55:17",
        "vSynt": [
            {
                "rgn": [
                    {
                        "code": "EN300-881857",
                        "smiles": "CNC(=O)[C@@H]1CC2=CC=CC=C2N1"
                    },
                    {
                        "code": "EN300-82844",
                        "smiles": "O=C(O)C=1C=CC=C(N1)OCC(F)(F)F"
                    }
                ],
                "rsn": [
                    "s1626____22847552____22074828",
                    "s11____22847536____5456408",
                    "s527____22847550____5456412"
                ]
            }
        ],
        "query_smiles": "CNC(=O)C1Cc2ccccc2N1C(=O)c1cccc(OCC(F)(F)F)n1",
        "space": "sREAL",
        "exact": false
    }
]

BATCH_SMILES_EXACT_STEREO_SEARCH_URL = f"{API_SERVER_URL}/api/v1/space/real/search-structure/batch/exact-stereo"
BATCH_SMILES_ANY_STEREO_SEARCH_URL = f"{API_SERVER_URL}/api/v1/space/real/search-structure/batch/any-stereo"

another_structure_of_interest_smiles = 'O=C(Cn1cc(-c2ccccc2)nn1)N1CCC(N2CCCC2)C1'
smiles_list = [structure_of_interest_smiles, another_structure_of_interest_smiles]

response = requests.post(
    BATCH_SMILES_EXACT_STEREO_SEARCH_URL,
    json={"smiles_list": smiles_list, "reaction_types": [0,1050]},
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_batch_data: list = response.json()
print("Structures found:", len(response_batch_data))
print(json.dumps(response_batch_data, indent=4))

Structures found: 2
[
    {
        "smiles": "CNC(=O)C1Cc2ccccc2N1C(=O)c1cccc(OCC(F)(F)F)n1",
        "sntDt": "2015-05-25T11:58:02",
        "vSynt": [
            {
                "rgn": [
                    {
                        "code": "EN300-219065",
                        "smiles": "Cl.CNC(=O)C1CC=2C=CC=CC2N1"
                    },
                    {
                        "code": "EN300-82844",
                        "smiles": "O=C(O)C=1C=CC=C(N1)OCC(F)(F)F"
                    }
                ],
                "rsn": [
                    "s11____10391112____5456408",
                    "s1626____22060942____22074828"
                ]
            }
        ],
        "query_smiles": "CNC(=O)C1Cc2ccccc2N1C(=O)c1cccc(OCC(F)(F)F)n1",
        "space": "sREAL"
    },
    {
        "smiles": "O=C(Cn1cc(-c2ccccc2)nn1)N1CCC(N2CCCC2)C1",
        "sntDt": "2012-07-19T15:02:00",
        "vSynt": [
            {
                "rgn": [
                    {
                        "code": "EN300-61758",
                        "smiles": "C1CCN(C1)C2CCNC2"
                    },
                    {
                        "code": "EN300-107888",
                        "smiles": "O=C(O)CN1C=C(N=N1)C=2C=CC=CC2"
                    }
                ],
                "rsn": [
                    "s11____25831198____7116776",
                    "s22____2910282____7116778"
                ]
            },
            {
                "rgn": [
                    {
                        "code": "EN300-61758",
                        "smiles": "C1CCN(C1)C2CCNC2"
                    },
                    {
                        "code": "EN300-69260",
                        "smiles": "[N-]=[N+]=NCC(=O)O"
                    },
                    {
                        "code": "EN300-19670",
                        "smiles": "C#CC=1C=CC=CC1"
                    }
                ],
                "rsn": [
                    "m274860____15217226____12536600____12655630"
                ]
            }
        ],
        "query_smiles": "O=C(Cn1cc(-c2ccccc2)nn1)N1CCC(N2CCCC2)C1",
        "space": "sREAL"
    }
]

# Example with mixed valid, charged, and invalid SMILES
mixed_smiles_list = [
    "FCCN1CCN(CC1)C(=O)N2CCOCC2",  # Valid SMILES
    "[Cl-].CCOC(=O)c1c(C)c(C)sc1NC(=O)C[N+]1(C)CCOCC1",  # Charged molecule
    "NotAValidSMILES",  # Invalid SMILES
    another_structure_of_interest_smiles  # Valid SMILES
]

response = requests.post(
    BATCH_SMILES_EXACT_STEREO_SEARCH_URL,
    json={"smiles_list": mixed_smiles_list, "reaction_types": [0,1050]},
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
mixed_batch_data: list = response.json()

# Print results showing both successful searches and errors
print(json.dumps(mixed_batch_data, indent=4))

[
    {
        "smiles": "O=C(Cn1cc(-c2ccccc2)nn1)N1CCC(N2CCCC2)C1",
        "sntDt": "2012-07-19T15:02:00",
        "vSynt": [
            {
                "rgn": [
                    {
                        "code": "EN300-61758",
                        "smiles": "C1CCN(C1)C2CCNC2"
                    },
                    {
                        "code": "EN300-107888",
                        "smiles": "O=C(O)CN1C=C(N=N1)C=2C=CC=CC2"
                    }
                ],
                "rsn": [
                    "s11____25831198____7116776",
                    "s22____2910282____7116778"
                ]
            },
            {
                "rgn": [
                    {
                        "code": "EN300-61758",
                        "smiles": "C1CCN(C1)C2CCNC2"
                    },
                    {
                        "code": "EN300-69260",
                        "smiles": "[N-]=[N+]=NCC(=O)O"
                    },
                    {
                        "code": "EN300-19670",
                        "smiles": "C#CC=1C=CC=CC1"
                    }
                ],
                "rsn": [
                    "m274860____15217226____12536600____12655630"
                ]
            }
        ],
        "query_smiles": "O=C(Cn1cc(-c2ccccc2)nn1)N1CCC(N2CCCC2)C1",
        "space": "sREAL"
    },
    {
        "smiles": "O=C(N1CCOCC1)N1CCN(CCF)CC1",
        "sntDt": "2023-02-23T00:00:00",
        "vSynt": [
            {
                "rgn": [
                    {
                        "code": "EN300-316571",
                        "smiles": "Cl.Cl.FCCN1CCNCC1"
                    },
                    {
                        "code": "EN300-21372",
                        "smiles": "O=C(Cl)N1CCOCC1"
                    }
                ],
                "rsn": [
                    "s68____22162794____22163998"
                ]
            },
            {
                "rgn": [
                    {
                        "code": "EN300-316571",
                        "smiles": "Cl.Cl.FCCN1CCNCC1"
                    },
                    {
                        "code": "EN300-110025",
                        "smiles": "O=C(N1C=CN=C1)N2CCOCC2"
                    }
                ],
                "rsn": [
                    "s487____14925160____28909612"
                ]
            }
        ],
        "query_smiles": "FCCN1CCN(CC1)C(=O)N2CCOCC2",
        "space": "sREAL"
    },
    {
        "smiles": "[Cl-].CCOC(=O)c1c(C)c(C)sc1NC(=O)C[N+]1(C)CCOCC1",
        "query_smiles": "[Cl-].CCOC(=O)c1c(C)c(C)sc1NC(=O)C[N+]1(C)CCOCC1",
        "error": "No charged structures in REAL"
    },
    {
        "smiles": "NotAValidSMILES",
        "query_smiles": "NotAValidSMILES",
        "error": "Smiles standardization failed: NotAValidSMILES"
    }
]

%%bash
echo m11____9192732____19875708 > RSN_LIST.txt
echo m280212____19665688____19600570 >> RSN_LIST.txt

BATCH_RSN_SEARCH_URL = f"{API_SERVER_URL}/api/v1/space/real/search-structure/batch/rsn"
rsn_list = []
with open("RSN_LIST.txt") as rsn_file:
    for line in rsn_file:
        rsn_list.append(line.strip())

response = requests.post(
    BATCH_RSN_SEARCH_URL,
    json={"rsn_list": rsn_list},
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_batch_data: list = response.json()
print(json.dumps(response_batch_data, indent=4))

# uncomment the following lines if you want to save the response in a tsv file
# with open("RSN_LIST_res.txt", "w") as out_file:
#     for row in response_batch_data:
#         out_file.write("\t".join(row) + "\n")

[
    [
        "m11____9192732____19875708",
        "Cc1ccc(CNC(=O)c2cccc3c(I)n[nH]c23)c(-n2cc(Cl)cn2)n1"
    ],
    [
        "m280212____19665688____19600570",
        "O=C(c1nccn2nccc12)N1CCc2cccc(-c3noc(=O)[nH]3)c2C1"
    ]
]

smarts_of_interest = 'CNC(=O)C1Cc2ccccc2N1'

SYNTHON_SEARCH_ESTIMATION_URL = f"{API_SERVER_URL}/api/v1/space/real/analogs/sss/estimate/snt"
SYNTHON_SEARCH_ENUMERATION_URL = f"{API_SERVER_URL}/api/v1/space/real/analogs/sss/enumerate/snt"

response = requests.post(
    SYNTHON_SEARCH_ESTIMATION_URL,
    json={
        "smarts": smarts_of_interest,
        "reactions": [{"id": 11,"roles": [1]}, {"id": 527, "roles": [1, 2]}],
        "reaction_types": [0, 1050],
        "num_components": [2, 3],
    },
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
# the format of the response is [{reactionId: int, distribution: [{mw, int, cnt: int}, ...]}, ...]
response_data: list = response.json()

import functools

# here is a summary of the response
print(json.dumps([{"reactionId": d.get("reactionId"), "distributionSummary": {"len": len(d.get("distribution")), "sumCnt": functools.reduce(lambda acc, val: acc + val.get("cnt"), d.get("distribution"), 0)}} for d in response_data], indent=4))
# uncomment the following line if you want to print the full response json as is
# print(json.dumps(response_data, indent=4))

[
    {
        "reactionId": 11,
        "distributionSummary": {
            "len": 89,
            "sumCnt": 55646
        }
    },
    {
        "reactionId": 527,
        "distributionSummary": {
            "len": 84,
            "sumCnt": 19893
        }
    }
]

response = requests.post(
    SYNTHON_SEARCH_ENUMERATION_URL,
    json={
        "smarts": smarts_of_interest,
        "reactions": [{"id": 11,"roles": [1]}, {"id": 527, "roles": [1, 2]}],
        "reaction_types": [0, 1050],
        "num_components": [2, 3],
        "filter": {
                "max_enum": 1000,
                "min_mw": 0,
                "max_mw": 500
            },
        "strategy": {
            "type": "weighted",
            "snt_max": 10
        }
    },
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_data: dict = response.json()
print(json.dumps(response_data, indent=4))
enumeration_job_id = response_data.get("id")

{
    "id": "e3f5238c-9409-4205-9fc4-5ace87fb2226",
    "status": "CREATED",
    "details": {
        "priority": 1,
        "created": "2026-01-28T16:02:29.831378"
    }
}

import time

CHECK_ENUMERATION_STATUS_URL = f"{API_SERVER_URL}/api/v1/space/real/analogs/check-enumeration"

def get_link_to_enumeration_file(enumeration_job_id: str) -> str:
    response_check = requests.get(f"{CHECK_ENUMERATION_STATUS_URL}?enum_id={enumeration_job_id}", headers={"X-API-KEY": YOUR_API_KEY})
    response_data_check: dict = response_check.json()
    link_to_file_check = response_data_check.setdefault("details", {}).get("link")
    if not link_to_file_check:
        print("Waiting for enumeration to complete...")
        time.sleep(2)
        return get_link_to_enumeration_file(enumeration_job_id)
    print(json.dumps(response_data_check, indent=4))
    return link_to_file_check

link_to_file = get_link_to_enumeration_file(enumeration_job_id)

Waiting for enumeration to complete...
{
    "id": "e3f5238c-9409-4205-9fc4-5ace87fb2226",
    "status": "COMPLETED",
    "details": {
        "size": 20,
        "created": "2026-01-28T16:02:29.831378",
        "finished": "2026-01-28T14:02:30.097490",
        "link": "/api/v1/space/real/analogs/access_enumeration_file?enum_id=e3f5238c-9409-4205-9fc4-5ace87fb2226"
    }
}

response = requests.get(f"{FILE_SERVER_URL}{link_to_file}", headers={"X-API-KEY": YOUR_API_KEY})
file_content = response.content

from io import BytesIO
import zipfile
import pandas as pd

with zipfile.ZipFile(BytesIO(file_content), mode="r") as zf:
    for zipped_file_name in zf.namelist():
        with zf.open(zipped_file_name) as zipped_file:
            df = pd.read_csv(zipped_file, delimiter="\t")
df.head(5)

TARGET_SS_SEARCH_ESTIMATION_URL = f"{API_SERVER_URL}/api/v1/space/real/analogs/sss/estimate/rxn"
TARGET_SS_SEARCH_ENUMERATION_URL = f"{API_SERVER_URL}/api/v1/space/real/analogs/sss/enumerate/rxn"

response = requests.post(
    TARGET_SS_SEARCH_ESTIMATION_URL,
    json={
        "smiles": structure_of_interest_smiles,
        "reaction_ids": [11, 527],
        "reaction_types": [0, 1050],
        "num_components": [2, 3],
    },
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_data: list = response.json()
print(json.dumps(response_data, indent=4))

[
    {
        "reactionId": 11,
        "distribution": [
            {
                "mw": 410,
                "cnt": 2
            },
            {
                "mw": 375,
                "cnt": 2
            },
            {
                "mw": 425,
                "cnt": 2
            }
        ]
    },
    {
        "reactionId": 527,
        "distribution": [
            {
                "mw": 375,
                "cnt": 1
            },
            {
                "mw": 410,
                "cnt": 1
            },
            {
                "mw": 425,
                "cnt": 1
            }
        ]
    }
]

response = requests.post(
    TARGET_SS_SEARCH_ENUMERATION_URL,
    json={
        "smiles": structure_of_interest_smiles,
        "reaction_ids": [11, 527],
        "reaction_types": [0, 1050],
        "num_components": [2, 3],
        "filter": {
                "max_enum": 1000,
                "min_mw": 0,
                "max_mw": 500
            },
        "strategy": {
            "type": "weighted",
            "snt_max": 10
        }
    },
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_data: dict = response.json()
enumeration_job_id = response_data.get("id")

# wait until the enumeration is completed
link_to_file = get_link_to_enumeration_file(enumeration_job_id)

Waiting for enumeration to complete...
{
    "id": "756208d7-f6bb-44ff-aa8c-25dd4fd8c1ae",
    "status": "COMPLETED",
    "details": {
        "size": 6,
        "created": "2026-01-28T16:02:34.639958",
        "finished": "2026-01-28T14:02:34.744426",
        "link": "/api/v1/space/real/analogs/access_enumeration_file?enum_id=756208d7-f6bb-44ff-aa8c-25dd4fd8c1ae"
    }
}

response = requests.get(f"{FILE_SERVER_URL}{link_to_file}", headers={"X-API-KEY": YOUR_API_KEY})
file_content = response.content

from io import BytesIO
import zipfile
import pandas as pd

with zipfile.ZipFile(BytesIO(file_content), mode="r") as zf:
    for zipped_file_name in zf.namelist():
        with zf.open(zipped_file_name) as zipped_file:
            df = pd.read_csv(zipped_file, delimiter="\t")
df.head(5)

TARGET_SIM_SEARCH_ESTIMATION_URL = f"{API_SERVER_URL}/api/v1/space/real/analogs/sim/estimate"
TARGET_SIM_SEARCH_ENUMERATION_URL = f"{API_SERVER_URL}/api/v1/space/real/analogs/sim/enumerate"

response = requests.post(
    TARGET_SIM_SEARCH_ESTIMATION_URL,
    json={
        "smiles": structure_of_interest_smiles,
        "reaction_ids": [11, 527],
        "reaction_types": [0, 1050],
        "num_components": [2, 3],
    },
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_data: list = response.json()
print(json.dumps([{"reactionId": d.get("reactionId"), "distributionSummary": {"len": len(d.get("distribution")), "sumCnt": functools.reduce(lambda acc, val: acc + val.get("cnt"), d.get("distribution"), 0)}} for d in response_data], indent=4))

[
    {
        "reactionId": 11,
        "distributionSummary": {
            "len": 57,
            "sumCnt": 3397
        }
    },
    {
        "reactionId": 527,
        "distributionSummary": {
            "len": 53,
            "sumCnt": 244
        }
    }
]

response = requests.post(
    TARGET_SIM_SEARCH_ENUMERATION_URL,
    json={
        "smiles": structure_of_interest_smiles,
        "reaction_ids": [11, 527],
        "reaction_types": [0, 1050],
        "num_components": [2, 3],
        "filter": {
                "max_enum": 1000,
                "min_mw": 0,
                "max_mw": 500,
                "fp_sim": 0.9
            },
        "strategy": {
            "type": "weighted",
            "snt_max": 10
        }
    },
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_data: dict = response.json()
enumeration_job_id = response_data.get("id")

# wait until the enumeration is completed
link_to_file = get_link_to_enumeration_file(enumeration_job_id)

Waiting for enumeration to complete...
{
    "id": "af817a2c-cee6-43f9-829e-6781b391355a",
    "status": "COMPLETED",
    "details": {
        "size": 21,
        "created": "2026-01-28T16:02:38.870619",
        "finished": "2026-01-28T14:02:39.535638",
        "link": "/api/v1/space/real/analogs/access_enumeration_file?enum_id=af817a2c-cee6-43f9-829e-6781b391355a"
    }
}

response = requests.get(f"{FILE_SERVER_URL}{link_to_file}", headers={"X-API-KEY": YOUR_API_KEY})
file_content = response.content

with zipfile.ZipFile(BytesIO(file_content), mode="r") as zf:
    for zipped_file_name in zf.namelist():
        with zf.open(zipped_file_name) as zipped_file:
            df = pd.read_csv(zipped_file, delimiter="\t")
df.head(5)

DELETE_ENUMERATION_FILE_URL = f"{API_SERVER_URL}/api/v1/space/real/analogs/delete_enumeration_file"

response = requests.get(f"{DELETE_ENUMERATION_FILE_URL}?enum_id={enumeration_job_id}", headers={"X-API-KEY": YOUR_API_KEY})
response_data: dict = response.json()
print(json.dumps(response_data, indent=4))

{
    "id": "af817a2c-cee6-43f9-829e-6781b391355a",
    "status": "DELETED",
    "details": {}
}

response = requests.get(f"{API_SERVER_URL}{link_to_file}", headers={"X-API-KEY": YOUR_API_KEY})
print(f"Status: {response.status_code}, json: {response.json()}")

Status: 410, json: {'detail': 'Could not get enumeration file'}

AUTO_SEARCH_ENUMERATION_URL = f"{API_SERVER_URL}/api/v1/space/real/analogs/auto/enumerate"

response = requests.post(
    AUTO_SEARCH_ENUMERATION_URL,
    json={
        "smiles": "COC=1N=CN=C2CCN(CC12)C(=O)NCCN3CCCC3=O",
        "filter": {"min_mw": 200, "max_mw": 600},
    },
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_data: dict = response.json()
print(json.dumps(response_data, indent=4))
link_to_file = response_data.setdefault("details", {}).get("link")

{
    "id": "52056440-aa61-4a76-8941-ed3a02986fd5",
    "status": "COMPLETED",
    "details": {
        "size": 3054,
        "created": "2026-01-28T16:02:42.712951",
        "finished": "2026-01-28T16:03:04.558796",
        "link": "/api/v1/space/real/analogs/access_enumeration_file?enum_id=52056440-aa61-4a76-8941-ed3a02986fd5&auto=1",
        "avg_sim": 0.556,
        "dist_sim": {
            "0.6": 650,
            "0.65": 124,
            "0.55": 761,
            "0.5": 483,
            "0.45": 459,
            "0.75": 52,
            "0.8": 32,
            "0.85": 19,
            "0.7": 39,
            "0.9": 7,
            "0.4": 425,
            "0.35": 3
        },
        "mode_data": [
            {
                "enumerated_size": 2065,
                "estimated_size": 96013,
                "snt_max": 50,
                "type": "TGT"
            },
            {
                "enumerated_size": 992,
                "estimated_size": 11959,
                "snt_max": 62,
                "type": "MMS"
            }
        ],
        "est_total": 107972
    }
}

import matplotlib.pyplot as plt

sorted_sim = sorted(response_data.get("details").get("dist_sim").items())
plt.bar([k for k, v in sorted_sim], [v for k, v in sorted_sim])

<BarContainer object of 12 artists>

link_to_file = response_data.setdefault("details", {}).get("link")

response = requests.get(f"{FILE_SERVER_URL}{link_to_file}", headers={"X-API-KEY": YOUR_API_KEY})
file_content = response.content

with zipfile.ZipFile(BytesIO(file_content), mode="r") as zf:
    for zipped_file_name in zf.namelist():
        with zf.open(zipped_file_name) as zipped_file:
            df = pd.read_csv(zipped_file, delimiter="\t")
df.head(5)

PUBLIC_ANALOGS_DATA_URL = f"{API_SERVER_URL}/api/v1/space/real/public/analog-details"

response = requests.get(
    PUBLIC_ANALOGS_DATA_URL,
    params={
        "catalog_id": "Z3574095764"
    },
)
response_data: dict = response.json()
print(json.dumps(response_data, indent=4))

[
    {
        "id": 15161890,
        "smiles": "CCN1CCN(CC1(C)C)C(=O)NC(C)CCC=2C=CN=CC2",
        "catalog_id": "Z3574095764",
        "updt": "2025-10-07 22:56:09.569000",
        "details": {
            "size": 10321,
            "avg_sim": 0.362,
            "dist_sim": {
                "0.2": 1573,
                "0.3": 1884,
                "0.4": 1180,
                "0.5": 632,
                "0.6": 156,
                "0.7": 24,
                "0.8": 4,
                "0.15": 12,
                "0.25": 1760,
                "0.35": 1886,
                "0.45": 782,
                "0.55": 354,
                "0.65": 63,
                "0.75": 10,
                "0.85": 1
            },
            "est_total": 1600938,
            "mode_data": [
                {
                    "type": "TGT",
                    "snt_max": 50,
                    "estimated_size": 55979,
                    "enumerated_size": 2408
                },
                {
                    "type": "MMS",
                    "snt_max": 12,
                    "estimated_size": 1544959,
                    "enumerated_size": 7916
                }
            ]
        },
        "last_id": 15161890
    },
    {
        "id": 10551172,
        "smiles": "CCN1CCN(CC1(C)C)C(=O)NC(C)CCC=2C=CN=CC2",
        "catalog_id": "Z3574095764",
        "updt": "2025-07-09 14:53:05.485636",
        "details": {
            "size": 10019,
            "avg_sim": 0.359,
            "dist_sim": {
                "0.2": 1505,
                "0.3": 1874,
                "0.4": 1153,
                "0.5": 583,
                "0.6": 120,
                "0.7": 20,
                "0.8": 2,
                "0.15": 7,
                "0.25": 1793,
                "0.35": 1829,
                "0.45": 782,
                "0.55": 292,
                "0.65": 48,
                "0.75": 10,
                "0.85": 1
            },
            "est_total": 1533845,
            "mode_data": [
                {
                    "type": "TGT",
                    "snt_max": 50,
                    "estimated_size": 54299,
                    "enumerated_size": 2213
                },
                {
                    "type": "MMS",
                    "snt_max": 12,
                    "estimated_size": 1479546,
                    "enumerated_size": 7810
                }
            ]
        },
        "last_id": 15161890
    },
    {
        "id": 5990702,
        "smiles": "CCN1CCN(CC1(C)C)C(=O)NC(C)CCC=2C=CN=CC2",
        "catalog_id": "Z3574095764",
        "updt": "2025-04-08 15:32:09.107773",
        "details": {
            "size": 9835,
            "avg_sim": 0.358,
            "dist_sim": {
                "0.2": 1532,
                "0.3": 1810,
                "0.4": 1147,
                "0.5": 560,
                "0.6": 113,
                "0.7": 17,
                "0.8": 1,
                "0.9": 2,
                "0.15": 11,
                "0.25": 1747,
                "0.35": 1822,
                "0.45": 745,
                "0.55": 268,
                "0.65": 56,
                "0.75": 4
            },
            "est_total": 1492000,
            "mode_data": [
                {
                    "type": "TGT",
                    "snt_max": 50,
                    "estimated_size": 53351,
                    "enumerated_size": 2113
                },
                {
                    "type": "MMS",
                    "snt_max": 12,
                    "estimated_size": 1438649,
                    "enumerated_size": 7727
                }
            ]
        },
        "last_id": 15161890
    }
]

import matplotlib.pyplot as plt

sorted_sim = sorted(response_data[0].get("details").get("dist_sim").items())
plt.bar([k for k, v in sorted_sim], [v for k, v in sorted_sim])

<BarContainer object of 15 artists>

PUBLIC_ANALOGS_DATA_BATCH_URL = f"{API_SERVER_URL}/api/v1/space/real/public/batch-analog-details"

response = requests.post(
    PUBLIC_ANALOGS_DATA_BATCH_URL,
    json={
        "catalog_ids": ["Z3574095764", "Z9240163731"]
    },
    headers={
        "Content-Type": "application/json",
    }
)
response_data: dict = response.json()
print(json.dumps(response_data, indent=4))

[
    {
        "id": 15161890,
        "smiles": "CCN1CCN(CC1(C)C)C(=O)NC(C)CCC=2C=CN=CC2",
        "catalog_id": "Z3574095764",
        "updt": "2025-10-07 22:56:09.569000",
        "details": {
            "size": 10321,
            "avg_sim": 0.362,
            "dist_sim": {
                "0.2": 1573,
                "0.3": 1884,
                "0.4": 1180,
                "0.5": 632,
                "0.6": 156,
                "0.7": 24,
                "0.8": 4,
                "0.15": 12,
                "0.25": 1760,
                "0.35": 1886,
                "0.45": 782,
                "0.55": 354,
                "0.65": 63,
                "0.75": 10,
                "0.85": 1
            },
            "est_total": 1600938,
            "mode_data": [
                {
                    "type": "TGT",
                    "snt_max": 50,
                    "estimated_size": 55979,
                    "enumerated_size": 2408
                },
                {
                    "type": "MMS",
                    "snt_max": 12,
                    "estimated_size": 1544959,
                    "enumerated_size": 7916
                }
            ]
        },
        "last_id": 15161890
    },
    {
        "id": 15265830,
        "smiles": "C=CCC(NC(C)=O)C(=O)NC1(COC)CN(C1)C(=O)C=2C=C(C)N=C3C2N=C(C)N3C",
        "catalog_id": "Z9240163731",
        "updt": "2025-10-09 21:02:38.581851",
        "details": {
            "size": 1987,
            "avg_sim": 0.623,
            "dist_sim": {
                "0.5": 182,
                "0.6": 689,
                "0.7": 231,
                "0.8": 18,
                "0.45": 17,
                "0.55": 602,
                "0.65": 151,
                "0.75": 96,
                "0.85": 1
            },
            "est_total": 891727,
            "mode_data": [
                {
                    "type": "TGT",
                    "snt_max": 71,
                    "estimated_size": 838753,
                    "enumerated_size": 994
                },
                {
                    "type": "MMS",
                    "snt_max": 71,
                    "estimated_size": 52974,
                    "enumerated_size": 994
                }
            ]
        },
        "last_id": 15265830
    }
]

catalog_id = "Z9240163731"
PUBLIC_ANALOGS_FILE_URL = f"{FILE_SERVER_URL}/public-enum-files/{catalog_id}"

response = requests.get(PUBLIC_ANALOGS_FILE_URL)
file_content = response.content

with zipfile.ZipFile(BytesIO(file_content), mode="r") as zf:
    for zipped_file_name in zf.namelist():
        with zf.open(zipped_file_name) as zipped_file:
            df = pd.read_csv(zipped_file, delimiter="\t")
df.head(5)

#!uv add rdkit
#!pip install rdkit

# this is our example query, you can try different one
structure_of_interest_smiles_sim = "CCCC(C)(COC)NC(=O)c1ccccc1F"

# 0.5 similarity cutoff may be considered low, but with RDKitFingerprint it is not
response = requests.post(
    TARGET_SIM_SEARCH_ENUMERATION_URL,
    json={
        "smiles": structure_of_interest_smiles_sim,
        "reaction_ids": [22],
        "reaction_types": [0],
        "num_components": [2],
        "filter": {
                "max_enum": 1000,
                "min_mw": 0,
                "max_mw": 500,
                "fp_sim": 0.5
            },
        "strategy": {
            "type": "weighted",
            "snt_max": 3
        }
    },
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_data: dict = response.json()
enumeration_job_id_sim = response_data.get("id")

link_to_file_sim = get_link_to_enumeration_file(enumeration_job_id_sim)

Waiting for enumeration to complete...
{
    "id": "0c05f083-cfde-45bf-a9e6-598df4cb6b20",
    "status": "COMPLETED",
    "details": {
        "size": 195,
        "created": "2026-01-28T16:03:07.656027",
        "finished": "2026-01-28T14:03:07.914318",
        "link": "/api/v1/space/real/analogs/access_enumeration_file?enum_id=0c05f083-cfde-45bf-a9e6-598df4cb6b20"
    }
}

from itertools import islice

response = requests.get(f"{FILE_SERVER_URL}{link_to_file_sim}", headers={"X-API-KEY": YOUR_API_KEY})
file_content = response.content
rows = []

with zipfile.ZipFile(BytesIO(file_content), mode="r") as zf:
    for zipped_file_name in zf.namelist():
        with zf.open(zipped_file_name) as zipped_file:
          next(zipped_file)
          for line in islice(zipped_file, 10_000):
            row = line.decode().strip().split("\t")
            rows.append(row)
print(rows[0] if rows else "No rows found")

['CC(C)CC(C)(CO)NC(=O)c1ccc(C2CC2)cc1F', 'm22____8845330____18906454', '0.703']

from rdkit import Chem
from rdkit.Chem import Draw

query = Chem.MolFromSmiles(structure_of_interest_smiles_sim)
query

rows.sort(key=lambda x: Chem.MolFromSmiles(x[0]).HasSubstructMatch(query))
Draw.MolsToGridImage([Chem.MolFromSmiles(d[0]) for d in islice(rows, 12)], molsPerRow=6, legends=[f"{'SS' if Chem.MolFromSmiles(d[0]).HasSubstructMatch(query) else 'Not SS'} - {d[2]}" for d in islice(rows, 12)], subImgSize=(200, 200))

# this is our example query, you can try different one
structure_of_interest_smiles_auto = "CNC(=O)c1ccccc1F"

# Run search with custom filter and all search modes
response = requests.post(
    AUTO_SEARCH_ENUMERATION_URL,
    json={
        "smiles": structure_of_interest_smiles_auto,
        "filter": {"min_mw": 400, "max_mw": 700},
    },
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_data: dict = response.json()
link_to_file = response_data.setdefault("details", {}).get("link")

from collections import Counter

response = requests.get(f"{FILE_SERVER_URL}{link_to_file}", headers={"X-API-KEY": YOUR_API_KEY})
file_content = response.content
rows = []

with zipfile.ZipFile(BytesIO(file_content), mode="r") as zf:
    for zipped_file_name in zf.namelist():
        with zf.open(zipped_file_name) as zipped_file:
            next(zipped_file)
            for line in islice(zipped_file, 10_000):
              row = line.decode().strip().split("\t")
              rows.append(row)

# Let's calculate how many structures were enumerated by each method
counter = Counter()
counter.update(d[2] for d in rows)
print(counter)

Counter({'TGT': 3874, 'MMS': 3864, 'SNT': 1254})

# Now, exclude SNT from auto_mode to skip this search method and check if
# enumerated structures indeed missing molecules found previously by this method
response = requests.post(
    AUTO_SEARCH_ENUMERATION_URL,
    json={
        "smiles": structure_of_interest_smiles_auto,
        "auto_mode": ["MMS", "TGT"],
        "filter": {"min_mw": 400, "max_mw": 700},
    },
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_data: dict = response.json()
link_to_file = response_data.setdefault("details", {}).get("link")

response = requests.get(f"{FILE_SERVER_URL}{link_to_file}", headers={"X-API-KEY": YOUR_API_KEY})
file_content = response.content
rows = []

with zipfile.ZipFile(BytesIO(file_content), mode="r") as zf:
    for zipped_file_name in zf.namelist():
        with zf.open(zipped_file_name) as zipped_file:
            next(zipped_file)
            for line in islice(zipped_file, 10_000):
              row = line.decode().strip().split("\t")
              rows.append(row)

counter = Counter()
counter.update(d[2] for d in rows)
print(counter)

Counter({'TGT': 3874, 'MMS': 3867})

SYNTHON_SMILES_BY_ID_URL = f"{API_SERVER_URL}/api/v1/space/real/utils/get-synthons-by-id"
response = requests.post(
    SYNTHON_SMILES_BY_ID_URL,
    json=[23844188,18715594],
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response.json()

[{'id': 23844188, 'roleInReaction': 1, 'sSmiles': '[U]NC1(CC=C)COC1'},
 {'id': 18715594, 'roleInReaction': 2, 'sSmiles': 'FCCOc1ccc(C(=O)[U])c(F)c1'}]

from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.rdBase import BlockLogs
from rdkit.Chem import Descriptors

# this is our example invalid smiles list
smiles_list_with_invalid_structures = [
    "CCCC(C)(COC)NC(=O)c1ccccc1F",  # this one is correct
    "C[NH3+]",                      # this is an ion, need to be neutralized first
    "CC[N+](CC)(CC)CC.[Cl-]",       # this is a salt which can't be neutral
    "C[S]1c2ccccc2Sc2ccccc12",      # this SMILES with incorrect valence which do not produce None when converted to mol
    "CCN(CC)C(=O)CCN=[N+]=N"        # this is an incorrect charged "azide"
]
filtered_smiles_list_with_invalid_structures = []

fr = rdMolStandardize.FragmentRemover()
un = rdMolStandardize.Uncharger()
bl = BlockLogs()
for smiles in smiles_list_with_invalid_structures:
  mol = Chem.MolFromSmiles(smiles)
  if mol is not None and Chem.GetFormalCharge(mol) == 0 and Descriptors.NumRadicalElectrons(mol) == 0:
    has_invalid_aromatic = any(
        bond.GetBondType() == Chem.BondType.AROMATIC and not bond.GetIsAromatic()
        for bond in mol.GetBonds()
    )
    if has_invalid_aromatic:
      continue

    fr.removeInPlace(mol)
    un.unchargeInPlace(mol)
    standartized_smiles = Chem.MolToSmiles(mol)

    if Chem.GetFormalCharge(mol) == 0 and "." not in standartized_smiles:
      filtered_smiles_list_with_invalid_structures.append(standartized_smiles)
      continue
  print(f"Removing SMILES: {smiles}")
del bl

print(f"Filtered smiles list: {filtered_smiles_list_with_invalid_structures}")

response = requests.post(
    BATCH_SMILES_ANY_STEREO_SEARCH_URL,
    json={
        "smiles_list": filtered_smiles_list_with_invalid_structures,
        "reaction_types": [0,1050],
    },
    headers={
        "Content-Type": "application/json",
        "X-API-KEY": YOUR_API_KEY
    }
)
response_data: list = response.json()
print("Response json:", json.dumps(response_data, indent=4))

Removing SMILES: C[NH3+]
Removing SMILES: CC[N+](CC)(CC)CC.[Cl-]
Removing SMILES: C[S]1c2ccccc2Sc2ccccc12
Removing SMILES: CCN(CC)C(=O)CCN=[N+]=N
Filtered smiles list: ['CCCC(C)(COC)NC(=O)c1ccccc1F']
Response json: [
    {
        "smiles": "CCCC(C)(COC)NC(=O)c1ccccc1F",
        "query_smiles": "CCCC(C)(COC)NC(=O)c1ccccc1F"
    }
]

	SMILES	RSN
0	CNC(=O)C1Cc2ccccc2N1C(=O)c1cc(OC)c(OC(F)F)c(OC)c1	m11____10391112____6202374
1	CNC(=O)C1Cc2ccccc2N1C(=O)CCc1ncc(-c2ccc(Cl)cc2...	m11____10391112____231009
2	CNC(=O)C1Cc2ccccc2N1C(=O)C1CCN(C(=O)N(C)Cc2ccc...	m11____10391112____3002638
3	CNC(=O)C1Cc2ccccc2N1C(=O)c1ccnc(CN(C)C)c1	m11____10391112____26273432
4	CNC(=O)C1Cc2ccccc2N1C(=O)Cc1cc(C)no1	m11____10391112____22135580

	SMILES	RSN
0	CNC(=O)C1Cc2ccccc2N1C(=O)c1cccc(OCC(F)(F)F)n1	m11____10391112____5456408
1	CNC(=O)C1Cc2ccccc2N1C(=O)c1nc(OCC(F)(F)F)ccc1Cl	m11____10391112____17512102
2	CNC(=O)C1Cc2ccccc2N1C(=O)c1cc2ccccc2c(OCC(F)(F...	m11____10391112____5453286
3	CNC(=O)[C@@H]1Cc2ccccc2N1C(=O)c1cccc(OCC(F)(F)...	m11____22847536____5456408
4	CNC(=O)[C@@H]1Cc2ccccc2N1C(=O)c1nc(OCC(F)(F)F)...	m11____22847536____17512102

	SMILES	RSN	Similarity
0	CNC(=O)[C@@H]1Cc2ccccc2N1C(=O)c1cccc(OCC(C)NC(...	m11____22847536____21473196	0.919
1	CNC(=O)C1Cc2ccccc2N1C(=O)c1cccc(OCC(C)NC(=O)OC...	m11____10391112____21473196	0.919
2	CNC(=O)[C@@H]1Cc2ccccc2N1C(=O)c1cccc(OC(F)(F)F)n1	m11____22847536____21846050	0.946
3	CNC(=O)[C@@H]1Cc2ccccc2N1C(=O)c1cc(OC)cc(OC)n1	m11____22847536____21615720	0.919
4	CNC(=O)C1Cc2ccccc2N1C(=O)c1cc(OC)cc(OC)n1	m11____10391112____21615720	0.919

	SMILES	RSN	Mode	Similarity
0	Cc1ncnc2c1CN(C(=O)N1CCN(C(=O)OC(C)(C)C)C3(CC3)...	m68____23321436____30097764	TGT	0.613
1	C#CCCN1CCN(C(=O)N2CCc3ncnc(C)c3C2)CC1	m68____23321436____27816314	TGT	0.675
2	Cc1ncnc2c1CN(C(=O)N1CCN(c3cccc4sccc34)CC1)CC2	m68____23321436____26735642	TGT	0.562
3	Cc1ncnc2c1CN(C(=O)NC1CCN(C(=O)OC(C)(C)C)C1)CC2	m68____23321436____21690000	TGT	0.629
4	Cc1ncnc2c1CN(C(=O)N(C)CCN(C)C(=O)OC(C)(C)C)CC2	m68____23321436____26735670	TGT	0.636

	SMILES	RSN	Mode	Similarity
0	CN(Cc1cc(Br)c(F)cc1F)C(=O)c1cc(C2CC2)nc2c1nc1n...	m11____25844392____323832	TGT	0.676
1	O=C(c1cc(C2CC2)nc2c1nc1n2CCCCC1)N1CCc2ncc(C(F)...	m11____22186334____323832	TGT	0.688
2	COC(=O)c1ccccc1C1CN(C(=O)c2cc(C3CC3)nc3c2nc2n3...	m11____22166432____323832	TGT	0.702
3	CC1CC(=O)CC(C2CC2)N1C(=O)c1cc(C2CC2)nc2c1nc1n2...	m11____10973556____323832	TGT	0.718
4	C[C@@H]1Cn2nccc2CN1C(=O)c1cc(C2CC2)nc2c1nc1n2C...	m11____28644182____323832	TGT	0.680

How to use REAL Space API¶

General Information¶

Authentication and Authorization¶

Lookup for a structure in REAL¶

Single SMILES¶

Batch search¶

Error Handling for Invalid SMILES in Batch Searches¶

By RSN code¶

Search analogs for a structure¶

Synthon SMARTS search¶

Target SMILES substructure search¶

Target SMILES similarity search¶

Delete enumeration file¶

Automatic representative REAL analogs search¶

Enamine in-stock analogs¶

Frequently Asked Questions¶

Similarity search question¶

Auto-Search question¶

Synthon Id - SMILES correlation¶

Pre-filtering structures for a batch structure lookup¶