Sonderamine Sonderamine site logo. Just a bunch of amines.

Chemistry stories, research advice, and more!


Chemistry Scripts/Programs (C#, Python)

molecular_weight_calculator.py

Github Link/Download

                    
import re

element_mw_list = {
    'h': 1.00784,
    'he': 4.002602,
    'li': 6.941,
    'be': 9.0121831,
    'b': 10.811,
    'c': 12.0107,
    'n': 14.0067,
    'o': 15.9994,
    'f': 18.998403163,
    'ne': 20.1797,
    'na': 22.98976928,
    'mg': 24.3050,
    'al': 26.9815385,
    'si': 28.0855,
    'p': 30.973761998,
    's': 32.065,
    'cl': 35.453,
    'ar': 39.948,
    'k': 39.0983,
    'ca': 40.078,
    'sc': 44.955908,
    'ti': 47.867,
    'v': 50.9415,
    'cr': 51.9961,
    'mn': 54.938044,
    'fe': 55.845,
    'co': 58.933194,
    'ni': 58.6934,
    'cu': 63.546,
    'zn': 65.38,
    'ga': 69.723,
    'ge': 72.630,
    'as': 74.921595,
    'se': 78.971,
    'br': 79.904,
    'kr': 83.798,
    'rb': 85.4678,
    'sr': 87.62,
    'y': 88.90584,
    'zr': 91.224,
    'nb': 92.90637,
    'mo': 95.95,
    'tc': 98.0,
    'ru': 101.07,
    'rh': 102.90550,
    'pd': 106.42,
    'ag': 107.8682,
    'cd': 112.414,
    'in': 114.818,
    'sn': 118.710,
    'sb': 121.760,
    'te': 127.60,
    'i': 126.90447,
    'xe': 131.293,
    'cs': 132.90545196,
    'ba': 137.327,
    'la': 138.90547,
    'ce': 140.116,
    'pr': 140.90766,
    'nd': 144.242,
    'pm': 145.0,
    'sm': 150.36,
    'eu': 151.964,
    'gd': 157.25,
    'tb': 158.92535,
    'dy': 162.500,
    'ho': 164.93033,
    'er': 167.259,
    'tm': 168.93422,
    'yb': 173.054,
    'lu': 174.9668,
    'hf': 178.49,
    'ta': 180.94788,
    'w': 183.84,
    're': 186.207,
    'os': 190.23,
    'ir': 192.217,
    'pt': 195.084,
    'au': 196.966569,
    'hg': 200.592,
    'tl': 204.38,
    'pb': 207.2,
    'bi': 208.98040,
    'po': 209.0,
    'at': 210.0,
    'rn': 222.0,
    'fr': 223.0,
    'ra': 226.0,
    'ac': 227.0,
    'th': 232.0377,
    'pa': 231.03588,
    'u': 238.02891,
    'np': 237.0,
    'pu': 244.0,
    'am': 243.0,
    'cm': 247.0,
    'bk': 247.0,
    'cf': 251.0,
    'es': 252.0,
    'fm': 257.0,
    'md': 258.0,
    'no': 259.0,
    'lr': 262.0,
    'rf': 267.0,
    'db': 270.0,
    'sg': 271.0,
    'bh': 270.0,
    'hs': 277.0,
    'mt': 278.0,
    'ds': 281.0,
    'rg': 282.0,
    'cn': 285.0,
    'nh': 286.0,
    'fl': 289.0,
    'mc': 290.0,
    'lv': 293.0,
    'ts': 294.0,
    'og': 294.0
}
indefinite_entry = True



def format_formula(compound: str) -> list[str]:
    if compound is None or not compound.strip():
        print("[ERROR]: An invalid compound was entered.")
        return None
    
    sorted_elements = sorted(element_mw_list.keys(), key=len, reverse=False)
    element_pattern = "|".join(sorted_elements)
    
    raw_formatted_compound = re.findall(f"({element_pattern})|(\\d+)", compound.lower())
    raw_formatted_compound = [item for group in raw_formatted_compound for item in group if item]
    
    formatted_compound: list[str] = []
    for i in range(len(raw_formatted_compound)):
        entry = raw_formatted_compound[i]
        formatted_compound.append(entry)

        if entry.isalpha():
            if i + 1 == len(raw_formatted_compound) or raw_formatted_compound[i + 1].isalpha():
                formatted_compound.append('1')
    
    if formatted_compound is None:
        print("[ERROR]: Could not correctly format compound.")
        return None  
    return formatted_compound


def calculate_formula_mw(compound: list[str]) -> float:
    if compound is None:
        print("[ERROR]: Can't proceed due to incorrectly formatted compound.")
        return

    total_mw: float = 0.000
    for i in range(len(compound)):
        for elm, mw, in element_mw_list.items():
            if compound[i] == elm:
                total_mw = total_mw + (mw * int(compound[i+1]))

    if total_mw <= 0.0:
        print("[ERROR]: Failed to calculate molecular weight of the choosen compound.")
    return total_mw



if __name__ == "__main__":
    while indefinite_entry:
        user_compound = str(input("Enter a compound's molecular formula: "))

        compound_formula = format_formula(user_compound)
        compound_weight = calculate_formula_mw(compound_formula)

        if compound_weight is not None and compound_weight > 0.0:
            print("Molecular weight of ", user_compound.upper(), ":  ", round(compound_weight, 5), " g/mol\n")

                    
                  

chemical_equation_balancer.py

Github Link/Download

                    
import re
import numpy
import sympy

indefinite_entry = True


def separate_chemical_equation(equation: str) -> dict[str]:
    if equation is None or not equation.strip():
        print("[ERROR]: An invalid equation was entered.")
        return None

    reactants: dict[str] = {}
    product: dict[str] = {}
    
    try:
        raw_split_equation = (equation.upper()).split("->")
        reactants = [item.strip() for item in raw_split_equation[0].split("+")]
        products = [item.strip() for item in raw_split_equation[1].split("+")]
    except:
        print("[ERROR] An invalid equation was entered.")
        return None

    if reactants is None or products is None:
        print("[ERROR]: Could not separate the chemical reactants or products.")
        return None
    return reactants, products


def format_chemical_equation(rxn_segment: dict[str]) -> list:
    if rxn_segment is None:
        print("[ERROR]: An invalid equation half was entered.")
        return None
    formatted_equation: list = []
    
    for part in rxn_segment:
        chemical_match = re.match(r'^(\d+)?\s*(.*)', part.strip())
        if not chemical_match:
            continue
            
        coeffecient = int(chemical_match.group(1)) if chemical_match.group(1) else 1
        chemical_formula = chemical_match.group(2)
        
        atom_matches = re.findall(r'([A-Z][a-z]*)(\d*)', chemical_formula)
        organized_atoms: list = []
        
        for symbol, subscript in atom_matches:
            count = int(subscript) if subscript else 1
            total_count = count * coeffecient
            
            organized_atoms.append({"coeffecient": total_count, "atom": symbol.lower()})
            
        if organized_atoms is not None:
            formatted_equation.append(organized_atoms)


    if formatted_equation is None:
        print("[ERROR]: Could not format a chemical equation half.")
        return None  
    return formatted_equation


def construct_equation_matrix(rxn_reactants: list, rxn_products: list):
    if rxn_reactants is None or rxn_products is None:
        print("[ERROR]: The reaction products or reactants cannot be used.")
        return None
    
    combined_atoms = set()
    for part in rxn_reactants + rxn_products:
        for entry in part:
            combined_atoms.add(entry['atom'])

    sorted_atoms = sorted(list(combined_atoms))
    atom_index = {atom: i for i, atom in enumerate(sorted_atoms)}

    elements_present = len(sorted_atoms)
    element_amount = len(rxn_reactants) + len(rxn_products)

    equation_matrix = sympy.zeros(elements_present, element_amount)
    current_col = 0
    for molecule_list in rxn_reactants:
        for entry in molecule_list:
            row_idx = atom_index[entry['atom']]
            equation_matrix[row_idx, current_col] += entry['coeffecient']
        current_col += 1

    for molecule_list in rxn_products:
        for entry in molecule_list:
            row_idx = atom_index[entry['atom']]
            equation_matrix[row_idx, current_col] -= entry['coeffecient']
        current_col += 1


    if sorted_atoms is None or equation_matrix is None:
        print("[ERROR]: Could not format a chemical equation matrix.")
        return None  
    return equation_matrix


def solve_chemical_equation(chemical_matrix):
    if chemical_matrix is None:
        print("[ERROR]: An invalid chemical equation matrix was passed.")
        return None

    null_space = chemical_matrix.nullspace()
    balanced_coeffecients = 0

    if null_space:
        solution = null_space[0]
        denominator = sympy.lcm([val.q for val in solution])
        balanced_coeffecients = solution * denominator

        
    if balanced_coeffecients is None or balanced_coeffecients == 0:
        print("[WARNING]: No possible solutions were found.")
        return None
    return balanced_coeffecients

def format_solved_chemical_equation(final_coeffecients, raw_reactants: list, raw_products: list):
    all_molecules = raw_reactants + raw_products
    formatted_parts: list[str] = []
    for i in range(len(all_molecules)):
        display_coeffecient = str(final_coeffecients[i]) if int(final_coeffecients[i]) > 1 else ""
        formatted_parts.append(f"{display_coeffecient}{all_molecules[i]}")

    num_reactants = len(raw_reactants)
    reactants_side = " + ".join(formatted_parts[:num_reactants])
    products_side = " + ".join(formatted_parts[num_reactants:])

    final_equation = f"{reactants_side} -> {products_side}"
    
    if final_equation is None:
        print("[ERROR]: Could not format final chemical equation.")
        return None
    return final_equation


if __name__ == "__main__":
    while indefinite_entry:
        print("Equation format: A + B -> C + D")
        user_equation = str(input("Enter a chemical equation: "))

        separated_equations = separate_chemical_equation(user_equation)
        raw_reactants = separated_equations[0]
        raw_products = separated_equations[1]

        formatted_reactants = format_chemical_equation(raw_reactants)
        formatted_products = format_chemical_equation(raw_products)

        raw_chemical_matrix = construct_equation_matrix(formatted_reactants, formatted_products)
        balanced_chemical_matrix = solve_chemical_equation(raw_chemical_matrix)
        solved_user_equation = format_solved_chemical_equation(balanced_chemical_matrix, raw_reactants, raw_products)

        print("Solved Chemical Equation:  ", solved_user_equation, "\n")
                    
                  

cplc.cs

Github Link/Download

                    
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Text.RegularExpressions;
using System.Text;


namespace CLPC
{
    internal class CLPC_Main
    {
        public static int[] compound_nmr_protons = new int[1024];
        public static int[] compound_nmr_carbons = new int[1024];

        public static int[] compound_c_composition = new int[1024];
        public static int[] compound_h_composition = new int[1024];


        static void Main()
        {
            const double version = 0.55;
            string[] file_args = new string[1];
            Console.WriteLine($"======CPLC======\n--Version: {version}--\n--By: Rim032--\n");

            Console.WriteLine("Drag or drop your the .txt file of your research paper.");
            file_args[0] = format_file_location(Console.ReadLine());


            if (file_args == null || !File.Exists(file_args[0]))
            {
                return;
            }
            string paper_lines = File.ReadAllText(file_args[0]);

            string[] proton_nmr_lines = obtain_hnmr_data(paper_lines);
            check_hnmr_data(proton_nmr_lines);
            string[] carbon_nmr_lines = obtain_cnmr_data(paper_lines);
            check_cnmr_data(carbon_nmr_lines);


            string[] rough_compound_formulas = obtain_compound_formulas(paper_lines);
            rufisfy_compound_formulas(rough_compound_formulas);
            compare_compounds_with_nmr(rough_compound_formulas, proton_nmr_lines, carbon_nmr_lines);

            Console.ReadLine();
        }

        private static void check_hnmr_data(string[] nmr_data)
        {
            if (nmr_data == null)
            {
                return;
            }

            for (int l = 0; l < nmr_data.Length; l++)
            {
                try
                {
                    char[] nmr_data_chars = nmr_data[l].ToCharArray();
                    for (int c = 0; c < nmr_data_chars.Length; c++)
                    {
                        if (c + 1 < nmr_data_chars.Length && (int)nmr_data_chars[c] > 46 && (int)nmr_data_chars[c] < 58 && nmr_data_chars[c + 1] == 'H')
                        {
                            compound_nmr_protons[l] = compound_nmr_protons[l] + 1;
                        }
                    }
                }
                catch (Exception error) { }
                //Is this a lazy solution? Yes.
            }
        }

        private static void check_cnmr_data(string[] nmr_data)
        {
            if (nmr_data == null)
            {
                return;
            }

            for (int l = 0; l < nmr_data.Length; l++)
            {
                try
                {
                    char[] nmr_data_chars = nmr_data[l].ToCharArray();
                    for (int c = 0; c < nmr_data_chars.Length; c++)
                    {
                        if (c + 1 < nmr_data_chars.Length && (int)nmr_data_chars[c] > 46 && (int)nmr_data_chars[c] < 58 && nmr_data_chars[c + 1] == '.')
                        {
                            compound_nmr_carbons[l] = compound_nmr_carbons[l] + 1;
                        }
                    }
                }
                catch (Exception error) { }
            }
        }

        private static void rufisfy_compound_formulas(string[] compound_data)
        {
            for (int i = 0; i < compound_data.Length; i++)
            {
                try
                {
                    char[] rough_formula_chars = compound_data[i].ToCharArray();
                    for (int j = 0; j < rough_formula_chars.Length; j++)
                    {
                        if (rough_formula_chars[j] == 'C') //Bad programming assumption here.
                        {
                            int first_digit_c_char = 0;
                            if ((int)rough_formula_chars[j + 2] > 48 && (int)rough_formula_chars[j + 2] < 58)
                            {
                                first_digit_c_char = ((int)rough_formula_chars[j + 1] - 48) + 10;
                            }
                            compound_c_composition[i] = first_digit_c_char + ((int)rough_formula_chars[j + 2] - 48);
                        }
                        if (rough_formula_chars[j] == 'H')
                        {
                            int first_digit_h_char = 0;
                            if ((int)rough_formula_chars[j + 2] > 48 && (int)rough_formula_chars[j + 2] < 58)
                            {
                                first_digit_h_char = ((int)rough_formula_chars[j + 1] - 48) + 10;
                            }
                            compound_h_composition[i] = first_digit_h_char + ((int)rough_formula_chars[j + 2] - 48);
                        }
                    }
                }
                catch (Exception error) { }
            }
        }

        private static string[] obtain_compound_formulas(string file)
        {
            string[] compound_formula_lines = new string[1024];

            const string compound_regex_key = @"calculated for(.*?)\[";
            Regex compound_regex = new Regex(compound_regex_key, RegexOptions.None);

            if (compound_regex != null)
            {
                int compound_counter = 0;
                foreach (Match hnmr_match in compound_regex.Matches(file))
                {
                    compound_formula_lines[compound_counter] = (hnmr_match.ToString());
                    compound_counter++;
                }
            }

            return compound_formula_lines;
        }

        private static string[] obtain_hnmr_data(string file)
        {
            string[] hnmr_data_lines = new string[1024];

            const string hnmr_regex_key = @"1H NMR(.*?)\)\.";
            Regex hnmr_regex = new Regex(hnmr_regex_key, RegexOptions.None);

            if (hnmr_regex != null)
            {
                int hnmr_counter = 0;
                foreach (Match hnmr_match in hnmr_regex.Matches(file))
                {
                    hnmr_data_lines[hnmr_counter] = (hnmr_match.ToString());
                    hnmr_counter++;
                }
            }

            return hnmr_data_lines;
        }

        private static string[] obtain_cnmr_data(string file)
        {
            string[] cnmr_data_lines = new string[1024];

            const string cnmr_regex_key = @"13C NMR(.*?)\. ";
            Regex cnmr_regex = new Regex(cnmr_regex_key, RegexOptions.None);

            if (cnmr_regex != null)
            {
                int cnmr_counter = 0;
                foreach (Match cnmr_match in cnmr_regex.Matches(file))
                {
                    cnmr_data_lines[cnmr_counter] = (cnmr_match.ToString());
                    cnmr_counter++;
                }
            }

            return cnmr_data_lines;
        }

        private static void compare_compounds_with_nmr(string[] compound_formulas, string[] proton_nmr_lines, string[] carbon_nmr_lines)
        {
            for (int a = 0; a < 1024; a++)
            {
                if (compound_formulas[a] != null && proton_nmr_lines != null && carbon_nmr_lines != null && compound_h_composition[a] != 0 &&
                compound_c_composition[a] != 0 && compound_nmr_carbons[a] != 0 && compound_nmr_protons[a] != 0)
                {
                    Console.Write($"\n\n--------------------------------------------------------------------------------\nINTERNAL COMPOUND INDEX: {a + 1}\n{compound_formulas[a]}\n\n{proton_nmr_lines[a]}\n\n{carbon_nmr_lines[a]}\n\n");
                    if (compound_c_composition[a] != compound_nmr_carbons[a])
                    {
                        Console.Write("\nWARNING: # of spectra CARBONS isn't equal to the formula's carbons!");
                    }

                    if (compound_h_composition[a] != compound_nmr_protons[a])
                    {
                        Console.Write("\nWARNING: # of spectra PROTONS isn't equal to the formula's protons!\n");
                    }
                    Console.Write("--------------------------------------------------------------------------------\n\n");
                }
            }
        }

        private static string format_file_location(string file)
        {
            string final_file = "";
            if (file == null)
            {
                return final_file;
            }

            string[] file_arr = file.Split("\"");
            for (int i = 0; i < file_arr.Length; i++)
            {
                if (file_arr[i] != "\"")
                {
                    final_file += file_arr[i];
                }
            }

            return final_file;
        }
    }
}
                    
                  

molecuLex.py

Github Link/Download

                    
import time
import os
import re
import csv
import sys

import argparse
from rdkit import Chem
import pubchempy as pcp
from rdkit.ML.Descriptors import MoleculeDescriptors
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors


os.system('')
desired_extension: str = ".txt"
log_types: list = [
    "LOG",      #0
    "ERROR",    #1
    "WARNING"   #2
]




def print_log(log_type: int, message: str) -> None:
    if message is None or log_type is None:
        return
    print(f"[{log_types[log_type]}] - {message}")


def create_local_id_list(min_id: int, max_id: int) -> list[str]:
    if max_id > sys.maxsize:
        print_log(1, f"Too large of a maximum ID value. MAX INT SIZE: {sys.maxsize}")
        return

    if min_id < 0 or max_id < 1:
        print_log(1, f"Too small of a min or max CID to search through. Please choose positive integers.")
        return

    new_id_list: list[str] = []
    for i in range(max_id-min_id):
        new_id_list.append(str(i+min_id+1))

    return new_id_list


def read_id_file(file: str) -> str:
    if file is None or os.path.isfile(file) is False:
        print_log(1, "File is invalid or malformed.")
        return

    file_content: str = ""
    with open(file, "r") as file_reader:
        file_content = file_reader.read()

    return file_content


def format_id_list(ids: str) -> list[str]:
    if ids is None:
        print_log(1, "An invalid list of IDs was passed.")
        return

    id_list: list[str] = re.split(r",|\s+", ids)
    return id_list


def api_bar_print(id_list: list[str], bar_length: int, compound_index: int, compound_id: int) -> None:
    api_progress = compound_index / len(id_list)
    filled_len = int(bar_length * api_progress)
    api_bar = "▓" * filled_len + "░" * (bar_length - filled_len)
    que_percent = int(api_progress * 100)
    sys.stdout.write(f"\rPROGRESS: [{api_bar}] {que_percent}% | Analyzing CID: {compound_id:<10}")
    sys.stdout.flush()



class MolecuLexParser():
    def __init__(self):
        self.user_input: str = ""

        self.display_drug_attributes: bool = True
        self.application_version: float = 0.99
        self.last_updated_date: str = "2026/1/25"
        self.csv_file_name: str = "untitled_molecuLex_data"

        self.console_noprint: bool = False
        self.save_csv_data: bool = False
        self.record_full_data: bool = False
        self.disable_stat_report: bool = False

        self.violation_count: int = 0
        self.likely_drug_count: int = 0
        self.unlikely_drug_count: int = 0
        self.max_request_size: int = 256

        self.csv_data: list[str] = [
    ["Compound Name", "PubChem ID", "Molecular Weight (g/mol)", "H-Donors", "H-Acceptors", "LogP", "Drug Possibility", "Lipinski Violations"]]
        self.smiles_list: list[str] = []
        self.name_list: list[str] = []
        self.id_list: list[str] = []


    def initialize_console_args(self) -> str:
        print("""\n\n\
     __       __            __                                __                           
    /  \     /  |          /  |                              /  |                          
    $$  \   /$$ |  ______  $$ |  ______    _______  __    __ $$ |        ______   __    __ 
    $$$  \ /$$$ | /      \ $$ | /      \  /       |/  |  /  |$$ |       /      \ /  \  /  |
    $$$$  /$$$$ |/$$$$$$  |$$ |/$$$$$$  |/$$$$$$$/ $$ |  $$ |$$ |      /$$$$$$  |$$  \/$$/ 
    $$ $$ $$/$$ |$$ |  $$ |$$ |$$    $$ |$$ |      $$ |  $$ |$$ |      $$    $$ | $$  $$<  
    $$ |$$$/ $$ |$$ \__$$ |$$ |$$$$$$$$/ $$ \_____ $$ \__$$ |$$ |_____ $$$$$$$$/  /$$$$  \ 
    $$ | $/  $$ |$$    $$/ $$ |$$       |$$       |$$    $$/ $$       |$$       |/$$/ $$  |
    $$/      $$/  $$$$$$/  $$/  $$$$$$$/  $$$$$$$/  $$$$$$/  $$$$$$$$/  $$$$$$$/ $$/   $$/ 
    \n""")
        print(f"{'—'*90}\n  MolecuLex\n    Version: {self.application_version}\n    Made by Rim032\n    Last Updated: {self.last_updated_date}\n{'—'*90}\n\n")
        
        console_parser = argparse.ArgumentParser()
        console_parser.add_argument("--fmin", help="Minimum value of CIDs to search through.")
        console_parser.add_argument("--fmax", help="Maximum value of CIDs to search through.")
        console_parser.add_argument("--file", help="The file path of a .txt containing CIDs formatted with commas or spaces.")
        console_parser.add_argument("--entry", help="A manually-entered string of CIDs.")

        console_parser.add_argument("--api_batch", help="Override the amount of requests made in one chunk at a time. Use with caution!")
        
        console_parser.add_argument("--noprint", action="store_true", help="Abstain from printing fetched data in console.")
        console_parser.add_argument("--nostat", action="store_true", help="Provide a short statistical summary regarding all of the fetched compounds.")
        
        console_parser.add_argument("--format", help="Display a short example of PubChem ID entry and file formatting for this program.")
        console_parser.add_argument("--full", action="store_true", help="Record all substansial properties associated with a compound.")
        console_parser.add_argument("--save_csv", help="Save the fetched data in a CSV file.")
        
        
        args = console_parser.parse_args()
        if args.file is not None:
            self.user_input = read_id_file(args.file)
            self.user_input = format_id_list(self.user_input)
        elif args.fmax is not None and args.fmin is not None:
            self.user_input = create_local_id_list(int(args.fmin), int(args.fmax))
        elif args.entry is not None:
            self.user_input = format_id_list(args.entry)

        if args.api_batch is not None:
            self.max_request_size = int(args.api_batch)
            print_log(2, "Maximum API batch size has been changed. This may ruin your work!")

        if args.format:
            print("Format your file with each ID separated by a comma or space. (Ex: 256,438,1024)(Ex 2: 133 194 23 5)")
        if args.save_csv is not None:
            self.save_csv_data = True
            self.csv_file_name = args.save_csv
        if args.full:
            self.csv_data = [["Compound Name", "PubChem ID", "Molecular Weight (g/mol)", "H-Donors", "H-Acceptors", "LogP", "Drug Possibility",
                         "Lipinski Violations", "Number of Atoms", "Molecular Formula", "Number of Bonds", "Number of Rings", "Topological Polar Surface Area",
                         "Rotatable Bonds", "Number of Heavy Atoms", "Number of Aromatic Rings", "Number of Saturated Rings", "Number of Aliphatic Rings",
                         "Number of Bridge Head Atoms", "Percent of sp^3 Hybridization", "Surface Area", "Max Partial Charge", "Hall Kier Alpha"]]
            self.record_full_data = True

        if args.nostat:
            self.disable_stat_report = True
        if args.noprint:
            self.console_noprint = True

        self.id_list = self.user_input
        return self.id_list


    def append_csv_molecule_data(self, molecule_data: dict) -> None:
        if self.save_csv_data is False:
            return
        if molecule_data is None:
            print_log(1, f"Cannot append molecule data to CSV data list. Passed data is invalid.")
            return
        
        molecule_data["violations"] = "; ".join(molecule_data["violations"]) if molecule_data["violations"] else "None"
        self.csv_data.append(list(molecule_data.values()))


    def save_csv_molecule_data(self) -> None:
        if self.save_csv_data is False:
            return
        try:
            with open(f"{self.csv_file_name}.csv", mode='w', newline='', encoding='utf-8') as csv_file:
                file_writer = csv.writer(csv_file, delimiter=',')
                file_writer.writerows(self.csv_data)
            print_log(0, f"Successfully saved data to {self.csv_file_name}.csv")
        except Exception as err:
            print_log(1, f"Failed to save CSV: {err}")


    def get_id_smiles(self) -> list[str]:
        if self.id_list is None:
            print_log(1, "An invalid list of formatted IDs was passed.")
            return

        comp_index: int = 0
        print(f"\n{'—'*20} DATA ACQUISITION PHASE: INITIATED {'—'*20}")
        print_log(0, f"Connecting to PubChem API for {len(self.id_list)} compounds...")

        try:
            for i in range(0, len(self.id_list), self.max_request_size):
                id_list_chunk = self.id_list[i : i + self.max_request_size]
                fetched_compounds = pcp.get_compounds(id_list_chunk, 'cid')
                
                for pub_compound in fetched_compounds:     
                    api_bar_print(self.id_list, 30, comp_index, self.id_list[comp_index])
                        
                    if pub_compound is not None:
                        self.smiles_list.append(pub_compound.smiles)
                        self.name_list.append(pub_compound.iupac_name)

                    comp_index = comp_index + 1
        except Exception as err:
            print("")
            print_log(1, f"API Issue. {err}.")
        print(f"\n{'—'*20} DATA ACQUISITION PHASE: COMPLETED {'—'*20}")
        
        return self.smiles_list, self.name_list


    def drug_candidate_check(self, mw: float, h_donors: int, h_acceptors: int, logp: float) -> bool:
        if mw is None or h_donors is None or h_acceptors is None or logp is None:
            print_log(1, "Cannot check the drug properties of a compound. Invalid arguments were passed.")
            return "N/A Couldn't parse", False
        violations: list[str] = []
        
        if mw > 500:
            violations.append(f"Molecular Weight (g/mol) > 500)")
        if h_donors > 5:
            violations.append(f"Hydrogen bond donors > 5)")
        if h_acceptors > 10:
            violations.append(f"Hydrogen bond acceptors > 10)")
        if logp > 5:
            violations.append(f"Octanol-water partition coefficient (LogP) > 5)")
            
        if len(violations) >= 1:
            self.violation_count = self.violation_count + len(violations)
            self.unlikely_drug_count = self.unlikely_drug_count + 1
            
            return violations, False
        self.likely_drug_count = self.likely_drug_count + 1
        return violations, True


    def parse_compound_properties(self) -> None:
        if self.smiles_list is None:
            print_log(1, "An invalid SMILES list was passed.")
            return
        
        for smiles, name, pc_id in zip(self.smiles_list, self.name_list, self.id_list):
            molecule = Chem.MolFromSmiles(smiles)
            if molecule is None:
                print_log(1, f"Could not parse a molecule. (CID: {pc_id})")
                continue
            
            try:
                molecule_data: dict = {
                    "name": name,
                    "id": pc_id,
                    "mw": round(Descriptors.MolWt(molecule), 5),
                    "h_donors": Descriptors.NumHDonors(molecule),
                    "h_acceptors": Descriptors.NumHAcceptors(molecule),
                    "log_p": round(Descriptors.MolLogP(molecule), 8)
                }

                violations, is_drug_likely = self.drug_candidate_check(molecule_data["mw"], molecule_data["h_donors"], molecule_data["h_acceptors"], molecule_data["log_p"])
                molecule_data.update({"is_drug_likely": is_drug_likely, "violations": violations})
                
                if self.record_full_data is True:
                    molecule_data.update({"atom_number": molecule.GetNumAtoms(),
                                          "molecular_formula": rdMolDescriptors.CalcMolFormula(Chem.AddHs(molecule)),
                                          "bond_number": molecule.GetNumBonds(),
                                          "ring_number": molecule.GetRingInfo().NumRings(),
                                          "tpsa": Descriptors.TPSA(molecule),
                                          "rot_bond_number": Descriptors.NumRotatableBonds(molecule),
                                          "heavy_atoms": molecule.GetNumHeavyAtoms(),
                                          "aromatic_ring_number": Descriptors.NumAromaticRings(molecule),
                                          "sat_ring_number": Descriptors.NumSaturatedRings(molecule),
                                          "aliph_ring_number": Descriptors.NumAliphaticRings(molecule),
                                          "bridge_head_number": rdMolDescriptors.CalcNumBridgeheadAtoms(molecule),
                                          "sp3_percent": Descriptors.FractionCSP3(molecule),
                                          "surface_area": round(Descriptors.LabuteASA(molecule), 8),
                                          "partial_charge": round(Descriptors.MaxAbsPartialCharge(molecule), 8),
                                          "hk_alpha": round(Descriptors.HallKierAlpha(molecule), 8)
                                          })
                
                self.print_molecule_info(molecule_data)
                self.append_csv_molecule_data(molecule_data)
            except Exception as err:
                print_log(1, f"Compound descriptor error. {err}.")

        self.print_molecule_summary()
        self.save_csv_molecule_data()


    def print_molecule_info(self, molecule_data: dict) -> None:
        if self.console_noprint is True or molecule_data is None:
            return

        mw: float = molecule_data["mw"]
        log_p: float = molecule_data["log_p"]
        h_acceptors: int = molecule_data["h_acceptors"]
        h_donors: int = molecule_data["h_donors"]
        is_drug_likely: bool = molecule_data["is_drug_likely"]
        name: str = molecule_data["name"]
        
        WIDTH: int = 62
        LABEL_W: int = 20
        DATA_W: int = 30
        
        try:
            tag = "[ LIKELY DRUG CANDIDATE ]" if is_drug_likely else "[ UNLIKELY DRUG CANDIDATE ]"
            print(f"\n╔{'═' * (WIDTH-2)}╗")

            molecule_id = molecule_data["id"]
            prefix = f"• CID ({molecule_id}): "
            available_space = (WIDTH - 4) - len(prefix)

            if len(name) > available_space:
                display_name = name[:available_space - 2] + ".."
            else:
                display_name = name
            full_content = f"{prefix}{display_name}"
            
            print(f"║ {full_content:<{WIDTH - 4}} ║")
            print(f"╠{'═' * (WIDTH-2)}╣")

            print(f"║ { 'Molecular Weight:':<{LABEL_W}}{mw:>{DATA_W}.4f} g/mol {' ':<2}║")
            print(f"║ { 'LogP:':<{LABEL_W}}{log_p:>{DATA_W}.4f} {' ':<8}║")
            print(f"║ { 'H-Donors:':<{LABEL_W}}{h_donors:>{DATA_W}} {' ':<8}║")
            print(f"║ { 'H-Acceptors:':<{LABEL_W}}{h_acceptors:>{DATA_W}} {' ':<8}║")

            if self.display_drug_attributes is True:
                print(f"╟{'─' * (WIDTH-2)}╢")
                print(f"║ {tag:^{WIDTH-4}} ║")
                
                for rule_violator in molecule_data["violations"]:
                    clean_violation = (rule_violator[:WIDTH-10] + '..') if len(rule_violator) > (WIDTH-10) else rule_violator
                    print(f"║ ! {clean_violation:<{WIDTH-5}}║")
            print(f"╚{'═' * (WIDTH-2)}╝")
        except Exception as err:
            print_log(1, f"Printing issue. {err}.")


    def print_molecule_summary(self) -> None:
        if self.disable_stat_report is True or self.id_list is None:
            return
        
        WIDTH: int = 62
        LABEL_W: int = 20
        DATA_W: int = 30
        
        try:
            print("\n\n")
            print(f"\n╔{'═' * (WIDTH-2)}╗")
            full_content = f"+++ SUMMARY REPORT +++"
            
            print(f"║ {full_content:<{WIDTH - 4}} ║")
            print(f"╠{'═' * (WIDTH-2)}╣")

            print(f"║ { 'Total Compounds:':<{LABEL_W}}{len(self.id_list):>{DATA_W}}\t   {' ':<2}║")
            print(f"║ { 'Total Violations:':<{LABEL_W}}{self.violation_count:>{DATA_W}}\t   {' ':<2}║")
            print(f"║ { '# Likely Drugs:':<{LABEL_W}}{self.likely_drug_count:>{DATA_W}}\t   {' ':<2}║")
            print(f"║ { '# Unlikely Drugs:':<{LABEL_W}}{self.unlikely_drug_count:>{DATA_W}}\t   {' ':<2}║")
            
            print(f"╚{'═' * (WIDTH-2)}╝")
        except Exception as err:
            print_log(1, f"Printing issue. {err}.")



def shutdown_cli() -> None:
    print_log(0, "No CID data was entered by the user... Type --help for more information.")
    print_log(0, "Exiting program...")

    time.sleep(4)
    sys.exit()



if __name__ == "__main__":
    application_start_time = time.perf_counter()

    parser = MolecuLexParser()
    parser_input = parser.initialize_console_args()
    if parser_input is None or parser_input == "":
        shutdown_cli()

    parser.get_id_smiles()
    parser.parse_compound_properties()

    application_end_time = time.perf_counter()
    print_log(0, f"Search and analysis completed (Execution Time: {(application_end_time-application_start_time):2f} seconds)...")
    temp_char = input("\nPress enter to exit...")    
                    
                  

Sonderamine - 2026

Contact Information:
sonderamine@gmail.com