Source code for docktopus.gnina_engine

import subprocess
from pathlib import Path
from typing import Optional, Dict, Tuple, List
import logging


[docs] class GninaDockingEngine: """ GNINA-specific docking engine implementation. This class provides an interface to the GNINA docking engine, which combines traditional molecular docking with deep learning-based scoring using convolutional neural networks (CNNs). GNINA is particularly effective for structure-based drug design and virtual screening. GNINA features: - Traditional Vina scoring function - CNN-based pose scoring and affinity prediction - Support for flexible docking - Automatic binding site detection - Multiple output poses with comprehensive scoring Attributes: gnina_path (str): Path to GNINA executable work_dir (Path): Directory for docking outputs receptor_format (str): Expected receptor file format ("pdb") ligand_format (str): Expected ligand file format ("sdf") exhaustiveness (int): Search exhaustiveness parameter num_modes (int): Number of binding modes to generate cpu (int): Number of CPU cores to use autobox_ligand (bool): Whether to use ligand for automatic box detection seed (int): Random seed for reproducibility logger (logging.Logger): Logger instance for engine events """
[docs] def __init__(self, gnina_path: str, work_dir: str, seed: int = 0, exhaustiveness: int = 16, num_modes: int = 9, cpu: int = 4): """ Initialize GNINA docking engine. Args: gnina_path (str): Path to GNINA executable. Must be a valid path to the GNINA binary. work_dir (str): Directory for docking outputs. Will be created if it doesn't exist. seed (int, optional): Random seed for reproducibility. Defaults to 0. exhaustiveness (int, optional): Search exhaustiveness (higher values give more thorough but slower searches). Defaults to 16. num_modes (int, optional): Number of binding modes to generate. Defaults to 9. cpu (int, optional): Number of CPU cores to use for docking. Defaults to 4. autobox_ligand (bool, optional): If True and no box_center is provided, automatically determine box center from ligand. Defaults to True. Raises: FileNotFoundError: If GNINA executable is not found at the specified path ValueError: If invalid parameters are provided """ self.gnina_path = gnina_path self.work_dir = Path(work_dir) self.work_dir.mkdir(parents=True, exist_ok=True) self.receptor_format = "pdb" self.ligand_format = "sdf" self.seed = seed # self.box_center = box_center # self.box_size = box_size self.exhaustiveness = exhaustiveness self.num_modes = num_modes self.cpu = cpu # Configure logging self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.INFO) if not Path(gnina_path).exists(): raise FileNotFoundError(f"GNINA executable not found at {gnina_path}")
[docs] def dock(self, receptor_file: str, ligand_file: str, box_size: Optional[Tuple[float, float, float]] = (30.0, 30.0, 30.0), box_center: Optional[Tuple[float, float, float]] = None, output_prefix: Optional[str] = None ) -> Dict[str, float]: """ Perform docking using GNINA. This method executes GNINA docking with the specified parameters and returns comprehensive results including multiple poses with both traditional and CNN-based scores. Args: receptor_file (str): Path to prepared receptor file (PDB format) ligand_file (str): Path to prepared ligand file (SDF format) box_center (Optional[Tuple[float, float, float]], optional): (x,y,z) coordinates of docking box center. If uses autoboxing ligand center. Defaults to None. box_size (Tuple[float, float, float], optional): (x,y,z) dimensions of search box in Angstroms. Defaults to (30.0, 30.0, 30.0). output_prefix (Optional[str], optional): Prefix for output files. If None, uses the ligand filename stem. Defaults to None. Returns: Dict[str, Any]: Dictionary containing docking results with keys: - output_file: Path to SDF file with docked poses - log_file: Path to GNINA log file with detailed output - scores: List of dictionaries, each containing scores for one pose: - pose: Pose number (1-based) - affinity: Vina binding affinity (kcal/mol) - intramol: Intramolecular energy (kcal/mol) - cnn_pose: CNN pose score - cnn_affinity: CNN affinity prediction Raises: FileNotFoundError: If input files are not found subprocess.CalledProcessError: If GNINA execution fails RuntimeError: If score parsing fails Note: - Receptor should be in PDB format with polar hydrogens - Ligand should be in SDF format with all hydrogens - Box parameters are applied as specified during initialization - All poses are saved in a single SDF file - Log file contains detailed GNINA output and diagnostics """ if output_prefix is None: output_prefix = Path(ligand_file).stem output_sdf = self.work_dir / f"{output_prefix}_docked.sdf" output_log = self.work_dir / f"{output_prefix}_docked.log" # Build GNINA command cmd = [ self.gnina_path, "-r", receptor_file, "-l", ligand_file, "-o", str(output_sdf), "--log", str(output_log), "-q", # Quiet mode "--seed", str(self.seed), # Reproducibility "--exhaustiveness", str(self.exhaustiveness), "--num_modes", str(self.num_modes), "--cpu", str(self.cpu), "--addH", "off" # Assume inputs are properly protonated ] # Add box parameters if box_center is not None: cmd.extend([ "--center_x", str(box_center[0]), "--center_y", str(box_center[1]), "--center_z", str(box_center[2]), "--size_x", str(box_size[0]), "--size_y", str(box_size[1]), "--size_z", str(box_size[2]) ]) else: cmd.extend(["--autobox_ligand", ligand_file]) # Run docking try: process = subprocess.run(cmd, check=True, capture_output=True, text=True) self.logger.info(process.stdout) except subprocess.CalledProcessError as e: self.logger.error(f"Docking failed: {e.stderr}") raise # Parse scores from log file scores = self._parse_scores(output_log) self.logger.info(f"Calculated scores from {output_log}") print(scores) return { "output_file": str(output_sdf), "log_file": str(output_log), "scores": scores }
def _parse_scores(self, log_file: Path) -> Dict[str, List[float]]: """ Parse docking scores from GNINA log file. This method extracts scoring information from the GNINA log file for each generated pose. Args: log_file (Path): Path to GNINA log file containing docking results Returns: List[Dict[str, Any]]: List of score dictionaries, one for each pose. Each dictionary contains: - pose: Pose number (1-based indexing) - affinity: Vina binding affinity in kcal/mol - intramol: Intramolecular energy in kcal/mol - cnn_pose: CNN pose score (lower is better) - cnn_affinity: CNN affinity prediction in kcal/mol Raises: FileNotFoundError: If log file doesn't exist RuntimeError: If score parsing fails Note: - Parses the standard GNINA output format - Handles variable number of poses (up to num_modes) - Returns empty list if parsing fails - Scores are ordered by pose number (1, 2, 3, ...) """ scores = [] try: with open(log_file) as f: for line in f: # Skip header lines and empty lines if not line.strip() or '---' in line or 'mode |' in line: continue # Parse score line if it starts with a number (pose mode) parts = line.strip().split() if len(parts) >= 5 and parts[0].isdigit(): scores.append({ "pose": int(parts[0]), "affinity": float(parts[1]), "intramol": float(parts[2]), "cnn_pose": float(parts[3]), "cnn_affinity": float(parts[4]) }) except Exception as e: self.logger.error(f"Failed to parse scores: {e}") scores = [] return scores
[docs] def precheck(self, file_path: str) -> bool: """ Check if the provided file path exists. This method performs a simple file existence check, which is useful for validating input files before attempting docking calculations. Runs automatically before each docking to make sure you have all the files you think you have. It does not check if those files are correct. Args: file_path (str): Path to the file to check Returns: bool: True if the file exists, False otherwise Note: - Only checks file existence, not file validity - Does not verify file format or content - Useful for basic input validation """ return Path(file_path).exists()