import subprocess
from pathlib import Path
from typing import Optional, Dict, Tuple, List
import logging
[docs]
class GninaDockingEngine:
"""
GNINA-specific docking engine implementation.
This class provides an interface to the GNINA docking engine, which combines
traditional molecular docking with deep learning-based scoring using
convolutional neural networks (CNNs). GNINA is particularly effective for
structure-based drug design and virtual screening.
GNINA features:
- Traditional Vina scoring function
- CNN-based pose scoring and affinity prediction
- Support for flexible docking
- Automatic binding site detection
- Multiple output poses with comprehensive scoring
Attributes:
gnina_path (str): Path to GNINA executable
work_dir (Path): Directory for docking outputs
receptor_format (str): Expected receptor file format ("pdb")
ligand_format (str): Expected ligand file format ("sdf")
exhaustiveness (int): Search exhaustiveness parameter
num_modes (int): Number of binding modes to generate
cpu (int): Number of CPU cores to use
autobox_ligand (bool): Whether to use ligand for automatic box detection
seed (int): Random seed for reproducibility
logger (logging.Logger): Logger instance for engine events
"""
[docs]
def __init__(self,
gnina_path: str,
work_dir: str,
seed: int = 0,
exhaustiveness: int = 16,
num_modes: int = 9,
cpu: int = 4):
"""
Initialize GNINA docking engine.
Args:
gnina_path (str): Path to GNINA executable. Must be a valid path to
the GNINA binary.
work_dir (str): Directory for docking outputs. Will be created if
it doesn't exist.
seed (int, optional): Random seed for reproducibility. Defaults to 0.
exhaustiveness (int, optional): Search exhaustiveness (higher values
give more thorough but slower searches). Defaults to 16.
num_modes (int, optional): Number of binding modes to generate.
Defaults to 9.
cpu (int, optional): Number of CPU cores to use for docking.
Defaults to 4.
autobox_ligand (bool, optional): If True and no box_center is provided,
automatically determine box center from ligand. Defaults to True.
Raises:
FileNotFoundError: If GNINA executable is not found at the specified path
ValueError: If invalid parameters are provided
"""
self.gnina_path = gnina_path
self.work_dir = Path(work_dir)
self.work_dir.mkdir(parents=True, exist_ok=True)
self.receptor_format = "pdb"
self.ligand_format = "sdf"
self.seed = seed
# self.box_center = box_center
# self.box_size = box_size
self.exhaustiveness = exhaustiveness
self.num_modes = num_modes
self.cpu = cpu
# Configure logging
self.logger = logging.getLogger(__name__)
self.logger.setLevel(logging.INFO)
if not Path(gnina_path).exists():
raise FileNotFoundError(f"GNINA executable not found at {gnina_path}")
[docs]
def dock(self,
receptor_file: str,
ligand_file: str,
box_size: Optional[Tuple[float, float, float]] = (30.0, 30.0, 30.0),
box_center: Optional[Tuple[float, float, float]] = None,
output_prefix: Optional[str] = None
) -> Dict[str, float]:
"""
Perform docking using GNINA.
This method executes GNINA docking with the specified parameters and
returns comprehensive results including multiple poses with both
traditional and CNN-based scores.
Args:
receptor_file (str): Path to prepared receptor file (PDB format)
ligand_file (str): Path to prepared ligand file (SDF format)
box_center (Optional[Tuple[float, float, float]], optional): (x,y,z)
coordinates of docking box center. If uses autoboxing ligand center. Defaults to None.
box_size (Tuple[float, float, float], optional): (x,y,z) dimensions
of search box in Angstroms. Defaults to (30.0, 30.0, 30.0).
output_prefix (Optional[str], optional): Prefix for output files.
If None, uses the ligand filename stem. Defaults to None.
Returns:
Dict[str, Any]: Dictionary containing docking results with keys:
- output_file: Path to SDF file with docked poses
- log_file: Path to GNINA log file with detailed output
- scores: List of dictionaries, each containing scores for one pose:
- pose: Pose number (1-based)
- affinity: Vina binding affinity (kcal/mol)
- intramol: Intramolecular energy (kcal/mol)
- cnn_pose: CNN pose score
- cnn_affinity: CNN affinity prediction
Raises:
FileNotFoundError: If input files are not found
subprocess.CalledProcessError: If GNINA execution fails
RuntimeError: If score parsing fails
Note:
- Receptor should be in PDB format with polar hydrogens
- Ligand should be in SDF format with all hydrogens
- Box parameters are applied as specified during initialization
- All poses are saved in a single SDF file
- Log file contains detailed GNINA output and diagnostics
"""
if output_prefix is None:
output_prefix = Path(ligand_file).stem
output_sdf = self.work_dir / f"{output_prefix}_docked.sdf"
output_log = self.work_dir / f"{output_prefix}_docked.log"
# Build GNINA command
cmd = [
self.gnina_path,
"-r", receptor_file,
"-l", ligand_file,
"-o", str(output_sdf),
"--log", str(output_log),
"-q", # Quiet mode
"--seed", str(self.seed), # Reproducibility
"--exhaustiveness", str(self.exhaustiveness),
"--num_modes", str(self.num_modes),
"--cpu", str(self.cpu),
"--addH", "off" # Assume inputs are properly protonated
]
# Add box parameters
if box_center is not None:
cmd.extend([
"--center_x", str(box_center[0]),
"--center_y", str(box_center[1]),
"--center_z", str(box_center[2]),
"--size_x", str(box_size[0]),
"--size_y", str(box_size[1]),
"--size_z", str(box_size[2])
])
else:
cmd.extend(["--autobox_ligand", ligand_file])
# Run docking
try:
process = subprocess.run(cmd,
check=True,
capture_output=True,
text=True)
self.logger.info(process.stdout)
except subprocess.CalledProcessError as e:
self.logger.error(f"Docking failed: {e.stderr}")
raise
# Parse scores from log file
scores = self._parse_scores(output_log)
self.logger.info(f"Calculated scores from {output_log}")
print(scores)
return {
"output_file": str(output_sdf),
"log_file": str(output_log),
"scores": scores
}
def _parse_scores(self, log_file: Path) -> Dict[str, List[float]]:
"""
Parse docking scores from GNINA log file.
This method extracts scoring information from the GNINA log file for each generated pose.
Args:
log_file (Path): Path to GNINA log file containing docking results
Returns:
List[Dict[str, Any]]: List of score dictionaries, one for each pose.
Each dictionary contains:
- pose: Pose number (1-based indexing)
- affinity: Vina binding affinity in kcal/mol
- intramol: Intramolecular energy in kcal/mol
- cnn_pose: CNN pose score (lower is better)
- cnn_affinity: CNN affinity prediction in kcal/mol
Raises:
FileNotFoundError: If log file doesn't exist
RuntimeError: If score parsing fails
Note:
- Parses the standard GNINA output format
- Handles variable number of poses (up to num_modes)
- Returns empty list if parsing fails
- Scores are ordered by pose number (1, 2, 3, ...)
"""
scores = []
try:
with open(log_file) as f:
for line in f:
# Skip header lines and empty lines
if not line.strip() or '---' in line or 'mode |' in line:
continue
# Parse score line if it starts with a number (pose mode)
parts = line.strip().split()
if len(parts) >= 5 and parts[0].isdigit():
scores.append({
"pose": int(parts[0]),
"affinity": float(parts[1]),
"intramol": float(parts[2]),
"cnn_pose": float(parts[3]),
"cnn_affinity": float(parts[4])
})
except Exception as e:
self.logger.error(f"Failed to parse scores: {e}")
scores = []
return scores
[docs]
def precheck(self, file_path: str) -> bool:
"""
Check if the provided file path exists.
This method performs a simple file existence check, which is useful
for validating input files before attempting docking calculations. Runs automatically
before each docking to make sure you have all the files you think you have. It does not check if
those files are correct.
Args:
file_path (str): Path to the file to check
Returns:
bool: True if the file exists, False otherwise
Note:
- Only checks file existence, not file validity
- Does not verify file format or content
- Useful for basic input validation
"""
return Path(file_path).exists()