# YASARA MACRO # TOPIC: 5. Structure prediction # TITLE: Docking many ligands to a receptor to perform virtual screening # REQUIRES: Structure # AUTHOR: Elmar Krieger # LICENSE: GPL # DESCRIPTION: This macro runs VINA or AutoDock to dock multiple ligands against a receptor and saves a sorted table of their binding energies, as well as the corresponding complexes # Parameter section - adjust as needed, but NOTE that some changes only take effect # if you start an entirely new docking job, not if you continue an existing one. # ================================================================================= # You can either set the target structure by clicking on Options > Macro > Set target, # by providing it as command line argument (see docs at Essentials > The command line), # or by uncommenting the line below and specifying it directly. #MacroTarget '/home/myname/projects/docking/1sdf' # Docking method, either VINA (CPU only) or AutoDockLGA (runs on the GPU if enabled at Options > Processors > Set compute GPU) method='VINA' # Number of docking runs per ligand (maximally 999, 0 lets YASARA choose based on the number of CPUs) runs=0 # Number of best poses per ligand saved and reported bestposes=5 # Set to 1 to keep the ligand completely rigid (alternatively you can provide # the ligand as a *.yob file and fix certain dihedral angles only). rigid=0 # A selection of receptor residues whose side-chains should be kept flexible, e.g. # flexres='Lys 91 Leu 100', or (if the receptor is not a monomer) # flexres='Res Lys 91 Mol A or Res Leu 100 Mol B'. # Alternatively you can provide the receptor as a *.sce or *.yob file with fixed # atoms, which gives better control (e.g. you can keep only part of a side-chain # or even a terminal backbone flexible). flexres='' # Sort results either by ligand number ('ligandnum'), binding energy ('bindnrg') or by # efficiency ('effi'), i.e. binding energy per heavy atom, which addresses the bias towards # larger ligands in screening. sortby='bindnrg' # Force field used for charge assignment (AutoDock only, not VINA) and to determine which ligand bonds can rotate. # Do not change to an in vacuo force field (NOVA), all other force fields should work too (not tested). ForceField AMBER03 # Uncomment below to set a certain random seed (a YASARA command, no '=') # RandomSeed 1000 # Normally no change required below this point # ============================================ if !runs # For an optimum speed/accuracy tradeoff, we select at least 8 runs, but more if CPUs are available cpus=Processors runs=cpus while runs<8 runs=runs+cpus # Sanity checks if MacroTarget=='' RaiseError "This macro requires a target. Either edit the macro file or click Options > Macro > Set target to choose a target structure" if runs>999 RaiseError 'Too many docking runs selected, (runs) would take forever' if runs2 ShowMessage 'Your scene contains (Objects) objects, while only the receptor and the docking cell are expected.' Wait ContinueButton # Load the ligands and determine the maximum radius for type in 'yob','pdb','sdf' filename='(MacroTarget)_ligands.(type)' exists=FileSize (filename) if exists ligandlist()=Load(type) (filename) break ligands=count ligandlist if not ligands RaiseError 'No ligands were found at (MacroTarget)_ligands.* Please follow the instructions in the "Recipes" section exactly, especially when setting the macro target' if ligands==1 # Maybe ligands are separate molecules, not objects ligandlist()=SplitObj (ligandlist1) ligands=count ligandlist if ligands==1 # If there is really just one ligand, raise an error (otherwise dock_play won't work) RaiseError 'Only a single ligand was found at (filename), you need at least two for screening' # Determine maximum ligand radius radiuslist()=RadiusObj (join ligandlist) radiusmax=max radiuslist # Rename objects to make sure they can be identified later, ligands are 'Waiting' to be docked. NameObj (receptor),Receptor NameObj (join ligandlist),Waiting RemoveObj (join ligandlist) # Keep selected side-chains flexible if flexres!='' FixObj Receptor FreeAtom Res (flexres) and Sidechain Obj Receptor FixRes Cys Atom SG with bond to Atom SG or Ala Pro and Obj Receptor # Add a cell if not already present cellfound=CountObj SimCell if !cellfound if scene RaiseError 'The receptor scene (scefilename) does not contain a cell' Cell Auto,Extension=(radiusmax*2) # Warn if the cell is too large x,y,z=Cell if method!='VINA' and (x>384 or y>384 or z>384) # AutoDock now uses dynamic memory allocation for grids up to 1024x1024x1024 (int MAX_GRID_PTS=1025 in autocomm.h) # Given the standard grid spacing of 0.375 A, these are 384 A. VINA doesn't have a compile-time limit ShowMessage "A cell axis is longer than 384 A, which may reduce docking accuracy. Consider focusing on the active site, or remove unusually long ligands from the library" Wait ContinueButton HideMessage # The segment name C*** is used to tag ligand Conformations, and cannot be used for the receptor hit=ListAtom Segment C??? Obj Receptor if hit MarkAtom (hit) segname=SegAtom (hit) RaiseError 'Segment names starting with "C" are unfortunately reserved to identify ligand conformations, please click "Edit > Name > Segment", double-click object (receptor) on the right, and click OK to set an empty segment name' # The molecule name 'l' is reserved for the ligand hit=ListAtom Mol l if hit RaiseError 'The receptor must not contain a molecule named "l", please click Edit > Name > Molecule and try again' # Docking is done without periodic boundaries Boundary Wall Longrange None # Do we have a checkpoint file from a previous run? checkpointfilename='(MacroTarget)_checkpoint.sce' restarted=FileSize (checkpointfilename) if restarted LoadSce (checkpointfilename) # Loop over the ligands for i=1 to ligands # Has this ligand been docked in a previous run? ligand=ligandlist(i) ligname=NameObj (ligand) if ligname=='Done' or ligname=='Broken' ShowMessage 'Ligand (i) has been docked before...' continue HideMessage # Place only the ligand in the soup, flexible receptor parts are always removed AddObj (ligand) RemoveObj (receptor) # Check that ligand is not internally disconnected conatoms=CountAtom all with connection to 1 if conatoms!=Atoms # Ligand has bonds missing, cannot be docked failure=1 else # See if the ligand can be simulated or is broken. Try three times. # This must be done without receptor, which may not have been cleaned yet. OnError Continue for j=1 to 4 # Initialize force field parameters, but do not stop if it fails failure=Sim Init if !failure # All OK break if failure==434 # User pressed escape OnError Stop RaiseError "Screening was interrupted. All progress has been saved, so you can simply run this macro again to continue" if j==1 # Try to retype the bonds TypeBond all,all if j==2 # Try to re-add the hydrogens DelHydObj (ligand) failure=CleanObj (ligand) if failure==768 # Molecule disappeared during cleaning, don't leave an empty object slot obj=BuildAtom C SwapObj (obj),(ligand) break if j==3 # Try to rebuild the geometry InflateObj (ligand) OnError Stop AddObj (receptor) if failure # This compound is hopeless, skip it NameObj (ligand),Broken BFactorObj (ligand),-9999 SegObj (ligand),C001 else # This compound is OK NameObj (ligand),Docking # Get the ligand compound name and display it at the top ligandname=CompoundMol Obj (ligand) LabelAll 'Screening ligand (i)/(ligands), (ligandname)',Height=0.5,Color=Yellow,Y=4,Z=2 # Move the ligand out of the cell, so that it does not block the view cellpos()=PosObj SimCell PosObj (ligand),(cellpos1),(cellpos2+y*0.5+radiusmax),(cellpos3) # Perform rigid docking if requested if rigid FixObj (ligand) # Align ligand with the cell (to end up with the same local atom coordinates independent # of cell orientation, which are used to calculate the checksum for the *.adr file) TransferObj (ligand),SimCell,Local=Keep # Perform the docking Experiment Docking Method (method) ReceptorObj (receptor) LigandObj (ligand) Runs (runs) # We only want a single result, the best complex ClusterRMSD 1000 # In case of docking failures (e.g. cell too small) don't stop, but assign this binding energy to the ligand [kcal/mol] FailureEnergy -9998 # Result file number must be separated with '_', in case MacroTarget also ends with number # If we exceed 999, an additional digit will be inserted. # If PDB files are needed too, replace .yob with .pdb to get both ResultFile (MacroTarget)_(000+i).yob # Uncomment below to set the number of energy evaluations (AutoDock ga_num_evals): # DockPar ga_num_evals 25000000 # Uncomment the two lines below to provide your own atom parameters (VdW radii etc.) # GridPar parameter_file /Path/To/Custom/AD4_parameters.dat # DockPar parameter_file /Path/To/Custom/AD4_parameters.dat Experiment On Wait ExpEnd UnlabelAll NameObj (ligand),Done RemoveObj (ligand) RecepFlex NameObj RecepFlex,RecFlex(ligand) # Save checkpoint SaveSce (checkpointfilename) SurfPar Molecular=Gaussian # Save a log file with an analysis Console Off RecordLog (MacroTarget) print 'Ligand screening result' print '=======================' print print '(ligands) different ligands were docked with (runs) runs each to the receptor object (receptor) yielding the following results,' print 'sorted by binding energy [more positive energies indicate stronger binding, and negative energies mean no binding].' print '"Effi" is the binding efficiency [binding energy per heavy atom], "Con.Surf" is the molecular contact surface.' print 'The best (bestposes) poses of each ligand are reported.' print print "Lig.|Effi[kcal/(mol*Atom)]|Bind.energy[kcal/mol]|Dissoc. constant [pM]| Con.Surf[A^2] | Name | Contacting receptor residues | SMILES" print '----+---------------------+---------------------+---------------------+---------------+------+------------------------------+-------------------------' doneobjlist()=ListObj Done idx=1 for i=1 to ligands obj=ligandlist(i) if obj in doneobjlist # Ligand was docked successfully AddObj (obj) for j=001 to bestposes resultlist(idx),bindnrglist(idx),effilist(idx)=DockingResult i,'Obj (receptor)','Segment C(j) Obj (ligandlist(i))' idx=idx+1 RemoveObj (obj) else # Ligand could not be docked resultlist(idx),bindnrglist(idx),effilist(idx)=DockingResult i,'','' idx=idx+1 if !(i%5) ShowMessage 'Analyzing docking results, (100*i/ligands)% completed...' Wait 1 # Keep only one pose per object and get the ligand SMILES strings, add them to the result list. idx=1 for i=1 to ligands obj=ligandlist(i) if obj in doneobjlist AddObj (obj) DelRes Segment !C001 Obj (obj) SaveSMI (obj),(MacroTarget).smi # Read the first string as 'smiles' for smiles in file (MacroTarget).smi break RemoveObj (obj) for j=1 to bestposes resultlist(idx)=resultlist(idx)+' | '+smiles idx=idx+1 else idx=idx+1 if !(i%10) ShowMessage 'Creating ligand SMILES strings, (100*i/ligands)% completed...' Wait 1 DelFile (MacroTarget).smi # Load original scene without hydrogens on flexible side-chains LoadSce (checkpointfilename) if sortby!='ligandnum' # Sort results by binding energy or binding efficiency SortResults 'resultlist','(sortby)list' for i=1 to ligands NameObj (ligandlist(i)) and not Broken,Ligand(i) for i=1 to count resultlist print (resultlist(i)) print rndseed=RandomSeed print 'The random seed used during docking was (rndseed).' fof=ForceField print 'Point charges and dihedral barriers were obtained from the (fof) force field.' StopLog HideMessage # Temporarily delete all except the bestposes selection='Segment C??? and not Segment' for i=001 to bestposes selection=selection+' C(i)' DelObj SimCell Broken RenumberObj all # Rename ligand molecules so that the compound info can be kept, but don't use receptor molecule names objlist()=ListObj Ligand?????? recmolnamelist()=NameMol Obj Receptor for range in '"A" to "Z"','"a" to "z"','"0" to "9"' for molname=(range) if molname not in recmolnamelist ligmolnamelist(count ligmolnamelist+1)=molname for i=1 to count objlist obj=objlist(i) AddObj (obj) DelRes (selection) and Obj (obj) # To save disk space and avoid busting the atom limit, save only the bestposes SaveSce (MacroTarget) for i=1 to count objlist NameMol Obj (objlist(i)),(ligmolnamelist(1+(i-1)%count ligmolnamelist)) # Make each ligand a separate object, keep atom numbers and shift objects ahead SplitObj (join objlist),Center=No,Keep=AtomNum # Save SDF file with the best ligand poses, choose original coordinate system that matches the receptor SaveSDF Ligand??????,(MacroTarget)_bestposes,Transform=Yes # Save PDB file with the receptor and best ligand poses SavePDB all,(MacroTarget)_bestposes,Transform=Yes # Bring back the complete scene LoadSce (MacroTarget) Console On # Exit YASARA if this macro was provided as command line argument in console mode and not included from another macro if runWithMacro and ConsoleMode and !IndentationLevel Exit # EXTRACT DOCKING RESULT # ====================== # Returns a result string and binding energy, extracted from atomic BFactor and Property. # 'num' is a sequential number, 'recsel' selects the receptor, 'ligsel' the ligand. def DockingResult num,recsel,ligsel if ligsel=='' bindnrg=-9999 consurf=0,0 compound='Ligand (num)' effi=0 else # Get the binding energy from the B-factor... bindnrg=BFactorAtom (ligsel) # ...and the dissociation constant from the atomic property. dissconst=PropAtom (ligsel) # Clear surface environment to calculate contact surface RemoveEnvAll # Calculate contact surface in both directions to average result, must be done during simulation # so that receptor and ligand are in the same coordinate system Sim Pause Sim On consurf1=ConSurfAtom (recsel),(ligsel) consurf2=ConSurfAtom (ligsel),(recsel) Sim Off # Get compound name compound=CompoundMol (ligsel) if compound=='' compound=NameRes (ligsel) compound=compound+' ' # Get receptor residues that contact the ligand reslist()=ListRes (recsel) with distance<4.0 from (ligsel),Format='MOLNAME RESNAME RESNUM' # Calculate ligand efficiency, i.e. binding energy divided by heavy atoms heavyatoms=CountAtom Element !H (ligsel) effi=bindnrg/heavyatoms if bindnrg<=0 dissconst=' None' else dissconst=00000000000000.0000+dissconst if bindnrg<=-9999 reslist='Broken molecule, no docking possible' elif bindnrg<=-9998 reslist='Docking failed. Check if cell is too small for ligand' elif !count reslist reslist='No residues in contact' else reslist=join reslist result='(0000+num)| (000000.0000+effi) | (000000.0000+bindnrg) | (dissconst) | (000000.00+mean consurf) | (compound) | (reslist)' return result,bindnrg,effi # SORT KEYLIST BY VALUES, HIGHEST FIRST # ===================================== # Lists are passed by reference, see Yanaconda doc section 'Calls to user-defined functions..' def SortResults keylist,vallist caller (keylist),(vallist) elements=count (keylist) do sorted=1 for i=1 to elements-1 if (vallist)(i)<(vallist)(i+1) swap=(vallist)(i) (vallist)(i)=(vallist)(i+1) (vallist)(i+1)=swap swap=(keylist)(i) (keylist)(i)=(keylist)(i+1) (keylist)(i+1)=swap sorted=0 while not sorted