################################################################################
#Reindexing Script - calls upon pre-existing functionality in CSPy 
#reindexes crystal structure files to match ordering in a reference molecule
###################################################################################


#################################################################################
#perform necessary imports
###################################################################################
from cspy.crystal import Crystal
from cspy.chem import Molecule
from zipfile import ZipFile
from multiprocessing import Pool
from functools import partial
###################################################################################


################################################################################
#define function
###############################################################################


def reindex(struc,ref_mol):
            replace_mols=[]
#Include zipfile here for extracting structures, optimally this should not be hard coded
            with ZipFile(<zip file of crystal structures>) as structures_zip:
            structures_zip.extract(struc)
            name_root=struc.split('.')[0]
#get the asymmetric unit of the crystal structure
            structure = Crystal.load(struc)
            molecules = structure.asym_mols()
#reindex the asymmetric unit molecules  - by overlaying with reference
            for i in range(len(molecules)):
                old_molecule = molecules[i]

                try:
                   overlay = ref_mol.overlay(old_molecule)
                   new_mol = overlay[0]
                   new_name = 'new_' + name_root + '_mol_' + str(i)  + '_.xyz'
                   new_mol.save(new_name)
                except:
                   print('overlay fail:', struc)



#Using molecular replacement in CSP, replace molecules in orginal with the reindex molecules
            try:
               original_structure = Crystal.load(struc)
            except:
               print('load crystal fail:', struc)

            for i in range(len(molecules)):
                load_name = 'new_' + name_root + '_mol_' + str(i) + '_.xyz'
                mol = Molecule.load(load_name)
                replace_mols.append(mol)
            new_structure = original_structure.replace_molecules(replace_mols,reorder_to='other')
            replaced_name = 'replaced_' + name_root + '.res'
            new_structure.to_shelx_file(replaced_name)








#######################################################################
#Running the process section
#######################################################################

#read in structure file of reference molecule wih desired indexing
ref_mols= Molecule.load(<reference molecule xyz file>)


with ZipFile(<zip file of crystal structures>) as structures_zip:
    struc_list = structures_zip.namelist()

#Parallelised over cores, read in structures one at a time and reindex the file
with Pool(int(40)) as pool:
     reindexer=partial(reindex,ref_mol=ref_mols)
     jobs = pool.map(reindexer,struc_list)

