"""

Jack Doyle

30th May 2022


This is a similar script to previous entries in that it extracts a given number of molecules from a cif file.

The main differences here are that we now:

a) perform the operation on all cif files in a given directory in a single script
b) only return the molecular units whose centroids live in the primitive cell (we can find the rest by simple
translations later and we don't need the csd api for this)

"""

from ccdc import io

import sys
import os
import pickle

def main():
    cif_dir = sys.argv[1]
    if len(sys.argv) > 2:
        min_size = int(sys.argv[2])
    else:
        min_size = None
    for file in sorted(os.listdir(cif_dir)):
        crys = ReadCifFile(os.path.join(cif_dir, file))
        prim = GetMolsInPrimitiveCell(crys)
        mols = MolListFromPacking(prim, mol_size = min_size)
        output_mols = CleanMols(mols)
        # add cell lengths and angles
        augmented_output_mols = AppendAnglesAndLengths(crys, output_mols)
        WriteMols(file, augmented_output_mols)


def ReadCifFile(cif_file):
    '''Reads the cif file and outputs crystal object'''
    crystal_reader = io.CrystalReader(cif_file, format = 'cif')
    return crystal_reader[0]

def GetMolsInPrimitiveCell(crystal):
    """ Returns set of molecule whose cnetroid lies in the primitve cell of the crystal"""
    frag_packing = crystal.packing(box_dimensions = ((0, 0, 0), (1, 1, 1)), inclusion = 'CentroidIncluded')
    return frag_packing

def MolListFromPacking(packing, mol_size = None):
    """Returns list of molecules in packing. If mol_size is specified will only return those molecules with great
    than  or the prespecified size. This is relevant if we want to deal with the case where some "molecules" have
    been incorrectly split into several components"""
    mol_list = []
    sizes = []
    if mol_size:
        for mol in packing.components:
            if len(mol.atoms) > mol_size:
                mol_list.append(mol)
                sizes.append(len(mol.atoms))
    else:
        for mol in packing.components:
            sizes.append(len(mol.atoms))
            mol_list.append(mol)
    with open('./out.txt', 'a') as writer:
        writer.write('************' + '\n')
        for s in sizes:
            writer.write(str(s) + '\n')
        writer.write('************' + '\n')
    return mol_list

def CleanMols(mol_list):
    """Converts csd molecule objects into list of tuples: (symbol, xyz coord)"""
    cleaned_mols = []
    for mol in mol_list:
        mol_tups = []
        for atom in mol.atoms:
            mol_tups.append((atom.atomic_symbol, list(atom.coordinates)))
        cleaned_mols.append(mol_tups)
    return cleaned_mols

def AppendAnglesAndLengths(crystal, cleaned_mols):
    """Append tuple with cell lengths and cell_angles to top of cleaned mol list"""
    out = []
    out.append(("lengths", list(crystal.cell_lengths)))
    out.append(("angles", list(crystal.cell_angles)))
    out.extend(cleaned_mols)
    return out

def WriteMols(cif_file, mols_out):
    '''Wrties output molecule to a pickle file with the correct name'''
    #define a name for the output file
    out = (cif_file[:-4] + '.pkl')
    with open(out, 'wb') as f:
        pickle.dump(mols_out, f)
    return None



if __name__ == '__main__':
    main()