#
# Basic test rig for MAFpt_ATTACK_DB.py
# Please set MAFpt_YAML_ROOT & MAFpt_YAML_FILE to your unit test versions
#
import os
import pandas as pd


import MAFpt_r_params as rpar
import MAFpt_ATTACK_DB_v2 as ATTACK

from scipy.spatial import distance
#import sklearn.cluster as cluster

import scipy.cluster.hierarchy as sch
from sklearn.cluster import AgglomerativeClustering
# https://scikit-learn-extra.readthedocs.io/en/latest/install.html
# Needs a working C compiler do later
#from sklearn_extra.cluster import KMedoids
#import matplotlib.pyplot as plt
# https://scikit-learn.org/stable/
from sklearn.mixture import GaussianMixture

# https://scikit-learn.org/stable/
from sklearn.cluster import Birch

import numpy as np
import itertools

# from scipy import linalg
import matplotlib.pyplot as plt
#import matplotlib.axes.Axes as ax
import matplotlib as mpl

def SortFunc(e):
  return e.lower()

# #######################################
#  START: Initialising ATTACK_DB
#
#

print("<< STARTING ATTACK DB TEST INITIALISE")

YAML_Root = os.environ.get('MAFpt_YAML_ROOT')
YAML_File = os.environ.get('MAFpt_YAML_FILE')
YAML_File="MAFpt_RunParams.yaml"

YAML_Root = os.getcwd()
YAML_File="/MAFpt_ATTACK_DB_TEST_RunParams.yaml"

#p_obj = MAFpt_r_params.MAFpt_r_params(YAML_Root + YAML_Dir + YAML_File)

p_obj = rpar.MAFpt_r_params(YAML_Root + YAML_File)

ck_load=p_obj.MAFpt_r_ck_load()

if ck_load == 0:
    print("MAFpt: Unable to access parameter file")
    exit(1)

#HELP_RUN=p_obj.MAFpt_r_read("RUN_HELP")
#SUMM_STATS_RUN=p_obj.MAFpt_r_read("RUN_SUMM_STATS")
#TESTS_RUN=p_obj.MAFpt_r_read("RUN_TESTS")

#ATT_SRC=p_obj.MAFpt_r_read("RUN_ATT_SRC")
#CVE_SRC=p_obj.MAFpt_r_read("RUN_CVE_SRC")
DOWNLOAD_ATTACK=p_obj.MAFpt_r_read("RUN_DOWNLOAD_ATTACK")
REINDEX_ATTACK=p_obj.MAFpt_r_read("RUN_REINDEX_ATTACK")
ATTACK_LOCAL_FILE_ROOT = p_obj.MAFpt_r_read('RUN_ATTACK_LOCAL_FILE_ROOT')
ATTACK_TAXII_SERVER = p_obj.MAFpt_r_read('RUN_ATTACK_TAXII_SERVER')
ATTACK_LOCAL_COPY = p_obj.MAFpt_r_read('RUN_ATTACK_LOCAL_COPY')
ATTACK_CVE_SEARCH = p_obj.MAFpt_r_read('RUN_ATTACK_CVE_SEARCH')
ATTACK_MAIN_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_MAIN_INDEX')
ATTACK_SUB_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_SUB_INDEX')
ATTACK_CVE_REF_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_CVE_REF_INDEX')
ATTACK_TTP_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_TTP_INDEX')
ATTACK_TACTIC_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_TACTIC_INDEX')
ATTACK_TECH_TO_TACTIC_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_TECH_TO_TACTIC_INDEX')
ATTACK_REL_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_REL_INDEX')
ATTACK_TACTIC_BIN_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_TACTIC_BIN_INDEX')   
ATTACK_TTP_BIN_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_TTP_BIN_INDEX') 
        

  
ATTACK_obj = ATTACK.MAFpt_ATTACK_DB(DOWNLOAD_ATTACK,
                         ATTACK_TAXII_SERVER,
                         ATTACK_LOCAL_FILE_ROOT,
                         ATTACK_LOCAL_COPY,
                         REINDEX_ATTACK,
                         ATTACK_CVE_SEARCH, 
                         ATTACK_MAIN_INDEX, 
                         ATTACK_SUB_INDEX, 
                         ATTACK_CVE_REF_INDEX, 
                         ATTACK_TTP_INDEX, 
                         ATTACK_TACTIC_INDEX, 
                         ATTACK_TECH_TO_TACTIC_INDEX, 
                         ATTACK_REL_INDEX, 
                         ATTACK_TACTIC_BIN_INDEX, 
                         ATTACK_TTP_BIN_INDEX) 
                         
print("<< STARTING ATTACK DB TEST APPLICATION")

ATTACK_obj.GetSchema()

# #######################################
#  END ATTACK_DB Intialise
#
#

# #######################################
#  START: Prepare summary information
#
#
                         
print("Getting a list of groups")
GroupsList=ATTACK_obj.GetListOfGroups()

print("There are " + str(len(GroupsList)) + " groups")
#print("These groups are ")
#for NextGroup in GroupsList:
    #print("Next group is " + str(NextGroup))

print("Getting a list of TTP, techniques, tools and malware")
TTPList=ATTACK_obj.GetListOfTTP()
print("There are " + str(len(TTPList)) + " TTP")


print("Getting a list of techniques")
TechList=ATTACK_obj.GetListOfTTPOfType("attack-pattern")
# This works because only Techs have domein noted
# Add domins for malware and tools and then use this
# GetListOfSelectedTTP(self, 
#                                   domain, 
#                                    level, 
#                                    type):
print("There are " + str(len(TechList)) + " techniques")
TTPEntList=ATTACK_obj.GetListOfTTPInDomain("Enterprise")
print("There are " + str(len(TTPEntList)) + " Enterprise Techniques")
TTPMobList=ATTACK_obj.GetListOfTTPInDomain("Mobile")
print("There are " + str(len(TTPMobList)) + " Mobile Techniques")
TTPPreList=ATTACK_obj.GetListOfTTPInDomain("Pre-ATT&CK")
print("There are " + str(len(TTPPreList)) + " Pre-ATT&CK Techniques")

print("Getting a list of malware")
MalList=ATTACK_obj.GetListOfTTPOfType("malware")
print("There are " + str(len(MalList)) + " malwares")

print("Getting a list of tools")
ToolList=ATTACK_obj.GetListOfTTPOfType("tool")
print("There are " + str(len(ToolList)) + " tools")

print("Getting a list of Tactics")
TacticList=ATTACK_obj.GetListOfTactics()
print("There are " + str(len(TacticList)) + " Tactics")

# Breakdown tactics for each domain
EntTacticList=[]
EntTacticNameList=[]
MobTacticList=[]
PreTacticList=[]
#    def GetTacticDomain(Tactic_Ext_Id) 
#                   return(enterprise-attack|mobile-attack|pre-attack)
for ThisTactic in TacticList:
    TacDom=ATTACK_obj.GetTacticDomain(ThisTactic)
    if TacDom == "enterprise-attack":
        EntTacticList.append(ThisTactic)
        EntTacticNameList.append(ATTACK_obj.GetTacticName(ThisTactic))
    if TacDom == "mobile-attack":
        MobTacticList.append(ThisTactic)    
    if TacDom == "pre-attack":
        PreTacticList.append(ThisTactic)    

# Print out the Enterprise Tactics
print("List of tactics for Enterprise" )      
print(str(EntTacticList))
print(str(EntTacticNameList))

ListOfDicts=ATTACK_obj.GetListOfTacticsWithMatrix()
#for ThisTactic in ListOfDicts:
    #print(str(ThisTactic))
    
# Convert list of ids into list of names
GroupNameList=[]
for NextGID in GroupsList:
        GroupName=ATTACK_obj.GetGroupName(NextGID)
#        GroupNameList.append(GroupName.lower())
        GroupNameList.append(GroupName)
        
# For each Technique within Initial Access show groups that use this technique
TechList=ATTACK_obj.GetTechForTactic("TA0001")
print("TA001 technique list is " + str(TechList))

TechGroupCount=[]
for ThisTech in ATTACK_obj.GetTechForTactic("TA0001"):
    print("This technique is " + ThisTech)
    GroupList=ATTACK_obj.GetGroupForTTP(ThisTech)
    TechGroupCount.append(len(GroupList))
    print(str(GroupList))
      
# Display the number of groups that use each of the Initial Access techniques

# Display how many Initial Access techniques are used by each group

 
# #######################################
#  END summary information preparatipn
#
#

exit(0)

# ###############################
#  START TACTICS
#
#  Convert to binary vector representations
#  Write out as csv
#  Display graphical view
#  Display dendogram (hamming, ward)
#  Check for groups with the same tactic set
#  Display groups from R Agnes based clustering
#  Display groups from R RForest based clustering
#
#
# ###############################
#  Build a list of strings of 1 and 0s to represent group 'fingerprint'
#       EntFGroupTacticList  - Enterprise tactic binary vector list
#
print("Building group tactic binary vector list")
        
# Sort list based on lowercase value of name (to match MITRE website order.        
GroupNameList.sort(key=SortFunc)

# Go through list creating binary vectors for all groups.
# REVIEW: Groups with no tactics are removed at the moment,
EntTacBinList=[]
ValidGroupList=[]

for NextGName in GroupNameList:
    #print("Next group is " + NextGID)
    # Initialise the binary vector list
    # Each element represents the presence or otherwise of a TTP
    # It is iinitially set to all zeroes (indicating no Tactics present)
    EntTacBin=[0]*len(EntTacticList)
    
    # Get all the tactics fro this group    
    GroupTacticList=ATTACK_obj.GetTacticsForGROUP(NextGName)
    
    #
    # Extract tactics for Enterprise domain
    #    
    GroupEntTacticList=[]    
    for ThisTactic in GroupTacticList:
        TacDom=ATTACK_obj.GetTacticDomain(ThisTactic)
        if TacDom == "enterprise-attack":
            GroupEntTacticList.append(ThisTactic)
        
                          
    ThisGroupHasNoTactics=True        
    for GroupEntTactic in GroupEntTacticList:
        #print("Updating for Group <" + str(NextGID) + "> TTP <" + GroupTTP + ">")
        TacticPos=EntTacticList.index(GroupEntTactic)
        EntTacBin[TacticPos]=1
        ThisGroupHasNoTactics=False
        #print("Found TTP at position " + str(TTPPos))
    # Can be used to remove groups with no tactics if required
    if ThisGroupHasNoTactics == True:
        print("No Tactics found for Group " + str(NextGName))
        # Remove if groups with no tactics are not required
        EntTacBinList.append(EntTacBin)
        ValidGroupList.append(NextGName)
        ####
    else:        
        EntTacBinList.append(EntTacBin)
        ValidGroupList.append(NextGName)

# Write out the tactic list to a csv file
#
# Convert to a dataframe (so we can write it out as CSV)
EntTacBinDF = pd.DataFrame(EntTacBinList)
# Add column names (Tactics) and row idexes (groups)
EntTacBinDF.columns = EntTacticList
EntTacBinDF.index = ValidGroupList
# Write out CSV
EntTacBinDF.to_csv(ATTACK_LOCAL_FILE_ROOT + "MAFpt_ATTACK_ENT_TACTIC_BIN.csv")

# Convert list of lists to array
# EntTacBinArr=np.array(EntTacBin,  dtype=np.int32)
# Write this out as CSV for external use
# np.savetxt(ATTACK_LOCAL_FILE_ROOT + "EntTacBin.csv", EntTacBinArr, delimiter=",", fmt='%1i')

# Print out a matrix to get a feel for the mix   
#plt.title('All vector view')
#mpl.pyplot.matshow(EntTacBinList)
#plt.show()

# https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781849513265/3/ch03lvl1sec48/controlling-tick-spacing

fig=plt.figure(figsize=[10, 17])
plt.rc('xtick', labelsize=8) 
plt.rc('ytick', labelsize=8) 
# https://stackoverflow.com/questions/12286607/making-heatmap-from-pandas-dataframe

plt.pcolor(EntTacBinDF)
#plt.yticks(np.arange(0.5, len(EntTacBinDF.index), 4), EntTacBinDF.index)
#plt.xticks(np.arange(0.5, len(EntTacBinDF.columns), 1), EntTacBinDF.columns,  rotation=90)

plt.yticks(np.arange(len(EntTacBinDF.index)), EntTacBinDF.index)
#ax.set_yticklabels(EntTacBinDF.index, rotation=0, fontsize=8)
plt.xticks(np.arange(len(EntTacBinDF.columns)),  EntTacBinDF.columns, rotation=90)

#ax.set_xticklabels(y_ticks, rotation=0, fontsize=8)
#plt.yticks(np.arange(len(EntTacBinDF.index)), EntTacBinDF.index)
#plt.xticks(np.arange(len(EntTacBinDF.columns)), EntTacBinDF.columns,  rotation=90)
plt.savefig(ATTACK_LOCAL_FILE_ROOT + "MAFpt_ATTACK_ENT_TACTIC_BIN.jpg")
plt.show()

Cluster01 =[
"admin@338",          "APT12",              "BlackTech",          "CopyKittens",       
"DarkHydrus",         "Elderwood",          "Gallmaker",          "Mofang",            
"TA459",             "The White Company",  "Windshift",          "WIRTE"]            
Cluster02 =[
"APT-C-36" ,          "APT18" ,             "APT19" ,             "APT29"  ,           
"Carbanak",           "Cobalt Group",       "Deep Panda",         "FIN10",             
"Gorgon Group",       "PittyTiger",         "Putter Panda" ,      "Rancor",            
"RTM" ,               "Sharpshooter" ,      "Threat Group-1314",  "Whitefly"]         
Cluster03=[
"APT1" ,           "Dust Storm" ,     "Equation" ,       "Group5" ,         "Poseidon Group", 
"Sowbug",          "Strider",         "Winnti Group"  ]
Cluster04 =[
"APT16",            "APT17" ,           "APT30"  ,          "BlackOasis",       "Bouncing Golf",   
"Charming Kitten",  "Cleaver",          "Dragonfly",        "DragonOK",         "GCMAN",           
"Lotus Blossom",    "Moafee",           "NEODYMIUM",        "Orangeworm",       "PROMETHIUM",      
"Scarlet Mimic",    "SilverTerrier",    "Taidoor",          "Thrip"      ]    
Cluster05 =[
"APT28",             "APT39",             "APT41",             "Axiom",             "Blue Mockingbird", 
"BRONZE BUTLER",     "Darkhotel",         "Dragonfly 2.0",     "FIN6",              "Leviathan",        
"menuPass" ,         "Night Dragon",      "Patchwork",         "Rocke",             "Silence",        
"Stolen Pencil",     "TEMP.Veles"     ] 
Cluster06 =[
"APT3",               "APT32",              "APT33",              "FIN8",              
"Frankenstein",       "Gamaredon Group",    "Honeybee",           "Ke3chang",          
"Kimsuky",            "Lazarus Group",      "MuddyWater",         "OilRig",            
"Sandworm Team",      "Soft Cell",          "Stealth Falcon",     "Threat Group-3390", 
"Tropic Trooper",     "Turla",              "Wizard Spider"    ]
Cluster07 =[
"APT37",         "APT38",         "Dark Caracal",  "DarkVishnya",   "FIN4",          "FIN5",         
"FIN7",          "Inception",     "Leafminer",     "Machete",       "Magic Hound",   "Molerats",     
"Naikon",        "PLATINUM",      "Suckfly",       "TA505" ]
ClusterList=[]
ClusterList.append(Cluster01)
ClusterList.append(Cluster02)
ClusterList.append(Cluster03)
ClusterList.append(Cluster04)
ClusterList.append(Cluster05)
ClusterList.append(Cluster06)
ClusterList.append(Cluster07)

ClusterBinList=[]
ClusterDictList=[]
ClusterNum=0
for ThisCluster in ClusterList:
    ClusterNum=ClusterNum+1
    ThisCluster_Bin=[]
    for ThisGroup in ThisCluster:
        ThisCluster_Bin.append(EntTacBinList[ValidGroupList.index(ThisGroup)])
        ThisClusterDict={'GroupName': ThisGroup, 
                                'ClusterNum': ClusterNum}
        ClusterDictList.append(ThisClusterDict)
        
    ClusterBinList.append(ThisCluster_Bin)
    

    #mpl.pyplot.matshow(ThisCluster_Bin)
    
    fig=plt.figure(figsize=[14, 17])
    plt.title('RAgnesClusterResult_K7 Cluster ' + str(ClusterNum))
    plt.rc('xtick', labelsize=12) 
    plt.rc('ytick', labelsize=12) 
    # https://stackoverflow.com/questions/12286607/making-heatmap-from-pandas-dataframe
    plt.pcolor(ThisCluster_Bin)
    #
    plt.yticks(np.arange(len(ThisCluster)), ThisCluster)
    plt.xticks(np.arange(len(EntTacBinDF.columns)),  EntTacBinDF.columns, rotation=90)

    plt.savefig(ATTACK_LOCAL_FILE_ROOT + "MAFpt_ATTACK_ENT_TACTIC_GROUP_CLUSTER" + str(ClusterNum) + ".jpg")
    plt.show()
    
EntTacGroupClusterDF = pd.DataFrame(ClusterDictList)
# Write out CSV
EntTacGroupClusterDF.to_csv(ATTACK_LOCAL_FILE_ROOT + "MAFpt_ATTACK_ENT_TACTIC_GROUP_CLUSTER.csv")


exit(0)
# And a dendogram
# We will use hamming dtstance as a measure
# And "Ward" as linkage method
#
#DistMatrix=distance.pdist(EntTacBinList, 
#                                    'hamming')
                                    
#plt.title('Hamming dendogram')
#plt.figure()
#dendrogram = sch.dendrogram(sch.linkage(DistMatrix, method='ward'))
#plt.show()

# Iterate through DF

EntTacBinDFRemain=EntTacBinDF.copy(deep=True)

#print("Going to check for duplicates now")
while EntTacBinDFRemain.empty == False:
    #print("Empty check done")
    # Remove first row from dataframe containing items left to check
    DupList=[]
    Duplicates=False
    
    for ThisIndex, row in EntTacBinDFRemain.iterrows():
        #print("Should only see this once")
        EntTacBinDFRemain=EntTacBinDFRemain.drop(index=ThisIndex)
        #print("First row droppped")
        DupList.append(ThisIndex)
        CheckRow=row
        #print("The check row is " + str(CheckRow))
        break
        
    # Making a copy here so we dont remove items whie iterating main DF
    # Notes: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows 
    #print("Copying DF")
    EntTacBinDFRemain_2=EntTacBinDFRemain.copy(deep=True)
    #print("Copy done")
    
    for CopyIndex, copy_row in EntTacBinDFRemain_2.iterrows():
        #print("In remain2 iteration " + str(CopyIndex))
        #print("The copy row is " + str(copy_row))
        if CheckRow.equals(copy_row):
            Duplicates=True
            EntTacBinDFRemain=EntTacBinDFRemain.drop(index=CopyIndex)
            DupList.append(CopyIndex)
            
    # Print duplicate set if duplicates found
    if Duplicates==True:
        print("<< Duplicates found")
        print(DupList)
        
#print("Check complete")
        
RAgnesClusterResult_K3=[1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 3,3, 2, 1, 2, 1, 3, 3, 1, 3, 2, 3, 1, 1, 1, 2, 3, 3, 3, 3, 3, 3, 1, 3, 1, 2, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3,  3, 3, 1, 3, 2, 3, 1, 2, 2, 3, 2, 3, 1, 2, 3, 1, 3, 1, 3, 3, 1, 3, 1, 3, 3, 1, 2, 3, 1, 3, 3, 2, 1, 1, 1, 3]

IndexCount=0
Cluster_1=[]
Cluster_1_Bin=[]
Cluster_2=[]
Cluster_2_Bin=[]
Cluster_3=[]
Cluster_3_Bin=[]
for NextClust in RAgnesClusterResult_K3:
    if NextClust == 1:
        Cluster_1.append(ValidGroupList[IndexCount])
        Cluster_1_Bin.append(EntTacBinList[IndexCount])
    if NextClust == 2:
        Cluster_2.append(ValidGroupList[IndexCount])
        Cluster_2_Bin.append(EntTacBinList[IndexCount])
    if NextClust == 3:
        Cluster_3.append(ValidGroupList[IndexCount])
        Cluster_3_Bin.append(EntTacBinList[IndexCount])
    IndexCount=IndexCount+1
    
print(str(Cluster_1))
plt.title('RAgnesClusterResult_K3 Cluster 1')
mpl.pyplot.matshow(Cluster_1_Bin)
plt.show()
print(str(Cluster_2))
plt.title('RAgnesClusterResult_K3 Cluster 2')
mpl.pyplot.matshow(Cluster_2_Bin)
plt.show()
print(str(Cluster_3))
plt.title('RAgnesClusterResult_K3 Cluster 3')
mpl.pyplot.matshow(Cluster_3_Bin)
plt.show()

#1 2 2 1 2 2 3 2 3 3 2 2 2 3 2 2 1 1 2 3 2 1 2 2 2 2 1 2 2 3 1 1 1 2 2 2 3 2 3 2 2 3 1 2 1 2 2 3 2
# [50] 2 2 3 2 2 3 1 1 2 2 2 3 3 1 3 2 2 2 2 2 2 2 2 1 2 3 1 3 2 2 2 1 2 1 2 3 1 2 3 1 3 3 2 1 1 1 3
RRForestClusterResult_K3=[1, 2, 2, 1, 2, 2, 3, 2, 3, 3, 2, 2, 2, 3, 2, 2, 1, 1, 2, 3, 2, 1, 2, 2, 2, 2, 1, 2, 2, 3, 1, 1, 1, 2, 2, 2, 3, 2, 3, 2, 2, 3, 1, 2, 1, 2, 2, 3, 2, 2, 2, 3, 2, 2, 3, 1, 1, 2, 2, 2, 3, 3, 1, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 1, 3, 2, 2, 2, 1, 2, 1, 2, 3, 1, 2, 3, 1, 3, 3, 2, 1, 1, 1, 3]

IndexCount=0
Cluster_1=[]
Cluster_1_Bin=[]
Cluster_2=[]
Cluster_2_Bin=[]
Cluster_3=[]
Cluster_3_Bin=[]
for NextClust in RRForestClusterResult_K3:
    if NextClust == 1:
        Cluster_1.append(ValidGroupList[IndexCount])
        Cluster_1_Bin.append(EntTacBinList[IndexCount])
    if NextClust == 2:
        Cluster_2.append(ValidGroupList[IndexCount])
        Cluster_2_Bin.append(EntTacBinList[IndexCount])
    if NextClust == 3:
        Cluster_3.append(ValidGroupList[IndexCount])
        Cluster_3_Bin.append(EntTacBinList[IndexCount])
    IndexCount=IndexCount+1
    
print(str(Cluster_1))
plt.title('RRForestClusterResult_K3 Cluster 1')
mpl.pyplot.matshow(Cluster_1_Bin)
plt.show()
print(str(Cluster_2))
plt.title('RRForestClusterResult_K3 Cluster 2')
mpl.pyplot.matshow(Cluster_2_Bin)
plt.show()
print(str(Cluster_3))
plt.title('RRForestClusterResult_K3 Cluster 3')
mpl.pyplot.matshow(Cluster_3_Bin)
plt.show()

# #######################################
#  END Tactics
#
#

# ###############################
#  START TTPs
#
#  Convert to binary vector representations
#  Write out as csv
#  Display graphical view
#  Display dendogram (hamming, ward)
#  Check for groups with the same tactic set
#  Display groups from R Agnes based clustering
#  Display groups from R RForest based clustering
#
#
# ###############################
#  Build a list of strings of 1 and 0s to represent group 'fingerprint'
#       EntFGroupTacticList  - Enterprise tactic binary vector list
#
print("Building group TTP binary vector list")
        
# Sort list based on lowercase value of name (to match MITRE website order.        
GroupNameList.sort(key=SortFunc)

# Go through list creating binary vectors for all groups.
# REVIEW: Groups with no tactics are removed at the moment,
EntTTPList=[]
EntTTPList.extend(TTPEntList)
EntTTPList.extend(MalList)
EntTTPList.extend(ToolList)
print("EntTTPList has " + str(len(EntTTPList)) + "Entries")
EntTTPBinList=[]
ValidGroupList=[]

for NextGName in GroupNameList:
    #print("Next group is " + NextGID)
    # Initialise the binary vector list
    # Each element represents the presence or otherwise of a TTP
    # It is iinitially set to all zeroes (indicating no Tactics present)
    EntTTPBin=[0]*len(EntTTPList)
    
    # Get all the TTP for this group    
    GroupTTPList=ATTACK_obj.GetTTPForGROUPByLevel(NextGName, "Top", "Y")
    if NextGName == "admin@338":
        print("The Group list is " + str(GroupTTPList))
    
    #
    # Extract tactics for Enterprise domain
    #    
    GroupEntTTPList=[]    
    for ThisTTP in GroupTTPList:
        TTPDom=ATTACK_obj.GetTTPDomain(ThisTTP)
        if TTPDom == "Enterprise" or TTPDom == "malware" or TTPDom == "tool":
            GroupEntTTPList.append(ThisTTP)
        
    #if NextGName == "admin@338":
        #print("The Ent Group list is " + str(GroupTTPList))    
        
    ThisGroupHasNoTTP=True        
    for GroupEntTTP in GroupEntTTPList:
        #print("Updating for Group <" + str(NextGID) + "> TTP <" + GroupTTP + ">")
        TTPPos=EntTTPList.index(GroupEntTTP)
        EntTTPBin[TTPPos]=1
        ThisGroupHasNoTTP=False
        #print("Found TTP at position " + str(TTPPos))
    if ThisGroupHasNoTTP == True:
        print("No TTP found for Group " + str(NextGName))
    else:        
        EntTTPBinList.append(EntTTPBin)
        ValidGroupList.append(NextGName)

# Write out the tactic list to a csv file
#
# Convert to a dataframe (so we can write it out as CSV)
EntTTPBinDF = pd.DataFrame(EntTTPBinList)
# Add column names (Tactics) and row idexes (groups)
EntTTPBinDF.columns = EntTTPList
EntTTPBinDF.index = ValidGroupList
# Write out CSV
EntTTPBinDF.to_csv(ATTACK_LOCAL_FILE_ROOT + "MAFpt_ATTACK_ENT_TTP_BIN.csv")

# Convert list of lists to array
# EntTacBinArr=np.array(EntTacBin,  dtype=np.int32)
# Write this out as CSV for external use
# np.savetxt(ATTACK_LOCAL_FILE_ROOT + "EntTacBin.csv", EntTacBinArr, delimiter=",", fmt='%1i')

# Print out a matrix to get a feel for the mix   
plt.title('All vector view')
mpl.pyplot.matshow(EntTTPBinList)
plt.show()

#
# K = 3

C01_Bin=[]
C01=["admin@338", "APT-C-36", "APT12", "APT16",            
        "APT17", "APT18", "APT19", "APT29",             
         "APT30", "APT33", "APT38", "Axiom",             
         "BlackOasis", "BlackTech", "Blue Mockingbird", "Bouncing Golf",    
        "Carbanak", "Charming Kitten", "Cleaver", "CopyKittens",       
        "Dark Caracal", "Darkhotel", "DarkHydrus", "DarkVishnya",       
        "Deep Panda", "Dragonfly", "DragonOK", "Dust Storm",        
        "Elderwood", "Equation", "FIN10", "FIN4",             
        "FIN5", "FIN6", "FIN7","FIN8",            
        "Gallmaker", "GCMAN", "Gorgon Group", "Group5",            
        "Leafminer", "Leviathan", "Lotus Blossom", "Machete" ,          
        "Moafee", "Mofang", "Molerats", "Naikon" ,           
        "NEODYMIUM", "Night Dragon", "Orangeworm", "PittyTiger",        
        "PLATINUM", "Poseidon Group", "PROMETHIUM", "Putter Panda",      
        "Rancor", "RTM", "Scarlet Mimic", "Sharpshooter",      
        "Silence", "SilverTerrier", "Sowbug", "Stolen Pencil",     
        "Strider", "Suckfly", "TA459", "TA505",             
        "TEMP.Veles", "The White Company", "Threat Group-1314", "Thrip" ,            
        "Whitefly", "Windshift", "Winnti Group", "WIRTE",             
        "Wizard Spider" ]  
for ThisGroup in C01:
    C01_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K3 Cluster 1')
mpl.pyplot.matshow(C01_Bin)
plt.show()

C02_Bin=[]
C02=["APT1", "APT3", "APT32", "APT39" ,            
        "APT41", "Dragonfly 2.0", "Ke3chang", "menuPass",          
        "OilRig", "Soft Cell", "Threat Group-3390", "Turla"   ]
for ThisGroup in C02:
    C02_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K3 Cluster 2')
mpl.pyplot.matshow(C02_Bin)
plt.show()

C03_Bin=[]
C03=["APT28", "APT37", "BRONZE BUTLER", "Cobalt Group", "Frankenstein",    
         "Gamaredon Group", "Honeybee", "Inception", "Kimsuky", "Lazarus Group", 
         "Magic Hound", "MuddyWater", "Patchwork", "Rocke", "Sandworm Team",  
         "Stealth Falcon", "Tropic Trooper" ]    
for ThisGroup in C03:
    C03_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K3 Cluster 3')
mpl.pyplot.matshow(C03_Bin)
plt.show()       

exit(0)
#
# K = 14

C01_Bin=[]
C01=["admin@338", "APT-C-36", "APT12", "APT19",  "APT29",  
     "BlackTech", "Dark Caracal", "Darkhotel",  "DarkHydrus", 
     "Elderwood", "FIN4", "FIN7",  "Gallmaker", "Gorgon Group", 
     "Machete", "Mofang", "Molerats", "Naikon", "PLATINUM", "Poseidon Group" , 
     "Rancor", "RTM", "Sharpshooter", "Sowbug", "TA459", "TA505" , 
     "The White Company", "Whitefly", "Windshift" ]  
for ThisGroup in C01:
    C01_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C01_Bin)
plt.show()

C02_Bin=[]
C02=["APT1", "Ke3chang"]
for ThisGroup in C02:
    C02_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C02_Bin)
plt.show()

C03_Bin=[]
C03=["APT16", "APT17", "APT18", "APT30",             
        "APT38", "Axiom", "BlackOasis", "Blue Mockingbird",  
        "Bouncing Golf", "Carbanak", "Charming Kitten", "Cleaver",           
        "CopyKittens", "DarkVishnya", "Deep Panda", "Dragonfly",         
        "DragonOK",  "Dust Storm", "Equation", "FIN10",             
        "FIN5", "GCMAN", "Group5", "Leafminer",        
        "Lotus Blossom", "Moafee", "NEODYMIUM", "Night Dragon",      
        "Orangeworm", "PittyTiger", "PROMETHIUM", "Putter Panda",      
        "Scarlet Mimic", "SilverTerrier", "Stolen Pencil", "Strider",          
        "Suckfly", "TEMP.Veles", "Threat Group-1314", "Thrip",             
        "Winnti Group", "WIRTE"]    
for ThisGroup in C03:
    C03_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C03_Bin)
plt.show()       

C04_Bin=[]
C04=["APT28"]
for ThisGroup in C04:
    C04_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C04_Bin)
plt.show()      

C05_Bin=[]
C05=["APT3", "APT39", "Dragonfly 2.0"]
for ThisGroup in C05:
    C05_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C05_Bin)
plt.show()      

C06_Bin=[]
C06=["APT32"]
for ThisGroup in C06:
    C06_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C06_Bin)
plt.show()      

C07_Bin=[]
C07=["APT33", "FIN6", "FIN8", "Leviathan", 
        "Silence" , "Wizard Spider"]
for ThisGroup in C07:
    C07_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C07_Bin)
plt.show()    

C08_Bin=[]
C08=["APT37", "Cobalt Group", "Frankenstein", "Gamaredon Group",
        "Honeybee",  "Inception", "Kimsuky", "Magic Hound", 
        "MuddyWater", "Patchwork", "Rocke" ,"Sandworm Team", 
        "Stealth Falcon",   "Tropic Trooper" ]
for ThisGroup in C08:
    C08_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C08_Bin)
plt.show()    

C09_Bin=[]        
C09=["APT41"]
for ThisGroup in C09:
    C09_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C09_Bin)
plt.show()    

C10_Bin=[]  
C10=["BRONZE BUTLER"]
for ThisGroup in C10:
    C10_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C10_Bin)
plt.show()    

C11_Bin=[]  
C11=["Lazarus Group"]
for ThisGroup in C10:
    C11_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C11_Bin)
plt.show()    

C12_Bin=[]  
C12=["menuPass", "Soft Cell", "Threat Group-3390"]
for ThisGroup in C12:
    C12_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C12_Bin)
plt.show()    

C13_Bin=[]  
C13=["OilRig"]
for ThisGroup in C13:
    C13_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C13_Bin)
plt.show()   

C14_Bin=[]  
C14=["Turla"]
for ThisGroup in C14:
    C14_Bin.append(EntTTPBinList[ValidGroupList.index(ThisGroup)])
plt.title('RAgnesClusterResult_K14 Cluster 1')
mpl.pyplot.matshow(C14_Bin)
plt.show()   



# And a dendogram
# We will use hamming dtstance as a measure
# And "Ward" as linkage method
#
DistMatrix=distance.pdist(EntTacBinList, 
                                    'hamming')
                                    
plt.title('Hamming dendogram')
plt.figure()
dendrogram = sch.dendrogram(sch.linkage(DistMatrix, method='ward'))
plt.show()

# Iterate through DF

EntTacBinDFRemain=EntTacBinDF.copy(deep=True)

#print("Going to check for duplicates now")
while EntTacBinDFRemain.empty == False:
    #print("Empty check done")
    # Remove first row from dataframe containing items left to check
    DupList=[]
    Duplicates=False
    
    for ThisIndex, row in EntTacBinDFRemain.iterrows():
        #print("Should only see this once")
        EntTacBinDFRemain=EntTacBinDFRemain.drop(index=ThisIndex)
        #print("First row droppped")
        DupList.append(ThisIndex)
        CheckRow=row
        #print("The check row is " + str(CheckRow))
        break
        
    # Making a copy here so we dont remove items whie iterating main DF
    # Notes: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows 
    #print("Copying DF")
    EntTacBinDFRemain_2=EntTacBinDFRemain.copy(deep=True)
    #print("Copy done")
    
    for CopyIndex, copy_row in EntTacBinDFRemain_2.iterrows():
        #print("In remain2 iteration " + str(CopyIndex))
        #print("The copy row is " + str(copy_row))
        if CheckRow.equals(copy_row):
            Duplicates=True
            EntTacBinDFRemain=EntTacBinDFRemain.drop(index=CopyIndex)
            DupList.append(CopyIndex)
            
    # Print duplicate set if duplicates found
    if Duplicates==True:
        print("<< Duplicates found")
        print(DupList)
        
#print("Check complete")
        
RAgnesClusterResult_K3=[1, 2, 1, 1, 2, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 3,3, 2, 1, 2, 1, 3, 3, 1, 3, 2, 3, 1, 1, 1, 2, 3, 3, 3, 3, 3, 3, 1, 3, 1, 2, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 3, 3, 3,  3, 3, 1, 3, 2, 3, 1, 2, 2, 3, 2, 3, 1, 2, 3, 1, 3, 1, 3, 3, 1, 3, 1, 3, 3, 1, 2, 3, 1, 3, 3, 2, 1, 1, 1, 3]

IndexCount=0
Cluster_1=[]
Cluster_1_Bin=[]
Cluster_2=[]
Cluster_2_Bin=[]
Cluster_3=[]
Cluster_3_Bin=[]
for NextClust in RAgnesClusterResult_K3:
    if NextClust == 1:
        Cluster_1.append(ValidGroupList[IndexCount])
        Cluster_1_Bin.append(EntTacBinList[IndexCount])
    if NextClust == 2:
        Cluster_2.append(ValidGroupList[IndexCount])
        Cluster_2_Bin.append(EntTacBinList[IndexCount])
    if NextClust == 3:
        Cluster_3.append(ValidGroupList[IndexCount])
        Cluster_3_Bin.append(EntTacBinList[IndexCount])
    IndexCount=IndexCount+1
    
print(str(Cluster_1))
plt.title('RAgnesClusterResult_K3 Cluster 1')
mpl.pyplot.matshow(Cluster_1_Bin)
plt.show()
print(str(Cluster_2))
plt.title('RAgnesClusterResult_K3 Cluster 2')
mpl.pyplot.matshow(Cluster_2_Bin)
plt.show()
print(str(Cluster_3))
plt.title('RAgnesClusterResult_K3 Cluster 3')
mpl.pyplot.matshow(Cluster_3_Bin)
plt.show()

# ###############################################
#  END MAIN CODE
#
#
    
    

#GroupNameList.sort()



# ###############################
#  Build a list of strings of 1 and 0s to represent group 'fingerprint'
#      FPrintListOfLists  - TTP binary vector list
#      FGroupTacticList  - Tactic binary vector list
#
print("Building group finger print list")

FPrintListOfLists=[]
FGroupTacticList=[]

#
# Iterate list
#

# Store Group TTP Statistics
GroupStatsList=[]
GroupNameList=[]
GroupNameMaxLen=0

TotGroupTopTTPCount=0
TotGroupSubTTPCount=0

TotEntTTPCount=0
TotEntTTPTopCount=0
TotEntTTPSubCount=0
TotMobTTPCount=0
TotMobTTPTopCount=0
TotMobTTPSubCount=0
TotPreTTPCount=0
TotPreTTPTopCount=0
TotPreTTPSubCount=0
TotMalTTPCount=0
TotToolTTPCount=0

for NextGID in GroupsList:
    #print("Next group is " + NextGID)
    # Initialise the string
    # Each byte of this string represents the presence or otherwise of a TTP
    # It is iinitially set to all zeroes (indicating no TTP present
    s=[0]*len(TTPList)
    
    t=[0]*len(TacticList)
    # NOTE:
    #  Faster approach
    #     text = 'abcdefg'
    #     text = text[:1] + 'Z' + text[2:]
    # 
    # Now get the list of TTP for the Group (convert GID to name)
    #print("About to call GetGroupName()")
    GroupName=ATTACK_obj.GetGroupName(NextGID)
    # Just doing this to display in alphabetical order later
    #  ATT&CK web page ignores case
    GroupNameList.append(GroupName.lower())
    # Keep max len to tidy up print later
    if len(GroupName) > GroupNameMaxLen:
        GroupNameMaxLen=len(GroupName)
        
    #print("The group name is " + GroupName)
    GroupTTPList=ATTACK_obj.GetTTPForGROUP(GroupName)
    GroupTopTTPList=ATTACK_obj.GetTTPForGROUPByLevel(GroupName, "Top", "N")
    GroupSubTTPList=ATTACK_obj.GetTTPForGROUPByLevel(GroupName, "Sub", "N")
    
    GroupTopTTPCount=len(GroupTopTTPList)
    TotGroupTopTTPCount=TotGroupTopTTPCount+GroupTopTTPCount
    GroupSubTTPCount=len(GroupSubTTPList)
    TotGroupSubTTPCount=TotGroupSubTTPCount+GroupSubTTPCount
    
    
    EntTTPCount=0
    EntTTPTopCount=0
    EntTTPSubCount=0
    MobTTPCount=0
    MobTTPTopCount=0
    MobTTPSubCount=0
    PreTTPCount=0
    PreTTPTopCount=0
    PreTTPSubCount=0
    MalTTPCount=0
    ToolTTPCount=0
    
    for ThisTTP in GroupTTPList:
        TTPDomain=ATTACK_obj.GetTTPDomain(ThisTTP)
        level=ATTACK_obj.GetTTPLevel(ThisTTP)
        
        if TTPDomain == "Enterprise":
            EntTTPCount=EntTTPCount+1
            TotEntTTPCount=TotEntTTPCount+1
            if level == "Top":
                EntTTPTopCount=EntTTPTopCount+1
                TotEntTTPTopCount=TotEntTTPTopCount+1
            else:
                EntTTPSubCount=EntTTPSubCount+1
                TotEntTTPSubCount=TotEntTTPSubCount+1
                
        if TTPDomain == "Mobile":
            MobTTPCount=MobTTPCount+1
            TotMobTTPCount=TotMobTTPCount+1
            if level == "Top":
                MobTTPTopCount=MobTTPTopCount+1
                TotMobTTPTopCount=TotMobTTPTopCount+1
            else:
                MobTTPSubCount=MobTTPSubCount+1
                TotMobTTPSubCount=TotMobTTPSubCount+1
                
        if TTPDomain == "Pre-ATT&CK":
            PreTTPCount=PreTTPCount+1
            TotPreTTPCount=TotPreTTPCount+1
            if level == "Top":
                TotPreTTPTopCount=TotPreTTPTopCount+1
                TotPreTTPTopCount=TotPreTTPTopCount+1
            else:
                TotPreTTPSubCount=TotPreTTPSubCount+1
                TotPreTTPSubCount=TotPreTTPSubCount+1
                
        if TTPDomain == "tool":
            ToolTTPCount=ToolTTPCount+1
            TotToolTTPCount=TotToolTTPCount+1
        if TTPDomain == "malware":
            MalTTPCount=MalTTPCount+1
            TotMalTTPCount=TotMalTTPCount+1
            
    GroupStatDict={'GroupName': GroupName.lower(), 
                           'TopTTP': GroupTopTTPCount, 
                           'SubTTP': GroupSubTTPCount, 
                           'EnterpriseTTP':EntTTPCount, 
                           'EnterpriseTTPTop':EntTTPTopCount, 
                           'EnterpriseTTPSub':EntTTPSubCount, 
                           'MobileTTP':MobTTPCount, 
                           'MobileTTPTop':MobTTPTopCount, 
                           'MobileTTPSub':MobTTPSubCount, 
                           'PreTTP': PreTTPCount, 
                           'PreTTPTop':PreTTPTopCount, 
                           'PreTTPSub':PreTTPSubCount, 
                           'Malware':MalTTPCount, 
                           'Tool':ToolTTPCount                           
                          }
                          
    GroupStatsList.append(GroupStatDict)

    #GroupTTPTacticsList=[]
    #for ThisTTP in GroupTTPList:
        #TList=ATTACK_obj.GetTacticsForTTP(ThisTTP)
        #GroupTTPTacticsList.append(TList)
        
    #print("<< This group is " + GroupName + " >>")
    i=0
    # print a line for each TTP
    # General form "TTP is <TTP_EXT_ID>  Tactics: <TACTIC_EXT_ID> <TACTIC_NAME> ...." 
    #for ThisTTP in GroupTTPList:
        #TTPTacticsList=ATTACK_obj.GetTacticsForTTP(ThisTTP)
        #TacticStr=""
        #for ThisTactic in TTPTacticsList:
            #TacticStr=TacticStr + " < " + str(ThisTactic) + " > " + "< Name: " + str(ATTACK_obj.GetTacticName(ThisTactic)) + " >"
        #print("TTP is < " + ThisTTP + "> Tactics: < " + 
                                     #TacticStr)
        #i=i+1
    
    GroupTacticList=ATTACK_obj.GetTacticsForGROUP(GroupName)
    #if GroupName == "Machete":
        #print("The group TTP list is " + str(GroupTTPList))
    # For each TTP present mark it as a 1 in the string
    # The position is as the position in TTPList (created above)
    ThisGroupHasNoTTP=True
    for GroupTTP in GroupTTPList:
        #print("Updating for Group <" + str(NextGID) + "> TTP <" + GroupTTP + ">")
        TTPPos=TTPList.index(GroupTTP)
        s[TTPPos]=1
        ThisGroupHasNoTTP=False
        #print("Found TTP at position " + str(TTPPos))
    if ThisGroupHasNoTTP == True:
        print("No TTP found for Group " + str(GroupName))

    ThisGroupHasNoTactics=True        
    for GroupTactic in GroupTacticList:
        #print("Updating for Group <" + str(NextGID) + "> TTP <" + GroupTTP + ">")
        TacticPos=TacticList.index(GroupTactic)
        t[TacticPos]=1
        ThisGroupHasNoTactics=False
        #print("Found TTP at position " + str(TTPPos))
    if ThisGroupHasNoTactics == True:
        print("No Tactics found for Group " + str(GroupName))

    FPrintListOfLists.append(s)
    
    FGroupTacticList.append(t)
 #   FPrintString="".join(s)
#    FPrintStringList.append(FPrintString)

# End for NextGID in GroupsList:
#GroupNameMaxLen=0
GroupNameList.sort()

#for GroupStatDict in GroupStatsList:
# https://stackoverflow.com/questions/8653516/python-list-of-dictionaries-search
print("Group stats: TTP Tots/ Name /Ent / Mob / Pre /Mal/Tool")
for GroupName in GroupNameList:
    GroupStatDict=next(item for item in GroupStatsList if item["GroupName"] == GroupName)

    GroupName=GroupStatDict.get('GroupName')
    GroupNameMax = GroupName + " "*(GroupNameMaxLen-len(GroupName))
    
    GroupTopTTPCount=GroupStatDict.get('TopTTP') 
    GroupSubTTPCount=GroupStatDict.get('SubTTP')

    EntTTPCount=GroupStatDict.get('EnterpriseTTP')
    EntTTPTopCount=GroupStatDict.get('EnterpriseTTPTop')
    EntTTPSubCount=GroupStatDict.get('EnterpriseTTPSub')
    MobTTPCount=GroupStatDict.get('MobileTTP')
    MobTTPTopCount=GroupStatDict.get('MobileTTPTop')
    MobTTPSubCount=GroupStatDict.get('MobileTTPSub')
    PreTTPCount=GroupStatDict.get('PreTTP')
    PreTTPTopCount=GroupStatDict.get('PreTTPTop')
    PreTTPSubCount=GroupStatDict.get('MobileTTPSub')
 
    MalTTPCount=GroupStatDict.get('Malware')
    ToolTTPCount=GroupStatDict.get('Tool')
        
#    print("[" + str(GroupNameMax) + "] Ent [" + str(EntTTPCount) + "] Mob [" + str(MobTTPCount) + 
#                                    "] Pre [" + str(PreTTPCount)  + "] Mal [" + str(MalTTPCount)  + "] Tool [" + str(ToolTTPCount) + "]")
# 'EnterpriseTTPTop'
    print("[" + str(GroupNameMax) + "][" + 
            '{:04}'.format(GroupTopTTPCount) + "][" + '{:04}'.format(GroupSubTTPCount) + "]   [" + 
            '{:04}'.format(EntTTPCount) + "][" + '{:04}'.format(EntTTPTopCount) + "][" + '{:04}'.format(EntTTPSubCount) + "]   [" +
            '{:04}'.format(MobTTPCount) + "][" + '{:04}'.format(MobTTPTopCount) + "][" + '{:04}'.format(MobTTPSubCount) + "]   [" +
            '{:04}'.format(PreTTPCount) + "][" + '{:04}'.format(PreTTPTopCount) + "][" + '{:04}'.format(PreTTPSubCount) + "]   [" + 
            '{:04}'.format(MalTTPCount)  + "][" + '{:04}'.format(ToolTTPCount) + "]")

GroupNameMax = "TOTALS" + " "*(GroupNameMaxLen-len("TOTALS"))
print("[" + str(GroupNameMax) + "][" + 
            '{:04}'.format(TotGroupTopTTPCount) + "][" + '{:04}'.format(TotGroupSubTTPCount) + "]   [" + 
            '{:04}'.format(TotEntTTPCount) + "][" + '{:04}'.format(TotEntTTPTopCount) + "][" + '{:04}'.format(TotEntTTPSubCount) + "]   [" +
            '{:04}'.format(TotMobTTPCount) + "][" + '{:04}'.format(TotMobTTPTopCount) + "][" + '{:04}'.format(TotMobTTPSubCount) + "]   [" +
            '{:04}'.format(TotPreTTPCount) + "][" + '{:04}'.format(TotPreTTPTopCount) + "][" + '{:04}'.format(TotPreTTPSubCount) + "]   [" + 
            '{:04}'.format(TotMalTTPCount)  + "][" + '{:04}'.format(TotToolTTPCount) + "]")

# ###################################
#  Not used
#
#i=0
#j=len(FPrintListOfLists)
#ThisFP=[]
#LastFP=[]
#for Fprint in FPrintListOfLists:
    #LastFP=ThisFP
    #ThisFP=Fprint
    
    #if not i ==0:
        #print("LastFp is " + str( LastFP))
        #print("\n")
        #print("ThisFp is " + str( ThisFP))
        #print("\n")

        #hamdist=distance.hamming(LastFP, 
         #                                      ThisFP)
        #jacdist=distance.jaccard(LastFP, 
        #                                       ThisFP)
                                           
        #print("Hamming distance is " + str(hamdist))
#        print("Jaccard distance is " + str(jacdist))
    
#    i=i+1
#FPrintListOfLists=[]
#FGroupTacticList=[]
ThisBinaryVectorList=FGroupTacticList

mpl.pyplot.matshow(ThisBinaryVectorList)
plt.show()
exit(0)

# ########################
#  Create a distance matrix based on Hamming
#   Useful in a number of algorithms that do not support Hamming directly
#  
#  A square form is also produced here, reauired for some algorithms
#
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html#scipy.spatial.distance.pdist
print("<<< Distance Matrix >>>")
DistMatrix=distance.pdist(ThisBinaryVectorList, 
                                    'hamming')
                                    
print(DistMatrix)

# https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.squareform.html#scipy.spatial.distance.squareform
SQDistMatrix=distance.squareform(DistMatrix)
print(SQDistMatrix)

# ########################
#  Investigate Agglomerative clustering approach
#   
#  

# https://scikit-learn.org/stable/modules/clustering.html#hierarchical-clustering        
# https://scikit-learn.org/stable/modules/generated/sklearn.cluster.AgglomerativeClustering.html#sklearn.cluster.AgglomerativeClustering
# “ward”, “complete”, “average”, “single”
print("<<< Agglomerative Clustering >>>")

clustering = AgglomerativeClustering(n_clusters=5, linkage="complete", 
                                affinity = 'precomputed').fit(SQDistMatrix)
print(str(clustering.labels_))
#clustering =AgglomerativeClustering(n_clusters=15, linkage="complete", 
#                                affinity = 'precomputed').fit_predict(SQDistMatrix)
#print(str(clustering))

# ########################
#  Investigate Kmedoids clustering approach (Kmeans only supports Euclidean)
#   
#  https://scikit-learn-extra.readthedocs.io/en/latest/generated/sklearn_extra.cluster.KMedoids.html
#  https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise_distances.html
#

print("<<< KMedoids Clustering >>>")

#clustering = KMedoids(metric = "hamming", n_clusters=5, random_state=0).fit(FPrintListOfLists)
#print(str(clustering.labels_))


#print(str(clustering.labels_))


# ########################
#  Investigate plot of dendrogram (using distance matrix)
#   
#
#  Try clustering based on Hamming Distance
#
# https://towardsdatascience.com/an-introduction-to-clustering-algorithms-in-python-123438574097
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.dendrogram.html
# create dendrogram

print("<<< Create Dendrogram >>>")

dendrogram = sch.dendrogram(sch.linkage(DistMatrix, method='ward'))
#plt.figure()
#dendrogram = sch.dendrogram(sch.linkage(DistMatrix, method='ward'))
#plt.show()

#sch.set_link_color_palette(['m', 'c', 'y', 'k'])
#fig, axes = plt.subplots(1, 2, figsize=(8, 3))
#dn1 = sch.dendrogram(Z, ax=axes[0], above_threshold_color='y',
#                           orientation='top')
#dn2 = sch.dendrogram(Z, ax=axes[1],
#                           above_threshold_color='#bcbddc',
#                           orientation='right')
#sch.set_link_color_palette(None)  # reset to default after use
#plt.show()

# create clusters
#hc = AgglomerativeClustering(n_clusters=4, affinity = 'euclidean', linkage = 'ward')
# save clusters for chart
#y_hc = hc.fit_predict(points)

# https://scikit-learn.org/stable/modules/generated/sklearn.cluster.AgglomerativeClustering.html
#clustering = AgglomerativeClustering(n_clusters=6,  linkage="complete", 
#                                affinity = 'precomputed').fit(DistMatrix)

#print(str(clustering.labels_))

# https://scikit-learn.org/stable/modules/generated/sklearn.cluster.Birch.html#sklearn.cluster.Birch
print(" <<<< Birch computation >>>>> ")
brc = Birch(n_clusters=None)
brc = Birch(n_clusters=6)
brc.fit(ThisBinaryVectorList)
brc.predict(ThisBinaryVectorList)
BClusters=brc.fit_predict(ThisBinaryVectorList)
print(BClusters)
print(" <<<< Birch computation done >>>>> ")

print("Begin model based clustering")
# https://scikit-learn.org/stable/auto_examples/mixture/plot_gmm_selection.html
# https://scikit-learn.org/stable/modules/mixture.html
#sphx-glr-auto-examples-mixture-plot-gmm-selection-py

FPrint_arr = np.array(ThisBinaryVectorList)
lowest_bic = np.infty
bic = []
n_components_range = range(1, 10)
cv_types = ['spherical', 'tied', 'diag', 'full']
for cv_type in cv_types:
    for n_components in n_components_range:
        # Fit a Gaussian mixture with EM
        gmm = GaussianMixture(n_components=n_components,
                                      covariance_type=cv_type)
        gmm.fit(FPrint_arr)
        ThisBIC=gmm.bic(FPrint_arr)
        print("This BIC is for " + str(cv_type) + " and " +str(n_components) + " BIC "+ str(ThisBIC))
        bic.append(ThisBIC)
        #  "best" model is one with the largest BIC values
        #  https://scikit-learn.org/stable/modules/generated/sklearn.mixture.GaussianMixture.html#sklearn.mixture.GaussianMixture.bic
        #      This says the lower the BIC the better?
        if bic[-1] < lowest_bic:
            lowest_bic = bic[-1]
            lowest_n=n_components
            best_gmm = gmm

bic = np.array(bic)
color_iter = itertools.cycle(['navy', 'turquoise', 'cornflowerblue',
                              'darkorange'])
clf = best_gmm
bars = []

# Plot the BIC scores
plt.figure(figsize=(8, 6))
spl = plt.subplot(2, 1, 1)
for i, (cv_type, color) in enumerate(zip(cv_types, color_iter)):
    xpos = np.array(n_components_range) + .2 * (i - 2)
    bars.append(plt.bar(xpos, bic[i * len(n_components_range):
                                  (i + 1) * len(n_components_range)],
                        width=.2, color=color))
plt.xticks(n_components_range)
plt.ylim([bic.min() * 1.01 - .01 * bic.max(), bic.max()])
plt.title('BIC score per model')
xpos = np.mod(bic.argmin(), len(n_components_range)) + .65 +\
    .2 * np.floor(bic.argmin() / len(n_components_range))
plt.text(xpos, bic.min() * 0.97 + .03 * bic.max(), '*', fontsize=14)
spl.set_xlabel('Number of components')
spl.legend([b[0] for b in bars], cv_types)

plt.show()



#gmm = GaussianMixture(n_components=10,  covariance_type='diag').fit(FPrint_arr)
print("<<< GMM for " + lowest_n + " components >>>")
labels = best_gmm.predict(FPrint_arr)
#plt.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis');
print(labels)
