
#__author__ = "Chris Maidens"
#__copyright__ = "Copyright (C) 2020 Chris Maidens"
#__license__ = "No license granted"
#__version__ = "0.1"
import os

import MAFpt_r_params as rpar

import MAFpt_ATTACK_DB_v2 as ATTACK

import pandas as pd

import numpy as np
from sklearn.neighbors import NearestNeighbors

#from sklearn.neighbors import NearestNeighbors
import random
from random import sample
from numpy.random import uniform
#mport numpy as np
from math import isnan

from scipy.spatial import distance


#import numpy as np
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram

from sklearn.cluster import AgglomerativeClustering

def hopkinsTEST(X):
    d = X.shape[1]
    #d = len(vars) # columns
    n = len(X) # rows
    m = int(0.1 * n) # heuristic from article [1]
    HList=[]
    TESTCOUNT=30
    
    for TEST in range(TESTCOUNT):
        # Select random row from X
        wList=[]
        uList=[]
       
        # Make a copy of the original dataframe to work on        
        wDF=X.copy()
        
        for x in range(m):
            wRow=wDF.sample()
            rownames=wRow.index
        
            for row_name in rownames:
                StoreRowName=row_name
                #print("The index is " + row_name)
            #print("The index is " + wRow.index.values)
        
            #Remove sample from wDF (so we dont sample it again)
            wDF=wDF.drop(axis='index',  index=[StoreRowName])
            # wDFDist is a copy of X with just thsi row removed
            wDFDist=X.drop(axis='index',  index=[StoreRowName])
            # Find nearest neighbour to the random row
            # Use Hamming
            ThiswMin=1
            for index, wDFrow in wDFDist.iterrows():
                wDist=distance.hamming(wRow, wDFrow)
                if wDist < ThiswMin:
                    ThiswMin=wDist
                
            if not ThiswMin == 0:
                print("Min w for " + str(x) + "is " + str(ThiswMin))
            wList.append(ThiswMin)
        
            # Construct a random d bit vector
            #   This meets uniform distribution requirements
            #    Prob of a specific vector is 1/2^d
        
            RandomPointInU=[]
            for x in range(d):
                k = random.randint(0, 1)
                RandomPointInU.append(k)
            
        
            ColList=X.columns.values.tolist()
            dfU = pd.DataFrame([RandomPointInU], columns=ColList)
            #for x in range(d):
                #dfU[ColList[x]] = RandomPointInU[x]
            ThisuMin=1
            for index, Xrow in X.iterrows():
                uDist=distance.hamming(dfU, Xrow)
                if uDist < ThisuMin:
                    ThisuMin=uDist
        
            print("Min u for " + str(x) + "is " + str(ThisuMin))
            uList.append(ThisuMin)
        
        # Calculate H
        # Sum up uList
        USum=0
        UWSum=0
        for x in range(m):
            USum=USum+(uList[x]**d)
            UWSum=UWSum+((uList[x]**d) + (wList[x]**d))
        
        HList.append(USum/UWSum)
    
        HTot=0
        
    for Next in range(TESTCOUNT):
        HTot=HTot+HList[Next]
        
    H=HTot/TESTCOUNT        
    
    return H
    
def hopkins(X):
    d = X.shape[1]
    #d = len(vars) # columns
    n = len(X) # rows
    m = int(0.1 * n) # heuristic from article [1]
    nbrs = NearestNeighbors(n_neighbors=1, metric="manhattan").fit(X.values)
    
    print(X.to_string())
    DUMMY=uniform(np.amin(X,axis=0),np.amax(X,axis=0),d)
    print(DUMMY)
 
    rand_X = sample(range(0, n, 1), m)
 
    ujd = []
    wjd = []
    for j in range(0, m):
        u_dist, _ = nbrs.kneighbors(uniform(np.amin(X,axis=0),np.amax(X,axis=0),d).reshape(1, -1), 2, return_distance=True)
        ujd.append(u_dist[0][1])
        w_dist, _ = nbrs.kneighbors(X.iloc[rand_X[j]].values.reshape(1, -1), 2, return_distance=True)
        wjd.append(w_dist[0][1])
 
    H = sum(ujd) / (sum(ujd) + sum(wjd))
    if isnan(H):
        print(ujd, wjd)
        H = 0
 
    return H

def hopkins2(X, portion=0.1, seed=247):
  # X: numpy array of shape (n_samples, n_features)
  n = X.shape[0] #rows
  print("n is " + str(n))
  d = X.shape[1] # columns
  print("d is " + str(d))
  m = int(portion * n) 
  print("m is " + str(m))

  np.random.seed(seed)
  nbrs = NearestNeighbors(n_neighbors=1, metric="manhattan").fit(X)
  
  print("Got here")
  # u_dist
  rand_X = np.random.uniform(X.min(axis=0), X.max(axis=0), size=(m,d))
  u_dist = nbrs.kneighbors(rand_X, return_distance=True)[0]
  print("Got here 2")
  # w_dist
  idx = np.random.choice(n, size=m, replace=False)
  print("Got here 3" + str(idx))
  w_dist = nbrs.kneighbors(X[idx,:], 2, return_distance=True)[0][:,1]
  print("Got here 4")

  U = (u_dist**d).sum()
  W = (w_dist**d).sum()
  H = U / (U + W)
  return H

YAML_Root = os.environ.get('MAFpt_YAML_ROOT')
YAML_File = os.environ.get('MAFpt_YAML_FILE')
YAML_File="MAFpt_RunParams.yaml"

YAML_Root = os.getcwd()
YAML_File="/MAFpt_ATTACK_DB_TEST_RunParams.yaml"

p_obj = rpar.MAFpt_r_params(YAML_Root + YAML_File)

DOWNLOAD_ATTACK=p_obj.MAFpt_r_read("RUN_DOWNLOAD_ATTACK")
REINDEX_ATTACK=p_obj.MAFpt_r_read("RUN_REINDEX_ATTACK")
ATTACK_LOCAL_FILE_ROOT = p_obj.MAFpt_r_read('RUN_ATTACK_LOCAL_FILE_ROOT')
ATTACK_TAXII_SERVER = p_obj.MAFpt_r_read('RUN_ATTACK_TAXII_SERVER')
ATTACK_LOCAL_COPY = p_obj.MAFpt_r_read('RUN_ATTACK_LOCAL_COPY')
ATTACK_CVE_SEARCH = p_obj.MAFpt_r_read('RUN_ATTACK_CVE_SEARCH')
ATTACK_MAIN_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_MAIN_INDEX')
ATTACK_SUB_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_SUB_INDEX')
ATTACK_CVE_REF_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_CVE_REF_INDEX')
ATTACK_TTP_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_TTP_INDEX')
ATTACK_TACTIC_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_TACTIC_INDEX')
ATTACK_TECH_TO_TACTIC_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_TECH_TO_TACTIC_INDEX')
ATTACK_REL_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_REL_INDEX')    
ATTACK_TACTIC_BIN_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_TACTIC_BIN_INDEX')   
ATTACK_TTP_BIN_INDEX = p_obj.MAFpt_r_read('RUN_ATTACK_TTP_BIN_INDEX') 

ATTACK_obj = ATTACK.MAFpt_ATTACK_DB(DOWNLOAD_ATTACK,
                         ATTACK_TAXII_SERVER,
                         ATTACK_LOCAL_FILE_ROOT,
                         ATTACK_LOCAL_COPY,
                         REINDEX_ATTACK,
                         ATTACK_CVE_SEARCH, 
                         ATTACK_MAIN_INDEX, 
                         ATTACK_SUB_INDEX, 
                         ATTACK_CVE_REF_INDEX, 
                         ATTACK_TTP_INDEX, 
                         ATTACK_TACTIC_INDEX, 
                         ATTACK_TECH_TO_TACTIC_INDEX, 
                         ATTACK_REL_INDEX, 
                         ATTACK_TACTIC_BIN_INDEX, 
                         ATTACK_TTP_BIN_INDEX) 
                         

TacticBinDF=ATTACK_obj.GetTacticBinIndex()
X_Test=TacticBinDF[["TA0043", "TA0042", "TA0001", "TA0002", "TA0003", "TA0004", "TA0005", "TA0006", "TA0007", "TA0008", "TA0009", "TA0011", "TA0010", "TA0040"]]
X_Test.to_csv(ATTACK_LOCAL_FILE_ROOT + "MAFpt_ENT_TACTICS_BIN_122023.csv")

exit(0)

# Look to see if any columns are always 0 or always 1
# Set up a reference Series item
InitFlag=0
Init=[]
#PrevRow=[]
T=[]
for index, Xrow in X_Test.iterrows():
                #print("The next row is " + str(Xrow))
                
                if InitFlag == 0:
                    #print("Initialising")
                    InitFlag=1
                    Init=Xrow
                    T=pd.Series([1]*len(X_Test.columns.values.tolist()), index=X_Test.columns.values.tolist())
                    #print("Setting reference row to " + str(S))
                    #print("Setting Test to " + str(T))
                    continue
                #print("Check")    
                for Sindex, Svalue in Xrow.items():
                    # If the T index is 1 then everything has matched so far
                    if T[Sindex] == 1:
                        if not Svalue == Init[Sindex]:
                            T[Sindex]=0
                
print("T is now " + str(T))

# Print the minimum and maximum vectors (from X_Test).

minFlag=0
maxFlag=0
minRow=[]
maxRow=[]

for index, Xrow in X_Test.iterrows():
    print("The next row is " + str(Xrow))
    
    if minFlag == 0:
        minRow=Xrow  
        minFlag=1
        continue
        
    MinFound=1
    for Sindex, Svalue in Xrow.items():
        print("minRow is " + str(minRow[Sindex]))
        print("SIndex is " + str(Sindex))
        print("SValue is " + str(Svalue))
        if Svalue > minRow[Sindex]:
            MinFound=0
            break
        if Svalue < minRow[Sindex]:
            break
    
    if MinFound==1:
        print("Min Row found")
        minRow=Xrow
        
    #break
       
for index, Xrow in X_Test.iterrows():
    print("The next row is " + str(Xrow))
    
    if maxFlag == 0:
        maxRow=Xrow
        maxFlag=1
        continue
        
    MaxFound=1
    for Sindex, Svalue in Xrow.items():
        print("maxRow is " + str(maxRow[Sindex]))
        print("SIndex is " + str(Sindex))
        print("SValue is " + str(Svalue))
        if Svalue < maxRow[Sindex]:
            print("No Max")
            MaxFound=0
            break
        if Svalue > maxRow[Sindex]:
            break    
    
    if MaxFound==1:
        print("Max Row found")
        maxRow=Xrow
        
    #break

            
print("the minimum row is " + str(minRow))
print("the maximum row is " + str(maxRow))
        




print(TacticBinDF[["TA0043", "TA0042", "TA0001", "TA0002", "TA0003", "TA0004", "TA0005", "TA0006", "TA0007", "TA0008", "TA0009", "TA0011", "TA0010", "TA0040"]].to_string())

#Val=hopkins(TacticBinDF[["TA0043", "TA0042", "TA0001", "TA0002", "TA0003", "TA0004", "TA0005", "TA0006", "TA0007", "TA0008", "TA0009", "TA0011", "TA0010", "TA0040"]])
#Val=hopkinsTEST(TacticBinDF[["TA0043", "TA0042", "TA0001", "TA0002", "TA0003", "TA0004", "TA0005", "TA0006", "TA0007", "TA0008", "TA0009", "TA0011", "TA0010", "TA0040"]])

#print("The value is " + str(Val))

# https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_dendrogram.html
#model = AgglomerativeClustering(metric='manhattan', linkage='average',  distance_threshold=0, n_clusters=None)
#model = model.fit(TacticBinDF[["TA0043", "TA0042", "TA0001", "TA0002", "TA0003", "TA0004", "TA0005", "TA0006", "TA0007", "TA0008", "TA0009", "TA0011", "TA0010", "TA0040"]])
#print("The number of clusters is " + str(model.n_clusters))
