#################################################### # Researcher: Sara Schwarz # Advisor: Dr. Jose Ortiz # VERSION #2 # Program Objective: # #In this work we seek to develop algorithms (brute-force approach, TRW, etc) to detect network and port #scanners on large-scale networks traffc such as those of Internet Service Providers, #Big Data research centers, and Science DMZ networks implemented in research institutions using network #flows. This specific program will follow the threshold random walk algorithm, of reading the network flows #and recording for each source ip amount of succesful connections and failed connections. # Succesful connections => It means to have completed the three way handshake, to have received the # Acknowledgment from the desitnaiton node. # Failed connections => It means to not have completed the three way handshake, to have not received # any answer, meaning it only has the Syncronization. #Later, the ratio between failed and succesful connections will be compared with a threshold to classify the source ips as #either suspicious scanners or not. #To run this algorithm we will be using high-performance methods for computing such as Map and Reduce, #specifically Python's Pool Class Library. ############################################################### from silk import * import multiprocessing as mp def Analisis(): counter = 0 # borrar luego counter_files=0 sampleHash={} #hash contains each sip with their dip and failed and succesful connections flow_counter = 0 for filename in FGlob(classname="all", type="all", start_date=startDate, end_date=endDate, site_config_file="/etc/silk/conf-v9/silk.conf", data_rootdir="/home/scratch/flow/rwflowpack/"): counter_files +=1 for rec in silkfile_open(filename, READ):#reading the flow file flow_counter += 1 if (':' in str(rec.sip)): continue else: connection = [0] * 2 #array to contain the amount of failed connections and amount of succesful conenctions sip = str(rec.sip) flags = str(rec.tcpflags) #print sip, flags #counter +=1 #verify if the network flow contains the Acknowledgment flag, which will imply a succesful connection if 'A' in flags: connection[1]=1 #succesful conections else: connection [0] =1 #failed conections if sip in sampleHash: sampleHash[sip][0]+= connection[0] sampleHash[sip][1]+= connection[1] else: sampleHash[sip] = [connection[0], connection[1]] #print sampleHash # print "flows", flow_counter # print counter_files return sampleHash def merge_list(list_hash): sampleHash = {} for sip_hash in list_hash: #print sip_hash for sip, arr in sip_hash.items(): if sip in sampleHash: sampleHash[sip][0]+= arr[0] sampleHash[sip][1]+= arr[1] else: sampleHash[sip] = [arr[0], arr[1]] return sampleHash def main(): startDate = "2018/06/1" endDate = "2018/06/30" p_ratio = 2 #threshold of the ratio. This ratio is chosen. p_counter = 100 #if there are only failed connections, rather than the ratio, #the total number of failed connection is compared to this threshold. process_num = 2 pool = mp.Pool(processes=process_num) files1 = FGlob(classname="all", type="all", start_date=startDate, end_date=endDate, site_config_file="/etc/silk/conf-v9/silk.conf", data_rootdir="/home/scratch/flow/rwflowpack/") #**************************************** #**************************************** files1 = [x for x in files1] #change files1 from FGlob object to a list #print len(files1) files_list = [] quenepas_blocksize = len(files1) / process_num for x in range(process_num): files_list.append(files1[0:quenepas_blocksize]) files1 = files1[quenepas_blocksize:] for i in files1: files_list[files1.index(i)].append(i) #**************************************** #**************************************** fileHash = pool.map(Analisis, files_list) # FGlob(classname="all", type="all", start_date=startDate, end_date=endDate, site_config_file="/etc/silk/conf-v9/silk.conf", data_rootdir="/home/scratch/flow/rwflowpack/")) #print fileHash[0]['136.145.231.48'] sip_connections_list = merge_list(fileHash) #print sip_connections_list['136.145.231.48'] #print sip_connections_list counter = 1 sipList = {"sipList":[]} for sip in sip_connections_list: #print counter, sip, "sip0", sip_connections_list[sip][0], "sip1", sip_connections_list[sip] [1] counter +=1 #compares the ratio of succesful connections to failed connections with a given threshold #If the amount is larger than the threshold it is added to a list of suspicious sips. if (sip_connections_list[sip][1] != 0) and ((sip_connections_list[sip][0] / sip_connections_list[sip][1]) > p_ratio): hash = {sip:sip_connections_list[sip]} #print hash sipList["sipList"].append(hash) continue elif (sip_connections_list[sip][1] == 0 and sip_connections_list[sip][0] > p_counter): hash = {sip:sip_connections_list[sip]} #print hash sipList["sipList"].append(hash) continue #If it does not reach the threshold, the network flow is ignored. else: continue counter = 0 for i in sipList["sipList"]: counter +=1 print counter if __name__== "__main__": main()