123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- ####################################################
- # Researcher: Sara Schwarz
- # Advisor: Dr. Jose Ortiz
- # VERSION #2
- # Program Objective:
- #
- #In this work we seek to develop algorithms (brute-force approach, TRW, etc) to detect network and port
- #scanners on large-scale networks traffc such as those of Internet Service Providers,
- #Big Data research centers, and Science DMZ networks implemented in research institutions using network
- #flows. This specific program will follow the threshold random walk algorithm, of reading the network flows
- #and recording for each source ip amount of succesful connections and failed connections.
- # Succesful connections => It means to have completed the three way handshake, to have received the
- # Acknowledgment from the desitnaiton node.
- # Failed connections => It means to not have completed the three way handshake, to have not received
- # any answer, meaning it only has the Syncronization.
- #Later, the ratio between failed and succesful connections will be compared with a threshold to classify the source ips as
- #either suspicious scanners or not.
- #To run this algorithm we will be using high-performance methods for computing such as Map and Reduce,
- #specifically Python's Pool Class Library.
- ###############################################################
-
- from silk import *
- import multiprocessing as mp
-
-
- def Analisis():
- counter = 0 # borrar luego
- counter_files=0
- sampleHash={} #hash contains each sip with their dip and failed and succesful connections
- flow_counter = 0
- for filename in FGlob(classname="all", type="all", start_date=startDate, end_date=endDate, site_config_file="/etc/silk/conf-v9/silk.conf", data_rootdir="/home/scratch/flow/rwflowpack/"):
- counter_files +=1
- for rec in silkfile_open(filename, READ):#reading the flow file
- flow_counter += 1
- if (':' in str(rec.sip)):
- continue
- else:
- connection = [0] * 2 #array to contain the amount of failed connections and amount of succesful conenctions
- sip = str(rec.sip)
- flags = str(rec.tcpflags)
- #print sip, flags
- #counter +=1
- #verify if the network flow contains the Acknowledgment flag, which will imply a succesful connection
- if 'A' in flags:
- connection[1]=1 #succesful conections
- else:
- connection [0] =1 #failed conections
- if sip in sampleHash:
- sampleHash[sip][0]+= connection[0]
- sampleHash[sip][1]+= connection[1]
- else:
- sampleHash[sip] = [connection[0], connection[1]]
- #print sampleHash
- # print "flows", flow_counter
- # print counter_files
- return sampleHash
-
-
-
- def merge_list(list_hash):
- sampleHash = {}
- for sip_hash in list_hash:
- #print sip_hash
- for sip, arr in sip_hash.items():
- if sip in sampleHash:
- sampleHash[sip][0]+= arr[0]
- sampleHash[sip][1]+= arr[1]
- else:
- sampleHash[sip] = [arr[0], arr[1]]
- return sampleHash
-
-
-
- def main():
- startDate = "2018/06/1"
- endDate = "2018/06/30"
- p_ratio = 2 #threshold of the ratio. This ratio is chosen.
- p_counter = 100 #if there are only failed connections, rather than the ratio,
- #the total number of failed connection is compared to this threshold.
- process_num = 2
- pool = mp.Pool(processes=process_num)
- files1 = FGlob(classname="all", type="all", start_date=startDate, end_date=endDate, site_config_file="/etc/silk/conf-v9/silk.conf", data_rootdir="/home/scratch/flow/rwflowpack/")
- #****************************************
-
- #****************************************
- files1 = [x for x in files1] #change files1 from FGlob object to a list
- #print len(files1)
- files_list = []
- quenepas_blocksize = len(files1) / process_num
- for x in range(process_num):
- files_list.append(files1[0:quenepas_blocksize])
- files1 = files1[quenepas_blocksize:]
-
- for i in files1:
- files_list[files1.index(i)].append(i)
-
- #****************************************
-
- #****************************************
-
- fileHash = pool.map(Analisis, files_list) # FGlob(classname="all", type="all", start_date=startDate, end_date=endDate, site_config_file="/etc/silk/conf-v9/silk.conf", data_rootdir="/home/scratch/flow/rwflowpack/"))
- #print fileHash[0]['136.145.231.48']
- sip_connections_list = merge_list(fileHash)
- #print sip_connections_list['136.145.231.48']
- #print sip_connections_list
- counter = 1
- sipList = {"sipList":[]}
- for sip in sip_connections_list:
- #print counter, sip, "sip0", sip_connections_list[sip][0], "sip1", sip_connections_list[sip] [1]
- counter +=1
-
- #compares the ratio of succesful connections to failed connections with a given threshold
- #If the amount is larger than the threshold it is added to a list of suspicious sips.
- if (sip_connections_list[sip][1] != 0) and ((sip_connections_list[sip][0] / sip_connections_list[sip][1]) > p_ratio):
- hash = {sip:sip_connections_list[sip]}
- #print hash
- sipList["sipList"].append(hash)
- continue
-
- elif (sip_connections_list[sip][1] == 0 and sip_connections_list[sip][0] > p_counter):
-
- hash = {sip:sip_connections_list[sip]}
- #print hash
- sipList["sipList"].append(hash)
- continue
- #If it does not reach the threshold, the network flow is ignored.
- else:
- continue
-
- counter = 0
- for i in sipList["sipList"]:
- counter +=1
- print counter
-
-
- if __name__== "__main__":
- main()
|