Source Code for network and port scanner, TRW algorithm, and reduction method implementations.

trw_map.py 5.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. ####################################################
  2. # Researcher: Sara Schwarz
  3. # Advisor: Dr. Jose Ortiz
  4. # VERSION #2
  5. # Program Objective:
  6. #
  7. #In this work we seek to develop algorithms (brute-force approach, TRW, etc) to detect network and port
  8. #scanners on large-scale networks traffc such as those of Internet Service Providers,
  9. #Big Data research centers, and Science DMZ networks implemented in research institutions using network
  10. #flows. This specific program will follow the threshold random walk algorithm, of reading the network flows
  11. #and recording for each source ip amount of succesful connections and failed connections.
  12. # Succesful connections => It means to have completed the three way handshake, to have received the
  13. # Acknowledgment from the desitnaiton node.
  14. # Failed connections => It means to not have completed the three way handshake, to have not received
  15. # any answer, meaning it only has the Syncronization.
  16. #Later, the ratio between failed and succesful connections will be compared with a threshold to classify the source ips as
  17. #either suspicious scanners or not.
  18. #To run this algorithm we will be using high-performance methods for computing such as Map and Reduce,
  19. #specifically Python's Pool Class Library.
  20. ###############################################################
  21. from silk import *
  22. import multiprocessing as mp
  23. def Analisis():
  24. counter = 0 # borrar luego
  25. counter_files=0
  26. sampleHash={} #hash contains each sip with their dip and failed and succesful connections
  27. flow_counter = 0
  28. for filename in FGlob(classname="all", type="all", start_date=startDate, end_date=endDate, site_config_file="/etc/silk/conf-v9/silk.conf", data_rootdir="/home/scratch/flow/rwflowpack/"):
  29. counter_files +=1
  30. for rec in silkfile_open(filename, READ):#reading the flow file
  31. flow_counter += 1
  32. if (':' in str(rec.sip)):
  33. continue
  34. else:
  35. connection = [0] * 2 #array to contain the amount of failed connections and amount of succesful conenctions
  36. sip = str(rec.sip)
  37. flags = str(rec.tcpflags)
  38. #print sip, flags
  39. #counter +=1
  40. #verify if the network flow contains the Acknowledgment flag, which will imply a succesful connection
  41. if 'A' in flags:
  42. connection[1]=1 #succesful conections
  43. else:
  44. connection [0] =1 #failed conections
  45. if sip in sampleHash:
  46. sampleHash[sip][0]+= connection[0]
  47. sampleHash[sip][1]+= connection[1]
  48. else:
  49. sampleHash[sip] = [connection[0], connection[1]]
  50. #print sampleHash
  51. # print "flows", flow_counter
  52. # print counter_files
  53. return sampleHash
  54. def merge_list(list_hash):
  55. sampleHash = {}
  56. for sip_hash in list_hash:
  57. #print sip_hash
  58. for sip, arr in sip_hash.items():
  59. if sip in sampleHash:
  60. sampleHash[sip][0]+= arr[0]
  61. sampleHash[sip][1]+= arr[1]
  62. else:
  63. sampleHash[sip] = [arr[0], arr[1]]
  64. return sampleHash
  65. def main():
  66. startDate = "2018/06/1"
  67. endDate = "2018/06/30"
  68. p_ratio = 2 #threshold of the ratio. This ratio is chosen.
  69. p_counter = 100 #if there are only failed connections, rather than the ratio,
  70. #the total number of failed connection is compared to this threshold.
  71. process_num = 2
  72. pool = mp.Pool(processes=process_num)
  73. files1 = FGlob(classname="all", type="all", start_date=startDate, end_date=endDate, site_config_file="/etc/silk/conf-v9/silk.conf", data_rootdir="/home/scratch/flow/rwflowpack/")
  74. #****************************************
  75. #****************************************
  76. files1 = [x for x in files1] #change files1 from FGlob object to a list
  77. #print len(files1)
  78. files_list = []
  79. quenepas_blocksize = len(files1) / process_num
  80. for x in range(process_num):
  81. files_list.append(files1[0:quenepas_blocksize])
  82. files1 = files1[quenepas_blocksize:]
  83. for i in files1:
  84. files_list[files1.index(i)].append(i)
  85. #****************************************
  86. #****************************************
  87. fileHash = pool.map(Analisis, files_list) # FGlob(classname="all", type="all", start_date=startDate, end_date=endDate, site_config_file="/etc/silk/conf-v9/silk.conf", data_rootdir="/home/scratch/flow/rwflowpack/"))
  88. #print fileHash[0]['136.145.231.48']
  89. sip_connections_list = merge_list(fileHash)
  90. #print sip_connections_list['136.145.231.48']
  91. #print sip_connections_list
  92. counter = 1
  93. sipList = {"sipList":[]}
  94. for sip in sip_connections_list:
  95. #print counter, sip, "sip0", sip_connections_list[sip][0], "sip1", sip_connections_list[sip] [1]
  96. counter +=1
  97. #compares the ratio of succesful connections to failed connections with a given threshold
  98. #If the amount is larger than the threshold it is added to a list of suspicious sips.
  99. if (sip_connections_list[sip][1] != 0) and ((sip_connections_list[sip][0] / sip_connections_list[sip][1]) > p_ratio):
  100. hash = {sip:sip_connections_list[sip]}
  101. #print hash
  102. sipList["sipList"].append(hash)
  103. continue
  104. elif (sip_connections_list[sip][1] == 0 and sip_connections_list[sip][0] > p_counter):
  105. hash = {sip:sip_connections_list[sip]}
  106. #print hash
  107. sipList["sipList"].append(hash)
  108. continue
  109. #If it does not reach the threshold, the network flow is ignored.
  110. else:
  111. continue
  112. counter = 0
  113. for i in sipList["sipList"]:
  114. counter +=1
  115. print counter
  116. if __name__== "__main__":
  117. main()