Lightweight Vulnerability Scanner for Resourced-constrained Organizations

webapp.py 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. import dash
  2. from dash import dcc, html, dash_table
  3. from dash.dependencies import Input, Output, State, ALL
  4. import pandas as pd
  5. from datetime import datetime as dt
  6. import plotly.express as px
  7. import plotly.graph_objs as go
  8. import os
  9. BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  10. DATA_DIR = os.path.abspath(os.path.join(BASE_DIR, "..", "data"))
  11. detailed_scan_csv = os.path.join(DATA_DIR, "detailed_scan_results.csv")
  12. openvas_csv = os.path.join(DATA_DIR, "openvasscan.csv")
  13. # Load and prepare the dataset
  14. df = pd.read_csv(detailed_scan_csv)
  15. vulnerability_data = pd.read_csv(openvas_csv)
  16. # Preparing grouped data
  17. grouped_data = vulnerability_data.groupby(['IP', 'NVT Name', 'Severity']).first().reset_index()
  18. grouped_data['Details'] = grouped_data.apply(lambda row: f"CVSS: {row['CVSS']}\nSeverity: {row['Severity']}\nSummary: {row['Summary']}\nSolution Type: {row['Solution Type']}", axis=1)
  19. # List of unique IPs for the dropdown
  20. unique_ips = vulnerability_data['IP'].unique().tolist()
  21. unique_ips.insert(0, 'All')
  22. # Convert Timestamp to datetime and sort
  23. df['Timestamp'] = pd.to_datetime(df['Timestamp'])
  24. df.sort_values('Timestamp', inplace=True)
  25. # Extract unique timestamps
  26. unique_timestamps = df['Timestamp'].unique()
  27. # Prepare data for the timeline graph, grouped by day
  28. df['Date'] = df['Timestamp'].dt.date
  29. ip_count_over_time = df.groupby('Date')['IP'].nunique().reset_index()
  30. ip_count_over_time.columns = ['Date', 'IP_Count']
  31. # Create the Plotly graph
  32. timeline_fig = px.line(ip_count_over_time, x='Date', y='IP_Count', title='Number of IPs Over Time')
  33. timeline_fig.update_layout(
  34. xaxis_title="Date",
  35. yaxis_title="IP Count"
  36. )
  37. # Initialize the Dash app
  38. app = dash.Dash(__name__)
  39. # Convert timestamps to strings for slider display
  40. timestamp_options = [{'label': str(ts), 'value': ts} for ts in df['Timestamp'].unique()]
  41. timestamp_values = [ts.value for ts in df['Timestamp']]
  42. def style_status_badge(status):
  43. emoji_map = {
  44. 'Added': '🟩',
  45. 'Removed': '🟥',
  46. 'Still Active': '⚪'
  47. }
  48. return f"{emoji_map.get(status, '⬜')} {status}"
  49. app.layout = html.Div([
  50. dcc.Tabs(id="tabs", children=[
  51. dcc.Tab(label='Overview', children=[
  52. html.Div([
  53. dcc.RangeSlider(
  54. id='time-range-slider',
  55. min=0,
  56. max=len(unique_timestamps) - 1,
  57. value=[0, len(unique_timestamps) - 1],
  58. marks={i: {'label': str(ts)[:10]} for i, ts in enumerate(unique_timestamps)},
  59. step=1,
  60. allowCross=False
  61. ),
  62. dash_table.DataTable(
  63. id='table',
  64. columns=[{"name": i, "id": i, "presentation": "markdown"} if i == "Status" else {"name": i, "id": i}
  65. for i in df.columns
  66. ] + [{"name": "Status", "id": "Status", "presentation": "markdown"}],
  67. sort_action='native',
  68. filter_action='native',
  69. style_table={'overflowX': 'auto'},
  70. style_data_conditional=[{'if': {'column_id': 'Status'}, 'textAlign': 'center', 'width': '120px'}]
  71. ),
  72. html.Div([
  73. dcc.Graph(
  74. id='timeline-graph',
  75. figure=timeline_fig
  76. ),
  77. dcc.Graph(id='open-ports-bar-chart')
  78. ], style={'display': 'flex', 'flex-direction': 'row'}),
  79. html.Div([
  80. dcc.Graph(id='severity-pie-chart')
  81. ], style={'display': 'flex', 'flex-direction': 'row'}),
  82. html.Div([
  83. dcc.Graph(id='ip-change-bar-chart'),
  84. dash_table.DataTable(
  85. id='ip-change-table',
  86. columns=[
  87. {"name": "IP", "id": "IP"},
  88. {"name": "Status", "id": "Status"}
  89. ],
  90. sort_action='native',
  91. filter_action='native',
  92. style_table={'overflowX': 'auto'}
  93. )
  94. ], style={'display': 'flex', 'flex-direction': 'row'}),
  95. html.Div(id='summary-section', style={'padding': '20px'})
  96. ])
  97. ]),
  98. dcc.Tab(label='Vulnerability Analysis', children=[
  99. html.Div([
  100. dcc.Dropdown(
  101. id='severity-dropdown',
  102. options=[{'label': s, 'value': s} for s in ['All', 'High', 'Medium', 'Low']],
  103. value='All'
  104. ),
  105. dcc.Dropdown(
  106. id='ip-dropdown',
  107. options=[{'label': ip, 'value': ip} for ip in unique_ips],
  108. value='All'
  109. ),
  110. dcc.Graph(id='vulnerability-treemap'),
  111. html.Div(id='details-and-ip-output'),
  112. html.Div(id='clicked-ip', style={'display': 'none'})
  113. ])
  114. ]),
  115. dcc.Tab(label='Port Heatmap', children=[
  116. html.Div([
  117. dcc.Graph(id='ip-port-heatmap', style={'height': '700px', 'width': '100%'}),
  118. html.Div([
  119. html.P("🟦 = Port is Open"),
  120. html.P("⬜ = Port is Closed"),
  121. html.P("Each row represents a Host (IP), and each column is a Port."),
  122. html.P("This heatmap shows which ports are open on each host at the selected time.")
  123. ], style={
  124. 'padding': '10px',
  125. 'backgroundColor': '#f9f9f9',
  126. 'border': '1px solid #ccc',
  127. 'marginTop': '10px',
  128. 'borderRadius': '5px'
  129. })
  130. ])
  131. ])
  132. ])
  133. ])
  134. @app.callback(
  135. [Output('table', 'data'),
  136. Output('table', 'style_data_conditional'),
  137. Output('timeline-graph', 'figure'),
  138. Output('open-ports-bar-chart', 'figure'),
  139. Output('severity-pie-chart', 'figure'),
  140. Output('ip-port-heatmap', 'figure'),
  141. Output('ip-change-bar-chart', 'figure'),
  142. Output('ip-change-table', 'data'),
  143. Output('summary-section', 'children')],
  144. [Input('time-range-slider', 'value')]
  145. )
  146. def update_overview_tab(time_range):
  147. start_index, end_index = time_range
  148. start_timestamp = unique_timestamps[start_index]
  149. end_timestamp = unique_timestamps[end_index]
  150. # Filter data within the selected time range
  151. filtered_df = df[(df['Timestamp'] >= start_timestamp) & (df['Timestamp'] <= end_timestamp)].copy()
  152. # Update table
  153. filtered_df_selected = filtered_df.copy()
  154. # Determine IPs in the time range
  155. #all_ips = set(filtered_df['IP'])
  156. #status_dict = {ip: 'Within Range' for ip in all_ips}
  157. # Assign badge-style labels using style_status_badge
  158. #filtered_df_selected['Status'] = filtered_df_selected['IP'].map(status_dict).fillna('Unknown')
  159. # filtered_df_selected['Status'] = filtered_df_selected['Status'].apply(style_status_badge)
  160. # Determine IPs in the time range
  161. all_ips = set(filtered_df['IP'])
  162. # Get previous IP set
  163. if start_index > 0:
  164. prev_timestamp = unique_timestamps[start_index - 1]
  165. else:
  166. prev_timestamp = start_timestamp
  167. prev_ips = set(df[df['Timestamp'] == prev_timestamp]['IP'])
  168. new_ips = all_ips - prev_ips
  169. removed_ips = prev_ips - all_ips
  170. existing_ips = all_ips.intersection(prev_ips)
  171. # Add dummy rows for removed IPs (with NaNs or placeholders)
  172. removed_rows = pd.DataFrame({
  173. "IP": list(removed_ips),
  174. "Hostname": "", "MAC Address": "", "Protocol": "", "Port": "", "Name": "",
  175. "State": "", "Product": "", "Version": "", "Extra Info": "",
  176. "Timestamp": pd.NaT, "Date": None
  177. })
  178. filtered_df_selected = pd.concat([filtered_df_selected, removed_rows], ignore_index=True)
  179. # Build status dictionary for badges
  180. status_dict = {}
  181. for ip in new_ips:
  182. status_dict[ip] = 'Added'
  183. for ip in removed_ips:
  184. status_dict[ip] = 'Removed'
  185. for ip in existing_ips:
  186. status_dict[ip] = 'Still Active'
  187. # Assign and badge
  188. filtered_df_selected['Status'] = filtered_df_selected['IP'].map(status_dict).fillna('Unknown')
  189. filtered_df_selected['Status'] = filtered_df_selected['Status'].apply(style_status_badge)
  190. # Apply conditional formatting based on the 'Status' column
  191. style = [
  192. {
  193. 'if': {
  194. 'filter_query': '{Status} = "Added"',
  195. },
  196. 'borderLeft': '4px solid green',
  197. 'backgroundColor': '#eaf7ea' # very light green background
  198. },
  199. {
  200. 'if': {
  201. 'filter_query': '{Status} = "Removed"',
  202. },
  203. 'borderLeft': '4px solid red',
  204. 'backgroundColor': '#fcebea' # very light red background
  205. },
  206. {
  207. 'if': {
  208. 'filter_query': '{Status} = "Still Active"',
  209. },
  210. 'borderLeft': '4px solid lightgray'
  211. }
  212. ]
  213. # Update timeline graph, grouped by day
  214. filtered_df['Date'] = filtered_df['Timestamp'].dt.date
  215. ip_count_over_time = filtered_df.groupby('Date')['IP'].nunique().reset_index()
  216. ip_count_over_time.columns = ['Date', 'IP_Count']
  217. timeline_fig = px.line(ip_count_over_time, x='Date', y='IP_Count', title='Number of IPs Over Time')
  218. timeline_fig.update_layout(
  219. xaxis_title="Date",
  220. yaxis_title="IP Count"
  221. )
  222. # Open ports bar chart
  223. open_ports_count = filtered_df['Port'].value_counts().reset_index()
  224. open_ports_count.columns = ['Port', 'Count']
  225. open_ports_bar_chart = px.bar(open_ports_count, x='Port', y='Count', title='Distribution of Open Ports')
  226. open_ports_bar_chart.update_layout(
  227. xaxis_title="Port",
  228. yaxis_title="Count"
  229. )
  230. open_ports_bar_chart.update_traces(marker_color='blue', marker_line_color='darkblue', marker_line_width=1.5, opacity=0.8)
  231. # Severity pie chart
  232. severity_count = vulnerability_data['Severity'].value_counts().reset_index()
  233. severity_count.columns = ['Severity', 'Count']
  234. severity_pie_chart = px.pie(severity_count, names='Severity', values='Count', title='Severity Distribution')
  235. # IP-Port Heatmap with Fixed Port Range and Binary Open/Closed
  236. # Define all possible ports you want to show (e.g. top 1024)
  237. # Only include ports that were actually scanned, but sorted
  238. all_ports = sorted(filtered_df['Port'].dropna().astype(int).unique().tolist())
  239. all_ips = set(filtered_df['IP'])
  240. heatmap_df = (
  241. filtered_df[["IP", "Port"]]
  242. .dropna()
  243. .assign(value=1)
  244. .pivot_table(index="IP", columns="Port", values="value", fill_value=0)
  245. )
  246. heatmap_df.columns = heatmap_df.columns.astype(int)
  247. heatmap_df = heatmap_df.sort_index(axis=1)
  248. hover_text = [
  249. [f"IP: {ip}<br>Port: {port}<br>Status: {'Open' if val == 1 else 'Closed'}"
  250. for port, val in zip(heatmap_df.columns, row)]
  251. for ip, row in zip(heatmap_df.index, heatmap_df.values)
  252. ]
  253. # Generate heatmap
  254. ip_port_heatmap = go.Figure(data=go.Heatmap(
  255. z=heatmap_df.values,
  256. x=heatmap_df.columns,
  257. y=heatmap_df.index,
  258. text=hover_text,
  259. hoverinfo='text',
  260. colorscale=[[0, 'white'], [1, 'darkblue']],
  261. zmin=0,
  262. zmax=1,
  263. zsmooth=False,
  264. colorbar=dict(
  265. title='Port Status',
  266. tickvals=[0, 1],
  267. ticktext=['Closed (White)', 'Open (Blue)']
  268. )
  269. ))
  270. ip_port_heatmap.update_layout(
  271. title='Binary Heatmap - Which Ports Are Open on Which Hosts',
  272. xaxis_title='Port',
  273. yaxis_title='IP',
  274. height=600
  275. )
  276. # Determine IPs added and removed
  277. if start_index > 0:
  278. prev_timestamp = unique_timestamps[start_index - 1]
  279. else:
  280. prev_timestamp = start_timestamp
  281. prev_ips = set(df[df['Timestamp'] == prev_timestamp]['IP'])
  282. new_ips = all_ips - prev_ips
  283. removed_ips = prev_ips - all_ips
  284. existing_ips = all_ips.intersection(prev_ips)
  285. # IP change table
  286. ip_change_data = []
  287. for ip in new_ips:
  288. ip_change_data.append({"IP": ip, "Status": "Added"})
  289. for ip in removed_ips:
  290. ip_change_data.append({"IP": ip, "Status": "Removed"})
  291. for ip in existing_ips:
  292. ip_change_data.append({"IP": ip, "Status": "Still Active"})
  293. # IP change bar chart
  294. ip_change_summary = {
  295. "Added": len(new_ips),
  296. "Removed": len(removed_ips),
  297. "Still Active": len(existing_ips)
  298. }
  299. ip_change_bar_chart = px.bar(
  300. x=list(ip_change_summary.keys()),
  301. y=list(ip_change_summary.values()),
  302. title="IP Changes Summary"
  303. )
  304. ip_change_bar_chart.update_layout(
  305. xaxis_title="Change Type",
  306. yaxis_title="Count"
  307. )
  308. ip_change_bar_chart.update_traces(marker_color='purple', marker_line_color='darkblue', marker_line_width=1.5, opacity=0.8)
  309. # Summary section
  310. total_unique_ips = len(df['IP'].unique())
  311. total_vulnerabilities = len(vulnerability_data)
  312. most_common_ports = filtered_df['Port'].value_counts().head(5).to_dict()
  313. most_dangerous_vulnerability = vulnerability_data.loc[vulnerability_data['CVSS'].idxmax()]
  314. most_common_vulnerability = vulnerability_data['NVT Name'].value_counts().idxmax()
  315. most_common_ip = df['IP'].value_counts().idxmax()
  316. average_cvss_score = vulnerability_data['CVSS'].mean()
  317. ips_with_most_vulnerabilities = vulnerability_data['IP'].value_counts().head(5).to_dict()
  318. summary_content = html.Div([
  319. html.H3("Summary of Interesting Data"),
  320. html.P(f"Total unique IPs: {total_unique_ips}"),
  321. html.P(f"Total vulnerabilities recorded: {total_vulnerabilities}"),
  322. html.P(f"Most dangerous vulnerability (highest CVSS score): {most_dangerous_vulnerability['NVT Name']} with CVSS score {most_dangerous_vulnerability['CVSS']}"),
  323. html.P(f"Most common vulnerability: {most_common_vulnerability}"),
  324. html.P(f"Most common IP: {most_common_ip}"),
  325. html.P(f"Average CVSS score: {average_cvss_score:.2f}"),
  326. html.H4("Most Common Ports:"),
  327. html.Ul([html.Li(f"Port {port}: {count} times") for port, count in most_common_ports.items()]),
  328. html.H4("IPs with the Most Vulnerabilities:"),
  329. html.Ul([html.Li(f"IP {ip}: {count} vulnerabilities") for ip, count in ips_with_most_vulnerabilities.items()])
  330. ])
  331. return (filtered_df_selected.to_dict('records'), style, timeline_fig, open_ports_bar_chart, severity_pie_chart,
  332. ip_port_heatmap, ip_change_bar_chart, ip_change_data, summary_content)
  333. @app.callback(
  334. [Output('vulnerability-treemap', 'figure'),
  335. Output('clicked-ip', 'children')],
  336. [Input('severity-dropdown', 'value'),
  337. Input('ip-dropdown', 'value'),
  338. Input({'type': 'dynamic-ip', 'index': ALL}, 'n_clicks')],
  339. [State({'type': 'dynamic-ip', 'index': ALL}, 'index')]
  340. )
  341. def update_treemap(selected_severity, selected_ip, n_clicks, ip_indices):
  342. ctx = dash.callback_context
  343. triggered_id = ctx.triggered[0]['prop_id'] if ctx.triggered else None
  344. # Determine if the callback was triggered by a related IP link click
  345. if ctx.triggered and 'dynamic-ip' in ctx.triggered[0]['prop_id']:
  346. # Extract clicked IP
  347. triggered_info = ctx.triggered[0]
  348. button_id = triggered_info['prop_id'].split('}.')[0] + '}'
  349. clicked_ip = json.loads(button_id)['index']
  350. else:
  351. clicked_ip = None
  352. # Filter data based on severity, dropdown IP, or clicked related IP
  353. filtered_data = grouped_data.copy()
  354. filtered_data['CVSS'] = filtered_data['CVSS'].fillna(0)
  355. if selected_severity != 'All':
  356. filtered_data = filtered_data[filtered_data['Severity'] == selected_severity]
  357. if selected_ip != 'All':
  358. filtered_data = filtered_data[filtered_data['IP'] == selected_ip]
  359. if clicked_ip:
  360. filtered_data = filtered_data[filtered_data['IP'] == clicked_ip]
  361. filtered_data = filtered_data[filtered_data['CVSS'] > 0]
  362. fig = px.treemap(
  363. filtered_data,
  364. path=['IP', 'NVT Name'],
  365. values='CVSS',
  366. color='CVSS',
  367. color_continuous_scale='reds',
  368. hover_data=['Details']
  369. )
  370. return fig, "" # Reset clicked-ip because of bug
  371. # Callback to display details and related IPs
  372. @app.callback(
  373. Output('details-and-ip-output', 'children'),
  374. [Input('vulnerability-treemap', 'clickData')]
  375. )
  376. def display_details_and_ips(clickData):
  377. if clickData is not None:
  378. clicked_vuln = clickData['points'][0]['label'].split('<br>')[0]
  379. details = clickData['points'][0]['customdata'][0]
  380. matching_ips = vulnerability_data[vulnerability_data['NVT Name'] == clicked_vuln]['IP'].unique()
  381. return html.Div([
  382. html.Pre(f'Details of Selected Vulnerability:\n{details}'),
  383. html.H4("Related IPs with the same vulnerability:"),
  384. html.Div([html.A(ip, href='#', id={'type': 'dynamic-ip', 'index': ip}, style={'marginRight': '10px', 'cursor': 'pointer'}) for ip in matching_ips])
  385. ])
  386. return 'Click on a vulnerability to see details and related IPs.'
  387. if __name__ == '__main__':
  388. app.run(debug=True)