460 lines
19 KiB
Python
460 lines
19 KiB
Python
from openai import OpenAI
|
||
from django.conf import settings
|
||
from .models import Risk, Control
|
||
from weasyprint import HTML
|
||
from django.http import HttpResponse
|
||
from PIL import Image
|
||
import io
|
||
import base64
|
||
import matplotlib
|
||
matplotlib.use('Agg')
|
||
import matplotlib.pyplot as plt
|
||
from django.contrib.staticfiles.finders import find
|
||
import matplotlib.image as mpimg
|
||
site_domain = settings.SITE_DOMAIN
|
||
import random
|
||
|
||
|
||
|
||
def extract_organization_details(organization):
|
||
excluded_fields = {"name", "email"}
|
||
risk_data = {}
|
||
|
||
for field in organization._meta.get_fields():
|
||
if field.name not in excluded_fields and hasattr(organization, field.name):
|
||
value = getattr(organization, field.name)
|
||
if value:
|
||
help_text = getattr(field, 'help_text', '').strip()
|
||
key = help_text if help_text else field.name
|
||
risk_data[key] = value
|
||
return risk_data
|
||
|
||
def get_top_risk(organization):
|
||
client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
||
|
||
all_risks = Risk.objects.all()
|
||
|
||
risk_list = []
|
||
for risk in all_risks:
|
||
risk_list.append(f"""
|
||
Risk ID: {risk.risk_id}
|
||
Category: {risk.category}
|
||
Name: {risk.risk_name}
|
||
Primary Impact: {risk.primary_impact}
|
||
Secondary Impact: {risk.secondary_impact}
|
||
Tertiary Impact: {risk.tretiary_impact}
|
||
Detection Difficulty: {risk.detection_difficulty}
|
||
Recovery Complexity: {risk.recovery_complexity}
|
||
Business Impact Severity: {risk.businnes_impact_severity}
|
||
""")
|
||
|
||
organization_details = extract_organization_details(organization)
|
||
|
||
prompt = f"""
|
||
You are an AI risk assessor. Based on the following company details and list of known risks,
|
||
identify the 10 most critical risks for this company. Respond only with risk IDs.
|
||
|
||
Company Details:
|
||
{organization_details}
|
||
|
||
List of Risks:
|
||
{risk_list}
|
||
|
||
Provide only the 10 most critical risk IDs in a simple comma-separated format, e.g "1,3,7,12,..."
|
||
"""
|
||
|
||
response = client.chat.completions.create(
|
||
model="gpt-4o-mini",
|
||
messages=[{"role": "system", "content": prompt}]
|
||
)
|
||
|
||
risk_ids = response.choices[0].message.content.strip().split(",")
|
||
|
||
return [int(risk_id) for risk_id in risk_ids if risk_id.isdigit()]
|
||
|
||
def get_controls_for_risk(risk, organization):
|
||
client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
||
all_controls = Control.objects.all()
|
||
organization_details = extract_organization_details(organization)
|
||
control_list = [f"Control ID: {control.id}, Control Name: {control.name}" for control in all_controls]
|
||
valid_control_ids = {control.id for control in all_controls}
|
||
control_map = {control.id: control.name for control in all_controls}
|
||
|
||
def fetch_controls(prompt):
|
||
response = client.chat.completions.create(
|
||
model="gpt-4o-mini",
|
||
messages=[{"role": "system", "content": prompt}]
|
||
)
|
||
return response.choices[0].message.content.strip()
|
||
|
||
prompt = f"""
|
||
You are an expert in cybersecurity risk management. Given the risk "{risk.risk_name}" and its associated organization details "{organization_details}",
|
||
your task is to select **exactly 10 unique controls** from the provided list that best mitigate this risk. Each control should be assigned:
|
||
- A weight between **1 and 5** (1 = low impact, 5 = high impact).
|
||
- A likelihood score between **1 and 5** (1 = rare occurrence, 5 = highly likely).
|
||
|
||
### Rules:
|
||
1. **Each control ID must be unique** (no duplicates).
|
||
2. **Only return control IDs, weights, and likelihood scores** in the exact format below.
|
||
3. **Weights must be between 1 and 5** (1 = low impact, 5 = high impact).
|
||
4. **Likelihood scores must be between 1 and 5** (1 = rare occurrence, 5 = highly likely).
|
||
5. **Do NOT add explanations, descriptions, or extra text.**
|
||
6. **Ensure that control IDs are randomly distributed and diverse across different categories.**
|
||
### Available Controls:
|
||
{control_list}
|
||
|
||
### Expected Response Format (STRICTLY FOLLOW THIS FORMAT):
|
||
<control_id> : <weight> : <likelihood>
|
||
<control_id> : <weight> : <likelihood>
|
||
|
||
### Example Correct Response (NO DUPLICATES):
|
||
12 : 5 : 2
|
||
45 : 4 : 1
|
||
|
||
⚠️ **If you provide duplicate control IDs, your response will be rejected. Ensure all control IDs are unique.**
|
||
⚠️ **Follow the response format exactly. Any deviation will be considered invalid.**
|
||
"""
|
||
|
||
selected_controls = []
|
||
control_ids_seen = set()
|
||
|
||
result = fetch_controls(prompt)
|
||
|
||
for line in result.split("\n"):
|
||
line = line.strip()
|
||
parts = line.split(":")
|
||
if len(parts) == 3:
|
||
control_id_str = parts[0].replace("ID:", "").replace("id:", "").replace("Id:", "").strip()
|
||
weight_str = parts[1].strip().replace("Weight:", "").replace("weight:", "").strip()
|
||
likelihood_str = parts[2].strip().replace("Likelihood:", "").replace("likelihood:", "").strip()
|
||
|
||
control_id_str = ''.join(filter(str.isdigit, control_id_str))
|
||
weight_str = ''.join(filter(str.isdigit, weight_str))
|
||
likelihood_str = ''.join(filter(str.isdigit, likelihood_str))
|
||
|
||
if control_id_str and weight_str and likelihood_str:
|
||
try:
|
||
control_id = int(control_id_str)
|
||
weight = int(weight_str)
|
||
likelihood = int(likelihood_str)
|
||
|
||
if control_id in valid_control_ids and 1 <= weight <= 5 and 1 <= likelihood <= 5 and control_id not in control_ids_seen:
|
||
selected_controls.append((control_id, weight, likelihood))
|
||
control_ids_seen.add(control_id)
|
||
except ValueError:
|
||
continue
|
||
|
||
if len(selected_controls) == 10:
|
||
return selected_controls
|
||
|
||
while len(selected_controls) < 10:
|
||
missing_count = 10 - len(selected_controls)
|
||
remaining_controls = valid_control_ids - control_ids_seen
|
||
remaining_controls_list = [f"Control ID: {cid}, Control Name: {control_map[cid]}" for cid in remaining_controls]
|
||
|
||
retry_prompt = f"""
|
||
You are an expert in cybersecurity risk management. Given the risk "{risk.risk_name}" and the organization's details "{organization_details}",
|
||
your task is to select **exactly {missing_count} unique controls** from the provided list that best mitigate this risk. Each control should be assigned:
|
||
- A **weight** between **1 and 5** based on its effectiveness in reducing the risk.
|
||
- A likelihood score between **1 and 5** (1 = rare occurrence, 5 = highly likely).
|
||
|
||
### Rules:
|
||
1. **Each control ID must be unique** (no duplicates).
|
||
2. **Only return control IDs, weights, and likelihood scores** in the exact format below.
|
||
3. **Weights must be between 1 and 5** (1 = low impact, 5 = high impact).
|
||
4. **Likelihood scores must be between 1 and 5** (1 = rare occurrence, 5 = highly likely).
|
||
5. **Do NOT add explanations, descriptions, or extra text.**
|
||
6. **Ensure that control IDs are diverse and well-distributed across different categories.**
|
||
|
||
### Available Controls:
|
||
{remaining_controls_list}
|
||
|
||
### Expected Response Format (STRICTLY FOLLOW THIS FORMAT):
|
||
<control_id> : <weight> : <likelihood>
|
||
<control_id> : <weight> : <likelihood>
|
||
|
||
### Example Correct Response (NO DUPLICATES):
|
||
12 : 4 : 5
|
||
45 : 5 : 3
|
||
|
||
⚠️ **If you provide duplicate control IDs, your response will be rejected. Ensure all control IDs are unique.**
|
||
⚠️ **Follow the response format exactly. Any deviation will be considered invalid.**
|
||
"""
|
||
|
||
result = fetch_controls(retry_prompt)
|
||
for line in result.split("\n"):
|
||
line = line.strip()
|
||
parts = line.split(":")
|
||
if len(parts) == 3:
|
||
control_id_str = parts[0].replace("ID:", "").replace("id:", "").replace("Id:", "").strip()
|
||
weight_str = parts[1].strip().replace("Weight:", "").replace("weight:", "").strip()
|
||
likelihood_str = parts[2].strip().replace("Likelihood:", "").replace("likelihood:", "").strip()
|
||
|
||
|
||
control_id_str = ''.join(filter(str.isdigit, control_id_str))
|
||
weight_str = ''.join(filter(str.isdigit, weight_str))
|
||
likelihood_str = ''.join(filter(str.isdigit, likelihood_str))
|
||
|
||
if control_id_str and weight_str and likelihood_str:
|
||
try:
|
||
control_id = int(control_id_str)
|
||
weight = int(weight_str)
|
||
likelihood = int(likelihood_str)
|
||
|
||
if control_id in valid_control_ids and 1 <= weight <= 5 and 1 <= likelihood <= 5 and control_id not in control_ids_seen:
|
||
selected_controls.append((control_id, weight, likelihood))
|
||
control_ids_seen.add(control_id)
|
||
except ValueError:
|
||
continue
|
||
|
||
if not remaining_controls:
|
||
break
|
||
return selected_controls if len(selected_controls) == 10 else []
|
||
|
||
|
||
def generate_recommendations(risks_with_controls, organization):
|
||
|
||
client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
||
|
||
organization_details = extract_organization_details(organization)
|
||
|
||
prompt = f"""
|
||
You are an AI assistant tasked with generating the Recommendations section for a cybersecurity assessment report. Use the organization’s context and the list of risks with their proposed controls to produce concise, actionable, and prioritized guidance.
|
||
|
||
Inputs:
|
||
- Organization details:
|
||
{organization_details}
|
||
|
||
- Risks with controls (Python-like list of dicts). Each item includes:
|
||
risk: id, name, category, risk_description (or similar)
|
||
r_impact (inherent impact 1–5), r_likelihood (inherent likelihood 1–5), risk_score
|
||
residual_impact, residual_likelihood, residual_risk_score (may be present)
|
||
controls: list of controls, each with control__name, weight (1–5 effectiveness), likelihood (1–5 occurrence modifier)
|
||
|
||
Task:
|
||
1) Compute a priority score per control = weight × likelihood. Aggregate scores across all risks and cluster into 3–5 thematic areas that best match the actual controls and risk names (e.g., Access Control & MFA, Patch & Vulnerability Management, Vendor/Third-Party Risk Management, Network Security & Segmentation, Logging/Monitoring/Detection, Incident Response & BCDR, Ransomware Prevention & Recovery, Cryptography & Key Management). Do not invent themes without support in the inputs.
|
||
2) For each chosen theme, produce 3–5 concrete actions derived from the highest-priority controls. Tailor to the organization_details where appropriate. Prefer steps that reduce both likelihood and impact.
|
||
3) Each bullet should be 1–2 sentences: start with a clear, imperative recommendation, and (optionally) add a brief explanation or context. Still keep it concise and actionable.
|
||
4) Use only the control__name for reference—do NOT include or reference control IDs, years (e.g., 2024), or quarter references (Q1, Q2, Q3, Q4) anywhere in the output.
|
||
5) Do not introduce controls that are not represented in the provided controls list.
|
||
|
||
Output format (STRICT):
|
||
<2–3 sentence paragraph explaining that recommendations are prioritized by expected risk reduction based on the provided controls and aligned to the organization’s context.>
|
||
|
||
<h3>Theme Title</h3>
|
||
- Bullet 1 (1–2 sentences, no IDs, years, or quarters)
|
||
- Bullet 2
|
||
- Bullet 3
|
||
- Bullet 4 (optional)
|
||
- Bullet 5 (optional)
|
||
|
||
Constraints:
|
||
- 3–5 themed subsections, each with 3–5 bullets.
|
||
- No preamble or postscript beyond the sections above.
|
||
- Do NOT reference or display control IDs, years, or quarters in any form.
|
||
|
||
Now produce the final Recommendations section using the actual inputs above.
|
||
Risks with controls:
|
||
{risks_with_controls}
|
||
"""
|
||
|
||
response = client.chat.completions.create(
|
||
model="gpt-4o-mini",
|
||
messages=[{"role": "system", "content": prompt}]
|
||
)
|
||
|
||
recommendations = response.choices[0].message.content.strip()
|
||
return recommendations
|
||
|
||
def generate_key_findings(document, top_10_risks):
|
||
|
||
client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
||
|
||
|
||
def extract_organization_details(organization):
|
||
excluded_fields = {"email"}
|
||
risk_data = {}
|
||
|
||
for field in organization._meta.get_fields():
|
||
if field.name not in excluded_fields and hasattr(organization, field.name):
|
||
value = getattr(organization, field.name)
|
||
if value:
|
||
help_text = getattr(field, 'help_text', '').strip()
|
||
key = help_text if help_text else field.name
|
||
risk_data[key] = value
|
||
return risk_data
|
||
|
||
organization_details = extract_organization_details(document.organization)
|
||
|
||
prompt = f"""
|
||
You are an AI assistant tasked with generating a "Key Findings" section for a cybersecurity assessment report. Your output must be structured precisely, extracting and presenting the top 3 risks.
|
||
|
||
From the following list of risks, select the 3 most critical for the organization and generate the as specified.
|
||
|
||
List of risks:
|
||
{top_10_risks}
|
||
Organization details:
|
||
{organization_details}
|
||
|
||
Introduction: The description field must begin with the following exact text:
|
||
"The assessment revealed several areas where { document.organization.name } faces heightened cybersecurity risks. These risks pose significant threats to operational continuity, sensitive data, and regulatory compliance. The top risks identified are:"
|
||
|
||
Risk Presentation:
|
||
Identify the top 3 risks from the list above.
|
||
For each of these top 3 risks, present it as a bulleted item within the description field, following this format:
|
||
"- [Risk Name]: [Concise, professionally phrased description of the risk's significance in context of the organization, likelihood, or impact.]"
|
||
|
||
Description Derivation:
|
||
The [Risk Name] part should be the actual name of the risk from the input data (e.g., {{ item.risk.name }}).
|
||
The [Concise, professionally phrased description] part must be synthesized from the provided risk_description field (e.g., {{ item.risk_description }}) associated with that risk. Aim to create a polished, impactful summary that clearly explains the risk's context, severity, or contributing factors.
|
||
|
||
Return it as plain text in the following format:
|
||
Output Format(STRICT):
|
||
Introduction
|
||
- <b> Risk 1 </b> : Brief description of Risk 1
|
||
- <b> Risk 2 </b> : Brief description of Risk 2
|
||
- <b> Risk 3 </b> : Brief description of Risk 3
|
||
"""
|
||
|
||
response = client.chat.completions.create(
|
||
model="gpt-4o-mini",
|
||
messages=[{"role": "system", "content": prompt}]
|
||
)
|
||
key_findings = response.choices[0].message.content.strip()
|
||
return key_findings
|
||
|
||
|
||
def generate_pdf(document):
|
||
document_link = f"{site_domain}/document/{document.id}/"
|
||
pdf_content = HTML(url=document_link).write_pdf()
|
||
|
||
response = HttpResponse(pdf_content, content_type='application/pdf')
|
||
response['Content-Disposition'] = f'inline; filename=document_{document.id}.pdf'
|
||
return response
|
||
|
||
def generate_first_page_image(document):
|
||
document_link = f"{site_domain}/document/{document.id}/"
|
||
|
||
pdf_bytes = HTML(url=document_link).write_pdf()
|
||
|
||
from pdf2image import convert_from_bytes
|
||
images = convert_from_bytes(pdf_bytes, first_page=1, last_page=1)
|
||
|
||
img_io = io.BytesIO()
|
||
images[0].save(img_io, format="JPEG", quality=90)
|
||
img_io.seek(0)
|
||
|
||
return img_io
|
||
|
||
def calculate_aggregate_weight(controls):
|
||
total_weight = sum(control['weight']for control in controls)
|
||
return total_weight
|
||
|
||
def calculate_aggregate_likelihood(controls):
|
||
total_likelihood = sum(control['likelihood'] for control in controls)
|
||
return total_likelihood
|
||
|
||
def map_weight_to_impact_likelihood(total_weight, total_likelihood, max_weight):
|
||
impact = min(5.0, max(1.0, total_weight / 10.0))
|
||
likelihood = min(5.0, max(1.0, total_likelihood / 10.0))
|
||
|
||
return impact, likelihood
|
||
|
||
|
||
def generate_risk_graph(risks_with_controls):
|
||
impacts = [risk['impact'] for risk in risks_with_controls]
|
||
likelihoods = [risk['likelihood'] for risk in risks_with_controls]
|
||
risk_ids = [risk['risk']['id'] for risk in risks_with_controls]
|
||
|
||
bg_img_path = find('img/graph_matrix.png')
|
||
bg_img = mpimg.imread(bg_img_path)
|
||
|
||
fig, ax = plt.subplots(figsize=(10, 8))
|
||
|
||
ax.imshow(bg_img, extent=[0.0, 5.4, 0.0, 5.4], aspect='auto')
|
||
|
||
scatter = ax.scatter(
|
||
likelihoods, impacts,
|
||
c="blue", edgecolors="white", s=500, alpha=0.9
|
||
)
|
||
|
||
for i, risk_id in enumerate(risk_ids):
|
||
ax.annotate(
|
||
str(risk_id),
|
||
(likelihoods[i], impacts[i]),
|
||
color="white",
|
||
fontsize=12,
|
||
ha="center",
|
||
va="center",
|
||
weight="bold",
|
||
)
|
||
|
||
ax.set_xticks([])
|
||
ax.set_yticks([])
|
||
ax.set_xticklabels([])
|
||
ax.set_yticklabels([])
|
||
|
||
ax.spines['top'].set_visible(False)
|
||
ax.spines['right'].set_visible(False)
|
||
ax.spines['left'].set_visible(False)
|
||
ax.spines['bottom'].set_visible(False)
|
||
|
||
buffer = io.BytesIO()
|
||
plt.savefig(buffer, format="png", transparent=True, bbox_inches='tight', pad_inches=0)
|
||
buffer.seek(0)
|
||
image_png = buffer.getvalue()
|
||
buffer.close()
|
||
plt.close()
|
||
|
||
return base64.b64encode(image_png).decode("utf-8")
|
||
|
||
def generate_residual_risk_graph(risks_with_controls):
|
||
impacts = [risk.get('residual_impact', 0) for risk in risks_with_controls]
|
||
likelihoods = [risk.get('residual_likelihood', 0) for risk in risks_with_controls]
|
||
risk_ids = [risk['risk']['id'] for risk in risks_with_controls]
|
||
|
||
bg_img_path = find('img/graph_matrix.png')
|
||
bg_img = mpimg.imread(bg_img_path)
|
||
|
||
fig, ax = plt.subplots(figsize=(10, 8))
|
||
|
||
ax.imshow(bg_img, extent=[0.0, 5.4, 0.0, 5.4], aspect='auto')
|
||
|
||
scatter = ax.scatter(
|
||
likelihoods, impacts,
|
||
c="blue", edgecolors="white", s=500, alpha=0.9
|
||
)
|
||
|
||
for i, risk_id in enumerate(risk_ids):
|
||
ax.annotate(
|
||
str(risk_id),
|
||
(likelihoods[i], impacts[i]),
|
||
color="white",
|
||
fontsize=12,
|
||
ha="center",
|
||
va="center",
|
||
weight="bold",
|
||
)
|
||
|
||
ax.set_xticks([])
|
||
ax.set_yticks([])
|
||
ax.set_xticklabels([])
|
||
ax.set_yticklabels([])
|
||
|
||
ax.spines['top'].set_visible(False)
|
||
ax.spines['right'].set_visible(False)
|
||
ax.spines['left'].set_visible(False)
|
||
ax.spines['bottom'].set_visible(False)
|
||
|
||
buffer = io.BytesIO()
|
||
plt.savefig(buffer, format="png", transparent=True, bbox_inches='tight', pad_inches=0)
|
||
buffer.seek(0)
|
||
image_png = buffer.getvalue()
|
||
buffer.close()
|
||
plt.close()
|
||
|
||
return base64.b64encode(image_png).decode("utf-8")
|
||
|
||
def generate_demo_code(length=6):
|
||
chars = 'ABCDEFGHJKLMNPQRSTUVWXYZ23456789'
|
||
return ''.join(random.choices(chars, k=length)) |