Files
old-riskletpy/backend/core/utils.py

498 lines
20 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from openai import OpenAI
from django.conf import settings
from .models import Risk, Control
from weasyprint import HTML
from django.http import HttpResponse
from PIL import Image
import io
import base64
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from django.contrib.staticfiles.finders import find
import matplotlib.image as mpimg
site_domain = settings.SITE_DOMAIN
import random
def extract_organization_details(organization):
excluded_fields = {"name", "email"}
risk_data = {}
for field in organization._meta.get_fields():
if field.name not in excluded_fields and hasattr(organization, field.name):
value = getattr(organization, field.name)
if value:
help_text = getattr(field, 'help_text', '').strip()
key = help_text if help_text else field.name
risk_data[key] = value
return risk_data
def get_top_risk(organization):
client = OpenAI(api_key=settings.OPENAI_API_KEY)
all_risks = Risk.objects.all()
risk_list = []
for risk in all_risks:
risk_list.append(f"""
Risk ID: {risk.risk_id}
Category: {risk.category}
Name: {risk.risk_name}
Primary Impact: {risk.primary_impact}
Secondary Impact: {risk.secondary_impact}
Tertiary Impact: {risk.tretiary_impact}
Detection Difficulty: {risk.detection_difficulty}
Recovery Complexity: {risk.recovery_complexity}
Business Impact Severity: {risk.businnes_impact_severity}
""")
organization_details = extract_organization_details(organization)
prompt = f"""
You are an AI risk assessor. Based on the following company details and list of known risks,
identify the 10 most critical risks for this company. Respond only with risk IDs.
Company Details:
{organization_details}
List of Risks:
{risk_list}
Provide only the 10 most critical risk IDs in a simple comma-separated format, e.g "1,3,7,12,..."
"""
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "system", "content": prompt}]
)
risk_ids = response.choices[0].message.content.strip().split(",")
return [int(risk_id) for risk_id in risk_ids if risk_id.isdigit()]
def get_controls_for_risk(risk, organization):
client = OpenAI(api_key=settings.OPENAI_API_KEY)
all_controls = Control.objects.all()
organization_details = extract_organization_details(organization)
control_list = [f"Control ID: {control.id}, Control Name: {control.name}" for control in all_controls]
valid_control_ids = {control.id for control in all_controls}
control_map = {control.id: control.name for control in all_controls}
def fetch_controls(prompt):
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "system", "content": prompt}]
)
return response.choices[0].message.content.strip()
prompt = f"""
You are an expert in cybersecurity risk management. Given the risk "{risk.risk_name}" and its associated organization details "{organization_details}",
your task is to select **exactly 10 unique controls** from the provided list that best mitigate this risk. Each control should be assigned:
- A weight between **1 and 5** (1 = low impact, 5 = high impact).
- A likelihood score between **1 and 5** (1 = rare occurrence, 5 = highly likely).
### Rules:
1. **Each control ID must be unique** (no duplicates).
2. **Only return control IDs, weights, and likelihood scores** in the exact format below.
3. **Weights must be between 1 and 5** (1 = low impact, 5 = high impact).
4. **Likelihood scores must be between 1 and 5** (1 = rare occurrence, 5 = highly likely).
5. **Do NOT add explanations, descriptions, or extra text.**
6. **Ensure that control IDs are randomly distributed and diverse across different categories.**
### Available Controls:
{control_list}
### Expected Response Format (STRICTLY FOLLOW THIS FORMAT):
<control_id> : <weight> : <likelihood>
<control_id> : <weight> : <likelihood>
### Example Correct Response (NO DUPLICATES):
12 : 5 : 2
45 : 4 : 1
⚠️ **If you provide duplicate control IDs, your response will be rejected. Ensure all control IDs are unique.**
⚠️ **Follow the response format exactly. Any deviation will be considered invalid.**
"""
selected_controls = []
control_ids_seen = set()
result = fetch_controls(prompt)
for line in result.split("\n"):
line = line.strip()
parts = line.split(":")
if len(parts) == 3:
control_id_str = parts[0].replace("ID:", "").replace("id:", "").replace("Id:", "").strip()
weight_str = parts[1].strip().replace("Weight:", "").replace("weight:", "").strip()
likelihood_str = parts[2].strip().replace("Likelihood:", "").replace("likelihood:", "").strip()
control_id_str = ''.join(filter(str.isdigit, control_id_str))
weight_str = ''.join(filter(str.isdigit, weight_str))
likelihood_str = ''.join(filter(str.isdigit, likelihood_str))
if control_id_str and weight_str and likelihood_str:
try:
control_id = int(control_id_str)
weight = int(weight_str)
likelihood = int(likelihood_str)
if control_id in valid_control_ids and 1 <= weight <= 5 and 1 <= likelihood <= 5 and control_id not in control_ids_seen:
selected_controls.append((control_id, weight, likelihood))
control_ids_seen.add(control_id)
except ValueError:
continue
if len(selected_controls) == 10:
return selected_controls
while len(selected_controls) < 10:
missing_count = 10 - len(selected_controls)
remaining_controls = valid_control_ids - control_ids_seen
remaining_controls_list = [f"Control ID: {cid}, Control Name: {control_map[cid]}" for cid in remaining_controls]
retry_prompt = f"""
You are an expert in cybersecurity risk management. Given the risk "{risk.risk_name}" and the organization's details "{organization_details}",
your task is to select **exactly {missing_count} unique controls** from the provided list that best mitigate this risk. Each control should be assigned:
- A **weight** between **1 and 5** based on its effectiveness in reducing the risk.
- A likelihood score between **1 and 5** (1 = rare occurrence, 5 = highly likely).
### Rules:
1. **Each control ID must be unique** (no duplicates).
2. **Only return control IDs, weights, and likelihood scores** in the exact format below.
3. **Weights must be between 1 and 5** (1 = low impact, 5 = high impact).
4. **Likelihood scores must be between 1 and 5** (1 = rare occurrence, 5 = highly likely).
5. **Do NOT add explanations, descriptions, or extra text.**
6. **Ensure that control IDs are diverse and well-distributed across different categories.**
### Available Controls:
{remaining_controls_list}
### Expected Response Format (STRICTLY FOLLOW THIS FORMAT):
<control_id> : <weight> : <likelihood>
<control_id> : <weight> : <likelihood>
### Example Correct Response (NO DUPLICATES):
12 : 4 : 5
45 : 5 : 3
⚠️ **If you provide duplicate control IDs, your response will be rejected. Ensure all control IDs are unique.**
⚠️ **Follow the response format exactly. Any deviation will be considered invalid.**
"""
result = fetch_controls(retry_prompt)
for line in result.split("\n"):
line = line.strip()
parts = line.split(":")
if len(parts) == 3:
control_id_str = parts[0].replace("ID:", "").replace("id:", "").replace("Id:", "").strip()
weight_str = parts[1].strip().replace("Weight:", "").replace("weight:", "").strip()
likelihood_str = parts[2].strip().replace("Likelihood:", "").replace("likelihood:", "").strip()
control_id_str = ''.join(filter(str.isdigit, control_id_str))
weight_str = ''.join(filter(str.isdigit, weight_str))
likelihood_str = ''.join(filter(str.isdigit, likelihood_str))
if control_id_str and weight_str and likelihood_str:
try:
control_id = int(control_id_str)
weight = int(weight_str)
likelihood = int(likelihood_str)
if control_id in valid_control_ids and 1 <= weight <= 5 and 1 <= likelihood <= 5 and control_id not in control_ids_seen:
selected_controls.append((control_id, weight, likelihood))
control_ids_seen.add(control_id)
except ValueError:
continue
if not remaining_controls:
break
return selected_controls if len(selected_controls) == 10 else []
def generate_recommendations(risks_with_controls, organization):
client = OpenAI(api_key=settings.OPENAI_API_KEY)
organization_details = extract_organization_details(organization)
prompt = f"""
You are an AI assistant tasked with generating the Recommendations section for a cybersecurity assessment report. Use the organizations context and the list of risks with their proposed controls to produce concise, actionable, and prioritized guidance.
Inputs:
- Organization details:
{organization_details}
- Risks with controls (Python-like list of dicts). Each item includes:
risk: id, name, category, risk_description (or similar)
r_impact (inherent impact 15), r_likelihood (inherent likelihood 15), risk_score
residual_impact, residual_likelihood, residual_risk_score (may be present)
controls: list of controls, each with control__name, weight (15 effectiveness), likelihood (15 occurrence modifier)
Task:
1) Compute a priority score per control = weight × likelihood. Aggregate scores across all risks and cluster into 35 thematic areas that best match the actual controls and risk names (e.g., Access Control & MFA, Patch & Vulnerability Management, Vendor/Third-Party Risk Management, Network Security & Segmentation, Logging/Monitoring/Detection, Incident Response & BCDR, Ransomware Prevention & Recovery, Cryptography & Key Management). Do not invent themes without support in the inputs.
2) For each chosen theme, produce 35 concrete actions derived from the highest-priority controls. Tailor to the organization_details where appropriate. Prefer steps that reduce both likelihood and impact.
3) Each bullet should be 12 sentences: start with a clear, imperative recommendation, and (optionally) add a brief explanation or context. Still keep it concise and actionable.
4) Use only the control__name for reference—do NOT include or reference control IDs, years (e.g., 2024), or quarter references (Q1, Q2, Q3, Q4) anywhere in the output.
5) Do not introduce controls that are not represented in the provided controls list.
Output format (STRICT):
<23 sentence paragraph explaining that recommendations are prioritized by expected risk reduction based on the provided controls and aligned to the organizations context.>
<h3>Theme Title</h3>
- Bullet 1 (12 sentences, no IDs, years, or quarters)
- Bullet 2
- Bullet 3
- Bullet 4 (optional)
- Bullet 5 (optional)
Constraints:
- 35 themed subsections, each with 35 bullets.
- No preamble or postscript beyond the sections above.
- Do NOT reference or display control IDs, years, or quarters in any form.
Now produce the final Recommendations section using the actual inputs above.
Risks with controls:
{risks_with_controls}
"""
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "system", "content": prompt}]
)
recommendations = response.choices[0].message.content.strip()
return recommendations
def generate_key_findings(document, top_10_risks):
client = OpenAI(api_key=settings.OPENAI_API_KEY)
def extract_organization_details(organization):
excluded_fields = {"email"}
risk_data = {}
for field in organization._meta.get_fields():
if field.name not in excluded_fields and hasattr(organization, field.name):
value = getattr(organization, field.name)
if value:
help_text = getattr(field, 'help_text', '').strip()
key = help_text if help_text else field.name
risk_data[key] = value
return risk_data
organization_details = extract_organization_details(document.organization)
prompt = f"""
You are an AI assistant tasked with generating a "Key Findings" section for a cybersecurity assessment report. Your output must be structured precisely, extracting and presenting the top 3 risks.
From the following list of risks, select the 3 most critical for the organization and generate the as specified.
List of risks:
{top_10_risks}
Organization details:
{organization_details}
Introduction: The description field must begin with the following exact text:
"The assessment revealed several areas where { document.organization.name } faces heightened cybersecurity risks. These risks pose significant threats to operational continuity, sensitive data, and regulatory compliance. The top risks identified are:"
Risk Presentation:
Identify the top 3 risks from the list above.
For each of these top 3 risks, present it as a bulleted item within the description field, following this format:
"- [Risk Name]: [Concise, professionally phrased description of the risk's significance in context of the organization, likelihood, or impact.]"
Description Derivation:
The [Risk Name] part should be the actual name of the risk from the input data (e.g., {{ item.risk.name }}).
The [Concise, professionally phrased description] part must be synthesized from the provided risk_description field (e.g., {{ item.risk_description }}) associated with that risk. Aim to create a polished, impactful summary that clearly explains the risk's context, severity, or contributing factors.
Return it as plain text in the following format:
Output Format(STRICT):
Introduction
- <b> Risk 1 </b> : Brief description of Risk 1
- <b> Risk 2 </b> : Brief description of Risk 2
- <b> Risk 3 </b> : Brief description of Risk 3
"""
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "system", "content": prompt}]
)
key_findings = response.choices[0].message.content.strip()
return key_findings
def generate_pdf(document):
document_link = f"{site_domain}/document/{document.id}/"
pdf_content = HTML(url=document_link).write_pdf()
response = HttpResponse(pdf_content, content_type='application/pdf')
response['Content-Disposition'] = f'inline; filename=document_{document.id}.pdf'
return response
def generate_first_page_image(document):
document_link = f"{site_domain}/document/{document.id}/"
pdf_bytes = HTML(url=document_link).write_pdf()
from pdf2image import convert_from_bytes
images = convert_from_bytes(pdf_bytes, first_page=1, last_page=1)
img_io = io.BytesIO()
images[0].save(img_io, format="JPEG", quality=90)
img_io.seek(0)
return img_io
def calculate_aggregate_weight(controls):
total_weight = sum(control['weight']for control in controls)
return total_weight
def calculate_aggregate_likelihood(controls):
total_likelihood = sum(control['likelihood'] for control in controls)
return total_likelihood
def map_weight_to_impact_likelihood(total_weight, total_likelihood, max_weight):
impact = min(5.0, max(1.0, total_weight / 10.0))
likelihood = min(5.0, max(1.0, total_likelihood / 10.0))
return impact, likelihood
def _draw_risk_matrix_background(ax):
ax.set_xlim(0.5, 5.5)
ax.set_ylim(0.5, 5.5)
ax.set_aspect('equal')
def score_color(score: int) -> str:
if score <= 2:
return '#1abc9c'
if score <= 4:
return '#2ecc71'
if score <= 9:
return '#f1c40f'
if score <= 15:
return '#f39c12'
return '#e74c3c'
for y in range(1, 6):
for x in range(1, 6):
score = x * y
rect = plt.Rectangle(
(x - 0.5, y - 0.5), 1, 1,
facecolor=score_color(score), edgecolor='#dddddd', linewidth=1.0, zorder=0
)
ax.add_patch(rect)
text_color = '#000000' if 5 <= score <= 9 else '#ffffff'
font_weight = 'bold' if score >= 15 else 'normal'
ax.text(
x, y, str(score),
ha='center', va='center', fontsize=9,
color=text_color, alpha=0.95, zorder=1, fontweight=font_weight
)
ax.set_xlabel('Likelihood', labelpad=10)
ax.set_ylabel('Impact', labelpad=10)
ax.set_xticks([])
ax.set_yticks([])
ax.tick_params(length=0)
ax.grid(False)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
def generate_risk_graph(risks_with_controls):
impacts = [risk['impact'] for risk in risks_with_controls]
likelihoods = [risk['likelihood'] for risk in risks_with_controls]
risk_ids = [risk['risk']['id'] for risk in risks_with_controls]
fig, ax = plt.subplots(figsize=(10, 8))
_draw_risk_matrix_background(ax)
scatter = ax.scatter(
likelihoods, impacts,
c="#1f6feb", edgecolors="white", linewidths=1.5, s=420, alpha=0.95, zorder=3
)
for i, risk_id in enumerate(risk_ids):
ax.annotate(
str(risk_id),
(likelihoods[i], impacts[i]),
color="white",
fontsize=12,
ha="center",
va="center",
weight="bold",
zorder=4,
)
buffer = io.BytesIO()
plt.savefig(buffer, format="png", transparent=True, bbox_inches='tight', pad_inches=0.1)
buffer.seek(0)
image_png = buffer.getvalue()
buffer.close()
plt.close()
return base64.b64encode(image_png).decode("utf-8")
def generate_residual_risk_graph(risks_with_controls):
impacts = [risk.get('residual_impact', 0) for risk in risks_with_controls]
likelihoods = [risk.get('residual_likelihood', 0) for risk in risks_with_controls]
risk_ids = [risk['risk']['id'] for risk in risks_with_controls]
bg_img_path = find('img/graph_matrix.png')
bg_img = mpimg.imread(bg_img_path)
fig, ax = plt.subplots(figsize=(10, 8))
ax.imshow(bg_img, extent=[0.0, 5.4, 0.0, 5.4], aspect='auto')
scatter = ax.scatter(
likelihoods, impacts,
c="blue", edgecolors="white", s=500, alpha=0.9
)
for i, risk_id in enumerate(risk_ids):
ax.annotate(
str(risk_id),
(likelihoods[i], impacts[i]),
color="white",
fontsize=12,
ha="center",
va="center",
weight="bold",
)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
buffer = io.BytesIO()
plt.savefig(buffer, format="png", transparent=True, bbox_inches='tight', pad_inches=0)
buffer.seek(0)
image_png = buffer.getvalue()
buffer.close()
plt.close()
return base64.b64encode(image_png).decode("utf-8")
def generate_demo_code(length=6):
chars = 'ABCDEFGHJKLMNPQRSTUVWXYZ23456789'
return ''.join(random.choices(chars, k=length))