import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier


def compute_precision_recall_curve_simple(y_true, y_score, n_thresholds=100):
    y_true = np.asarray(y_true)
    y_score = np.asarray(y_score)

    thresholds = np.linspace(0.0, 1.0, n_thresholds)

    recalls = []
    precisions = []

    for t in thresholds:
        # Convert scores to binary predictions
        y_pred = []
        for s in y_score:
            y_pred.append(1 if s >= t else 0)

        tp = fp = fn = 0
        for yt, yp in zip(y_true, y_pred):
            if yp == 1 and yt == 1:
                tp += 1
            elif yp == 1 and yt == 0:
                fp += 1
            elif yp == 0 and yt == 1:
                fn += 1

        # Precision
        if tp + fp == 0:
            precision = 1.0
        else:
            precision = tp / (tp + fp)

        # Recall
        if tp + fn == 0:
            recall = 0.0
        else:
            recall = tp / (tp + fn)

        precisions.append(precision)
        recalls.append(recall)

    return np.array(recalls), np.array(precisions), thresholds


def plot_all_precision_recall_curves(curve_data):
    """
    curve_data: dictionary mapping 'Model Name' -> (recalls, precisions)
    """
    plt.figure(figsize=(7, 7))

    for model_name in curve_data:
        recalls, precisions = curve_data[model_name]
        plt.plot(recalls, precisions, marker="o", linewidth=1.5, label=model_name)

    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title("Precision–Recall Curves")
    plt.xlim(0, 1)
    plt.ylim(0, 1.05)
    plt.grid(True, linestyle=":")
    plt.legend()
    plt.tight_layout()
    plt.show()


def main():
    dataset = pd.read_csv("dataset.csv")

    features = [
        "Check_Time",
        "Challenge_Time",
        "Challenge_Errors",
        "Mouse_Path_Entropy",
        "Click_Speed",
        "Scroll_Count",
        "History_Captcha_Success",
        "History_Captcha_Count",
        "IP_Suspicious",
        "Device_Trust_Score",
    ]

    X = dataset[features]
    y = dataset["Is_Human"]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42
    )

    models = {
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "Random Forest": RandomForestClassifier(n_estimators=100, max_depth=6, random_state=42),
    }

    for name, model in models.items():
        print("Training", name, "...")
        model.fit(X_train, y_train)

    # ---- Compute and store all PR curves ----
    pr_curves = {}

    for model_name, model in models.items():
        test_scores = model.predict_proba(X_test)[:, 1]

        recalls, precisions, _ = compute_precision_recall_curve_simple(
            y_true=y_test,
            y_score=test_scores,
            n_thresholds=50,
        )

        pr_curves[model_name] = (recalls, precisions)

    # ---- Plot them all at once ----
    plot_all_precision_recall_curves(pr_curves)


if __name__ == "__main__":
    main()