import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

from bayes_net_simple import BayesianInferenceClassifier
from bayes_net_complex import PerfectOracleBayesCaptcha

def main():
    # Load dataset
    dataset = pd.read_csv('dataset.csv')

    features = [
        'Check_Time',
        'Challenge_Time',
        'Challenge_Errors',
        'Mouse_Path_Entropy',
        'Click_Speed',
        'Scroll_Count',
        'History_Captcha_Success',
        'History_Captcha_Count',
        'IP_Suspicious',
        'Device_Trust_Score',
    ]

    # Separate features and target
    X = dataset[features]
    y = dataset['Is_Human']

    # Train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Initialize models
    log_reg = LogisticRegression(max_iter=1000)
    naive_bayes = GaussianNB()
    decision_tree = DecisionTreeClassifier(random_state=42, max_depth=6)
    baseline = DummyClassifier(strategy='constant', constant=1)
    random_forest = RandomForestClassifier(n_estimators=100, max_depth=6, random_state=42)
    gradient_boosting = GradientBoostingClassifier(
        n_estimators=100,     # Number of boosting stages
        max_depth=3,          # Keep depth the same
        learning_rate=0.05,   # Reduced learning rate for regularization
        random_state=42
    )
    adaboost = AdaBoostClassifier(n_estimators=50, random_state=42)

    # Train models
    models = {
        'Baseline': baseline,
        'Naive Bayes': naive_bayes,
        'Logistic Regression': log_reg,
        'Decision Tree': decision_tree,
        'Random Forest': random_forest,
        'Gradient Boosting': gradient_boosting,
        'AdaBoost': adaboost,
        'BayesNet': BayesianInferenceClassifier()
    }
    
    for name, model in models.items():
        print("Training", name, "...")
        model.fit(X_train, y_train)

    # Calculate train and test accuracies
    accuracy = {}
    for name, model in models.items():
        print("Testing", name, "...")
        train_preds = model.predict(X_train)
        test_preds = model.predict(X_test)
        accuracy[name] = {
            'train': accuracy_score(y_train, train_preds),
            'test': accuracy_score(y_test, test_preds)
        }

    # Print the results
    print('Model\t\t\tTrain Accuracy\tTest Accuracy')
    print('--------------------------------------------------------------')
    for model, acc in accuracy.items():
        print(f'{model:20} \t{acc["train"]:.4f}  \t{acc["test"]:.4f}')

def accuracy_score(y_true, y_pred):
    return (y_true == y_pred).mean()

if __name__ == '__main__':
    main()
