
    Ki                         d Z ddlmZmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZ d	Zd
eeej        f         deeej        f         fdZ G d dee	          ZdS )zOutlier/anomaly detection utilities for MySQL Connector/Python.

Provides a scikit-learn compatible wrapper using HeatWave to score anomalies.
    )OptionalUnionN)OutlierMixin)MyBaseMLModel)ML_TASK)	copy_dict)MySQLConnectionAbstractgh㈵>probreturnc                 |    t          j        | t          dt          z
            }t          j        |d|z
  z            S )z
    Compute logit (logodds) for a probability, clipping to avoid numerical overflow.

    Args:
        prob: Scalar or array of probability values in (0,1).

    Returns:
        logit-transformed probabilities.
       )npclipEPSlog)r
   results     O/var/www/html/analyses/venv/lib/python3.11/site-packages/mysql/ai/ml/outlier.py_get_logitsr   0   s4     WT3C((F6&AJ'(((    c            
          e Zd ZdZ	 	 	 ddedee         dee         dee         fdZde	e
j        ej        f         d	ej        fd
Zde	e
j        ej        f         d	ej        fdZde	e
j        ej        f         d	ej        fdZdS )MyAnomalyDetectora  
    MySQL HeatWave scikit-learn compatible anomaly/outlier detector.

    Flags samples as outliers when the probability of being an anomaly
    exceeds a user-tunable threshold.
    Includes helpers to obtain decision scores and anomaly probabilities
    for ranking.

    Args:
        db_connection (MySQLConnectionAbstract): Active MySQL DB connection.
        model_name (str, optional): Custom model name in the database.
        fit_extra_options (dict, optional): Extra options for fitting.
        score_extra_options (dict, optional): Extra options for scoring/prediction.

    Attributes:
        boundary: Decision threshold boundary in logit space. Derived from
            trained model's catalog info

    Methods:
        predict(X): Predict outlier/inlier labels.
        score_samples(X): Compute anomaly (normal class) logit scores.
        decision_function(X): Compute signed score above/below threshold for ranking.
    Ndb_connection
model_namefit_extra_optionsscore_extra_optionsc                     t          j        | |t          j        ||           t	          |          | _        d| _        dS )a2  
        Initialize an anomaly detector instance with threshold and extra options.

        Args:
            db_connection: Active MySQL DB connection.
            model_name: Optional model name in DB.
            fit_extra_options: Optional extra fit options.
            score_extra_options: Optional extra scoring options.

        Raises:
            ValueError: If outlier_threshold is not in (0,1).
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.
        )r   r   N)r   __init__r   ANOMALY_DETECTIONr   r   boundary)selfr   r   r   r   s        r   r   zMyAnomalyDetector.__init__W   sN    , 	%!/	
 	
 	
 	
 $--@#A#A )-r   Xr   c                 \    t          j        |                     |          dk     dd          S )a  
        Predict outlier/inlier binary labels for input samples.

        Args:
            X: Samples to predict on.

        Returns:
            ndarray: Values are -1 for outliers, +1 for inliers, as per scikit-learn convention.

        Raises:
            DatabaseError:
                If provided options are invalid or unsupported,
                or if the model is not initialized, i.e., fit or import has not
                been called
                If a database connection issue occurs.
                If an operational error occurs during execution.
            DatabaseError:
                If provided options are invalid or unsupported,
                or if the model is not initialized, i.e., fit or import has not
                been called
                If a database connection issue occurs.
                If an operational error occurs during execution.
        g        r   )r   wheredecision_function)r    r!   s     r   predictzMyAnomalyDetector.predictw   s+    6 x..q11C7Q???r   c                 ,   |                      |          }| j        o|                                 }|t          d          |d         d                             dd          }|t          d          t          d|z
            | _        || j        z
  S )a  
        Compute signed distance to the outlier threshold.

        Args:
            X: Samples to predict on.

        Returns:
            ndarray: Score > 0 means inlier, < 0 means outlier; |value| gives margin.

        Raises:
            DatabaseError:
                If provided options are invalid or unsupported,
                or if the model is not initialized, i.e., fit or import has not
                been called
                If a database connection issue occurs.
                If an operational error occurs during execution.
            ValueError:
                If the provided model info does not provide threshold
        Nz Model does not exist in catalog.model_metadatatraining_paramsanomaly_detection_thresholdzzTrained model is outdated and does not support threshold. Try retraining or using an existing, trained model with MyModel.g      ?)score_samplesr   get_model_info
ValueErrorgetr   )r    r!   sample_scores
model_info	thresholds        r   r%   z#MyAnomalyDetector.decision_function   s    . **1--= ,,..J! !CDDD"#345FGKK-t I   W   (i88DMt},,r   c                     | j                             || j                  }t          |d                             d                                                     S )aJ  
        Compute normal probability logit score for each sample.
        Used for ranking, thresholding.

        Args:
            X: Samples to score.

        Returns:
            ndarray: Logit scores based on "normal" class probability.

        Raises:
            DatabaseError:
                If provided options are invalid or unsupported,
                or if the model is not initialized, i.e., fit or import has not
                been called
                If a database connection issue occurs.
                If an operational error occurs during execution.
        )options
ml_resultsc                     | d         d         S )Nprobabilitiesnormal )xs    r   <lambda>z1MyAnomalyDetector.score_samples.<locals>.<lambda>   s    Q/9 r   )_modelr&   r   r   applyto_numpy)r    r!   r   s      r   r+   zMyAnomalyDetector.score_samples   sU    , $$Q0H$II< U99::XZZ
 
 	
r   )NNN)__name__
__module____qualname____doc__r	   r   strdictr   r   pd	DataFramer   ndarrayr&   r%   r+   r8   r   r   r   r   >   s        6 %),0.2. ... SM. $D>	.
 &d^. . . .@@rz)*@ 
@ @ @ @:+-rz)*+- 
+- +- +- +-Z
rz)*
 

 
 
 
 
 
r   r   )rA   typingr   r   numpyr   pandasrD   sklearn.baser   mysql.ai.ml.baser   mysql.ai.ml.modelr   mysql.ai.utilsr   mysql.connector.abstractsr	   r   floatrF   r   r   r8   r   r   <module>rP      s  :  # " " " " " " "         % % % % % % * * * * * * % % % % % % $ $ $ $ $ $ = = = = = =
)eE2:-. )5
9J3K ) ) ) )_
 _
 _
 _
 _
| _
 _
 _
 _
 _
r   