Source code for Scikit-Learn Exporter

from __future__ import absolute_import

import sys, os
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
sys.path.append(BASE_DIR)

import PMML44 as pml
import pre_process as pp
from datetime import datetime
import math
import metadata
from enums import *

def skl_to_pmml(pipeline, col_names, target_name='target', pmml_f_name='from_sklearn.pmml', model_name=None, description=None):

    """
    Exports scikit-learn pipeline object into pmml

    Parameters
    ----------
    pipeline :
        Contains an instance of Pipeline with preprocessing and final estimator
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the target column. (Default='target')
    pmml_f_name : String
        Name of the pmml file. (Default='from_sklearn.pmml')
    model_name : string (optional)
        Name of the model
    description : string (optional)
        Description of the model

    Returns
    -------
    Generates a PMML object and exports it to `pmml_f_name` 
    
    """
    try:
        model = pipeline.steps[-1][1]
    except:
        raise TypeError("Exporter expects pipeleine_instance and not an estimator_instance")
    else:
        import numpy as np
        if isinstance(col_names, np.ndarray):
            col_names = col_names.tolist()
        ppln_sans_predictor = pipeline.steps[:-1]
        trfm_dict_kwargs = dict()
        derived_col_names = col_names
        categoric_values = tuple()
        mining_imp_val = tuple()
        if ppln_sans_predictor:
            pml_pp = pp.get_preprocess_val(ppln_sans_predictor, col_names, model)
            trfm_dict_kwargs['TransformationDictionary'] = pml_pp['trfm_dict']
            derived_col_names = pml_pp['derived_col_names']
            col_names = pml_pp['preprocessed_col_names']
            categoric_values = pml_pp['categorical_feat_values']
            mining_imp_val = pml_pp['mining_imp_values']
            
        PMML_kwargs = get_PMML_kwargs(model,
                                      derived_col_names,
                                      col_names,
                                      target_name,
                                      mining_imp_val,
                                      categoric_values,
                                      model_name)
             
        pmml = pml.PMML(
            version=PMML_SCHEMA.VERSION.value,
            Header=get_header(description),
            DataDictionary=get_data_dictionary(model, col_names, target_name, categoric_values),
            **trfm_dict_kwargs,
            **PMML_kwargs
        )
        pmml.export(outfile=open(pmml_f_name, "w"), level=0)


def any_in(seq_a, seq_b):
    """
    Checks for common elements in two given sequence elements

    Parameters
    ----------
    seq_a : list
        A list of items

    seq_b : list
        A list of items

    Returns
    -------
    Returns a boolean value if any item of seq_a belongs to seq_b or visa versa

    """
    return any(elem in seq_b for elem in seq_a)


def get_PMML_kwargs(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values, model_name):

    """
    It returns all the pmml elements.

    Parameters
    ----------
    model : Scikit-learn model object
        An instance of Scikit-learn model.
    derived_col_names : List 
        Contains column names after preprocessing
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the target column .
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    algo_kwargs : Dictionary
        Get the PMML model argument based on scikit learn model object
    """
    skl_mdl_super_cls_names = get_super_cls_names(model)
    regression_model_names = ('LinearRegression','LinearSVR')
    regression_mining_model_names = ('LogisticRegression', 'RidgeClassifier','LinearDiscriminantAnalysis', \
                                        'SGDClassifier','LinearSVC',)
    tree_model_names = ('BaseDecisionTree',)
    support_vector_model_names = ('SVC', 'SVR')
    anomaly_model_names = ('OneClassSVM','IsolationForest')
    naive_bayes_model_names = ('GaussianNB',)
    mining_model_names = ('RandomForestRegressor', 'RandomForestClassifier', 'GradientBoostingClassifier',
                            'GradientBoostingRegressor')
    neurl_netwk_model_names = ('MLPClassifier', 'MLPRegressor')
    nearest_neighbour_names = ('NeighborsBase',)
    clustering_model_names = ('KMeans',)
    if any_in(tree_model_names, skl_mdl_super_cls_names):
        algo_kwargs = {'TreeModel': get_tree_models(model,
                                                    derived_col_names,
                                                    col_names,
                                                    target_name,
                                                    mining_imp_val,
                                                    categoric_values,
                                                    model_name)}
    elif any_in(regression_mining_model_names, skl_mdl_super_cls_names):
        if len(model.classes_) == 2:
            algo_kwargs = {'RegressionModel': get_regrs_models(model,
                                                           derived_col_names,
                                                           col_names,
                                                           target_name,
                                                           mining_imp_val,
                                                           categoric_values,
                                                           model_name)}
        else:
            algo_kwargs = {'MiningModel': get_reg_mining_models(model,
                                                                derived_col_names,
                                                                col_names,
                                                                target_name,
                                                                mining_imp_val,
                                                                categoric_values,
                                                                model_name)}
    elif any_in(regression_model_names, skl_mdl_super_cls_names):
        algo_kwargs = {'RegressionModel': get_regrs_models(model,
                                                           derived_col_names,
                                                           col_names,
                                                           target_name,
                                                           mining_imp_val,
                                                           categoric_values,
                                                           model_name)}
    elif any_in(support_vector_model_names, skl_mdl_super_cls_names):
        algo_kwargs = {'SupportVectorMachineModel':
                           get_supportVectorMachine_models(model,
                                                           derived_col_names,
                                                           col_names,
                                                           target_name,
                                                           mining_imp_val,
                                                           categoric_values,
                                                           model_name)}
    elif any_in(mining_model_names, skl_mdl_super_cls_names):
        algo_kwargs = {'MiningModel': get_ensemble_models(model,
                                                          derived_col_names,
                                                          col_names,
                                                          target_name,
                                                          mining_imp_val,
                                                          categoric_values,
                                                          model_name)}
    elif any_in(neurl_netwk_model_names, skl_mdl_super_cls_names):
        algo_kwargs = {'NeuralNetwork': get_neural_models(model,
                                                          derived_col_names,
                                                          col_names,
                                                          target_name,
                                                          mining_imp_val,
                                                          categoric_values,
                                                          model_name)}
    elif any_in(naive_bayes_model_names, skl_mdl_super_cls_names):
        algo_kwargs = {'NaiveBayesModel': get_naiveBayesModel(model,
                                                              derived_col_names,
                                                              col_names,
                                                              target_name,
                                                              mining_imp_val,
                                                              categoric_values,
                                                              model_name)}
    elif any_in(nearest_neighbour_names, skl_mdl_super_cls_names):
        algo_kwargs = {'NearestNeighborModel':
                           get_nearestNeighbour_model(model,
                                                      derived_col_names,
                                                      col_names,
                                                      target_name,
                                                      mining_imp_val,
                                                      categoric_values,
                                                      model_name)}
    elif any_in(anomaly_model_names, skl_mdl_super_cls_names):
        algo_kwargs = {'AnomalyDetectionModel':
                            get_anomalydetection_model(model,
                                                        derived_col_names,
                                                        col_names,
                                                        target_name,
                                                        mining_imp_val,
                                                        categoric_values,
                                                        model_name)}
    elif any_in(clustering_model_names, skl_mdl_super_cls_names):
        algo_kwargs = {'ClusteringModel':
                            get_clustering_model(model,
                                                    derived_col_names,
                                                    col_names,
                                                    target_name,
                                                    mining_imp_val,
                                                    categoric_values,
                                                    model_name
                                                 )}
    else:
        raise NotImplementedError("{} is not Implemented!".format(model.__class__.__name__))

    return algo_kwargs


def get_model_kwargs(model, col_names, target_name, mining_imp_val, categoric_values):

    """
    It returns all the model element for a specific model.

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    model_kwargs : Dictionary
        Returns  function name, MiningSchema and Output of the sk_model object
    """
    model_kwargs = dict()
    model_kwargs['functionName'] = get_mining_func(model)
    model_kwargs['MiningSchema'] = get_mining_schema(model, col_names, target_name, mining_imp_val, categoric_values)
    model_kwargs['Output'] = get_output(model, target_name)

    return model_kwargs


def get_reg_mining_models(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values, model_name):
    """
    Creates xml elements for multi-class linear models

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.
    derived_col_names : List
        Contains column names after preprocessing
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    mining_model : List
        Returns a Nyoka's MiningModel object

    """
    num_classes = len(model.classes_)
    model_kwargs = get_model_kwargs(model, col_names, target_name, mining_imp_val, categoric_values)

    mining_model = pml.MiningModel(modelName=model_name if model_name else model.__class__.__name__,**model_kwargs)
    inner_mining_schema = [mfield for mfield in model_kwargs['MiningSchema'].MiningField if mfield.usageType != FIELD_USAGE_TYPE.TARGET.value]
    segmentation = pml.Segmentation(multipleModelMethod=MULTIPLE_MODEL_METHOD.MODEL_CHAIN.value)
    for idx in range(num_classes):
        segment = pml.Segment(id=str(idx+1),True_=pml.True_())
        segment.RegressionModel = pml.RegressionModel(
            functionName=MINING_FUNCTION.REGRESSION.value,
            MiningSchema=pml.MiningSchema(
                MiningField=inner_mining_schema
                ),
            Output=pml.Output(
                OutputField=[
                    pml.OutputField(
                        name="probablity_"+str(idx),
                        optype=OPTYPE.CONTINUOUS.value,
                        dataType=DATATYPE.DOUBLE.value
                        )
                    ]
                ),
            RegressionTable=get_reg_tab_for_reg_mining_model(model,derived_col_names,idx,categoric_values)
        )
        if model.__class__.__name__ != 'LinearSVC':
            segment.RegressionModel.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.LOGISTIC.value
        segmentation.add_Segment(segment)

    last_segment = pml.Segment(id=str(num_classes+1),True_=pml.True_())
    mining_flds_for_last = [pml.MiningField(name="probablity_"+str(idx)) for idx in range(num_classes)]
    mining_flds_for_last.append(pml.MiningField(name=target_name,usageType=FIELD_USAGE_TYPE.TARGET.value))
    mining_schema_for_last = pml.MiningSchema(MiningField=mining_flds_for_last)
    reg_tab_for_last = list()
    for idx in range(num_classes):
        reg_tab_for_last.append(
            pml.RegressionTable(
                intercept="0.0",
                targetCategory=str(model.classes_[idx]),
                NumericPredictor=[pml.NumericPredictor(
                    name="probablity_"+str(idx),
                    coefficient="1.0"
                )]
            )
        )

    last_segment.RegressionModel = pml.RegressionModel(
        functionName=MINING_FUNCTION.CLASSIFICATION.value,
        MiningSchema=mining_schema_for_last,
        RegressionTable=reg_tab_for_last
    )
    if model.__class__.__name__ != 'LinearSVC':
        last_segment.RegressionModel.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.SIMPLEMAX.value
    segmentation.add_Segment(last_segment)
    mining_model.set_Segmentation(segmentation)
    return [mining_model]


def get_reg_tab_for_reg_mining_model(model, col_names, index, categorical_values):
    """
    Generates Regression Table for multi-class linear models

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.
    col_names : List
        Contains list of feature/column names.
    index : int
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    Returns Nyoka's RegressionTable object
    """
    reg_tab = pml.RegressionTable(intercept="{:.16f}".format(model.intercept_[index]))
    for idx, coef in enumerate(model.coef_[index]):
        reg_tab.add_NumericPredictor(pml.NumericPredictor(name=col_names[idx],coefficient="{:.16f}".format(coef)))
    return [reg_tab]


def get_anomalydetection_model(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values, model_name):
    """
    Creates xml elements for anomaly detction models

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.
    derived_col_names : List
        Contains column names after preprocessing
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model


    Returns
    -------
    anomaly_detection_model : List
        Returns Nyoka's AnomalyDetectionModel object

    """
    anomaly_detection_model = list()
    if 'OneClassSVM' in str(model.__class__):
        svm_model = get_supportVectorMachine_models(model,
                                                    derived_col_names,
                                                    col_names,
                                                    target_name,
                                                    mining_imp_val,
                                                    categoric_values, model_name)[0]
        anomaly_detection_model.append(
            pml.AnomalyDetectionModel(
                modelName=model_name if model_name else model.__class__.__name__,
                algorithmType=ANOMALY_DETECTION_ALGORITHM.ONE_CLASS_SVM.value,
                functionName=MINING_FUNCTION.REGRESSION.value,
                MiningSchema=get_mining_schema(model, col_names, target_name, mining_imp_val,categoric_values),
                Output=get_anomaly_detection_output(model),
                SupportVectorMachineModel=svm_model
            )
        )
    else:
        mining_schema = get_mining_schema(model, col_names, target_name, mining_imp_val,categoric_values)
        ensemble_model = get_ensemble_models(model,
                                            derived_col_names,
                                            col_names,
                                            'avg_path_length',
                                            mining_imp_val,
                                            categoric_values, model_name)[0]
        anomaly_detection_model.append(
            pml.AnomalyDetectionModel(
                modelName=model_name if model_name else "IsolationForest",
                algorithmType=ANOMALY_DETECTION_ALGORITHM.ISOLATION_FOREST.value,
                functionName=MINING_FUNCTION.REGRESSION.value,
                MiningSchema=mining_schema,
                Output=get_anomaly_detection_output(model),
                sampleDataSize=str(model.max_samples_),
                MiningModel=ensemble_model
            )
    )
    return anomaly_detection_model


def get_anomaly_detection_output(model):
    """
    Generates output for anomaly detection models

    Parameters
    ----------
    model :
        Scikit-learn's model object

    Returns
    -------
    output_fields :
        Returns Nyoka's Output object
    """
    output_fields = list()
    output_fields.append(pml.OutputField(name="anomalyScore", 
                                            optype=OPTYPE.CONTINUOUS.value, 
                                            dataType=DATATYPE.DOUBLE.value,
                                            feature=RESULT_FEATURE.PREDICTED_VALUE.value,
                                            isFinalResult="false"))
    thresh = 0
    try:
        thresh = model.threshold_
    except:
        thresh = 0
    
    offset = 0
    operator = SIMPLE_PREDICATE_OPERATOR.LESS_THAN.value
    if model.__class__.__name__ == "IsolationForest":
        operator = SIMPLE_PREDICATE_OPERATOR.GREATER_THAN.value
        offset = model.offset_
    thresh = -1 * (thresh + offset)

    output_fields.append(
        pml.OutputField(name="outlier",
                        optype=OPTYPE.CATEGORICAL.value,
                        dataType=DATATYPE.BOOLEAN.value,
                        feature=RESULT_FEATURE.DECISION.value,
                        isFinalResult="true", 
                        Apply=pml.Apply(function=operator, 
                                        FieldRef=[pml.FieldRef(field="anomalyScore")],
                                        Constant=[pml.Constant(dataType=DATATYPE.DOUBLE.value, 
                                        valueOf_="0" if thresh==0 else "{:.16f}".format(thresh))]))
    )
    return pml.Output(OutputField=output_fields)


def get_clustering_model(model, derived_col_names, col_names, target_name, mining_imp_val,categoric_values,model_name):
    """
    Generates PMML elements for clustering models

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.
    derived_col_names : List
        Contains column names after preprocessing
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    clustering_models : List
        Returns Nyoka's ClusteringModel object

    """
    import numpy as np
    clustering_models = list()
    model_kwargs = get_model_kwargs(model, col_names, target_name, mining_imp_val,categoric_values)
    values, counts = np.unique(model.labels_,return_counts=True)
    model_kwargs["Output"] = get_output_for_clustering(values)
    clustering_models.append(
        pml.ClusteringModel(
            modelClass=CLUSTERING_MODEL_CLASS.CENTER_BASED.value,
            modelName=model_name if model_name else model.__class__.__name__,
            numberOfClusters=get_cluster_num(model),
            ComparisonMeasure=get_comp_measure(),
            ClusteringField=get_clustering_flds(derived_col_names),
            Cluster=get_cluster_vals(model,counts),
            **model_kwargs

        )
    )

    return clustering_models


def get_output_for_clustering(values):
    """
    Generates output for clustering models

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.

    Returns
    -------
    output_fields : List
        Returns Nyoka's Output object
    """
    output_fields = list()
    for idx, val in enumerate(values):
        output_fields.append(
            pml.OutputField(
                name="affinity("+str(idx)+")",
                optype=OPTYPE.CONTINUOUS.value,
                dataType=DATATYPE.DOUBLE.value,
                feature=RESULT_FEATURE.ENTITY_AFFINITY.value,
                value=str(val)
            )
        )
    output_fields.append(pml.OutputField(name="cluster", optype=OPTYPE.CATEGORICAL.value,\
        dataType=DATATYPE.STRING.value,feature=RESULT_FEATURE.PREDICTED_VALUE.value))
    return pml.Output(OutputField=output_fields)
        


def get_cluster_vals(model,counts):
    """
    Generates cluster information for clustering models

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.

    Returns
    -------
    cluster_flds : List
        Returns Nyoka's Cluster object

    """
    centroids = model.cluster_centers_
    cluster_flds = []
    for centroid_idx in range(centroids.shape[0]):
        centroid_values = ""
        centroid_flds = pml.ArrayType(type_=ARRAY_TYPE.REAL.value)
        for centroid_cordinate_idx in range(centroids.shape[1]):
            centroid_flds.content_[0].value = centroid_values + "{:.16f}".format(centroids[centroid_idx][centroid_cordinate_idx])
            centroid_values = centroid_flds.content_[0].value + " "
        cluster_flds.append(pml.Cluster(id=str(centroid_idx), Array=centroid_flds,size=str(counts[centroid_idx])))
    return cluster_flds


def get_cluster_num(model):
    """
    Returns number of cluster for clustering models

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.

    Returns
    -------

    model.n_clusters: Integer

        Returns the number of clusters

    """
    return model.n_clusters


def get_comp_measure():
    """
    Generates comparison measure information for clustering models
    
    Parameters
    ----------

    Returns
    -------
    Returns Nyoka's ComparisonMeasure object

    """
    comp_equation = pml.euclidean()
    return pml.ComparisonMeasure(euclidean=comp_equation, kind=COMPARISON_MEASURE_KIND.DISTANCE.value)


def get_clustering_flds(col_names):
    """
    Generates cluster fields for clustering models

    Parameters
    ----------
    col_names :
        Contains list of feature/column names.

    Returns
    -------
    clustering_flds: List
        Returns Nyoka's ClusteringField object

    """
    clustering_flds = []
    for name in col_names:
        clustering_flds.append(pml.ClusteringField(field=str(name)))
    return clustering_flds


def get_nearestNeighbour_model(model, derived_col_names, col_names, target_name, mining_imp_val,categoric_values,model_name):
    
    """
    Generates PMML elements for nearest neighbour model

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.
    derived_col_names : List
        Contains column names after preprocessing
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model
    

    Returns
    -------
    nearest_neighbour_model :
        Returns Nyoka's NearestNeighborModel object
        
    """
    model_kwargs = get_model_kwargs(model, col_names, target_name, mining_imp_val,categoric_values)
    nearest_neighbour_model = list()
    nearest_neighbour_model.append(
        pml.NearestNeighborModel(
            modelName=model_name if model_name else model.__class__.__name__,
            continuousScoringMethod=CONTINUOUS_SCORING_METHOD.AVERAGE.value,
            algorithmName="KNN",
            numberOfNeighbors=model.n_neighbors,
            KNNInputs=get_knn_inputs(derived_col_names),
            ComparisonMeasure=get_comparison_measure(model),
            TrainingInstances=get_training_instances(model, derived_col_names, target_name),
            **model_kwargs
        )
    )
    return nearest_neighbour_model


def get_training_instances(model, derived_col_names, target_name):

    """
    It returns the Training Instance element.

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.
    derived_col_names : List
        Contains column names after preprocessing        
    target_name : String
        Name of the Target column.
    
    Returns
    -------
    TrainingInstances :
        Returns Nyoka's TrainingInstances object
        
    """
    return pml.TrainingInstances(
        InstanceFields=get_instance_fields(derived_col_names, target_name),
        InlineTable=get_inline_table(model)
    )


def get_inline_table(model):
    """
    It Returns the Inline Table element of the model.

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.

    Returns
    -------
    InlineTable :
        Returns Nyoka's InlineTable object
        
    """
    rows = []
    x = model._tree.get_arrays()[0].tolist()
    y = model._y.tolist()

    X = []
    for idx in range(len(model._tree.get_arrays()[0][0])):
        X.append("x" + str(idx + 1))

    for idx in range(len(x)):
        row = pml.row()
        row.elementobjs_ = ['y'] + X
        if hasattr(model, 'classes_'):
            row.y = model.classes_[y[idx]]
        else:
            row.y = y[idx]
        for idx_2 in range(len(x[idx])):
            exec("row." + X[idx_2] + "=" + str(x[idx][idx_2]))
        rows.append(row)
    return pml.InlineTable(row=rows)


def get_instance_fields(derived_col_names, target_name):
    """
    It returns the Instance field element.

    Parameters
    ----------

    derived_col_names : List
        Contains column names after preprocessing.        
    target_name : String
        Name of the Target column.
    

    Returns
    -------
    InstanceFields :
        Returns Nyoka's InstanceFields object
        
    """
    instance_fields = list()
    instance_fields.append(pml.InstanceField(field=target_name, column="y"))
    for (index, name) in enumerate(derived_col_names):
        instance_fields.append(pml.InstanceField(field=str(name), column="x" + str(index + 1)))
    return pml.InstanceFields(InstanceField=instance_fields)


def get_comparison_measure(model):

    """
    It return the Comparison measure element for nearest neighbour model.

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.

    Returns
    -------
    comp_measure :
        Returns Nyoka's ComparisonMeasure object.
        
    """
    if model.effective_metric_ == 'euclidean':
        comp_measure = pml.ComparisonMeasure(euclidean=pml.euclidean(), kind=COMPARISON_MEASURE_KIND.DISTANCE.value)
    elif model.effective_metric_ == 'minkowski':
        comp_measure = pml.ComparisonMeasure(minkowski=pml.minkowski(p_parameter=model.p), kind=COMPARISON_MEASURE_KIND.DISTANCE.value)
    elif model.effective_metric_ in ['manhattan','cityblock']:
        comp_measure = pml.ComparisonMeasure(cityBlock=pml.cityBlock(), kind=COMPARISON_MEASURE_KIND.DISTANCE.value)
    elif model.effective_metric_ == 'sqeuclidean':
        comp_measure = pml.ComparisonMeasure(squaredEuclidean=pml.squaredEuclidean(), kind=COMPARISON_MEASURE_KIND.DISTANCE.value)
    elif model.effective_metric_ == 'chebyshev':
        comp_measure = pml.ComparisonMeasure(chebychev=pml.chebychev(), kind=COMPARISON_MEASURE_KIND.DISTANCE.value)
    elif model.effective_metric_ == 'matching':
        comp_measure = pml.ComparisonMeasure(simpleMatching=pml.simpleMatching(), kind=COMPARISON_MEASURE_KIND.SIMILARITY.value)
    elif model.effective_metric_ == 'jaccard':
        comp_measure = pml.ComparisonMeasure(jaccard=pml.jaccard(), kind=COMPARISON_MEASURE_KIND.SIMILARITY.value)
    elif model.effective_metric_ == 'rogerstanimoto':
        comp_measure = pml.ComparisonMeasure(tanimoto=pml.tanimoto(), kind=COMPARISON_MEASURE_KIND.SIMILARITY.value)
    else:
        raise NotImplementedError("{} metric is not implemented for KNN Model!".format(model.effective_metric_))
    return comp_measure


def get_knn_inputs(col_names):
    """
    It returns the KNN Inputs element.

    Parameters
    ----------
    col_names : List
        Contains list of feature/column names.

    Returns
    -------
    KNNInputs :
        Returns Nyoka's KNNInputs object.
        
    """
    knnInput = list()
    for name in col_names:
        knnInput.append(pml.KNNInput(field=str(name)))
    return pml.KNNInputs(KNNInput=knnInput)


def get_naiveBayesModel(model, derived_col_names, col_names, target_name, mining_imp_val,categoric_values,model_name):

    """
    Generates PMML elements for naive bayes models

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    naive_bayes_model : List
        Returns Nyoka's NaiveBayesModel
    """
    model_kwargs = get_model_kwargs(model, col_names, target_name, mining_imp_val,categoric_values)
    naive_bayes_model = list()
    naive_bayes_model.append(pml.NaiveBayesModel(
        modelName=model_name if model_name else model.__class__.__name__,
        BayesInputs=get_bayes_inputs(model, derived_col_names),
        BayesOutput=get_bayes_output(model, target_name),
        threshold=get_threshold(),
        **model_kwargs
    ))
    return naive_bayes_model


def get_threshold():
    """
    It returns the Threshold value for Naive Bayes models.

    Returns
    -------
    Returns the Threshold value

    """
    return '0.001'


def get_bayes_output(model, target_name):

    """
    It returns the Bayes Output element of the model

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.
    target_name : String
        Name of the Target column.    

    Returns
    -------
    BayesOutput :
        Returns Nyoka's BayesOutput object
        
    """
    class_counts = model.class_count_
    target_val_counts = pml.TargetValueCounts()
    for name, count in zip(model.classes_, class_counts):
        tr_val = pml.TargetValueCount(value=str(name), count=str(count))
        target_val_counts.add_TargetValueCount(tr_val)
    return pml.BayesOutput(
        fieldName=target_name,
        TargetValueCounts=target_val_counts
    )



def get_bayes_inputs(model, derived_col_names):

    """
    It returns the Bayes Input element of the naive bayes model .
    
    Parameters
    ----------
    model :
        An instance of Scikit-learn model.
    derived_col_names : List
        Contains column names after preprocessing.

    Returns
    -------
    bayes_inputs :
        Returns Nyoka's BayesInput object.

    """
    bayes_inputs = pml.BayesInputs()
    for indx, name in enumerate(derived_col_names):
        means = model.theta_[:, indx]
        variances = model.sigma_[:, indx]
        target_val_stats = pml.TargetValueStats()
        for idx, val in enumerate(model.classes_):
            target_val = pml.TargetValueStat(
                val, GaussianDistribution=pml.GaussianDistribution(
                    mean="{:.16f}".format(means[idx]),
                    variance="{:.16f}".format(variances[idx])))
            target_val_stats.add_TargetValueStat(target_val)
        bayes_inputs.add_BayesInput(pml.BayesInput(fieldName=str(name),
                                               TargetValueStats=target_val_stats))
    return bayes_inputs


def get_supportVectorMachine_models(model, derived_col_names, col_names, target_names,
 									mining_imp_val, categoric_values,model_name):
    
    """
    Generates PMML elements for support vector machine models
    
    Parameters
    ----------
    model :
        An instance of Scikit-learn model.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.        
    target_names : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    supportVector_models : List
        Returns Nyoka's SupportVectorMachineModel object
        
    """
    model_kwargs = get_model_kwargs(model, col_names, target_names, mining_imp_val,categoric_values)
    supportVector_models = list()
    kernel_type = get_kernel_type(model)
    supportVector_models.append(pml.SupportVectorMachineModel(
        modelName=model_name if model_name else model.__class__.__name__,
        classificationMethod=get_classificationMethod(model),
        VectorDictionary=get_vectorDictionary(model, derived_col_names, categoric_values),
        SupportVectorMachine=get_supportVectorMachine(model),
        **kernel_type,
        **model_kwargs
    ))

    return supportVector_models


def get_ensemble_models(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values,model_name):
    
    """
    Generates PMML elemenets for ensemble models

    Parameters
    ----------
    model :
        An instance of Scikit-learn model.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value.
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    mining_models : List
        Returns Nyoka's MiningModel object
    """
    model_kwargs = get_model_kwargs(model, col_names, target_name, mining_imp_val,categoric_values)
    if model.__class__.__name__ == 'GradientBoostingRegressor':
        model_kwargs['Targets'] = get_targets(model, target_name)
        
    mining_models = list()
    mining_models.append(pml.MiningModel(
        modelName=model_name if model_name else model.__class__.__name__,
        Segmentation=get_outer_segmentation(model, derived_col_names, col_names, target_name,
                                            mining_imp_val, categoric_values, model_name),
        **model_kwargs
    ))
    return mining_models


def get_targets(model, target_name):

    """
    It returns the Target element of the model.
    
    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    target_name : String
        Name of the Target column.

    Returns
    -------
    targets :
        Returns Nyoka's Target object
    """
    if model.__class__.__name__ == 'GradientBoostingRegressor':
        targets = pml.Targets(
            Target=[
                pml.Target(
                    field=target_name,
                    rescaleConstant="{:.16f}".format(model.init_.mean),
                    rescaleFactor="{:.16f}".format(model.learning_rate)
                )
            ]
        )
    else:
        targets = pml.Targets(
            Target=[
                pml.Target(
                    field=target_name,
                    rescaleConstant="{:.16f}".format(model.base_score)
                )
            ]
        )
    return targets


def get_multiple_model_method(model):

    """
    It returns the type of multiple model method for MiningModels.

    Parameters
    ----------
    model :
        A Scikit-learn model instance

    Returns
    -------
    The multiple model method for a MiningModel.
        
    """
    if model.__class__.__name__ == 'GradientBoostingClassifier':
        return MULTIPLE_MODEL_METHOD.MODEL_CHAIN.value
    elif model.__class__.__name__ == 'GradientBoostingRegressor':
        return MULTIPLE_MODEL_METHOD.SUM.value
    elif model.__class__.__name__ == 'RandomForestClassifier':
        return MULTIPLE_MODEL_METHOD.MAJORITY_VOTE.value
    elif model.__class__.__name__ in ['RandomForestRegressor','IsolationForest']:
        return MULTIPLE_MODEL_METHOD.AVERAGE.value


def get_outer_segmentation(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values,model_name):
    
    """
    It returns the Segmentation element of a MiningModel.

    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.            
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    segmentation :
        Nyoka's Segmentation object
        
    """
    segmentation = pml.Segmentation(
        multipleModelMethod=get_multiple_model_method(model),
        Segment=get_segments(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values,model_name)
    )
    return segmentation


def get_segments(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values,model_name):

    """
    It returns the Segment element of a Segmentation.

    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.    
    target_name : String
        Name of the Target column.    
    mining_imp_val : tuple
          Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    segments :
        Nyoka's Segment object
        
    """
    segments = None
    if 'GradientBoostingClassifier' in str(model.__class__):
        segments = get_segments_for_gbc(model, derived_col_names, col_names, target_name,
                                        mining_imp_val, categoric_values, model_name)
    else:
        segments = get_inner_segments(model, derived_col_names, col_names, 0)
    return segments


def get_segments_for_gbc(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values, model_name):
    
    """
    It returns list of Segments element of a Segmentation.

    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.    
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    segments : List
        Nyoka's Segment object
        
    """
    segments = list()
    out_field_names = list()
    for estm_idx in range(len(model.estimators_[0])):
        mining_fields_for_first = list()
        for name in col_names:
            mining_fields_for_first.append(pml.MiningField(name=name))

        miningschema_for_first = pml.MiningSchema(MiningField=mining_fields_for_first)
        output_fields = list()
        output_fields.append(
            pml.OutputField(
                name='decisionFunction(' + str(estm_idx) + ')',
                feature=RESULT_FEATURE.PREDICTED_VALUE.value,
                dataType=DATATYPE.DOUBLE.value,
                isFinalResult=False
            )
        )
        if len(model.classes_) == 2:
            output_fields.append(
                pml.OutputField(
                    name='transformedDecisionFunction(0)',
                    feature=RESULT_FEATURE.TRANSFORMED_VALUE.value,
                    dataType=DATATYPE.DOUBLE.value,
                    isFinalResult=True,
                    Apply=pml.Apply(
                        function=FUNCTION.ADDITION.value,
                        Constant=[pml.Constant(
                            dataType=DATATYPE.DOUBLE.value,
                            valueOf_="{:.16f}".format(model.init_.prior)
                        )],
                        Apply_member=[pml.Apply(
                            function=FUNCTION.MULTIPLICATION.value,
                            Constant=[pml.Constant(
                                dataType=DATATYPE.DOUBLE.value,
                                valueOf_="{:.16f}".format(model.learning_rate)
                            )],
                            FieldRef=[pml.FieldRef(
                                field="decisionFunction(0)",
                            )]
                        )]
                    )
                )
            )
        else:
            output_fields.append(
                pml.OutputField(
                    name='transformedDecisionFunction(' + str(estm_idx) + ')',
                    feature=RESULT_FEATURE.TRANSFORMED_VALUE.value,
                    dataType=DATATYPE.DOUBLE.value,
                    isFinalResult=True,
                    Apply=pml.Apply(
                        function=FUNCTION.ADDITION.value,
                        Constant=[pml.Constant(
                            dataType=DATATYPE.DOUBLE.value,
                            valueOf_="{:.16f}".format(model.init_.priors[estm_idx])
                        )],
                        Apply_member=[pml.Apply(
                            function=FUNCTION.MULTIPLICATION.value,
                            Constant=[pml.Constant(
                                dataType=DATATYPE.DOUBLE.value,
                                valueOf_="{:.16f}".format(model.learning_rate)
                            )],
                            FieldRef=[pml.FieldRef(
                                field="decisionFunction(" + str(estm_idx) + ")",
                            )]
                        )]
                    )
                )
            )

        out_field_names.append('transformedDecisionFunction(' + str(estm_idx) + ')')
        segments.append(
            pml.Segment(
                True_=pml.True_(),
                id=str(estm_idx),
                MiningModel=pml.MiningModel(
                    functionName=MINING_FUNCTION.REGRESSION.value,
                    modelName="MiningModel",
                    MiningSchema=miningschema_for_first,
                    Output=pml.Output(OutputField=output_fields),
                    Segmentation=pml.Segmentation(
                        multipleModelMethod=MULTIPLE_MODEL_METHOD.SUM.value,
                        Segment=get_inner_segments(model, derived_col_names,
                                                   col_names, estm_idx)
                    )
                )
            )
        )
    reg_model = get_regrs_models(model, out_field_names,out_field_names, target_name, mining_imp_val, categoric_values, model_name)[0]
    reg_model.Output = None
    if len(model.classes_) == 2:
        reg_model.normalizationMethod=REGRESSION_NORMALIZATION_METHOD.LOGISTIC.value
    else:
        reg_model.normalizationMethod=REGRESSION_NORMALIZATION_METHOD.SOFTMAX.value
    segments.append(
        pml.Segment(
            id=str(len(model.estimators_[0])),
            True_=pml.True_(),
            RegressionModel=reg_model
        )
    )
    return segments


def get_inner_segments(model, derived_col_names, col_names, index):
    
    """
    It returns the segments of a Segmentation.
    
    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.
    index : Integer
        The index of the estimator for the model

    Returns
    -------
    segments : List
        Nyoka's Segment object
        
    """
    import numpy as np
    segments = list()
    for estm_idx in range(model.n_estimators):
        if np.asanyarray(model.estimators_).ndim == 1:
            estm = model.estimators_[estm_idx]
        else:
            estm = model.estimators_[estm_idx][index]
        tree_features = estm.tree_.feature
        features_ = list()
        for feat in tree_features:
            if feat != -2 and feat not in features_:
                features_.append(feat)
        if len(features_) != 0:
            mining_fields = list()
            for feat in col_names:
                mining_fields.append(pml.MiningField(name=feat))
            segments.append(
                pml.Segment(
                    True_=pml.True_(),
                    id=str(estm_idx),
                    TreeModel=pml.TreeModel(
                        modelName=estm.__class__.__name__,
                        functionName=get_mining_func(estm),
                        splitCharacteristic=TREE_SPLIT_CHARACTERISTIC.MULTI.value,
                        MiningSchema=pml.MiningSchema(MiningField = mining_fields),
                        Node=get_node(estm, derived_col_names, model)
                    )
                )
            )
    return segments


def get_classificationMethod(model):
    
    """
    It returns the Classification method name for SVM models.
    
    Parameters
    ----------
    model :
        A Scikit-learn model instance.

    Returns
    -------
    Returns the classification method of the SVM model
        
    """
    if model.__class__.__name__ == 'SVC':
        return SVM_CLASSIFICATION_METHOD.OVO.value
    else:
        return SVM_CLASSIFICATION_METHOD.OVR.value


def get_vectorDictionary(model, derived_col_names, categoric_values):

    """
    It return the Vector Dictionary element.
    
    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    derived_col_names : List
        Contains column names after preprocessing.
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    VectorDictionary :
        Nyoka's VectorDictionary object
        
    """
    fieldref_element = list()
    for name in derived_col_names:
        fieldref_element.append(pml.FieldRef(field=name))
    
    vectorfields_element = pml.VectorFields(FieldRef=fieldref_element)
    vec_id = list(model.support_)
    vecinsts = list()
    vecs = list(model.support_vectors_)
    if model.support_vectors_.__class__.__name__ != 'csr_matrix':
        for vec_idx in range(len(vecs)):
            vecinsts.append(pml.VectorInstance(
                id=vec_id[vec_idx],
                REAL_SparseArray=pml.REAL_SparseArray(
                    n=len(fieldref_element),
                    Indices=([x for x in range(1, len(vecs[vec_idx]) + 1)]),
                    REAL_Entries=vecs[vec_idx].tolist()
                )
            ))
    else:
        for vec_idx in range(len(vecs)):
            vecinsts.append(pml.VectorInstance(
                id=vec_id[vec_idx],
                REAL_SparseArray=pml.REAL_SparseArray(
                    n=len(fieldref_element),
                    Indices=([x for x in range(1, len(vecs[vec_idx].todense().tolist()[0]) + 1)]),
                    REAL_Entries=vecs[vec_idx].todense().tolist()[0]
                )
            ))
    vd=pml.VectorDictionary(VectorFields=vectorfields_element, VectorInstance=vecinsts)
    return vd


def get_kernel_type(model):

    """
    It returns the kernel type element.

    Parameters
    ----------
    model :
        A Scikit-learn model instance.

    Returns
    -------
    kernel_kwargs : Dictionary
        Get the respective kernel type of the SVM model.
        
    """
    kernel_kwargs = dict()
    if model.kernel == 'linear':
        kernel_kwargs['LinearKernelType'] = pml.LinearKernelType(description='Linear Kernel Type')
    elif model.kernel == 'poly':
        kernel_kwargs['PolynomialKernelType'] = pml.PolynomialKernelType(description='Polynomial Kernel type',
                                                                         gamma="{:.16f}".format(model._gamma),
                                                                         coef0="{:.16f}".format(model.coef0),
                                                                         degree=model.degree)
    elif model.kernel == 'rbf':
        kernel_kwargs['RadialBasisKernelType'] = pml.RadialBasisKernelType(description='Radial Basis Kernel Type',
                                                                           gamma="{:.16f}".format(model._gamma))
    elif model.kernel == 'sigmoid':
        kernel_kwargs['SigmoidKernelType'] = pml.SigmoidKernelType(description='Sigmoid Kernel Type',
                                                               gamma="{:.16f}".format(model._gamma),
                                                               coef0="{:.16f}".format(model.coef0))
    else:
        raise NotImplementedError("{} kernel is not implemented!".format(model.kernel))
    return kernel_kwargs


def get_supportVectorMachine(model):

    """
    Generates PMML elements for support vector machine models
    
    Parameters
    ----------
    model :
        A Scikit-learn model instance.

    Returns
    -------
    support_vector_machines : List
        Nyoka's SupportVectorMachineModel object

    """
    support_vector_machines = list()
    if model.__class__.__name__ in ['SVR','OneClassSVM']:
        support_vector = list()
        for sv in model.support_:
            support_vector.append(pml.SupportVector(vectorId=sv))
        support_vectors = pml.SupportVectors(SupportVector=support_vector)
        coefficient = list()
        absoValue = model.intercept_[0]
        if model.dual_coef_.__class__.__name__ != 'csr_matrix':
            for coef in model.dual_coef_:
                for num in coef:
                    coefficient.append(pml.Coefficient(value="{:.16f}".format(num)))
        else:
            dual_coefficent=model.dual_coef_.data
            for num in dual_coefficent:
                coefficient.append(pml.Coefficient(value="{:.16f}".format(num)))
        coeff = pml.Coefficients(absoluteValue=absoValue, Coefficient=coefficient)
        support_vector_machines.append(pml.SupportVectorMachine(SupportVectors=support_vectors, Coefficients=coeff))
    else:
        import numpy as np
        support_vector_locs = np.cumsum(np.hstack([[0], model.n_support_]))
        n_class = model.dual_coef_.shape[0] + 1
        coef_abs_val_index = 0
        for class1 in range(n_class):
            sv1 = model.support_[support_vector_locs[class1]:support_vector_locs[class1 + 1]]
            for class2 in range(class1 + 1, n_class):
                svs = list()
                coefs = list()
                sv2 = model.support_[support_vector_locs[class2]:support_vector_locs[class2 + 1]]
                svs.append((list(sv1) + list(sv2)))
                alpha1 = model.dual_coef_[class2 - 1, support_vector_locs[class1]:support_vector_locs[class1 + 1]]
                alpha2 = model.dual_coef_[class1, support_vector_locs[class2]:support_vector_locs[class2 + 1]]
                coefs.append((list(alpha1) + list(alpha2)))
                all_svs = list()
                for sv in (svs[0]):
                    all_svs.append(pml.SupportVector(vectorId=sv))
                all_coefs = list()
                for coef in (coefs[0]):
                    all_coefs.append(pml.Coefficient(value="{:.16f}".format(coef)))
                coef_abs_value = model.intercept_[coef_abs_val_index]
                coef_abs_val_index += 1
                if len(model.classes_) == 2:
                    support_vector_machines.append(
                        pml.SupportVectorMachine(
                            targetCategory=model.classes_[class1],
                            alternateTargetCategory=model.classes_[class2],
                            SupportVectors=pml.SupportVectors(SupportVector=all_svs),
                            Coefficients=pml.Coefficients(absoluteValue="{:.16f}".format(coef_abs_value), Coefficient=all_coefs)
                        )
                    )
                else:
                    support_vector_machines.append(
                        pml.SupportVectorMachine(
                            targetCategory=model.classes_[class2],
                            alternateTargetCategory=model.classes_[class1],
                            SupportVectors=pml.SupportVectors(SupportVector=all_svs),
                            Coefficients=pml.Coefficients(absoluteValue="{:.16f}".format(coef_abs_value), Coefficient=all_coefs)
                        )
                    )
    return support_vector_machines


def get_tree_models(model, derived_col_names, col_names, target_name, mining_imp_val,categoric_values,model_name):

    """
    Generates PMML elements for tree models

    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    derived_col_names : 
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.    
    target_name : String
        Name of the Target column.    
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    tree_models : List
        Nyoka's TreeModel object
        
    """
    model_kwargs = get_model_kwargs(model, col_names, target_name, mining_imp_val,categoric_values)
    tree_models = list()
    tree_models.append(pml.TreeModel(
        modelName=model_name if model_name else model.__class__.__name__,
        Node=get_node(model, derived_col_names),
        **model_kwargs
    ))
    return tree_models


def get_neural_models(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values,model_name):

    """
    Generates PMML elements for neural network models

    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.    
    target_name : String
        Name of the Target column.    
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value.
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    neural_model : List
        Nyoka's NeuralNetwork object
        
    """
    model_kwargs = get_model_kwargs(model, col_names, target_name, mining_imp_val,categoric_values)
    neural_model = list()
    neural_layers, neural_outs = get_neural_layer(model, derived_col_names, target_name)
    neural_model.append(pml.NeuralNetwork(
        modelName=model_name if model_name else model.__class__.__name__,
        threshold='0',
        altitude='1.0',
        activationFunction=get_funct(model),
        NeuralInputs = get_neuron_input(derived_col_names),
        NeuralLayer = neural_layers,
        NeuralOutputs = neural_outs,
        **model_kwargs
    ))
    return neural_model


def get_funct(sk_model):

    """
    It returns the activation fucntion for a neural network model.
    
    Parameters
    ----------
    model :
        A Scikit-learn model instance.

    Returns
    -------
    a_fn : String
        Returns the activation function.

    """
    a_fn = sk_model.activation
    if a_fn =='relu':
        a_fn = NN_ACTIVATION_FUNCTION.RECTIFIER.value
    return a_fn


def get_regrs_models(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values,model_name):

    """
    Generates PMML elements for linear models
    
    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.    
    target_name : String
        Name of the Target column.    
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    regrs_models : List
        Nyoka's RegressionModel object
    """
    model_kwargs = get_model_kwargs(model, col_names, target_name, mining_imp_val, categoric_values)
    if model.__class__.__name__ not in ['LinearRegression','LinearSVR']: 
        model_kwargs['normalizationMethod'] = REGRESSION_NORMALIZATION_METHOD.LOGISTIC.value
    regrs_models = list()
    regrs_models.append(pml.RegressionModel(
        modelName=model_name if model_name else model.__class__.__name__,
        RegressionTable=get_regrs_tabl(model, derived_col_names, target_name, categoric_values),
        **model_kwargs
    ))
    return regrs_models


def get_regrs_tabl(model, feature_names, target_name, categoric_values):

    """
    It returns the Regression Table element of the model.

    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    derived_col_names : List
        Contains column names after preprocessing.
    target_name : String
        Name of the Target column.
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    merge : List
        Nyoka's RegressionTable object
        
    """
    merge = list()
    if hasattr(model, 'intercept_'):
        import numpy as np
        func_name = get_mining_func(model)
        inter = model.intercept_
        model_coef = model.coef_
        target_classes = target_name
        if not hasattr(inter, '__iter__') or model.__class__.__name__ in ['LinearRegression','LinearSVR']:
            inter = np.array([inter])
            target_classes = [target_classes]
            model_coef = np.ravel(model_coef)
            model_coef = model_coef.reshape(1, model_coef.shape[0])
            target_cat = None
        else:
            target_classes = model.classes_
            max_target_index = len(target_classes) - 1
            target_cat = target_classes[max_target_index]

        if hasattr(model_coef[0],"__len__"):
            model_coef = model_coef[0]
        reg_preds=list()
        for idx, feat in enumerate(feature_names):
            reg_preds.append(pml.NumericPredictor(name=feat, coefficient="{:.16f}".format(model_coef[idx])))
        merge.append(
            pml.RegressionTable(
                intercept="{:.16f}".format(inter.item()),
                targetCategory=target_cat,
                NumericPredictor=reg_preds
            )
        )
        if func_name != MINING_FUNCTION.REGRESSION.value:
            merge.append(
                pml.RegressionTable(
                    intercept="0.0",
                    targetCategory=target_classes[0]
                )
            )

    else:
        if len(model.classes_) == 2:
            merge.append(
                pml.RegressionTable(
                    NumericPredictor=[pml.NumericPredictor(coefficient='1.0',name=feature_names[0])],
                    intercept='0.0',
                    targetCategory=str(model.classes_[-1])
                )
            )
            merge.append(
                pml.RegressionTable(intercept='0.0', targetCategory=str(model.classes_[0]))
            )
        else:
            for feat_idx in range(len(feature_names)):
                merge.append(
                    pml.RegressionTable(
                        NumericPredictor=[pml.NumericPredictor(coefficient='1.0',name=feature_names[feat_idx])],
                        intercept='0.0',
                        targetCategory=str(model.classes_[feat_idx])
                    )
                )
    return merge



def get_node(model, features_names, main_model=None):
    
    """
    It return the Node element of the model.
    
    Parameters
    ----------
    model :
        An instance of the estimator of the tree object.
    features_names : List
        Contains the list of feature/column name.
    main_model :
        A Scikit-learn model instance.

    Returns
    -------
    Get all the underlying Nodes.
        
    """
    tree = model.tree_
    node_samples = tree.n_node_samples
    if main_model and main_model.__class__.__name__ == 'RandomForestClassifier':
        classes = main_model.classes_
    elif hasattr(model,'classes_'):
        classes = model.classes_
    tree_leaf = -1

    def _getNode(idx,parent=None, cond=None):
        simple_pred_cond = None
        if cond:
            simple_pred_cond = cond
        node = pml.Node(id=idx, recordCount=float(tree.n_node_samples[idx]))
        if simple_pred_cond:
            node.SimplePredicate = simple_pred_cond
        else:
            node.True_ = pml.True_()


        if tree.children_left[idx] != tree_leaf:
            fieldName = features_names[tree.feature[idx]]
            prnt = None
            if model.__class__.__name__ == "ExtraTreeRegressor":
                prnt = parent + 1
            thresh = 0
            try:
                rnd_ = str(tree.threshold[idx]).split(".")[1]
                thresh = round(tree.threshold[idx], min(rnd_, 16))
            except:
                thresh = tree.threshold[idx]
            simplePredicate = pml.SimplePredicate(field=fieldName, operator=SIMPLE_PREDICATE_OPERATOR.LESS_OR_EQUAL.value,\
                value = str(thresh))
                                                #   value="{:.16f}".format(tree.threshold[idx]))
            left_child = _getNode(tree.children_left[idx],prnt, simplePredicate)
            simplePredicate = pml.SimplePredicate(field=fieldName, operator=SIMPLE_PREDICATE_OPERATOR.GREATER_THAN.value, \
                value= str(thresh))
                                                #   value="{:.16f}".format(tree.threshold[idx]))
            right_child = _getNode(tree.children_right[idx],prnt, simplePredicate)
            node.add_Node(left_child)
            node.add_Node(right_child)
        else:
            nodeValue = list(tree.value[idx][0])
            lSum = float(sum(nodeValue))
            if model.__class__.__name__ == 'DecisionTreeClassifier':
                probs = [x / lSum for x in nodeValue]
                score_dst = []
                for i in range(len(probs)):
                    score_dst.append(pml.ScoreDistribution(confidence=probs[i], recordCount=float(nodeValue[i]),
                                                          value=classes[i]))
                node.ScoreDistribution = score_dst
                node.score = classes[probs.index(max(probs))]
            else:
                if model.__class__.__name__ == "ExtraTreeRegressor":
                    nd_sam=node_samples[int(idx)]
                    node.score = "{:.16f}".format(parent+avgPathLength(nd_sam))
                else:
                    node.score="{:.16f}".format(lSum)
        return node
    if model.__class__.__name__ == "ExtraTreeRegressor":
        return _getNode(0,0)
    else:
        return _getNode(0)

def avgPathLength(n):
    """
    Generates average path length for Isolation forest models

    Parameters
    ----------
    n : int
        Number of samples

    Returns
    -------
    The average path length
    """
    if n<=1.0:
        return 1.0
    return 2.0*(math.log(n-1.0)+0.57721566) - 2.0*((n-1.0)/n)


def get_output(model, target_name):

    """
    It returns the output element of the model.
    
    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    target_name : String
        Name of the Target column.

    Returns
    -------
    Output :
        Nyoka's Output object
        
    """

    mining_func = get_mining_func(model)
    output_fields = list()
    if not has_target(model):
        output_fields.append(pml.OutputField(
                name='predicted',
                feature=RESULT_FEATURE.PREDICTED_VALUE.value,
                optype=OPTYPE.CONTINUOUS.value,
                dataType=DATATYPE.DOUBLE.value
            ))
    else:
        alt_target_name = 'predicted_' + target_name
        if mining_func == MINING_FUNCTION.CLASSIFICATION.value:
            for cls in model.classes_:
                output_fields.append(pml.OutputField(
                    name='probability_' + str(cls),
                    feature=RESULT_FEATURE.PROBABILITY.value,
                    optype=OPTYPE.CONTINUOUS.value,
                    dataType=DATATYPE.DOUBLE.value,
                    value=str(cls)
                ))
            output_fields.append(pml.OutputField(
                name=alt_target_name,
                feature=RESULT_FEATURE.PREDICTED_VALUE.value,
                optype=OPTYPE.CATEGORICAL.value,
                dataType=get_dtype(model.classes_[0])))
        else:
            output_fields.append(pml.OutputField(
                name=alt_target_name,
                feature=RESULT_FEATURE.PREDICTED_VALUE.value,
                optype=OPTYPE.CONTINUOUS.value,
                dataType=DATATYPE.DOUBLE.value))
    return pml.Output(OutputField=output_fields)




def get_mining_func(model):
    """
    It returns the name of the mining function of the model.
    
    Parameters
    ----------
    model :
        A Scikit-learn model instance.

    Returns
    -------
    func_name : String
        Returns the function name of the model
        
    """
    import numpy as np
    if not hasattr(model, 'classes_'):
        if hasattr(model,'n_clusters'):
            func_name = MINING_FUNCTION.CLUSTERING.value
        else:
            func_name = MINING_FUNCTION.REGRESSION.value
    else:
        if isinstance(model.classes_, np.ndarray):
            func_name = MINING_FUNCTION.CLASSIFICATION.value
        else:
            func_name = MINING_FUNCTION.REGRESSION.value

    return func_name


def get_mining_schema(model, feature_names, target_name, mining_imp_val, categoric_values):

    """
    It returns the Mining Schema of the model.
    
    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    feature_names : List
        Contains the list of feature/column name.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value.
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    MiningSchema :
        Nyoka's MiningSchema object
        
    """
    if mining_imp_val:
        mining_attributes = mining_imp_val[0]
        mining_strategy = mining_imp_val[1]
        mining_replacement_val = mining_imp_val[2]
    n_features = len(feature_names)
    features_pmml_optype = [OPTYPE.CONTINUOUS.value] * n_features
    features_pmml_utype = [FIELD_USAGE_TYPE.ACTIVE.value] * n_features
    target_pmml_utype = FIELD_USAGE_TYPE.TARGET.value
    mining_func = get_mining_func(model)
    if mining_func == MINING_FUNCTION.CLASSIFICATION.value:
        target_pmml_optype = OPTYPE.CATEGORICAL.value
    elif mining_func == MINING_FUNCTION.REGRESSION.value:
        target_pmml_optype = OPTYPE.CONTINUOUS.value
    mining_flds = list()
    mining_name_stored = list()
    # handling impute pre processing
    if mining_imp_val:
        for mining_item, mining_idx in zip(mining_attributes, range(len(mining_attributes))):
            for feat_name,feat_idx in zip(feature_names, range(len(feature_names))):
                if feat_name in mining_item:
                    if feat_name not in mining_name_stored:
                        impute_index = mining_item.index(feat_name)

                        mining_flds.append(pml.MiningField(name=str(feat_name),
                                                           optype=features_pmml_optype[feat_idx],
                                                           missingValueReplacement=mining_replacement_val[mining_idx][
                                                              impute_index],
                                                           missingValueTreatment=mining_strategy[mining_idx],
                                                           usageType=features_pmml_utype[feat_idx]))
                        mining_name_stored.append(feat_name)
    if len(categoric_values) > 0:
        for cls_attr in categoric_values[1]:
            mining_flds.append(pml.MiningField(
                name=cls_attr,
                usageType=FIELD_USAGE_TYPE.ACTIVE.value,
                optype=OPTYPE.CATEGORICAL.value
            ))
            mining_name_stored.append(cls_attr)
    for feat_name, feat_idx in zip(feature_names, range(len(feature_names))):
        if feat_name not in mining_name_stored:
            mining_flds.append(pml.MiningField(name=str(feat_name),
                                               optype=features_pmml_optype[feat_idx],
                                               usageType=features_pmml_utype[feat_idx]))
    if model.__class__.__name__ not in ['KMeans', 'IsolationForest', 'OneClassSVM']:
        mining_flds.append(pml.MiningField(name=target_name,
                                        optype=target_pmml_optype,
                                            usageType=target_pmml_utype))
    return pml.MiningSchema(MiningField=mining_flds)


def get_neuron_input(feature_names):

    """
    It returns the Neural Input element.

    Parameters
    ----------
    feature_names : List
        Contains the list of feature/column name. 

    Returns
    -------
    neural_input_element :
        Returns Nyoka's NeuralInput object
        
    """
    neural_input = list()
    for features in feature_names:
        field_ref = pml.FieldRef(field = str(features))
        derived_flds = pml.DerivedField(optype = OPTYPE.CONTINUOUS.value, dataType = DATATYPE.DOUBLE.value, FieldRef = field_ref)
        class_node = pml.NeuralInput(id = str(features), DerivedField = derived_flds)
        neural_input.append(class_node)
    neural_input_element = pml.NeuralInputs(NeuralInput = neural_input, numberOfInputs = str(len(neural_input)))
    return neural_input_element


def get_neural_layer(model, feature_names, target_name):

    """
    It returns the Neural Layer and Neural Ouptput element.
    
    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    feature_names : List
        Contains the list of feature/column name. 
    target_name : String
        Name of the Target column.
    
    Returns
    -------
    all_neuron_layer : List
        Nyoka's NeuralLayer object

    neural_output_element :
        Nyoka's NeuralOutput object
        
    """
    weight = model.coefs_
    bias = model.intercepts_
    last_layer = bias[-1]
    hidden_layer_sizes = model.hidden_layer_sizes
    hidden_layers = list(hidden_layer_sizes)
    hidden_layers.append(len(last_layer))
    neuron = list()
    all_neuron_layer = list()
    input_features = feature_names
    neuron_id = list()
    for count in range(len(hidden_layers)):
        for count1 in range(hidden_layers[count]):
            con = list()
            for count2 in range(len(input_features)):
                con.append(pml.Con(from_ = input_features[count2], weight = format(weight[count][count2][count1])))
            neuron.append(pml.Neuron(id = str(count)+str(count1), bias = format(bias[count][count1]),Con = con))
            neuron_id.append(str(count)+str(count1))
        all_neuron_layer.append(pml.NeuralLayer(Neuron = neuron))
        input_features = neuron_id
        neuron_id = list()
        neuron = list()
    all_neuron_layer[-1].activationFunction = NN_ACTIVATION_FUNCTION.IDENTITY.value
    if hasattr(model, "classes_"):
        if len(model.classes_) == 2:
            bias1=[1.0,0.0]
            weight1=[-1.0,1.0]
            con = list()
            linear = ['logistic/1']
            i_d = ['false', 'true']
            con.append(pml.Con(from_ = input_features[0], weight = 1.0))
            neuron.append(pml.Neuron(id = linear[0], bias = ('0.0'), Con = con))
            all_neuron_layer.append(pml.NeuralLayer(activationFunction = NN_ACTIVATION_FUNCTION.LOGISTIC.value, Neuron = neuron))
            neuron = list()
            con = list()
            for num in range(2):
                con.append(pml.Con(from_ = linear[0], weight = format(weight1[num])))
                neuron.append(pml.Neuron(id = i_d[num], bias = format(bias1[num]), Con = con))
                con = list()
            all_neuron_layer.append(pml.NeuralLayer(activationFunction = NN_ACTIVATION_FUNCTION.IDENTITY.value, Neuron = neuron))
            input_features = i_d
        else:
            all_neuron_layer[-1].normalizationMethod = model.out_activation_
        
        
        neural_output = list()
        for values, count in zip(model.classes_, range(len(model.classes_))):
            norm_discrete = pml.NormDiscrete(field = target_name, value = str(values))
            derived_flds = pml.DerivedField(optype = OPTYPE.CATEGORICAL.value, dataType = DATATYPE.DOUBLE.value,
                                    NormDiscrete = norm_discrete)
            if len(input_features)==1:
                class_node = pml.NeuralOutput(outputNeuron = input_features[0], DerivedField = derived_flds)
            else:
                class_node = pml.NeuralOutput(outputNeuron = input_features[count],DerivedField = derived_flds)
            neural_output.append(class_node)
        neural_output_element = pml.NeuralOutputs(numberOfOutputs = None, Extension = None,
                                                    NeuralOutput = neural_output)
    else:
        neural_output = list()
        fieldRef = pml.FieldRef(field = target_name)
        derived_flds = pml.DerivedField(optype = OPTYPE.CONTINUOUS.value, dataType = DATATYPE.DOUBLE.value, FieldRef = fieldRef)
        class_node = pml.NeuralOutput(outputNeuron = input_features[0], DerivedField = derived_flds)
        neural_output.append(class_node)
        neural_output_element = pml.NeuralOutputs(numberOfOutputs = None, Extension = None, NeuralOutput = neural_output)
   
    return all_neuron_layer, neural_output_element


def get_super_cls_names(model_inst):
    """
    It returns the set of Super class of the model.

    Parameters
    -------
    model_inst :
        Instance of the scikit-learn model

    Returns
    -------
    parents : Set
        Returns all the parent class of the model instance.

    """
    def super_cls_names(cls):
        nonlocal parents
        parents.add(cls.__name__)
        for super_cls in cls.__bases__:
            super_cls_names(super_cls)
    cls = model_inst.__class__
    parents = set()
    super_cls_names(cls)
    return parents


from nyoka import metadata

def get_header(description):

    """
    It returns the Header element of the pmml.

     Returns
     -------
     header :
         Returns Nyoka's Header object.

     """
    copyryt = "Copyright (c) 2018 Software AG"
    description = description if description else "Default Description"
    timestamp = pml.Timestamp(datetime.now())
    application=pml.Application(name="Nyoka",version=metadata.__version__)
    header = pml.Header(copyright=copyryt, description=description, Timestamp=timestamp, Application=application)
    return header


def get_dtype(feat_value):
    """
    It return the data type of the value.

    Parameters
    ----------
    feat_value :
        Contains a value for finding the its data type.

    Returns
    -------
        Returns the respective data type of that value.

    """
    data_type=feat_value.__class__.__name__
    if 'float' in data_type:
        return DATATYPE.DOUBLE.value
    if 'int' in data_type:
        return DATATYPE.INTEGER.value
    if 'str' in data_type:
        return DATATYPE.STRING.value

def get_data_dictionary(model, feature_names, target_name, categoric_values):

    """
    It returns the Data Dictionary element.
    
    Parameters
    ----------
    model :
        A Scikit-learn model instance.
    feature_names : List
        Contains the list of feature/column name. 
    target_name : List
        Name of the Target column.    
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    data_dict :
        Returns Nyoka's DataDictionary object
        
    """
    categoric_feature_name = list()
    if categoric_values:
        categoric_labels = categoric_values[0]
        categoric_feature_name = categoric_values[1]
    target_attr_values = []
    n_features = len(feature_names)
    features_pmml_optype = [OPTYPE.CONTINUOUS.value] * n_features
    features_pmml_dtype = [DATATYPE.DOUBLE.value] * n_features

    mining_func = get_mining_func(model)

    if mining_func == MINING_FUNCTION.CLASSIFICATION.value:
        target_pmml_optype = OPTYPE.CATEGORICAL.value
        target_pmml_dtype = get_dtype(model.classes_[0])
        target_attr_values = model.classes_.tolist()
    elif mining_func == MINING_FUNCTION.REGRESSION.value:
        target_pmml_optype = OPTYPE.CONTINUOUS.value
        target_pmml_dtype = DATATYPE.DOUBLE.value

    data_fields = list()
    if categoric_values:
        for class_list, attr_for_class in zip(categoric_labels, categoric_feature_name):
            category_flds = pml.DataField(name=str(attr_for_class), optype=OPTYPE.CATEGORICAL.value,
                                          dataType=get_dtype(class_list[0]) if class_list else DATATYPE.STRING.value)
            if class_list:
                for values in class_list:
                    category_flds.add_Value(pml.Value(value=str(values)))
            data_fields.append(category_flds)
    attr_without_class_attr = [feat_name for feat_name in feature_names if feat_name not in categoric_feature_name]
    for feature_idx, feat_name in enumerate(attr_without_class_attr):
        data_fields.append(pml.DataField(name=str(feat_name),
                                         optype=features_pmml_optype[feature_idx],
                                         dataType=features_pmml_dtype[feature_idx]))
    if model.__class__.__name__ not in ['KMeans', 'IsolationForest', 'OneClassSVM']:
        class_node = pml.DataField(name=str(target_name), optype=target_pmml_optype,
                                dataType=target_pmml_dtype)

        for class_value in target_attr_values:
            class_node.add_Value(pml.Value(value=str(class_value)))
        data_fields.append(class_node)
    data_dict = pml.DataDictionary(numberOfFields=len(data_fields), DataField=data_fields)
    return data_dict


def has_target(model):
    """
    Checks whether a given model has target or not

    Parameters
    ----------
    model :
        Scikit-learn's model object

    Returns
    -------
    Boolean value
    """
    target_less_models = ['OneClassSVM','IsolationForest', ]
    if model.__class__.__name__  in target_less_models:
        return False
    else:
        return True