The-Strategy-Unit · yiwen-h · Aug 10, 2023 · Jul 3, 2023 · Jul 3, 2023 · Jul 4, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,6 +9,7 @@ repos:
       args: [ "--maxkb=750000" ]
     - id: end-of-file-fixer
       name: Check for a blank line at the end of scripts (auto-fixes)
+      exclude: 'json'
     - id: trailing-whitespace
       name: Check for trailing whitespaces (auto-fixes)
 - repo: https://github.com/pycqa/isort

diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,14 @@
+FROM python:3.10.12-slim-bookworm
+VOLUME /data
+
+COPY docker-requirements.txt requirements.txt
+RUN pip install --upgrade pip setuptools \
+  && pip install -r requirements.txt \
+  && rm -rf /root/.cache
+
+COPY api/bert_sentiment bert_sentiment
+COPY --chmod=755 docker_run.py docker_run.py
+
+LABEL org.opencontainers.image.source=https://github.com/cdu-data-science-team/pxtextmining
+
+ENTRYPOINT ["python3", "docker_run.py"]
diff --git a/docker-requirements.txt b/docker-requirements.txt
@@ -0,0 +1,4 @@
+pandas==1.5.3 ; python_version >= "3.8" and python_version < "3.11"
+scikit-learn==1.0.2 ; python_version >= "3.8" and python_version < "3.11"
+tensorflow==2.12.0 ; python_version >= "3.8" and python_version < "3.11"
+pxtextmining==0.5.4
diff --git a/docker_data/data_in/file_01.json b/docker_data/data_in/file_01.json
@@ -0,0 +1,22 @@
+[
+  {
+    "comment_id": "1",
+    "comment_text": "Nurse was great.",
+    "question_type": "what_good"
+  },
+  {
+    "comment_id": "2",
+    "comment_text": "The ward was freezing.",
+    "question_type": "could_improve"
+  },
+  {
+    "comment_id": "3",
+    "comment_text": "",
+    "question_type": "nonspecific"
+  },
+  {
+    "comment_id": "4",
+    "comment_text": "Thank you so much",
+    "question_type": "nonspecific"
+  }
+]
diff --git a/docker_data/data_in/file_02.json b/docker_data/data_in/file_02.json
diff --git a/docker_data/data_out/file_01.json b/docker_data/data_out/file_01.json
@@ -0,0 +1 @@
+[{"comment_id": "1", "sentiment": 2.0}, {"comment_id": "2", "sentiment": 4.0}, {"comment_id": "3", "sentiment": "Labelling not possible"}, {"comment_id": "4", "sentiment": 1.0}]
diff --git a/docker_run.py b/docker_run.py
@@ -0,0 +1,105 @@
+import argparse
+import json
+import os
+
+import pandas as pd
+from tensorflow.keras.saving import load_model
+
+from pxtextmining.factories.factory_predict_unlabelled_text import (
+    predict_sentiment_bert,
+)
+
+
+def load_sentiment_model():
+    model_path = "bert_sentiment"
+    if not os.path.exists(model_path):
+        model_path = os.path.join("api", model_path)
+    loaded_model = load_model(model_path)
+    return loaded_model
+
+
+def get_sentiment_predictions(
+    text_to_predict, loaded_model, preprocess_text, additional_features
+):
+    predictions = predict_sentiment_bert(
+        text_to_predict,
+        loaded_model,
+        preprocess_text=preprocess_text,
+        additional_features=additional_features,
+    )
+    return predictions
+
+
+def predict_sentiment(items):
+    """Accepts comment ids, comment text and question type as JSON in a POST request. Makes predictions using trained Tensorflow Keras model.
+
+    Args:
+        items (List[ItemIn]): JSON list of dictionaries with the following compulsory keys:
+        - `comment_id` (str)
+        - `comment_text` (str)
+        - `question_type` (str)
+        The 'question_type' must be one of three values: 'nonspecific', 'what_good', and 'could_improve'.
+        For example, `[{'comment_id': '1', 'comment_text': 'Thank you', 'question_type': 'what_good'},
+        {'comment_id': '2', 'comment_text': 'Food was cold', 'question_type': 'could_improve'}]`
+
+    Returns:
+        (dict): Keys are: `comment_id`, `comment_text`, and predicted `labels`.
+    """
+
+    # Process received data
+    loaded_model = load_sentiment_model()
+    df = pd.DataFrame([i for i in items], dtype=str)
+    df_newindex = df.set_index("comment_id")
+    if df_newindex.index.duplicated().sum() != 0:
+        raise ValueError("comment_id must all be unique values")
+    df_newindex.index.rename("Comment ID", inplace=True)
+    text_to_predict = df_newindex[["comment_text", "question_type"]]
+    text_to_predict = text_to_predict.rename(
+        columns={"comment_text": "FFT answer", "question_type": "FFT_q_standardised"}
+    )
+    # Make predictions
+    preds_df = get_sentiment_predictions(
+        text_to_predict, loaded_model, preprocess_text=False, additional_features=True
+    )
+    # Join predicted labels with received data
+    preds_df["comment_id"] = preds_df.index.astype(str)
+    merged = pd.merge(df, preds_df, how="left", on="comment_id")
+    merged["sentiment"] = merged["sentiment"].fillna("Labelling not possible")
+    return_dict = merged[["comment_id", "sentiment"]].to_dict(orient="records")
+    return return_dict
+
+
+def parse_args():
+    """Parse command line arguments"""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "json_file",
+        nargs=1,
+        help="Name of the json file",
+    )
+    parser.add_argument(
+        "--local-storage",
+        "-l",
+        action="store_true",
+        help="Use local storage (instead of Azure)",
+    )
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = parse_args()
+    json_file = os.path.join("data", "data_in", args.json_file[0])
+    with open(json_file, "r") as jf:
+        json_in = json.load(jf)
+    if not args.local_storage:
+        os.remove(json_file)
+    json_out = predict_sentiment(json_in)
+    out_path = os.path.join("data", "data_out", args.json_file[0])
+    with open(out_path, "w+") as jf:
+        json.dump(json_out, jf)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/poetry.lock b/poetry.lock
diff --git a/pxtextmining/factories/factory_model_performance.py b/pxtextmining/factories/factory_model_performance.py
@@ -132,7 +132,6 @@ def get_multilabel_metrics(
             enhance_with_rules=enhance_with_rules,
             already_encoded=already_encoded,
         )
-        y_pred = np.array(y_pred_df)[:, :-1].astype("int64")
     elif model_type == "sklearn":
         y_pred_df = predict_multilabel_sklearn(
             x_test,
@@ -143,17 +142,28 @@ def get_multilabel_metrics(
             enhance_with_probs=True,
             enhance_with_rules=enhance_with_rules,
         )
-        y_pred = np.array(y_pred_df)[:, :-1].astype("int64")
     else:
         raise ValueError(
             'Please select valid model_type. Options are "bert" or "sklearn"'
         )
+    y_pred = np.array(y_pred_df[labels]).astype("int64")
     # Calculate various metrics
     model_metrics["exact_accuracy"] = metrics.accuracy_score(y_test, y_pred)
     model_metrics["hamming_loss"] = metrics.hamming_loss(y_test, y_pred)
     model_metrics["macro_jaccard_score"] = metrics.jaccard_score(
         y_test, y_pred, average="macro"
     )
+    y_probs = y_pred_df.filter(like="Probability", axis=1)
+    model_metrics["macro_roc_auc"] = metrics.roc_auc_score(
+        y_test, y_probs, multi_class="ovr"
+    )
+    model_metrics[
+        "Label ranking average precision"
+    ] = metrics.label_ranking_average_precision_score(
+        y_test,
+        y_probs,
+    )
+    # Model summary
     if model_type in ("bert", "tf"):
         stringlist = []
         model.summary(print_fn=lambda x: stringlist.append(x))
@@ -218,14 +228,64 @@ def parse_metrics_file(metrics_file, labels):
         "precision": [],
         "recall": [],
         "f1_score": [],
-        "support": [],
+        "support (label count in test data)": [],
     }
     for each in lines:
         splitted = each.split("      ")
         metrics_dict["label"].append(splitted[0].strip())
         metrics_dict["precision"].append(splitted[1].strip())
         metrics_dict["recall"].append(splitted[2].strip())
         metrics_dict["f1_score"].append(splitted[3].strip())
-        metrics_dict["support"].append(splitted[4].strip())
+        metrics_dict["support (label count in test data)"].append(splitted[4].strip())
     metrics_df = pd.DataFrame.from_dict(metrics_dict)
     return metrics_df
+
+
+def get_y_score(probs):
+    """Converts probabilities into format (n_samples, n_classes) so they can be passed into sklearn roc_auc_score function
+
+    Args:
+        probs (np.ndarray): Probability estimates outputted by model
+
+    Returns:
+        np.ndarray: Probability estimates in format (n_samples, n_classes)
+    """
+    if probs.ndim == 3:
+        score = np.transpose([pred[:, 1] for pred in probs])
+    elif probs.ndim == 2:
+        score = probs
+    return score
+
+
+def additional_analysis(preds_df, y_true, labels):
+    """For given predictions, returns dataframe containing: macro one-vs-one ROC AUC score, number of True Positives, True Negatives, False Positives, and False Negatives.
+
+    Args:
+        preds_df (pd.DataFrame): Dataframe containing predicted labels in one-hot encoded format
+        y_true (np.array): One-hot encoded real Y values
+        labels (List): List of the target labels
+
+    Returns:
+        pd.DataFrame: dataframe containing: macro one-vs-one ROC AUC score, number of True Positives, True Negatives, False Positives, and False Negatives.
+    """
+    # include threshold?? (later)
+    y_score = np.array(preds_df.filter(like="Probability", axis=1))
+    cm = metrics.multilabel_confusion_matrix(y_true, np.array(preds_df[labels]))
+    cm_dict = {}
+    average_precision = {}
+    for i, label in enumerate(labels):
+        cm_meaning = {}
+        tn, fp = cm[i][0]
+        fn, tp = cm[i][1]
+        cm_meaning["True Negative"] = tn
+        cm_meaning["False Negative"] = fn
+        cm_meaning["True Positive"] = tp
+        cm_meaning["False Positive"] = fp
+        cm_dict[label] = cm_meaning
+        average_precision[label] = metrics.average_precision_score(
+            y_true[:, i], y_score[:, i]
+        )
+    df = pd.DataFrame.from_dict(cm_dict, orient="index")
+    average_precision = pd.Series(average_precision)
+    df["average_precision_score"] = average_precision
+    return df
diff --git a/pxtextmining/factories/factory_predict_unlabelled_text.py b/pxtextmining/factories/factory_predict_unlabelled_text.py
@@ -84,6 +84,11 @@ def predict_multilabel_sklearn(
                     predictions[row][label_index] = 1
     preds_df = pd.DataFrame(predictions, index=processed_text.index, columns=labels)
     preds_df["labels"] = preds_df.apply(get_labels, args=(labels,), axis=1)
+    # add probs to df
+    if pred_probs.ndim == 3:
+        pred_probs = np.transpose([pred[:, 1] for pred in pred_probs])
+    label_list = ['Probability of "' + label + '"' for label in labels]
+    preds_df[label_list] = pred_probs
     return preds_df
 
 
@@ -142,6 +147,9 @@ def predict_multilabel_bert(
         predictions = y_binary
     preds_df = pd.DataFrame(predictions, index=processed_text.index, columns=labels)
     preds_df["labels"] = preds_df.apply(get_labels, args=(labels,), axis=1)
+    # add probs to df
+    label_list = ['Probability of "' + label + '"' for label in labels]
+    preds_df[label_list] = y_probs
     return preds_df