{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Titanic data set example\n", "\n", "Note: \n", "The focus of this example is less on finding anomalies but rather to illustrate model explanability in the case of categorical and continuous features." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from sklearn.datasets import fetch_openml\n", "from bhad.model import BHAD" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclassnamesexagesibspparchticketfarecabinembarkedboatbodyhome.dest
01Allen, Miss. Elisabeth Waltonfemale29.00000024160211.3375B5S2NaNSt Louis, MO
11Allison, Master. Hudson Trevormale0.916712113781151.5500C22 C26S11NaNMontreal, PQ / Chesterville, ON
\n", "
" ], "text/plain": [ " pclass name sex age sibsp parch \\\n", "0 1 Allen, Miss. Elisabeth Walton female 29.0000 0 0 \n", "1 1 Allison, Master. Hudson Trevor male 0.9167 1 2 \n", "\n", " ticket fare cabin embarked boat body \\\n", "0 24160 211.3375 B5 S 2 NaN \n", "1 113781 151.5500 C22 C26 S 11 NaN \n", "\n", " home.dest \n", "0 St Louis, MO \n", "1 Montreal, PQ / Chesterville, ON " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X, y = fetch_openml(\"titanic\", version=1, as_frame=True, return_X_y=True)\n", "\n", "X.head(2)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 684 entries, 0 to 1281\n", "Data columns (total 8 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 pclass 684 non-null int64 \n", " 1 sex 684 non-null category\n", " 2 age 684 non-null float64 \n", " 3 sibsp 684 non-null int64 \n", " 4 parch 684 non-null int64 \n", " 5 fare 684 non-null float64 \n", " 6 embarked 684 non-null category\n", " 7 home.dest 684 non-null object \n", "dtypes: category(2), float64(2), int64(3), object(1)\n", "memory usage: 39.0+ KB\n" ] } ], "source": [ "X_cleaned = X.drop(['body', 'cabin', 'name', 'ticket', 'boat'], axis=1).dropna() # not needed\n", "y_cleaned = y[X_cleaned.index]\n", "\n", "X_cleaned.info(verbose=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Partition dataset:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(458, 8)\n", "(226, 8)\n", "(array(['0', '1'], dtype=object), array([242, 216]))\n", "(array(['0', '1'], dtype=object), array([122, 104]))\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X_cleaned, y_cleaned, test_size=0.33, random_state=42)\n", "\n", "print(X_train.shape)\n", "print(X_test.shape)\n", "\n", "print(np.unique(y_train, return_counts=True))\n", "print(np.unique(y_test, return_counts=True))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train model and create local/global model explanation:\n", "\n", "Retrieve local model explanations. Here: Specify all numeric and categorical columns explicitly" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "num_cols = list(X_train.select_dtypes(include=['float', 'int']).columns) \n", "cat_cols = list(X_train.select_dtypes(include=['object', 'category']).columns)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Score your train set:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training predictions: (array([-1, 1]), array([ 5, 453]))\n" ] } ], "source": [ "model = BHAD(\n", " contamination=0.01,\n", " num_features=num_cols,\n", " cat_features=cat_cols,\n", " nbins=None, \n", " verbose=False\n", ")\n", "\n", "y_pred_train_new = model.fit_predict(X_train)\n", "scores_train_new = model.decision_function(X_train)\n", "\n", "print(\"Training predictions:\", np.unique(y_pred_train_new, return_counts=True))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- BHAD Model Explainer ---\n", "\n", "Using fitted BHAD and discretizer.\n", "Marginal distributions estimated using train set of shape (458, 8)\n" ] } ], "source": [ "from bhad import explainer\n", "\n", "local_expl = explainer.Explainer(bhad_obj=model, discretize_obj=model._discretizer).fit()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Create local explanations for 458 observations.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "968728c99c00492c90ab2592bf3c6222", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/458 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
avg ranks
embarked0.152058
sex0.257304
parch0.279548
sibsp0.444223
age0.491700
fare0.546813
pclass0.634462
home.dest1.000000
\n", "" ], "text/plain": [ " avg ranks\n", "embarked 0.152058\n", "sex 0.257304\n", "parch 0.279548\n", "sibsp 0.444223\n", "age 0.491700\n", "fare 0.546813\n", "pclass 0.634462\n", "home.dest 1.000000" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "global_feat_imp = local_expl.global_feat_imp # based on X_train\n", "global_feat_imp" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Get global model explanation (in decreasing order):" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlcAAAGwCAYAAACEkkAjAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAANBJJREFUeJzt3Ql8jdfa//8rAyFIzFNPTDW25ipF1VgxljrVHlqqhnTgEJ62qJqnHMWhVFu0phqqiqOlhqrUY56VClWk9FQVRVCC5P69rvX89/5nEyGsJJL9eb9e94l9z3slJ/vba611x8dxHEcAAABgha+d0wAAAIBwBQAAYBmVKwAAAIsIVwAAABYRrgAAACwiXAEAAFhEuAIAALDI3+bJcGfx8fHy22+/SY4cOcTHx4cmAwAgHdDHgl68eFEKFy4svr5J16YIV6lMg1VISEhqXxYAAFhw4sQJ+dvf/pbkPoSrVKYVK9c3JygoKLUvDwAA7kFMTIwpjrg+x5NCuEplrq5ADVaEKwAA0pe7GdLDgHYAAACLCFcAAAAWEa4AAAAsIlwBAABYRLgCAACwiHAFAABgEeEKAADAIsIVAACARYQrAAAAiwhXAAAAFhGuAAAALCJcAQAAWES4AgAAsIhwBQAAYJG/zZPh7pUfvEp8AwJpMgAALIqOaC5pjcoVAACARYQrAAAAiwhXAAAAFhGuAAAALCJcAQAAWES4AgAAsIhwBQAAYBHhCgAAwCLCFQAAgEWEKwAAAIsIVwAAABYRrgAAANIqXNWrV0/Cw8MlPYqOjhYfHx/Zs2dPWt8KAADIwKhc3aNOnTpJ69at7X43AABAuke4AgAASMtwFR8fL2+//bbkzp1bChYsKEOGDHFvO378uLRq1UqyZ88uQUFB8vzzz8upU6fc23XfypUry6effipFihQx+73xxhsSFxcnY8aMMefLnz+/jBw50uOa58+fl65du0q+fPnMeRs0aCB79+5N8j63bdsmVapUkSxZski1atVk9+7dt+yzf/9+adq0qbmPAgUKSIcOHeTMmTPu7YsWLZIKFSpI1qxZJU+ePNKoUSO5fPmyeR+zZs2S//znP6arUZfIyMhE7yM2NlZiYmI8FgAAkHElO1xpqMiWLZts3brVBKJhw4bJmjVrTOjSYPXnn3/K999/b9YdPXpUXnjhBY/jjxw5It98842sXLlS5s+fL5988ok0b95cfv31V3Pcv/71L3n33XfN+V3atm0rf/zxhzlu586dUrVqVWnYsKG5VmIuXbokLVq0kEceecTsr2HozTffvCWwaUjTALZjxw5zPxoENRCqkydPSrt27aRz584SFRVlwlObNm3EcRxzLt2vSZMmZj9datWqlei9jB49WoKDg91LSEhIcpscAACkI/7JPaBixYoyePBg8+9SpUrJ5MmTZe3ateb1vn375NixY+4AMXv2bHn00Udl+/bt8vjjj5t1GsK0cpUjRw4TfurXry+HDh2SFStWiK+vr5QpU8YErHXr1kmNGjVkw4YNpgql4SogIMCcY+zYsbJ06VJTWQoLC7vlHufNm2euo8FNK1d6DxreXn/9dfc+et8arEaNGuVep/el9/7TTz+ZgHbjxg0TqIoWLWq2axXLRatZWpXSaltS+vfvL3369HG/1soVAQsAgIzrnsJVQoUKFTLBR6s7GhoSBgcNTzlz5jTbXOGqWLFiJli5aHecn5+fCVYJ1+k5lXb/adDRbrmErly5YqpgidHr6X1qsHKpWbOmxz56Xg1w2iV4Mz1v48aNTXVMA1VoaKh5/dxzz0muXLkkOTQQukIhAADI+JIdrjJlyuTxWscbaZXofo5P6pwarDTAJTamSYPbvdLztmzZ0lTJbqbX08CnXZubNm2S1atXy6RJk2TAgAGmu7J48eL3fF0AAJCxJTtc3U65cuXkxIkTZnFVrw4cOGDGNmkF617p+Krff/9d/P39TdXrbu9lzpw5cvXqVXf1asuWLbec98svvzTn1HMnRkNe7dq1zTJo0CDTPbhkyRLTzZc5c2YzEB8AACBFHsWgM+m0C+3FF1+UXbt2mXFSHTt2lLp165rZevdzXu3S02dKaQVJHwaq1SStIulAdKXXKlu2rPz3v/81r9u3b2+CUbdu3UzA0/FcOk4roe7du5sB8TpoXceEaVfgqlWr5JVXXjGhSStUOh5Lr6GzIBcvXiynT582wU1pKPvhhx/MeDGdYXj9+vX7aj8AAJAxWAtXGmb00QQ6Jumpp54yoahEiRLy+eef3/d5NRzpOTX4lC5dWv7xj3/IL7/8YsZmqb/++suEHFfA0XFUX331lRlgr4PWNYjd3P1XuHBh2bhxowlSOp5Kg6E+fV67GnX8lz7yYf369dKsWTNzTZ3BOG7cOPPoBqXBTQffa3DUR0TouQAAAHwcfbYAUo3OFjSPZAhfKL4BgbQ8AAAWRUc0l5T8/L5w4YIpwCSFJ7QDAABYRLgCAACwiHAFAABgEeEKAADAIsIVAACARYQrAAAAiwhXAAAAFhGuAAAALCJcAQAAPIh/uBnJs39o6B2f8AoAANIfKlcAAAAWEa4AAAAsIlwBAABYRLgCAACwiHAFAABgEeEKAADAIsIVAACARYQrAAAAi3iIaBopP3iV+AYEptXlAeCuREc0p6WAZKJyBQAAYBHhCgAAwCLCFQAAgEWEKwAAAIsIVwAAABYRrgAAACwiXAEAAFhEuAIAALCIcAUAAGAR4QoAAMAiwhUAAIBFhCsAAADCVfJERkaKj4+PnD9/nh8eAACQoqhcAQAAWES4AgAA8MZwVa9ePenRo4dZgoODJW/evDJw4EBxHMdsj42Nlb59+0pISIgEBARIyZIl5ZNPPkn0XGfPnpV27drJQw89JIGBgVKhQgWZP3++xz6LFi0y67NmzSp58uSRRo0ayeXLl93djNWrV5ds2bJJzpw5pXbt2vLLL78kei29r5iYGI8FAABkXOkmXKlZs2aJv7+/bNu2TSZOnCjjx4+X6dOnm20dO3Y0Aen999+XqKgo+fjjjyV79uyJnufq1avy2GOPyfLly2X//v0SFhYmHTp0MOdVJ0+eNOGrc+fO5lwaptq0aWOC3I0bN6R169ZSt25d+eGHH2Tz5s3meB3TlZjRo0ebMOhaNPwBAICMy8dxlX7SQeXqjz/+kB9//NEdZPr16yfLli2TpUuXSpkyZWTNmjWmwnQzDUf169eXc+fOmUpTYlq0aCFly5aVsWPHyq5du0z4io6OlqJFi3rs9+eff5pKlp5TA9adaOVKFxetXGnACglfKL4BgffQEgCQeqIjmtPcgPzf57cWSS5cuCBBQUEZp3L1xBNPeFSIatasKYcPH5bdu3eLn5/fXYUdFRcXJ8OHDzfdfrlz5zYVrlWrVsnx48fN9kqVKknDhg3N9rZt28q0adNMMFO6f6dOnSQ0NFRatmxpKmha6bod7aLUb0LCBQAAZFzpKlzdTpYsWZK1/3vvvWdCkY7RWrdunezZs8eEpWvXrpntGtS0CvbNN9/II488IpMmTTKVsWPHjpntM2bMMN2BtWrVks8//1xKly4tW7ZsSZH3BgAA0pd0Fa62bt3q8VoDTalSpUylKT4+Xr7//vu7Os/GjRulVatW8tJLL5ljS5QoIT/99JPHPloh04HqQ4cONZWxzJkzy5IlS9zbq1SpIv3795dNmzZJ+fLlZd68eZbeJQAASM/SVbjSbrs+ffrIoUOHzOB1rSj16tVLihUrJi+//LIZgK7jr7TCpGOiFi5cmOh5NJBpZUqDkQ5Yf/XVV+XUqVMeIW7UqFGyY8cOc83FixfL6dOnpVy5cubcGqq0cqUzBFevXm26JnUbAACAf3pqAp0ReOXKFfMYBO2602ClM/XUhx9+KO+884688cYb5lELRYoUMa8T8+6778rRo0dNV6A+ikHPoTMAdZCa0nFR69evlwkTJpgBbDqofdy4cdK0aVMTwg4ePGhmLup1ChUqJN27dzcBDQAAIF3NFqxcubIJPBlhtgGzBQGkB8wWBDL4bEEAAIAHHeEKAADAG8dc6QB1AACABx2VKwAAAIsIVwAAABYRrgAAACwiXAEAAFhEuAIAALCIcAUAAOCNj2LIaPYPDb3jE14BAED6Q+UKAADAIsIVAACARYQrAAAAiwhXAAAAFhGuAAAALCJcAQAAWES4AgAAsIhwBQAAYBEPEU0j5QevEt+AwLS6PIAHQHRE87S+BQApgMoVAACARYQrAAAAiwhXAAAAFhGuAAAALCJcAQAAWES4AgAAsIhwBQAAYBHhCgAAwCLCFQAAgEWEKwAAAMIVAADAg4nKFQAAgEVeEa4cx5GwsDDJnTu3+Pj4yJ49e9L6lgAAQAblL15g5cqVMnPmTImMjJQSJUpI3rx50/qWAABABuUV4erIkSNSqFAhqVWr1j2f4/r165IpUyar9wUAADKeDN8t2KlTJ/nnP/8px48fN12CxYoVM5WsJ598UnLmzCl58uSRFi1amADmEh0dbfb9/PPPpW7dupIlSxaZO3eu2TZ9+nQpV66cWVe2bFmZMmVKktePjY2VmJgYjwUAAGRcGT5cTZw4UYYNGyZ/+9vf5OTJk7J9+3a5fPmy9OnTR3bs2CFr164VX19fefbZZyU+Pt7j2H79+kmvXr0kKipKQkNDTcAaNGiQjBw50qwbNWqUDBw4UGbNmnXb648ePVqCg4PdS0hISCq8awAAkFYyfLegBpocOXKIn5+fFCxY0Kz7+9//7rHPp59+Kvny5ZMDBw5I+fLl3evDw8OlTZs27teDBw+WcePGudcVL17cHPPxxx/Lyy+/nOj1+/fvb4Kci1auCFgAAGRcGT5cJebw4cOmArV161Y5c+aMu2KlXYcJw1W1atXc/9Zql3YddunSRbp16+Zef+PGDRPgbicgIMAsAADAO3hluGrZsqUULVpUpk2bJoULFzbhSkPVtWvXPPbLli2b+9+XLl0yX/WYGjVqeOynVTEAAACvDFdnz56VQ4cOmZBUp04ds27Dhg13PK5AgQImiB09elRefPHFVLhTAACQHnlduMqVK5eZITh16lTzeAbtCtSB63dj6NCh0rNnT9MN2KRJEzMTUAfFnzt3zmNcFQAA8F4ZfrbgzXRm4IIFC2Tnzp2mK7B3797y3nvv3dWxXbt2NY9imDFjhlSoUME8pkEfTqoD2wEAAJSPo38bBqlGZwuaRzKELxTfgEBaHvBi0RHN0/oWACTz8/vChQsSFBSU5L5eV7kCAABISYQrAAAAiwhXAAAAFhGuAAAALCJcAQAAWES4AgAAsIhwBQAAYBHhCgAAwCLCFQAAgEWEKwAAAIu87g83Pyj2Dw294+PzAQBA+kPlCgAAwCLCFQAAgEWEKwAAAIsIVwAAABYRrgAAACwiXAEAAFhEuAIAALCI51ylkfKDV4lvQGBaXR5AEqIjmtM+AO4ZlSsAAACLCFcAAAAWEa4AAAAsIlwBAABYRLgCAACwiHAFAABgEeEKAADAIsIVAAAA4QoAAODBROUKAADAIsIVAACARYQrAAAAiwhXAAAAFhGuAAAALCJcAQAAWORV4WrlypXy5JNPSs6cOSVPnjzSokULOXLkiHv7pk2bpHLlypIlSxapVq2aLF26VHx8fGTPnj3uffbv3y9NmzaV7NmzS4ECBaRDhw5y5syZ214zNjZWYmJiPBYAAJBxeVW4unz5svTp00d27Ngha9euFV9fX3n22WclPj7ehJ6WLVtKhQoVZNeuXTJ8+HDp27evx/Hnz5+XBg0aSJUqVcw5NKydOnVKnn/++dtec/To0RIcHOxeQkJCUuGdAgCAtOLjOI4jXkorTvny5ZN9+/bJhg0b5N1335Vff/3VVK7U9OnTpVu3brJ7925T0RoxYoT87//+r6xatcp9Dt1fA9OhQ4ekdOnSiVaudHHREKf7h4QvFN+AwFR6pwCSIzqiOQ0GwIN+fmuR5MKFCxIUFCRJ8RcvcvjwYRk0aJBs3brVBCutWKnjx4+bcFSxYkV3sFLVq1f3OH7v3r2ybt060yV4M+1eTCxcBQQEmAUAAHgHrwpX2u1XtGhRmTZtmhQuXNiEq/Lly8u1a9fu6vhLly6Zc/zrX/+6ZVuhQoVS4I4BAEB64zXh6uzZs6Y6pcGqTp06Zp12BbqUKVNGPvvsM9OF56o0bd++3eMcVatWlS+//FKKFSsm/v5e03QAACAZvGZAe65cucwMwalTp8rPP/8s3333nRnc7tK+fXtTyQoLC5OoqCgzrmrs2LFmm84YVN27d5c///xT2rVrZ4KXdgXqfq+88orExcWl2XsDAAAPDq8JVzozcMGCBbJz507TFdi7d29577333Nt1cNpXX31lHrugg9cHDBhgxmcp1zgs7UrcuHGjCVKNGzc2MwvDw8PNox30/AAAAF49W/BO5s6da6pSOjMga9asVmcbMFsQeHAxWxDAzZgteI9mz54tJUqUkIceesjMDNTnXOkzrGwFKwAAkPExKjuB33//3XQF6led/de2bVsZOXJk2n13AABAukO4SuDtt982CwAAwL1iFDYAAIBFhCsAAACLCFcAAAAWEa4AAAAsIlwBAABYRLgCAACwiHAFAABgEc+5SiP7h4aav2cIAAAyFipXAAAAFhGuAAAALCJcAQAAWES4AgAAsIhwBQAAYBHhCgAAwCLCFQAAgEU85yqNlB+8SnwDAtPq8kC6Fx3RPK1vAQASReUKAADAIsIVAACARYQrAAAAiwhXAAAAFhGuAAAALCJcAQAAEK4AAAAeTFSuAAAALCJcAQAAWES4AgAAsIhwBQAAYBHhCgAAwKIMHa46deokrVu3dr+uV6+ehIeHp+k9AQCAjM1fMrCJEyeK4zhpfRsAAMCLZOhwFRwcnNa3AAAAvEyG6BZctGiRVKhQQbJmzSp58uSRRo0ayeXLl2/pFlQ3btyQHj16mOCVN29eGThwoEd1a8qUKVKqVCnJkiWLFChQQJ577jmPbkU9NqnjbxYbGysxMTEeCwAAyLjSfbg6efKktGvXTjp37ixRUVESGRkpbdq0uW3gmTVrlvj7+8u2bdtMt+H48eNl+vTpZtuOHTukZ8+eMmzYMDl06JCsXLlSnnrqqbs+PjGjR482Qcy1hISEWG4BAADwIPHPCOFKq1EaqIoWLWrWaRXrdjTc/Pvf/xYfHx8pU6aM7Nu3z7zu1q2bHD9+XLJlyyYtWrSQHDlymPNVqVLlro9PTP/+/aVPnz7u11q5ImABAJBxpfvKVaVKlaRhw4YmULVt21amTZsm586du+3+TzzxhAlGLjVr1pTDhw9LXFycPP300yZQlShRQjp06CBz586Vv/76666PT0xAQIAEBQV5LAAAIONK9+HKz89P1qxZI99884088sgjMmnSJFNROnbsWLLPpdWqXbt2yfz586VQoUIyaNAgE97Onz+fIvcOAAAynnQfrpRWkmrXri1Dhw6V3bt3S+bMmWXJkiWJ7rt161aP11u2bDED2DWkKR1PpQPix4wZIz/88INER0fLd999d9fHAwAA75bux1xp2Fm7dq00btxY8ufPb16fPn1aypUrZ8LRzXRclY6BevXVV02VSitd48aNM9u+/vprOXr0qBnEnitXLlmxYoXEx8ebStjdHA8AAJDuw5WOYVq/fr1MmDDBDBbXMVMadpo2bSqff/75Lft37NhRrly5ItWrVzfVpl69eklYWJjZljNnTlm8eLEMGTJErl69aipS2kX46KOP3tXxAAAAPg6PML9r+pyrypUrmyB3rzQAmkcyhC8U34BAfgKBexQd0Zy2A5BqXJ/fFy5cuOPktAwx5goAAOBBQbgCAACwKN2PuUpN+vR3AACApFC5AgAAsIhwBQAAYBHhCgAAwCLCFQAAgEWEKwAAAIsIVwAAABYRrgAAACziOVdpZP/Q0Ds+Ph8AAKQ/VK4AAAAsIlwBAABYRLgCAACwiHAFAABgEeEKAADAIsIVAACARYQrAAAAi3jOVRopP3iV+AYEptXl8QCJjmie1rcAALCIyhUAAIBFhCsAAACLCFcAAAAWEa4AAAAIVwAAAA8mKlcAAAAWEa4AAAAsIlwBAABYRLgCAACwiHAFAABgEeEKAADAIsIVAACARYSrZIiOjhYfHx/Zs2ePze8BAADIQAhXAAAAFhGu/j+O48iNGzdsti0AAPBC6TZc1atXT3r06GGW4OBgyZs3rwwcONCEJDVnzhypVq2a5MiRQwoWLCjt27eXP/74w318ZGSk6eL75ptv5LHHHpOAgADZsGGDxMfHy5gxY6RkyZJmXZEiRWTkyJEe1z569KjUr19fAgMDpVKlSrJ58+ZUf/8AAODBlG7DlZo1a5b4+/vLtm3bZOLEiTJ+/HiZPn262Xb9+nUZPny47N27V5YuXWrGS3Xq1OmWc/Tr108iIiIkKipKKlasKP379zevNagdOHBA5s2bJwUKFPA4ZsCAAfLmm2+asVelS5eWdu3a3bbqFRsbKzExMR4LAADIuHwcV6knHVautBL1448/mgqUKygtW7bMhKKb7dixQx5//HG5ePGiZM+e3VSutPqkwatVq1ZmH92WL18+mTx5snTt2vWWc2hAK168uAlwXbp0Mev0Wo8++qgJZ2XLlr3lmCFDhsjQoUNvWR8SvlB8AwKttAXSt+iI5ml9CwCAO9DiiPaUXbhwQYKCgjJu5eqJJ55wBytVs2ZNOXz4sMTFxcnOnTulZcuWpltPuwbr1q1r9jl+/LjHObTr0EUDklaaGjZsmOR1tcLlUqhQIfM1YZdjQloJ02+Eazlx4sQ9vlsAAJAe+EsGdPXqVQkNDTXL3LlzTTVKQ5W+vnbtmse+2bJlc/87a9asd3X+TJkyuf/tCnc6VisxOm5LFwAA4B3SdeVq69atHq+3bNkipUqVkoMHD8rZs2fN2Kk6deqY7rrbVZYS0mM1YK1duzYF7xoAAGRk6TpcaTWqT58+cujQIZk/f75MmjRJevXqZboCM2fObF7rzD4dh6WD2+8kS5Ys0rdvX3n77bdl9uzZcuTIERPYPvnkk1R5PwAAIP1L192CHTt2lCtXrkj16tXFz8/PBKuwsDDTVTdz5kx555135P3335eqVavK2LFj5ZlnnrnjOXWWoM5AHDRokPz2229mTNVrr72WKu8HAACkf+l6tmDlypVlwoQJkh5nGzBbEC7MFgSAB5/XzBYEAAB40BCuAAAALEq3Y670IaAAAAAPGipXAAAAFhGuAAAALCJcAQAAWES4AgAAsIhwBQAAYBHhCgAAwCLCFQAAgEXp9jlX6d3+oaF3fHw+AABIf6hcAQAAWES4AgAAsIhwBQAAYBHhCgAAwCLCFQAAgEWEKwAAAIsIVwAAABbxnKs0Un7wKvENCEyryyMVREc0p50BwAtRuQIAALCIcAUAAGAR4QoAAMAiwhUAAIBFhCsAAACLCFcAAAAWEa4AAAAsIlwBAABYRLgCAACwiHAFAABgEeEKAADAIsIVAACARYQrAAAAiwhXAAAAFhGuAAAALPLacLVo0SKpUKGCZM2aVfLkySONGjWSy5cvm23Tp0+XcuXKSZYsWaRs2bIyZcoU93GdO3eWihUrSmxsrHl97do1qVKlinTs2DHN3gsAAHhweGW4OnnypLRr184EpaioKImMjJQ2bdqI4zgyd+5cGTRokIwcOdJsGzVqlAwcOFBmzZpljn3//fdNCOvXr595PWDAADl//rxMnjw50WtpCIuJifFYAABAxuUvXhqubty4YQJV0aJFzTqtYqnBgwfLuHHjzDZVvHhxOXDggHz88cfy8ssvS/bs2eWzzz6TunXrSo4cOWTChAmybt06CQoKSvRao0ePlqFDh6biuwMAAGnJx9FyjZeJi4uT0NBQ2bZtm/nauHFjee655yRz5swmPGlXoa/v/1/U0yAWHBwsp06dcq975513THDq27evRERE3PZaWrlydSEqrVyFhIRISPhC8Q0ITMF3ibQWHdE8rW8BAGCJfn5rFrhw4cJtCypeXbny8/OTNWvWyKZNm2T16tUyadIk07331Vdfme3Tpk2TGjVq3HKMS3x8vGzcuNGs+/nnn5O8VkBAgFkAAIB38MoxV8rHx0dq165tuux2795tqlYamAoXLixHjx6VkiVLeizaPejy3nvvycGDB+X777+XlStXyowZM9L0vQAAgAeHV1autm7dKmvXrjXdgfnz5zevT58+bWYIatjq2bOnKf01adLEdOnt2LFDzp07J3369DFBTAe862xDDWfjx4+XXr16mTFYJUqUSOu3BgAA0phXhivtK12/fr0ZjK59qDqoXQexN23a1GwPDAw01am33npLsmXLZga7h4eHy9WrV+Wll16STp06ScuWLc2+YWFhsnz5cunQoYM5Z8LuQwAA4H28ckD7gzAgjgHtGR8D2gHAOwe0e+2YKwAAgJRAuAIAALCIcAUAAGAR4QoAAMAiwhUAAIBFhCsAAACLCFcAAAAWEa4AAAAsIlwBAABYRLgCAACwyCv/tuCDYP/Q0Ds+Ph8AAKQ/VK4AAAAsIlwBAABYRLgCAACwiHAFAABgEeEKAADAIsIVAACARYQrAAAAi3jOVRopP3iV+AYEptXlH2jREc3T+hYAALhnVK4AAAAsIlwBAABYRLgCAACwiHAFAABgEeEKAADAIsIVAACARYQrAAAAiwhXAAAAFhGuAAAALCJcAQAAWES4AgAAsIhwBQAAkJHDVb169SQ8PDxFzj1kyBCpXLmy1XPOnDlTcubMafWcAAAg/XrgwhUAAEB65i9ewHEciYuLS+vbAAAAXuC+Klfx8fEyevRoKV68uGTNmlUqVaokixYtMtsiIyPFx8dHVq1aJVWqVDHbGzRoIH/88Yd88803Uq5cOQkKCpL27dvLX3/95XHeGzduSI8ePSQ4OFjy5s0rAwcONAHJZc6cOVKtWjXJkSOHFCxY0JxDz+viurZe57HHHpOAgADZsGHDLfd/5MgRKVGihLmWnj82NlbefPNNeeihhyRbtmxSo0YNc66buwGLFCkigYGB8uyzz8rZs2fvpwkBAEAGc1/hSoPV7Nmz5aOPPpIff/xRevfuLS+99JJ8//33HuOcJk+eLJs2bZITJ07I888/LxMmTJB58+bJ8uXLZfXq1TJp0iSP886aNUv8/f1l27ZtMnHiRBk/frxMnz7dvf369esyfPhw2bt3ryxdulSio6OlU6dOt9xfv379JCIiQqKioqRixYoe23744Qd58sknTTDT+9MwpiFr8+bNsmDBArO9bdu20qRJEzl8+LA5ZuvWrdKlSxez3549e6R+/foyYsSIJNtIA1tMTIzHAgAAMi4fJ2FJKBk0NOTOnVu+/fZbqVmzpnt9165dTSUqLCzMhA/d3rBhQ7NNg07//v3dFSP12muvmXC0cuVK94B2rUJpWNPA4wpJy5YtkwMHDiR6Lzt27JDHH39cLl68KNmzZzfVJr22Bq9WrVp5BD1dN2XKFGnRooUMGDBA/ud//sdsO378uLkn/Vq4cGH3MY0aNZLq1avLqFGjTBC7cOGCCYUu//jHP8y9nz9/PtF702sOHTr0lvUh4QvFNyAwma3uHaIjmqf1LQAA4EGLI9qjpjlAe95SpHL1888/mxD19NNPm0DjWrSSpeHJJWHFqECBAqY7zRWsXOsSdumpJ554wh2slIY3rR65xk3t3LlTWrZsabrntGuwbt26Zr0Go4S06/Bmuo/e86BBg9zBSu3bt8+cv3Tp0h7vR6twrvejFTDtKkwoYbBMjIZJ/Ua4Fq3eAQCAjOueB7RfunTJfNUqjo5RSkjHOLkCSaZMmdzrNTAlfO1ap2O37tbly5clNDTULHPnzpV8+fKZwKSvr1275rGvjpu6me6vlan58+dL586d3elT34+fn58Jbvo1IQ1Z90rbQhcAAOAd7jlcPfLIIyY0aLBxVY4SSli9Si4d25TQli1bpFSpUib0HDx40Awi1y7GkJAQd7fg3dKB9V9//bU0a9bMBDId86XVLx10r5UrraLVqVMn0WN1EH5i9wYAAHDf4UoDic6s00HsWnnSweHa7bVx40ZTDSpatOi9ntoEtj59+sirr74qu3btMgPex40bZ7ZpV2DmzJnNOh2vtX//fjO4PTm0oqUVt6ZNm5pFx0xpd+CLL74oHTt2NNfSsHX69GlZu3at6dps3ry59OzZU2rXri1jx441Y7l0JqRrrBgAAMB9zxbUUKOPSdBZg1rV0Zl1Glr00Qz3QwPOlStXzEDy7t27S69evcwAeVe3nj4O4YsvvjDVM61gadhJLu3q00c16Hh+DU7a3ThjxgxzbR2LVaZMGWndurVs377dBDrXWLBp06aZGYz62Amter377rv39V4BAEDGcs+zBXF/sw2YLXh7zBYEAHjlbEEAAADcinAFAABgEeEKAADAIsIVAACARYQrAAAAiwhXAAAAFhGuAAAALCJcAQAAWES4AgAAsIhwBQAA8CD84Wbcn/1DQ+/4+HwAAJD+ULkCAACwiHAFAABgEeEKAADAIsIVAACARYQrAAAAiwhXAAAAFhGuAAAALCJcAQAAWES4AgAAsIhwBQAAYBHhCgAAwCLCFQAAgEWEKwAAAIsIVwAAABYRrgAAACzyt3ky3JnjOOZrTEwMzQUAQDrh+tx2fY4nhXCVys6ePWu+hoSEpPalAQDAfbp48aIEBwcnuQ/hKpXlzp3bfD1+/Pgdvzmw/18dGmpPnDghQUFBNG8qod3TDm1Pu3ubmBT8Pa8VKw1WhQsXvuO+hKtU5uv7f8PcNFjxAZ82tN1pe9rdm/AzT7t7m6AU+j1/t0URBrQDAABYRLgCAACwiHCVygICAmTw4MHmK2h7b8DPPG3vbfiZTzsPStv7OHczpxAAAAB3hcoVAACARYQrAAAAiwhXAAAAFhGuAAAALCJcpYAPPvhAihUrJlmyZJEaNWrItm3bktz/iy++kLJly5r9K1SoICtWrEiJ2/IKyWn7adOmSZ06dSRXrlxmadSo0R2/V7j/dk9owYIF4uPjI61bt6ZpU6ntz58/L927d5dChQqZGVWlS5fmd04qtPuECROkTJkykjVrVvME8d69e8vVq1fv5dJea/369dKyZUvzhHT9vbF06dI7HhMZGSlVq1Y1P+slS5aUmTNnpsq96uPcYdGCBQuczJkzO59++qnz448/Ot26dXNy5szpnDp1KtH9N27c6Pj5+TljxoxxDhw44Lz77rtOpkyZnH379vF9SeG2b9++vfPBBx84u3fvdqKiopxOnTo5wcHBzq+//krbp2C7uxw7dsx56KGHnDp16jitWrWizVOh7WNjY51q1ao5zZo1czZs2GC+B5GRkc6ePXto/xRs97lz5zoBAQHmq7b5qlWrnEKFCjm9e/em3ZNhxYoVzoABA5zFixfrUw6cJUuWJLn/0aNHncDAQKdPnz7m83XSpEnm83blypVOSiNcWVa9enWne/fu7tdxcXFO4cKFndGjRye6//PPP+80b97cY12NGjWcV1991fatZXjJbfub3bhxw8mRI4cza9asFLzLjOde2l3bulatWs706dOdl19+mXCVSm3/4YcfOiVKlHCuXbt2r5fEPbS77tugQQOPdfqBX7t2bdrzHt1NuHr77bedRx991GPdCy+84ISGhjopjW5Bi65duyY7d+403UsJ/5agvt68eXOix+j6hPur0NDQ2+4Pe21/s7/++kuuX7/u/uPaSLl2HzZsmOTPn1+6dOlCM6di2y9btkxq1qxpugULFCgg5cuXl1GjRklcXBzfhxRs91q1apljXF2HR48eNV2xzZo1o91TUFp+vvKHmy06c+aM+SWlv7QS0tcHDx5M9Jjff/890f11PVK27W/Wt29f05d/8/8ZYbfdN2zYIJ988ons2bOHpk3lttcP9e+++05efPFF8+H+888/yxtvvGH+o0Kfao2Uaff27dub45588kntLZIbN27Ia6+9Ju+88w5NnoJu9/kaExMjV65cMePfUgqVK0BEIiIizODqJUuWmAGqSBkXL16UDh06mMkEefPmpZlTWXx8vKkYTp06VR577DF54YUXZMCAAfLRRx/xvUhBOqhaK4RTpkyRXbt2yeLFi2X58uUyfPhw2j2DonJlkX5Y+Pn5yalTpzzW6+uCBQsmeoyuT87+sNf2LmPHjjXh6ttvv5WKFSvSxCnY7keOHJHo6Ggz4yfhB77y9/eXQ4cOycMPP8z3IAXaXukMwUyZMpnjXMqVK2f+C1+7uzJnzkzbp0C7Dxw40PxHRdeuXc1rnRV++fJlCQsLM+FWuxVh3+0+X4OCglK0aqX4jlqkv5j0vwbXrl3r8cGhr3WcQ2J0fcL91Zo1a267P+y1vRozZoz5r8eVK1dKtWrVaN4Ubnd95Mi+fftMl6BreeaZZ6R+/frm3zpFHSnT9qp27dqmK9AVaNVPP/1kQhfBKuXaXcdz3hygXAGXP++bctL08zXFh8x74RRdnXI7c+ZMM/UzLCzMTNH9/fffzfYOHTo4/fr183gUg7+/vzN27FjzOIDBgwfzKIZUavuIiAgznXrRokXOyZMn3cvFixfv74fAyyS33W/GbMHUa/vjx4+bGbE9evRwDh065Hz99ddO/vz5nREjRtzHXXif5La7/l7Xdp8/f755PMDq1audhx9+2MwWx93T38366BxdNL6MHz/e/PuXX34x27XNte1vfhTDW2+9ZT5f9dE7PIohHdNnaRQpUsR8cOuU3S1btri31a1b13yYJLRw4UKndOnSZn+dNrp8+fI0uGvva/uiRYua/4PevOgvQqRcu9+McJW6bb9p0ybzuBcNB/pYhpEjR5pHYyDl2v369evOkCFDTKDKkiWLExIS4rzxxhvOuXPnaPZkWLduXaK/s11trV+17W8+pnLlyub7pD/vM2bMcFKDj/5PytfHAAAAvANjrgAAACwiXAEAAFhEuAIAALCIcAUAAGAR4QoAAMAiwhUAAIBFhCsAAACLCFcAAAAWEa4A4Dbq1asn4eHhtA+AZCFcAbgnnTp1Eh8fn1sW/cPANsycOVNy5syZpt+dxYsXmz/s/aCKjIw0bX7+/Pm0vhUACfgnfAEAydGkSROZMWOGx7p8+fI9cI14/fp1yZQpU7KPy507tzyo9D0BeDBRuQJwzwICAqRgwYIei5+fn9n2n//8R6pWrSpZsmSREiVKyNChQ+XGjRvuY8ePHy8VKlSQbNmySUhIiLzxxhty6dIld0XmlVdekQsXLrgrYkOGDDHb9N9Lly71uA+tcGmlS0VHR5t9Pv/8c6lbt665/ty5c8226dOnS7ly5cy6smXLypQpU5LVLVisWDEZMWKEdOzYUbJnzy5FixaVZcuWyenTp6VVq1ZmXcWKFWXHjh23VOD0nkuVKmWuHRoaKidOnPC41ocffigPP/ywZM6cWcqUKSNz5szx2K7vSfd55plnTJt169ZN6tevb7blypXLbNdqolq5cqU8+eST5rp58uSRFi1ayJEjR9zncrWRVub0HIGBgVKpUiXZvHmzxzU3btxo2kC36zX0vs+dO2e2xcfHy+jRo6V48eKSNWtWc/yiRYvcx+p+L774ognbul3f+81BHMiwUuXPQwPIcPQv0Ldq1SrRbevXr3eCgoKcmTNnOkeOHHFWr17tFCtWzBkyZIh7n3//+9/Od9995xw7dsxZu3atU6ZMGef1118322JjY50JEyaYc5w8edIsFy9eNNv019aSJUs8rhccHOz+a/d6Pt1Hr/fll186R48edX777Tfns88+cwoVKuRep19z585t7vF26tat6/Tq1cv9umjRouaYjz76yPnpp5/M/eo9NmnSxFm4cKFz6NAhp3Xr1k65cuWc+Ph4c4zeV6ZMmZxq1ao5mzZtcnbs2OFUr17dqVWrlvu8ixcvNvt88MEH5hzjxo1z/Pz8TPu46HvKnz+/8+mnn5o2jY6ONu9B1+sx2kbnz583+y5atMhsO3z4sLN7926nZcuWToUKFZy4uDiPNipbtqzz9ddfm+Ofe+458/6uX79u9tHjAgICzHvcs2ePs3//fmfSpEnO6dOnzfYRI0aY41euXGnuR9+n7h8ZGWm2d+/e3alcubKzfft2c701a9Y4y5Ytu8ufLiB9I1wBuOdwpQEgW7Zs7kU/oFXDhg2dUaNGeew/Z84cE25u54svvnDy5Mnjfq0f1hqabvmldZfhSsNZQg8//LAzb948j3XDhw93atasmaxw9dJLL7lfa6DRaw0cONC9bvPmzWadbnO9D329ZcsW9z5RUVFm3datW81rDVrdunXzuHbbtm2dZs2aebzv8PBwj33WrVtn1p87d85JigYi3W/fvn0ebTR9+nT3Pj/++KNZp/em2rVr59SuXTvR8129etUJDAw0YTGhLl26mOOUBrpXXnklyfsCMirGXAG4Z9qlpF1VLtpdpfbu3Wu6lEaOHOneFhcXJ1evXpW//vrLdDN9++23plvp4MGDEhMTY7oME26/X9WqVXP/+/Lly6ZbrEuXLqY7zUWvGRwcnKzzarefS4ECBcxX7d68ed0ff/xhukmVv7+/PP744+59tEtSu+yioqKkevXq5mtYWJjHdWrXri0TJ0687XtKyuHDh2XQoEGydetWOXPmjOnCU8ePH5fy5csn+l4KFSrkvm+9vz179kjbtm0TPb9OWtDv09NPP+2x/tq1a1KlShXz79dff13+/ve/y65du6Rx48bSunVrqVWr1l3dP5DeEa4A3DMNUyVLlrxlvY6d0jFWbdq0uWWbjjnSMT86Dkg/gDWA6cDxDRs2mPCjH9BJhSsdK/R/hZykB3e7gp7rftS0adOkRo0aHvu5xojdrYQD4/VebrfOFWhsSviektKyZUszHkzfb+HChc29aKjStk0oqfvWcVK342rP5cuXy0MPPXTLODzVtGlT+eWXX2TFihWyZs0aadiwoXTv3l3Gjh171+8XSK8IVwCs04Hshw4dSjR4qZ07d5oP8XHjxomv7//Nq1m4cKHHPjqwW6tdN9MB0idPnvSo0mgVJSlaTdKQcfToUTPIOrVphUwHuWuVSmnb6OMTdHC90q9a6Xv55Zfdx+jrRx55JMnzahuphO109uxZc34NVnXq1DHrNLgml1a11q5da0LyzfS+NERpJUwnDdyOfq/0Pemi9/LWW28RruAVCFcArNMuKa1MFSlSRJ577jkToLSrcP/+/Wa2nYYurTZNmjTJVFk0SHz00Uce59CZeVoh0Q94nYmm1SxdGjRoIJMnT5aaNWuaUNG3b9+7esyChoSePXuabkB9hERsbKwJPDqrrU+fPin6U6D3989//lPef/9900XYo0cPeeKJJ9xhS0PH888/b7rUGjVqJF999ZWZyaddp0nR6pRWnL7++mtp1qyZqTbprD6dITh16lTT1acBqF+/fsm+5/79+5vuTp3F+dprr5kgt27dOtNVmDdvXnnzzTeld+/eJiTrzESd2anfx6CgIBOm9Gfgsccek0cffdS0td6jK0wCGV5aD/oCkPFmCyqdRaYDtbNmzWpm1OkMualTp7q3jx8/3gxw1+2hoaHO7Nmzbxmc/dprr5lB7rp+8ODBZt1///tfp3HjxmYAfalSpZwVK1YkOqBdZ7vdbO7cuWYGW+bMmZ1cuXI5Tz31lJmpl5wB7TrLMakB9jdf3zUwX2fvlShRwsyoa9SokfPLL794nGfKlClmu84aLF26tGmPpK7jMmzYMKdgwYKOj4+P+Z4onZmnMxb1WhUrVjQz+BIen1gbabvrOh0k76LH6fdQz5MzZ07zfXJ9f3Q2pE4a0Fmees/58uUz27///nv3ZAG9B/3+6gxL/VnRWZqAN/DR/0nrgAcAGZU+50qflcVT1AHvwUNEAQAALCJcAQAAWES3IAAAgEVUrgAAACwiXAEAAFhEuAIAALCIcAUAAGAR4QoAAMAiwhUAAIBFhCsAAACLCFcAAABiz/8DgCorkNbd/iwAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from matplotlib import pyplot as plt\n", "\n", "plt.barh(global_feat_imp.index, global_feat_imp.values.flatten())\n", "plt.xlabel(\"Feature importances\");" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Get local explanations, i.e. feature importances (in decreasing order):" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Obs. 0:\n", " parch (Cumul.perc.: 0.996): 5.0\n", "home.dest (Perc.: 0.011): Sweden Winnipeg, MN\n", "sex (Perc.: 0.4): female\n", "\n", "Obs. 100:\n", " home.dest (Perc.: 0.002): Tofta, Sweden Joliet, IL\n", "fare (Cumul.perc.: 0.07): 7.78\n", "\n", "Obs. 200:\n", " home.dest (Perc.: 0.013): Brooklyn, NY\n", "\n", "Obs. 300:\n", " home.dest (Perc.: 0.007): Bournmouth, England\n", "age (Cumul.perc.: 0.05): 5.0\n", "sex (Perc.: 0.4): female\n", "\n", "Obs. 400:\n", " home.dest (Perc.: 0.002): Taalintehdas, Finland Hoboken, NJ\n" ] } ], "source": [ "for obs, ex in enumerate(df_train.explanation.values):\n", " if (obs % 100) == 0:\n", " print(f'\\nObs. {obs}:\\n', ex)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "y_pred_test = model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Create local explanations for 226 observations.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "65da9be295504cdba17d044a1ae50524", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/226 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclasssexagesibspparchfareembarkedhome.destexplanation
02.0male36.01.02.027.7500SBournmouth, Englandhome.dest (Perc.: 0.007): Bournmouth, England
11.0male49.01.01.0110.8833CHaverford, PAhome.dest (Perc.: 0.007): Haverford, PA\\nfare ...
\n", "" ], "text/plain": [ " pclass sex age sibsp parch fare embarked home.dest \\\n", "0 2.0 male 36.0 1.0 2.0 27.7500 S Bournmouth, England \n", "1 1.0 male 49.0 1.0 1.0 110.8833 C Haverford, PA \n", "\n", " explanation \n", "0 home.dest (Perc.: 0.007): Bournmouth, England \n", "1 home.dest (Perc.: 0.007): Haverford, PA\\nfare ... " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_test = local_expl.get_explanation(nof_feat_expl = 4)\n", "df_test.head(2)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Obs. 0:\n", " home.dest (Perc.: 0.007): Bournmouth, England\n", "\n", "Obs. 50:\n", " home.dest (Perc.: 0.002): Deephaven, MN / Cedar Rapids, IA\n", "fare (Cumul.perc.: 0.91): 106.42\n", "\n", "Obs. 100:\n", " home.dest (Perc.: 0.002): Hudson, NY\n", "sex (Perc.: 0.4): female\n", "\n", "Obs. 150:\n", " home.dest (Perc.: 0.0): ?Havana, Cuba\n", "\n", "Obs. 200:\n", " embarked (Perc.: 0.048): Q\n", "home.dest (Perc.: 0.0): Co Sligo, Ireland Hartford, CT\n", "sex (Perc.: 0.4): female\n", "fare (Cumul.perc.: 0.061): 7.75\n" ] } ], "source": [ "for obs, ex in enumerate(df_test.explanation.values):\n", " if (obs % 50) == 0:\n", " print(f'\\nObs. {obs}:\\n', ex)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
avg ranks
embarked0.157711
parch0.245639
sex0.256804
sibsp0.441731
age0.480112
fare0.575715
pclass0.638521
home.dest1.000000
\n", "
" ], "text/plain": [ " avg ranks\n", "embarked 0.157711\n", "parch 0.245639\n", "sex 0.256804\n", "sibsp 0.441731\n", "age 0.480112\n", "fare 0.575715\n", "pclass 0.638521\n", "home.dest 1.000000" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "local_expl.global_feat_imp # based on X_test" ] } ], "metadata": { "kernelspec": { "display_name": "bayes-anomaly", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.8" } }, "nbformat": 4, "nbformat_minor": 4 }