{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "045e0f5b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_2684932/3289088492.py:1: DeprecationWarning: \n",
      "Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n",
      "(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\n",
      "but was not found to be installed on your system.\n",
      "If this would cause problems for you,\n",
      "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n",
      "        \n",
      "  import pandas as pd\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import sys\n",
    "from sklearn.model_selection import train_test_split\n",
    "load(\"~/conjecturing/sage/conjecturing.py\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c561be47-a291-4492-baa8-5cc698c879cb",
   "metadata": {},
   "source": [
    "Specify output files."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4861be5f-d0ae-4a84-b768-b6903560f130",
   "metadata": {},
   "outputs": [],
   "source": [
    "inv_file = open(\"2022_12_07_inv.txt\", \"w\")\n",
    "prop_file = open(\"2022_12_07_prop.txt\", \"w\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f5db5172-0de7-4bfa-ad8e-6804f0b1798a",
   "metadata": {
    "tags": []
   },
   "source": [
    "Specify the number of examples to use for conjecturing and skips."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "cc76b0a8-7d39-4ce4-aecc-7495a82aaafa",
   "metadata": {},
   "outputs": [],
   "source": [
    "num_train = 100\n",
    "my_skips = 0.3"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c229b455-e876-4ce7-a910-c6766f168ecf",
   "metadata": {},
   "source": [
    "Read data.  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e449e88a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(25808, 63)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Year</th>\n",
       "      <th>DSA</th>\n",
       "      <th>Program_Year.x</th>\n",
       "      <th>Facility_Count</th>\n",
       "      <th>HIE_B_Count</th>\n",
       "      <th>Weighted_HIE_DSA</th>\n",
       "      <th>HIE_DSA_Penetration</th>\n",
       "      <th>Time</th>\n",
       "      <th>ZIP</th>\n",
       "      <th>County.Code</th>\n",
       "      <th>...</th>\n",
       "      <th>Tx_R_O1</th>\n",
       "      <th>Tx_R_ST1</th>\n",
       "      <th>Tx_R_ST2</th>\n",
       "      <th>Flagging_ratio</th>\n",
       "      <th>Tx_R_ST_C</th>\n",
       "      <th>M_R_O1</th>\n",
       "      <th>M_R_ST1</th>\n",
       "      <th>HalfYear.y</th>\n",
       "      <th>HIE_DSA_P</th>\n",
       "      <th>P_HIE_Adopt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2012</td>\n",
       "      <td>ALOB</td>\n",
       "      <td>2013</td>\n",
       "      <td>102</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>35010</td>\n",
       "      <td>1037</td>\n",
       "      <td>...</td>\n",
       "      <td>0.05</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.08</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2012</td>\n",
       "      <td>ALOB</td>\n",
       "      <td>2013</td>\n",
       "      <td>102</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>36033</td>\n",
       "      <td>1013</td>\n",
       "      <td>...</td>\n",
       "      <td>0.05</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.08</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2012</td>\n",
       "      <td>ALOB</td>\n",
       "      <td>2013</td>\n",
       "      <td>102</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>35401</td>\n",
       "      <td>1125</td>\n",
       "      <td>...</td>\n",
       "      <td>0.05</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.08</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2012</td>\n",
       "      <td>ALOB</td>\n",
       "      <td>2013</td>\n",
       "      <td>102</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>36401</td>\n",
       "      <td>1035</td>\n",
       "      <td>...</td>\n",
       "      <td>0.05</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.08</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2012</td>\n",
       "      <td>ALOB</td>\n",
       "      <td>2013</td>\n",
       "      <td>102</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>35462</td>\n",
       "      <td>1063</td>\n",
       "      <td>...</td>\n",
       "      <td>0.05</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.08</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 63 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Year   DSA  Program_Year.x  Facility_Count  HIE_B_Count  Weighted_HIE_DSA  \\\n",
       "0  2012  ALOB            2013             102            0               0.0   \n",
       "1  2012  ALOB            2013             102            0               0.0   \n",
       "2  2012  ALOB            2013             102            0               0.0   \n",
       "3  2012  ALOB            2013             102            0               0.0   \n",
       "4  2012  ALOB            2013             102            0               0.0   \n",
       "\n",
       "   HIE_DSA_Penetration  Time    ZIP  County.Code  ... Tx_R_O1 Tx_R_ST1  \\\n",
       "0                    0     1  35010         1037  ...    0.05        1   \n",
       "1                    0     1  36033         1013  ...    0.05        1   \n",
       "2                    0     1  35401         1125  ...    0.05        1   \n",
       "3                    0     1  36401         1035  ...    0.05        1   \n",
       "4                    0     1  35462         1063  ...    0.05        1   \n",
       "\n",
       "   Tx_R_ST2  Flagging_ratio  Tx_R_ST_C M_R_O1  M_R_ST1 HalfYear.y HIE_DSA_P  \\\n",
       "0         1             1.0          1   0.08        1          7         0   \n",
       "1         1             1.0          1   0.08        1          7         0   \n",
       "2         1             1.0          1   0.08        1          7         0   \n",
       "3         1             1.0          1   0.08        1          7         0   \n",
       "4         1             1.0          1   0.08        1          7         0   \n",
       "\n",
       "  P_HIE_Adopt  \n",
       "0           0  \n",
       "1           0  \n",
       "2           0  \n",
       "3           0  \n",
       "4           0  \n",
       "\n",
       "[5 rows x 63 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my_data =pd.read_excel(\"essi.xlsx\",\n",
    "                    header=int(0),\n",
    "                    sheet_name = \"Sheet1\"   \n",
    "                    )\n",
    "print(my_data.shape)\n",
    "my_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf280d83-a24c-4458-ab72-66a115e7cf99",
   "metadata": {},
   "source": [
    "Check the data types of the columns.  For categorical data, make sure the type is integer or objects.  Make sure the categories do not contain special characters besides numbers and \"_\"."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "17f4debf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 25808 entries, 0 to 25807\n",
      "Data columns (total 63 columns):\n",
      " #   Column               Non-Null Count  Dtype  \n",
      "---  ------               --------------  -----  \n",
      " 0   Year                 25808 non-null  int64  \n",
      " 1   DSA                  25808 non-null  object \n",
      " 2   Program_Year.x       25808 non-null  int64  \n",
      " 3   Facility_Count       25808 non-null  int64  \n",
      " 4   HIE_B_Count          25808 non-null  int64  \n",
      " 5   Weighted_HIE_DSA     25808 non-null  float64\n",
      " 6   HIE_DSA_Penetration  25808 non-null  int64  \n",
      " 7   Time                 25808 non-null  int64  \n",
      " 8   ZIP                  25808 non-null  int64  \n",
      " 9   County.Code          25808 non-null  int64  \n",
      " 10  State.Name           25808 non-null  object \n",
      " 11  County.Name          25808 non-null  object \n",
      " 12  County.FIPS          25808 non-null  int64  \n",
      " 13  Provider_Number      25808 non-null  int64  \n",
      " 14  Network              25808 non-null  int64  \n",
      " 15  Facility_Name        25808 non-null  object \n",
      " 16  Unnamed: 16          0 non-null      float64\n",
      " 17  City                 25808 non-null  object \n",
      " 18  STATE                25808 non-null  object \n",
      " 19  Late.Shift           25808 non-null  object \n",
      " 20  Chain.Owned          23214 non-null  object \n",
      " 21  Chain.Organization   25808 non-null  object \n",
      " 22  TOTSTAS              25808 non-null  int64  \n",
      " 23  HD                   25808 non-null  int64  \n",
      " 24  PD                   25808 non-null  int64  \n",
      " 25  HOMEHD               25808 non-null  int64  \n",
      " 26  N_DP_HGBD            25454 non-null  float64\n",
      " 27  N_DP_HGBD12          23537 non-null  float64\n",
      " 28  PTSURV_C             25808 non-null  int64  \n",
      " 29  County               25808 non-null  object \n",
      " 30  HDKTVPM12_F          19151 non-null  float64\n",
      " 31  VAVF_F               23931 non-null  float64\n",
      " 32  F_Star               0 non-null      float64\n",
      " 33  PT_HOS_T             25798 non-null  object \n",
      " 34  PT_HOS_T1            25808 non-null  int64  \n",
      " 35  PT_HOS_T2            24936 non-null  float64\n",
      " 36  PT_HOS_C             25808 non-null  int64  \n",
      " 37  N_PT_HOS_S           25418 non-null  float64\n",
      " 38  N_PTSURV_SUM         25416 non-null  float64\n",
      " 39  SHR                  25808 non-null  float64\n",
      " 40  SMR                  25808 non-null  float64\n",
      " 41  PT_TRANS_T           25061 non-null  object \n",
      " 42  PT_TRANS_T1          23396 non-null  float64\n",
      " 43  PT_TRANS_T2          23396 non-null  float64\n",
      " 44  N_PT_TRANS_S         25269 non-null  float64\n",
      " 45  PTSURV_T             25808 non-null  object \n",
      " 46  PTSURV_T1            25808 non-null  int64  \n",
      " 47  PTSURV_T2            24872 non-null  float64\n",
      " 48  R_Year               25808 non-null  int64  \n",
      " 49  HalfYear.x           25808 non-null  int64  \n",
      " 50  N_Tx_Ctr1            25808 non-null  int64  \n",
      " 51  N_R_Ctr1             25808 non-null  int64  \n",
      " 52  N_Center             25808 non-null  int64  \n",
      " 53  Tx_R_O1              25808 non-null  float64\n",
      " 54  Tx_R_ST1             25808 non-null  int64  \n",
      " 55  Tx_R_ST2             25808 non-null  int64  \n",
      " 56  Flagging_ratio       25808 non-null  float64\n",
      " 57  Tx_R_ST_C            25808 non-null  int64  \n",
      " 58  M_R_O1               25808 non-null  float64\n",
      " 59  M_R_ST1              25808 non-null  int64  \n",
      " 60  HalfYear.y           25808 non-null  int64  \n",
      " 61  HIE_DSA_P            25808 non-null  int64  \n",
      " 62  P_HIE_Adopt          25808 non-null  int64  \n",
      "dtypes: float64(19), int64(31), object(13)\n",
      "memory usage: 12.4+ MB\n"
     ]
    }
   ],
   "source": [
    "my_data.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "622c9753-c59a-4cb8-bfd4-bb03f9c5434b",
   "metadata": {},
   "source": [
    "Pandas thinks PTSURV_T and PT_HOS_T are integers/numeric, but they are categorical.  Recast them as objects."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "bc565d07",
   "metadata": {},
   "outputs": [],
   "source": [
    "#\"PTSURV_T\", \"PT_HOS_T\"\n",
    "my_data = my_data.astype({\"PTSURV_T\": object, \"PT_HOS_T\": object})\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "77108245",
   "metadata": {},
   "outputs": [],
   "source": [
    "my_data.dropna(subset=[\"PTSURV_T\"], inplace=True)\n",
    "#my_data.dropna().reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b5b3c159-cac7-4599-8680-8c39c381da77",
   "metadata": {},
   "source": [
    "Identify invariant and categorical columns and the target column.  The target should be in one of the lists."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "820e9888",
   "metadata": {},
   "outputs": [],
   "source": [
    "#invariant_names=[\"Tx_R_O1\", \"N_Tx_Ctr1\", \"N_R_Ctr1\", \"N_Center\", \"M_R_O1\", \"M_R_ST1\",   \"N_PT_TRANS_S\", \"PT_TRANS_T1\", \"N_PTSURV_SUM\", \"N_PT_HOS_S\", \"VAVF_F\", \"N_DP_HGBD12\", \"N_DP_HGBD\", \"HOMEHD\", \"PD\", \"HD\", \"TOTSTAS\"]\n",
    "invariant_names=[\"Weighted_HIE_DSA\", \"Flagging_ratio\", \"Tx_R_O1\", \"N_Tx_Ctr1\"]\n",
    "\n",
    "categorical_names=[\"PTSURV_T\", \"PT_HOS_T\"]\n",
    "target = \"PTSURV_T\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "88b339af-22c1-415b-8faa-fbfaa3437f71",
   "metadata": {},
   "source": [
    "Generic code starts here.  Rename target column.  Select columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "5ae99a05-9075-4ce7-9bb7-bb7137c3cb44",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['Year', 'DSA', 'Program_Year.x', 'Facility_Count', 'HIE_B_Count',\n",
      "       'Weighted_HIE_DSA', 'HIE_DSA_Penetration', 'Time', 'ZIP', 'County.Code',\n",
      "       'State.Name', 'County.Name', 'County.FIPS', 'Provider_Number',\n",
      "       'Network', 'Facility_Name', 'Unnamed: 16', 'City', 'STATE',\n",
      "       'Late.Shift', 'Chain.Owned', 'Chain.Organization', 'TOTSTAS', 'HD',\n",
      "       'PD', 'HOMEHD', 'N_DP_HGBD', 'N_DP_HGBD12', 'PTSURV_C', 'County',\n",
      "       'HDKTVPM12_F', 'VAVF_F', 'F_Star', 'PT_HOS_T', 'PT_HOS_T1', 'PT_HOS_T2',\n",
      "       'PT_HOS_C', 'N_PT_HOS_S', 'N_PTSURV_SUM', 'SHR', 'SMR', 'PT_TRANS_T',\n",
      "       'PT_TRANS_T1', 'PT_TRANS_T2', 'N_PT_TRANS_S', 'TARGET', 'PTSURV_T1',\n",
      "       'PTSURV_T2', 'R_Year', 'HalfYear.x', 'N_Tx_Ctr1', 'N_R_Ctr1',\n",
      "       'N_Center', 'Tx_R_O1', 'Tx_R_ST1', 'Tx_R_ST2', 'Flagging_ratio',\n",
      "       'Tx_R_ST_C', 'M_R_O1', 'M_R_ST1', 'HalfYear.y', 'HIE_DSA_P',\n",
      "       'P_HIE_Adopt'],\n",
      "      dtype='object')\n",
      "['TARGET', 'PT_HOS_T']\n"
     ]
    }
   ],
   "source": [
    "if target in categorical_names:\n",
    "    categorical_names[categorical_names.index(target)] = \"TARGET\"\n",
    "else:\n",
    "    invariant_names[invariant_names.index(target)] = \"TARGET\"\n",
    "\n",
    "my_data = my_data.rename(columns={target: \"TARGET\"})\n",
    "print(my_data.columns)\n",
    "my_data = my_data[invariant_names + categorical_names]\n",
    "print(categorical_names)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "94bcbfd3-514a-4a5b-bbea-9ffdd72425c1",
   "metadata": {},
   "source": [
    "Convert categorical variables to dummies.  One dummy for each binary variable and one dummy for each level for variables with more than two levels."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "daa9681d-dcc4-4600-86c2-8926b2ba2bd1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Weighted_HIE_DSA</th>\n",
       "      <th>Flagging_ratio</th>\n",
       "      <th>Tx_R_O1</th>\n",
       "      <th>N_Tx_Ctr1</th>\n",
       "      <th>TARGET_As Expected</th>\n",
       "      <th>TARGET_Better than Expected</th>\n",
       "      <th>TARGET_Not Available</th>\n",
       "      <th>TARGET_Worse than Expected</th>\n",
       "      <th>TARGET_nan</th>\n",
       "      <th>PT_HOS_T_As Expected</th>\n",
       "      <th>PT_HOS_T_Better than Expected</th>\n",
       "      <th>PT_HOS_T_Not Available</th>\n",
       "      <th>PT_HOS_T_Worse than Expected</th>\n",
       "      <th>PT_HOS_T_nan</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.05</td>\n",
       "      <td>157</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.05</td>\n",
       "      <td>157</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.05</td>\n",
       "      <td>157</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.05</td>\n",
       "      <td>157</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.05</td>\n",
       "      <td>157</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Weighted_HIE_DSA  Flagging_ratio  Tx_R_O1  N_Tx_Ctr1  TARGET_As Expected  \\\n",
       "0               0.0             1.0     0.05        157                   1   \n",
       "1               0.0             1.0     0.05        157                   1   \n",
       "2               0.0             1.0     0.05        157                   0   \n",
       "3               0.0             1.0     0.05        157                   1   \n",
       "4               0.0             1.0     0.05        157                   1   \n",
       "\n",
       "   TARGET_Better than Expected  TARGET_Not Available  \\\n",
       "0                            0                     0   \n",
       "1                            0                     0   \n",
       "2                            0                     0   \n",
       "3                            0                     0   \n",
       "4                            0                     0   \n",
       "\n",
       "   TARGET_Worse than Expected  TARGET_nan  PT_HOS_T_As Expected  \\\n",
       "0                           0           0                     1   \n",
       "1                           0           0                     0   \n",
       "2                           1           0                     1   \n",
       "3                           0           0                     1   \n",
       "4                           0           0                     1   \n",
       "\n",
       "   PT_HOS_T_Better than Expected  PT_HOS_T_Not Available  \\\n",
       "0                              0                       0   \n",
       "1                              1                       0   \n",
       "2                              0                       0   \n",
       "3                              0                       0   \n",
       "4                              0                       0   \n",
       "\n",
       "   PT_HOS_T_Worse than Expected  PT_HOS_T_nan  \n",
       "0                             0             0  \n",
       "1                             0             0  \n",
       "2                             0             0  \n",
       "3                             0             0  \n",
       "4                             0             0  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "property_names = []\n",
    "for col in categorical_names:\n",
    "    if col != \"TARGET\":\n",
    "        unique_vals=list(my_data[col].unique())  # if nan is a level\n",
    "        #unique_vals=list(my_data[col].dropna().unique())  # if nan is not a level\n",
    "        if len(unique_vals)==2: # just use one level for binary features\n",
    "            property_names.append(col+\"_\"+str(unique_vals[1]))\n",
    "        elif len(unique_vals) > 2: #one property for each level.\n",
    "            for level in unique_vals:\n",
    "                property_names.append(col+\"_\"+str(level))\n",
    "\n",
    "\n",
    "if \"TARGET\" in categorical_names:\n",
    "    target_property_names = []\n",
    "    unique_vals = list(my_data[\"TARGET\"].unique()) # if nan is a level\n",
    "    #unique_vals = list(my_data[\"TARGET\"].dropna().unique()) # if nan is not a level\n",
    "    if len(unique_vals)==2:\n",
    "        target_property_names.append(\"TARGET_\"+str(unique_vals[1]))\n",
    "    elif len(unique_vals) > 2:\n",
    "        for level in unique_vals:\n",
    "            target_property_names.append(\"TARGET_\"+str(level))\n",
    "            \n",
    "my_df = pd.get_dummies(my_data, \n",
    "                       columns=categorical_names,\n",
    "                       dtype=np.uint8,\n",
    "                       dummy_na=True,  # False is the default.  If False, use dropna() above\n",
    "                       drop_first=False) # False is the default\n",
    "\n",
    "my_df = my_df.rename(lambda col: col.replace('.0', ''), axis='columns')\n",
    "my_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2c9c7a94-00ee-49df-8b80-90ece844bee9",
   "metadata": {},
   "source": [
    "Define class, invariants, properties, and target properties (if applicable)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "5aaf4118",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['PT_HOS_T_As Expected', 'PT_HOS_T_Better than Expected', 'PT_HOS_T_Worse than Expected', 'PT_HOS_T_Not Available', 'PT_HOS_T_nan']\n"
     ]
    }
   ],
   "source": [
    "class Example():\n",
    "    def __init__(self, name, mydf):\n",
    "        self.name = name\n",
    "        self.mydf = mydf\n",
    "        \n",
    "for i in invariant_names:\n",
    "    inv = build_inv(i)\n",
    "    setattr(Example,inv.__name__,inv )\n",
    "\n",
    "for i in property_names:\n",
    "    prop = build_prop(i)\n",
    "    setattr(Example, prop.__name__,prop)\n",
    "\n",
    "if \"TARGET\" in categorical_names:\n",
    "    for i in target_property_names:\n",
    "        prop = build_prop(i)\n",
    "        setattr(Example, prop.__name__, prop)\n",
    "else:\n",
    "    target_invariant = invariant_names.index(\"TARGET\")\n",
    "print(property_names)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1a474cff-5048-45c0-8cdb-f28ae59e6215",
   "metadata": {},
   "source": [
    "Split into training and testing data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "a84753d5-f6ff-44a2-ae83-e084938808ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "if \"TARGET\" in categorical_names:\n",
    "    X_train, X_test = train_test_split(\n",
    "        my_df.index,\n",
    "        stratify=my_data[\"TARGET\"],  # stratify on target levels\n",
    "        train_size=num_train,\n",
    "        random_state=12345\n",
    "    )\n",
    "else:\n",
    "    X_train, X_test = train_test_split(\n",
    "        my_df.index,\n",
    "        train_size=num_train,\n",
    "        random_state=12345\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "41f925f4-f870-46bc-9778-7935914574a7",
   "metadata": {},
   "source": [
    "Create examples for conjecturing."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "fee01df8",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_examples = [Example(i, my_df) for i in X_train]\n",
    "test_examples = [Example(i, my_df) for i in X_test]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "62bfa521-f831-45f2-86b2-9cd69f2cc4e8",
   "metadata": {},
   "source": [
    "Get lists of invariant and property functions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "3f252376",
   "metadata": {},
   "outputs": [],
   "source": [
    "invariants =[]\n",
    "for i in invariant_names:\n",
    "    invariants.append(Example.__dict__[i])\n",
    "properties=[]\n",
    "for i in property_names:\n",
    "    properties.append(Example.__dict__[i])\n",
    "target_properties=[]\n",
    "if \"TARGET\" in categorical_names:\n",
    "    for i in target_property_names:\n",
    "        target_properties.append(Example.__dict__[i])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "0367a44b-736d-4388-8e63-8307607f1f8b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Weighted_HIE_DSA</th>\n",
       "      <th>Flagging_ratio</th>\n",
       "      <th>Tx_R_O1</th>\n",
       "      <th>N_Tx_Ctr1</th>\n",
       "      <th>TARGET_As Expected</th>\n",
       "      <th>TARGET_Better than Expected</th>\n",
       "      <th>TARGET_Not Available</th>\n",
       "      <th>TARGET_Worse than Expected</th>\n",
       "      <th>TARGET_nan</th>\n",
       "      <th>PT_HOS_T_As Expected</th>\n",
       "      <th>PT_HOS_T_Better than Expected</th>\n",
       "      <th>PT_HOS_T_Not Available</th>\n",
       "      <th>PT_HOS_T_Worse than Expected</th>\n",
       "      <th>PT_HOS_T_nan</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.050000</td>\n",
       "      <td>157</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.050000</td>\n",
       "      <td>157</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.050000</td>\n",
       "      <td>157</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.050000</td>\n",
       "      <td>157</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.050000</td>\n",
       "      <td>157</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25803</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.089506</td>\n",
       "      <td>118</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25804</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.089506</td>\n",
       "      <td>118</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25805</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.089506</td>\n",
       "      <td>118</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25806</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.089506</td>\n",
       "      <td>118</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25807</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.089506</td>\n",
       "      <td>118</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>25808 rows × 14 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       Weighted_HIE_DSA  Flagging_ratio   Tx_R_O1  N_Tx_Ctr1  \\\n",
       "0                   0.0             1.0  0.050000        157   \n",
       "1                   0.0             1.0  0.050000        157   \n",
       "2                   0.0             1.0  0.050000        157   \n",
       "3                   0.0             1.0  0.050000        157   \n",
       "4                   0.0             1.0  0.050000        157   \n",
       "...                 ...             ...       ...        ...   \n",
       "25803               0.0             1.0  0.089506        118   \n",
       "25804               0.0             1.0  0.089506        118   \n",
       "25805               0.0             1.0  0.089506        118   \n",
       "25806               0.0             1.0  0.089506        118   \n",
       "25807               0.0             1.0  0.089506        118   \n",
       "\n",
       "       TARGET_As Expected  TARGET_Better than Expected  TARGET_Not Available  \\\n",
       "0                       1                            0                     0   \n",
       "1                       1                            0                     0   \n",
       "2                       0                            0                     0   \n",
       "3                       1                            0                     0   \n",
       "4                       1                            0                     0   \n",
       "...                   ...                          ...                   ...   \n",
       "25803                   0                            1                     0   \n",
       "25804                   1                            0                     0   \n",
       "25805                   1                            0                     0   \n",
       "25806                   1                            0                     0   \n",
       "25807                   1                            0                     0   \n",
       "\n",
       "       TARGET_Worse than Expected  TARGET_nan  PT_HOS_T_As Expected  \\\n",
       "0                               0           0                     1   \n",
       "1                               0           0                     0   \n",
       "2                               1           0                     1   \n",
       "3                               0           0                     1   \n",
       "4                               0           0                     1   \n",
       "...                           ...         ...                   ...   \n",
       "25803                           0           0                     1   \n",
       "25804                           0           0                     1   \n",
       "25805                           0           0                     1   \n",
       "25806                           0           0                     1   \n",
       "25807                           0           0                     1   \n",
       "\n",
       "       PT_HOS_T_Better than Expected  PT_HOS_T_Not Available  \\\n",
       "0                                  0                       0   \n",
       "1                                  1                       0   \n",
       "2                                  0                       0   \n",
       "3                                  0                       0   \n",
       "4                                  0                       0   \n",
       "...                              ...                     ...   \n",
       "25803                              0                       0   \n",
       "25804                              0                       0   \n",
       "25805                              0                       0   \n",
       "25806                              0                       0   \n",
       "25807                              0                       0   \n",
       "\n",
       "       PT_HOS_T_Worse than Expected  PT_HOS_T_nan  \n",
       "0                                 0             0  \n",
       "1                                 0             0  \n",
       "2                                 0             0  \n",
       "3                                 0             0  \n",
       "4                                 0             0  \n",
       "...                             ...           ...  \n",
       "25803                             0             0  \n",
       "25804                             0             0  \n",
       "25805                             0             0  \n",
       "25806                             0             0  \n",
       "25807                             0             0  \n",
       "\n",
       "[25808 rows x 14 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my_df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5b513bc3-3f57-4c7b-8825-c3a9d19527b9",
   "metadata": {},
   "source": [
    "Invariant conjecturing - upper and lower bounds."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "24ae1792",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TARGET_As Expected\n",
      "TARGET_Worse than Expected\n",
      "TARGET_Better than Expected\n",
      "TARGET_Not Available\n",
      "346\n",
      "346\n"
     ]
    }
   ],
   "source": [
    "#define operators for expression tree to build upper bounsand lower bouns for each class\n",
    "use_operators =  { '-1', '+1', '*2', '/2', '^2', '-()', '1/', \n",
    "                  'sqrt', 'ln', 'log10', 'exp', '10^', 'ceil', \n",
    "                  'floor', 'abs', '+', '*', 'max', 'min', '-', '/', '^'}\n",
    "\n",
    "inv_conjectures = []\n",
    "\n",
    "if \"TARGET\" in categorical_names:\n",
    "    for value in target_property_names:\n",
    "        print(value)\n",
    "        target_property = Example.__dict__[value]\n",
    "        my_examples = [example for example in train_examples if target_property(example) == True]\n",
    "        for inv in invariants:\n",
    "            sys.stdout.flush()\n",
    "            inv_of_interest = invariants.index(inv)\n",
    "            conjs = conjecture(my_examples, \n",
    "                               invariants, \n",
    "                               inv_of_interest, \n",
    "                               operators=use_operators, \n",
    "                               upperBound=True, \n",
    "                               time=Integer(5)\n",
    "                             # ,debug=True,\n",
    "                             #  verbose=True,\n",
    "                               ,skips=my_skips\n",
    "                              )\n",
    "            convert_conjecture_names(conjs)\n",
    "            inv_conjectures += conjs\n",
    "\n",
    "            conjs = conjecture(my_examples, \n",
    "                               invariants, \n",
    "                               inv_of_interest, \n",
    "                               operators=use_operators, \n",
    "                               upperBound=False, \n",
    "                               time=Integer(5)\n",
    "                              ,skips=my_skips)\n",
    "            convert_conjecture_names(conjs)\n",
    "            inv_conjectures += conjs\n",
    "    print(len(inv_conjectures))\n",
    "    if len(target_property_names) == 1:\n",
    "        value = target_property_names[0]\n",
    "        print(value + \" False\")\n",
    "        target_property = Example.__dict__[value]\n",
    "        my_examples = [example for example in train_examples if target_property(example) == False]\n",
    "        for inv in invariants:\n",
    "            sys.stdout.flush()\n",
    "            inv_of_interest = invariants.index(inv)\n",
    "            conjs = conjecture(my_examples, \n",
    "                               invariants, \n",
    "                               inv_of_interest, \n",
    "                               operators=use_operators, \n",
    "                               upperBound=True, \n",
    "                               time=Integer(5)\n",
    "                             # ,debug=True,\n",
    "                             #  verbose=True,\n",
    "                               ,skips=my_skips\n",
    "                              )\n",
    "            convert_conjecture_names(conjs)\n",
    "            inv_conjectures += conjs\n",
    "\n",
    "            conjs = conjecture(my_examples, \n",
    "                               invariants, \n",
    "                               inv_of_interest, \n",
    "                               operators=use_operators, \n",
    "                               upperBound=False, \n",
    "                               time=Integer(5)\n",
    "                              ,skips=my_skips)\n",
    "            convert_conjecture_names(conjs)\n",
    "            inv_conjectures += conjs\n",
    "else: # target is an invariant\n",
    "    my_examples = [example for example in train_examples]\n",
    "    conjs = conjecture(my_examples, \n",
    "                       invariants, \n",
    "                       target_invariant, \n",
    "                       operators=use_operators, \n",
    "                       upperBound=True, \n",
    "                       time=Integer(5)\n",
    "                        # ,debug=True,\n",
    "                        #  verbose=True,\n",
    "                        ,skips=my_skips)\n",
    "    convert_conjecture_names(conjs)\n",
    "    inv_conjectures += conjs\n",
    "    conjs = conjecture(my_examples, \n",
    "                       invariants, \n",
    "                       target_invariant, \n",
    "                       operators=use_operators,\n",
    "                       upperBound=False, \n",
    "                       time=Integer(5)\n",
    "                       ,skips=my_skips)\n",
    "    convert_conjecture_names(conjs)\n",
    "    inv_conjectures += conjs     \n",
    "print(len(inv_conjectures))  \n",
    "\n",
    "for c in inv_conjectures:\n",
    "    inv_file.write(\"%s\\n\" % c)\n",
    "    inv_file.flush()\n",
    "inv_file.close()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2fc6d1d3-75e2-43e5-ad29-5e1669954216",
   "metadata": {},
   "source": [
    "Property conjecturing - sufficient conditions for a categorical target values.  For a binary target, get sufficient conditions for the positive class and necessary conditions for the negative class."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "9c8befb8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TARGET_As Expected\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: divide by zero encountered in log\n",
      "  return ln(args[0], **kwds)\n",
      "/lustre/home/clarson/conjecturing/sage/conjecturing.py:279: RuntimeWarning: overflow encountered in scalar power\n",
      "  return (lambda x: 10**x), 1\n",
      "/lustre/home/clarson/conjecturing/sage/conjecturing.py:177: RuntimeWarning: invalid value encountered in scalar multiply\n",
      "  stack.append(op(left, right))\n",
      "/lustre/home/clarson/conjecturing/sage/conjecturing.py:132: RuntimeWarning: overflow encountered in exp\n",
      "  stack.append(op(stack.pop()))\n",
      "/lustre/home/clarson/conjecturing/sage/conjecturing.py:177: RuntimeWarning: overflow encountered in scalar power\n",
      "  stack.append(op(left, right))\n",
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: invalid value encountered in log\n",
      "  return ln(args[0], **kwds)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(~(Weighted_HIE_DSA_leq_open_bracket_Flagging_ratio_plus_Tx_R_O1_close_bracket_divided_by_open_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_minus_1_close_bracket))->(TARGET_AsExpected)\n",
      "(~(N_Tx_Ctr1_leq_inverse_of_2_times_e_to_the_power_open_bracket_2_times_e_to_the_power_open_bracket_e_to_the_power_Weighted_HIE_DSA_close_bracket_plus_1_close_bracket))->(TARGET_AsExpected)\n",
      "(~(Tx_R_O1_leq_inverse_of_sqrtopen_bracket_2_times_logopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket))->(TARGET_AsExpected)\n",
      "(~(N_Tx_Ctr1_geq_e_to_the_power_open_bracket_e_to_the_power_open_bracket_e_to_the_power_open_bracket_Weighted_HIE_DSA_to_the_power_open_bracket_inverse_of_4_close_bracket_close_bracket_close_bracket_close_bracket))->(TARGET_AsExpected)\n",
      "((~(Weighted_HIE_DSA_geq_open_bracket_inverse_of_open_bracket_10_to_the_power_open_bracket_inverse_of_2_divided_by_Tx_R_O1_close_bracket_minus_N_Tx_Ctr1_close_bracket_close_bracket))&(Tx_R_O1_geq_open_bracket_inverse_of_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_Flagging_ratio_close_bracket_plus_N_Tx_Ctr1_close_bracket_close_bracket))->(TARGET_AsExpected)\n",
      "((Tx_R_O1_geq_open_bracket_logopen_bracket_flooropen_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_close_bracket_close_bracket_divided_by_logopen_bracket_10_close_bracket_minus_1_close_bracket_squared)^(Weighted_HIE_DSA_leq__minus_inverse_of_open_bracket_sqrtopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket_plus_Tx_R_O1))->(TARGET_AsExpected)\n",
      "((N_Tx_Ctr1_geq_ceilopen_bracket_10_to_the_power_open_bracket_2_times_sqrtopen_bracket_Tx_R_O1_close_bracket_plus_1_close_bracket_close_bracket)^(N_Tx_Ctr1_geq_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_minus_1_close_bracket_close_bracket))->(TARGET_AsExpected)\n",
      "((N_Tx_Ctr1_geq_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_to_the_power_open_bracket_10_to_the_power_open_bracket_Weighted_HIE_DSA_plus_1_close_bracket_minus_1_close_bracket)^(N_Tx_Ctr1_leq_ceilopen_bracket_e_to_the_power_open_bracket_inverse_of_4_divided_by_Tx_R_O1_squared_close_bracket_close_bracket))->(TARGET_AsExpected)\n",
      "((Tx_R_O1_geq_10_to_the_power_open_bracket_10_to_the_power_open_bracket_Weighted_HIE_DSA_minus_1_close_bracket_minus_1_close_bracket_minus_Flagging_ratio)^(Weighted_HIE_DSA_leq__minus_inverse_of_open_bracket_sqrtopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket_plus_Tx_R_O1))->(TARGET_AsExpected)\n",
      "((Tx_R_O1_leq_ceilopen_bracket_e_to_the_power_Weighted_HIE_DSA_close_bracket_divided_by_open_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket)^(Weighted_HIE_DSA_geq__minus_inverse_of_logopen_bracket_inverse_of_2_times_N_Tx_Ctr1_plus_inverse_of_2_close_bracket_plus_Tx_R_O1))->(TARGET_AsExpected)\n",
      "((Tx_R_O1_leq_10_to_the_power_open_bracket_Weighted_HIE_DSA_divided_by_10_to_the_power_logopen_bracket_Flagging_ratio_close_bracket_minus_1_close_bracket)^(Tx_R_O1_leq_open_bracket_logopen_bracket_2_times_sqrtopen_bracket_Weighted_HIE_DSA_close_bracket_close_bracket_divided_by_logopen_bracket_10_close_bracket_plus_1_close_bracket_squared))->(TARGET_AsExpected)\n",
      "(~((Tx_R_O1_geq_open_bracket_inverse_of_open_bracket_10_to_the_power_e_to_the_power_Flagging_ratio_minus_logopen_bracket_N_Tx_Ctr1_close_bracket_close_bracket_close_bracket)|(Weighted_HIE_DSA_geq_open_bracket_inverse_of_2_times_Tx_R_O1_minus_1_close_bracket_to_the_power_ceilopen_bracket_inverse_of_2_times_N_Tx_Ctr1_close_bracket)))->(TARGET_AsExpected)\n",
      "TARGET_Worse than Expected\n",
      "(~((Tx_R_O1_geq_open_bracket_inverse_of_open_bracket_10_to_the_power_e_to_the_power_Flagging_ratio_minus_logopen_bracket_N_Tx_Ctr1_close_bracket_close_bracket_close_bracket)|(N_Tx_Ctr1_geq_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_minus_1_close_bracket_close_bracket)))->(TARGET_WorsethanExpected)\n",
      "((Flagging_ratio_geq__minus_open_bracket_Tx_R_O1_times_Weighted_HIE_DSA_minus_1_close_bracket_to_the_power_N_Tx_Ctr1)&(PT_HOS_T_WorsethanExpected))->(TARGET_WorsethanExpected)\n",
      "TARGET_Better than Expected\n",
      "(~(N_Tx_Ctr1_geq__minus_inverse_of_open_bracket_Tx_R_O1_minus_4_times_Weighted_HIE_DSA_close_bracket))->(TARGET_BetterthanExpected)\n",
      "(~(Tx_R_O1_geq_4_times_open_bracket_Flagging_ratio_minus_1_close_bracket_squared_divided_by_N_Tx_Ctr1))->(TARGET_BetterthanExpected)\n",
      "((Weighted_HIE_DSA_geq_open_bracket_Tx_R_O1_minus_1_close_bracket_to_the_power_ceilopen_bracket_logopen_bracket_10_to_the_power_N_Tx_Ctr1_close_bracket_close_bracket)^(N_Tx_Ctr1_leq_10_to_the_power_flooropen_bracket_e_to_the_power_open_bracket_inverse_of_2_divided_by_Tx_R_O1_close_bracket_close_bracket))->(TARGET_BetterthanExpected)\n",
      "((~(Weighted_HIE_DSA_leq_absopen_bracket_logopen_bracket_logopen_bracket_sqrtopen_bracket_Tx_R_O1_close_bracket_close_bracket_squared_close_bracket_close_bracket))&(PT_HOS_T_WorsethanExpected))->(TARGET_BetterthanExpected)\n",
      "TARGET_Not Available\n",
      "(PT_HOS_T_NotAvailable)->(TARGET_NotAvailable)\n"
     ]
    }
   ],
   "source": [
    "all_properties = [\"TARGET\"] + properties + inv_conjectures #\"TARGET\" is just a placeholder\n",
    "prop_conjs = []\n",
    "conditions = {}\n",
    "if \"TARGET\" in categorical_names:\n",
    "    for value in target_property_names:\n",
    "        print(value)\n",
    "        all_properties[0] = Example.__dict__[value]\n",
    "        #print(all_properties)\n",
    "        these_prop_conjs = propertyBasedConjecture(objects=train_examples, # edit here 6/27/23\n",
    "                                           properties = all_properties,\n",
    "                                           mainProperty=0,\n",
    "                                           #verbose=True,\n",
    "                                           #debug=True,\n",
    "                                           skips=my_skips)\n",
    "        conditions[value] = []\n",
    "        for c in these_prop_conjs: # edit here 6/27/23 just get premises once\n",
    "            conditions[value].append(get_premise(c, myprint=False))\n",
    "        prop_conjs += these_prop_conjs\n",
    "    if len(target_property_names) == 1:\n",
    "        print(value + \" Necessary\")\n",
    "        all_properties[0] = Example.__dict__[value]\n",
    "        these_prop_conjs = propertyBasedConjecture(objects=train_examples,  # edit here 6/27/23\n",
    "                                           properties = all_properties,\n",
    "                                           mainProperty=0,\n",
    "                                           sufficient=False,\n",
    "                                           #verbose=True,\n",
    "                                            #  debug=True,\n",
    "                                             skips=my_skips)\n",
    "        conditions[\"necessary\"] = []\n",
    "        for c in these_prop_conjs:\n",
    "            conditions[\"necessary\"].append(get_conclusion(c, myprint=False))\n",
    "        prop_conjs += these_prop_conjs  # edit here 6/27/23\n",
    "        \n",
    "for c in prop_conjs:\n",
    "    prop_file.write(\"%s\\n\" % convert_name_back(c.__name__))\n",
    "    prop_file.flush()\n",
    "    \n",
    "prop_file.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "50788eb0-61a3-4aad-87f0-3ae3508bb38b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(these_prop_conjs)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9e3b80c5-fa02-4d58-bfe2-b44ff0b81666",
   "metadata": {},
   "source": [
    "Apply conjectures to train and test data if target is categorical."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "f3ab0127-5bef-46ca-9f52-f0a27f35e55a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/lustre/home/clarson/conjecturing/sage/conjecturing.py:279: RuntimeWarning: overflow encountered in scalar power\n",
      "  return (lambda x: 10**x), 1\n",
      "/lustre/home/clarson/conjecturing/sage/conjecturing.py:279: RuntimeWarning: overflow encountered in scalar power\n",
      "  return (lambda x: 10**x), 1\n",
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: divide by zero encountered in log\n",
      "  return ln(args[0], **kwds)\n",
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: divide by zero encountered in log\n",
      "  return ln(args[0], **kwds)\n",
      "/lustre/home/clarson/conjecturing/sage/conjecturing.py:279: RuntimeWarning: overflow encountered in scalar power\n",
      "  return (lambda x: 10**x), 1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'TARGET_As Expected': [~Weighted_HIE_DSA_leq_open_bracket_Flagging_ratio_plus_Tx_R_O1_close_bracket_divided_by_open_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_minus_1_close_bracket, ~N_Tx_Ctr1_leq_inverse_of_2_times_e_to_the_power_open_bracket_2_times_e_to_the_power_open_bracket_e_to_the_power_Weighted_HIE_DSA_close_bracket_plus_1_close_bracket, ~Tx_R_O1_leq_inverse_of_sqrtopen_bracket_2_times_logopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket, ~N_Tx_Ctr1_geq_e_to_the_power_open_bracket_e_to_the_power_open_bracket_e_to_the_power_open_bracket_Weighted_HIE_DSA_to_the_power_open_bracket_inverse_of_4_close_bracket_close_bracket_close_bracket_close_bracket, ~Weighted_HIE_DSA_geq_open_bracket_inverse_of_open_bracket_10_to_the_power_open_bracket_inverse_of_2_divided_by_Tx_R_O1_close_bracket_minus_N_Tx_Ctr1_close_bracket_close_bracket&Tx_R_O1_geq_open_bracket_inverse_of_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_Flagging_ratio_close_bracket_plus_N_Tx_Ctr1_close_bracket_close_bracket, Tx_R_O1_geq_open_bracket_logopen_bracket_flooropen_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_close_bracket_close_bracket_divided_by_logopen_bracket_10_close_bracket_minus_1_close_bracket_squared^Weighted_HIE_DSA_leq__minus_inverse_of_open_bracket_sqrtopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket_plus_Tx_R_O1, N_Tx_Ctr1_geq_ceilopen_bracket_10_to_the_power_open_bracket_2_times_sqrtopen_bracket_Tx_R_O1_close_bracket_plus_1_close_bracket_close_bracket^N_Tx_Ctr1_geq_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_minus_1_close_bracket_close_bracket, N_Tx_Ctr1_geq_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_to_the_power_open_bracket_10_to_the_power_open_bracket_Weighted_HIE_DSA_plus_1_close_bracket_minus_1_close_bracket^N_Tx_Ctr1_leq_ceilopen_bracket_e_to_the_power_open_bracket_inverse_of_4_divided_by_Tx_R_O1_squared_close_bracket_close_bracket, Tx_R_O1_geq_10_to_the_power_open_bracket_10_to_the_power_open_bracket_Weighted_HIE_DSA_minus_1_close_bracket_minus_1_close_bracket_minus_Flagging_ratio^Weighted_HIE_DSA_leq__minus_inverse_of_open_bracket_sqrtopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket_plus_Tx_R_O1, Tx_R_O1_leq_ceilopen_bracket_e_to_the_power_Weighted_HIE_DSA_close_bracket_divided_by_open_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket^Weighted_HIE_DSA_geq__minus_inverse_of_logopen_bracket_inverse_of_2_times_N_Tx_Ctr1_plus_inverse_of_2_close_bracket_plus_Tx_R_O1, Tx_R_O1_leq_10_to_the_power_open_bracket_Weighted_HIE_DSA_divided_by_10_to_the_power_logopen_bracket_Flagging_ratio_close_bracket_minus_1_close_bracket^Tx_R_O1_leq_open_bracket_logopen_bracket_2_times_sqrtopen_bracket_Weighted_HIE_DSA_close_bracket_close_bracket_divided_by_logopen_bracket_10_close_bracket_plus_1_close_bracket_squared, ~(Tx_R_O1_geq_open_bracket_inverse_of_open_bracket_10_to_the_power_e_to_the_power_Flagging_ratio_minus_logopen_bracket_N_Tx_Ctr1_close_bracket_close_bracket_close_bracket|Weighted_HIE_DSA_geq_open_bracket_inverse_of_2_times_Tx_R_O1_minus_1_close_bracket_to_the_power_ceilopen_bracket_inverse_of_2_times_N_Tx_Ctr1_close_bracket)], 'TARGET_Worse than Expected': [~(Tx_R_O1_geq_open_bracket_inverse_of_open_bracket_10_to_the_power_e_to_the_power_Flagging_ratio_minus_logopen_bracket_N_Tx_Ctr1_close_bracket_close_bracket_close_bracket|N_Tx_Ctr1_geq_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_minus_1_close_bracket_close_bracket), Flagging_ratio_geq__minus_open_bracket_Tx_R_O1_times_Weighted_HIE_DSA_minus_1_close_bracket_to_the_power_N_Tx_Ctr1&PT_HOS_T_WorsethanExpected], 'TARGET_Better than Expected': [~N_Tx_Ctr1_geq__minus_inverse_of_open_bracket_Tx_R_O1_minus_4_times_Weighted_HIE_DSA_close_bracket, ~Tx_R_O1_geq_4_times_open_bracket_Flagging_ratio_minus_1_close_bracket_squared_divided_by_N_Tx_Ctr1, Weighted_HIE_DSA_geq_open_bracket_Tx_R_O1_minus_1_close_bracket_to_the_power_ceilopen_bracket_logopen_bracket_10_to_the_power_N_Tx_Ctr1_close_bracket_close_bracket^N_Tx_Ctr1_leq_10_to_the_power_flooropen_bracket_e_to_the_power_open_bracket_inverse_of_2_divided_by_Tx_R_O1_close_bracket_close_bracket, ~Weighted_HIE_DSA_leq_absopen_bracket_logopen_bracket_logopen_bracket_sqrtopen_bracket_Tx_R_O1_close_bracket_close_bracket_squared_close_bracket_close_bracket&PT_HOS_T_WorsethanExpected], 'TARGET_Not Available': [PT_HOS_T_NotAvailable]}\n"
     ]
    }
   ],
   "source": [
    "X_train_df = my_df.loc[X_train,property_names+invariant_names]  # drop target and one level for each binary variable\n",
    "X_test_df = my_df.loc[X_test,property_names+invariant_names]\n",
    "y_train_df = my_data.loc[X_train,\"TARGET\"] # get original target, even if it is multiple levels\n",
    "y_test_df = my_data.loc[X_test, \"TARGET\"]\n",
    "if \"TARGET\" in categorical_names:\n",
    "    index=0\n",
    "    for value in target_property_names:\n",
    "        index += 1\n",
    "        for i, condition in enumerate(conditions[value]):\n",
    "            X_train_df['conj_' + str(i)] = [condition(example) for example in train_examples]\n",
    "            X_test_df['conj_' + str(i)] = [condition(example) for example in test_examples]\n",
    "    if len(target_property_names) == 1:\n",
    "        index += 1\n",
    "        for i, condition in enumerate(conditions[\"necessary\"]):\n",
    "            X_train_df['conj_' + str(index)] = [condition(example) for example in train_examples]\n",
    "            X_test_df['conj_' + str(index)] = [condition(example) for example in test_examples]\n",
    "        \n",
    "    X_train_df.head()\n",
    "    #y_train_df.head()    \n",
    "    print(conditions)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b410f6ff-3cff-41f6-aaab-7a60f6f6cbe3",
   "metadata": {},
   "source": [
    "Calculate support, precision, and lift."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "498f947a-8e7c-4492-88f7-802df56d1898",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['TARGET_As Expected',\n",
       " 'TARGET_Worse than Expected',\n",
       " 'TARGET_Better than Expected',\n",
       " 'TARGET_Not Available']"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target_property_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "f9e29f39-e7e2-46bb-8154-ec731a18109c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "value: TARGET_As Expected\n",
      "0 condition: ~Weighted_HIE_DSA_leq_open_bracket_Flagging_ratio_plus_Tx_R_O1_close_bracket_divided_by_open_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_minus_1_close_bracket\n",
      "1 condition: ~N_Tx_Ctr1_leq_inverse_of_2_times_e_to_the_power_open_bracket_2_times_e_to_the_power_open_bracket_e_to_the_power_Weighted_HIE_DSA_close_bracket_plus_1_close_bracket\n",
      "2 condition: ~Tx_R_O1_leq_inverse_of_sqrtopen_bracket_2_times_logopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket\n",
      "3 condition: ~N_Tx_Ctr1_geq_e_to_the_power_open_bracket_e_to_the_power_open_bracket_e_to_the_power_open_bracket_Weighted_HIE_DSA_to_the_power_open_bracket_inverse_of_4_close_bracket_close_bracket_close_bracket_close_bracket\n",
      "4 condition: ~Weighted_HIE_DSA_geq_open_bracket_inverse_of_open_bracket_10_to_the_power_open_bracket_inverse_of_2_divided_by_Tx_R_O1_close_bracket_minus_N_Tx_Ctr1_close_bracket_close_bracket&Tx_R_O1_geq_open_bracket_inverse_of_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_Flagging_ratio_close_bracket_plus_N_Tx_Ctr1_close_bracket_close_bracket\n",
      "5 condition: Tx_R_O1_geq_open_bracket_logopen_bracket_flooropen_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_close_bracket_close_bracket_divided_by_logopen_bracket_10_close_bracket_minus_1_close_bracket_squared^Weighted_HIE_DSA_leq__minus_inverse_of_open_bracket_sqrtopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket_plus_Tx_R_O1\n",
      "6 condition: N_Tx_Ctr1_geq_ceilopen_bracket_10_to_the_power_open_bracket_2_times_sqrtopen_bracket_Tx_R_O1_close_bracket_plus_1_close_bracket_close_bracket^N_Tx_Ctr1_geq_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_minus_1_close_bracket_close_bracket\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/lustre/home/clarson/conjecturing/sage/conjecturing.py:279: RuntimeWarning: overflow encountered in scalar power\n",
      "  return (lambda x: 10**x), 1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7 condition: N_Tx_Ctr1_geq_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_to_the_power_open_bracket_10_to_the_power_open_bracket_Weighted_HIE_DSA_plus_1_close_bracket_minus_1_close_bracket^N_Tx_Ctr1_leq_ceilopen_bracket_e_to_the_power_open_bracket_inverse_of_4_divided_by_Tx_R_O1_squared_close_bracket_close_bracket\n",
      "8 condition: Tx_R_O1_geq_10_to_the_power_open_bracket_10_to_the_power_open_bracket_Weighted_HIE_DSA_minus_1_close_bracket_minus_1_close_bracket_minus_Flagging_ratio^Weighted_HIE_DSA_leq__minus_inverse_of_open_bracket_sqrtopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket_plus_Tx_R_O1\n",
      "9 condition: Tx_R_O1_leq_ceilopen_bracket_e_to_the_power_Weighted_HIE_DSA_close_bracket_divided_by_open_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket^Weighted_HIE_DSA_geq__minus_inverse_of_logopen_bracket_inverse_of_2_times_N_Tx_Ctr1_plus_inverse_of_2_close_bracket_plus_Tx_R_O1\n",
      "10 condition: Tx_R_O1_leq_10_to_the_power_open_bracket_Weighted_HIE_DSA_divided_by_10_to_the_power_logopen_bracket_Flagging_ratio_close_bracket_minus_1_close_bracket^Tx_R_O1_leq_open_bracket_logopen_bracket_2_times_sqrtopen_bracket_Weighted_HIE_DSA_close_bracket_close_bracket_divided_by_logopen_bracket_10_close_bracket_plus_1_close_bracket_squared\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: divide by zero encountered in log\n",
      "  return ln(args[0], **kwds)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11 condition: ~(Tx_R_O1_geq_open_bracket_inverse_of_open_bracket_10_to_the_power_e_to_the_power_Flagging_ratio_minus_logopen_bracket_N_Tx_Ctr1_close_bracket_close_bracket_close_bracket|Weighted_HIE_DSA_geq_open_bracket_inverse_of_2_times_Tx_R_O1_minus_1_close_bracket_to_the_power_ceilopen_bracket_inverse_of_2_times_N_Tx_Ctr1_close_bracket)\n",
      "value: TARGET_Worse than Expected\n",
      "0 condition: ~(Tx_R_O1_geq_open_bracket_inverse_of_open_bracket_10_to_the_power_e_to_the_power_Flagging_ratio_minus_logopen_bracket_N_Tx_Ctr1_close_bracket_close_bracket_close_bracket|N_Tx_Ctr1_geq_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_minus_1_close_bracket_close_bracket)\n",
      "1 condition: Flagging_ratio_geq__minus_open_bracket_Tx_R_O1_times_Weighted_HIE_DSA_minus_1_close_bracket_to_the_power_N_Tx_Ctr1&PT_HOS_T_WorsethanExpected\n",
      "value: TARGET_Better than Expected\n",
      "0 condition: ~N_Tx_Ctr1_geq__minus_inverse_of_open_bracket_Tx_R_O1_minus_4_times_Weighted_HIE_DSA_close_bracket\n",
      "1 condition: ~Tx_R_O1_geq_4_times_open_bracket_Flagging_ratio_minus_1_close_bracket_squared_divided_by_N_Tx_Ctr1\n",
      "2 condition: Weighted_HIE_DSA_geq_open_bracket_Tx_R_O1_minus_1_close_bracket_to_the_power_ceilopen_bracket_logopen_bracket_10_to_the_power_N_Tx_Ctr1_close_bracket_close_bracket^N_Tx_Ctr1_leq_10_to_the_power_flooropen_bracket_e_to_the_power_open_bracket_inverse_of_2_divided_by_Tx_R_O1_close_bracket_close_bracket\n",
      "3 condition: ~Weighted_HIE_DSA_leq_absopen_bracket_logopen_bracket_logopen_bracket_sqrtopen_bracket_Tx_R_O1_close_bracket_close_bracket_squared_close_bracket_close_bracket&PT_HOS_T_WorsethanExpected\n",
      "value: TARGET_Not Available\n",
      "0 condition: PT_HOS_T_NotAvailable\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>support</th>\n",
       "      <th>precision</th>\n",
       "      <th>lift</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2975</td>\n",
       "      <td>0.851764705882353</td>\n",
       "      <td>1.05229309716101</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4622</td>\n",
       "      <td>0.830809173517958</td>\n",
       "      <td>1.02640406712479</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2461</td>\n",
       "      <td>0.852905323039415</td>\n",
       "      <td>1.05370224636923</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2644</td>\n",
       "      <td>0.813161875945537</td>\n",
       "      <td>1.00460211960247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2306</td>\n",
       "      <td>0.802688638334779</td>\n",
       "      <td>0.991663199303691</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3534</td>\n",
       "      <td>0.805319750990379</td>\n",
       "      <td>0.994913746862447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2065</td>\n",
       "      <td>0.844552058111380</td>\n",
       "      <td>1.04338239751681</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1709</td>\n",
       "      <td>0.826214160327677</td>\n",
       "      <td>1.02072726386198</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>4142</td>\n",
       "      <td>0.824239497827137</td>\n",
       "      <td>1.01828771253496</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>5092</td>\n",
       "      <td>0.830322073841320</td>\n",
       "      <td>1.02580229104295</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>6840</td>\n",
       "      <td>0.813742690058480</td>\n",
       "      <td>1.00531967302722</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2341</td>\n",
       "      <td>0.851772746689449</td>\n",
       "      <td>1.05230303099103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>258</td>\n",
       "      <td>0.100775193798450</td>\n",
       "      <td>1.33818630277404</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>657</td>\n",
       "      <td>0.168949771689498</td>\n",
       "      <td>2.24347145175290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>112</td>\n",
       "      <td>0.169642857142857</td>\n",
       "      <td>2.14730604206232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>220</td>\n",
       "      <td>0.172727272727273</td>\n",
       "      <td>2.18634797009982</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>513</td>\n",
       "      <td>0.105263157894737</td>\n",
       "      <td>1.33240042499158</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>0.0555555555555556</td>\n",
       "      <td>0.703211335412222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>858</td>\n",
       "      <td>0.851981351981352</td>\n",
       "      <td>23.5007903398461</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   support           precision               lift\n",
       "0     2975   0.851764705882353   1.05229309716101\n",
       "1     4622   0.830809173517958   1.02640406712479\n",
       "2     2461   0.852905323039415   1.05370224636923\n",
       "3     2644   0.813161875945537   1.00460211960247\n",
       "4     2306   0.802688638334779  0.991663199303691\n",
       "5     3534   0.805319750990379  0.994913746862447\n",
       "6     2065   0.844552058111380   1.04338239751681\n",
       "7     1709   0.826214160327677   1.02072726386198\n",
       "8     4142   0.824239497827137   1.01828771253496\n",
       "9     5092   0.830322073841320   1.02580229104295\n",
       "10    6840   0.813742690058480   1.00531967302722\n",
       "11    2341   0.851772746689449   1.05230303099103\n",
       "12     258   0.100775193798450   1.33818630277404\n",
       "13     657   0.168949771689498   2.24347145175290\n",
       "14     112   0.169642857142857   2.14730604206232\n",
       "15     220   0.172727272727273   2.18634797009982\n",
       "16     513   0.105263157894737   1.33240042499158\n",
       "17      18  0.0555555555555556  0.703211335412222\n",
       "18     858   0.851981351981352   23.5007903398461"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "support = []\n",
    "lift = []\n",
    "precision = []\n",
    "if \"TARGET\" in categorical_names:\n",
    "    for value in target_property_names:\n",
    "        print(\"value: {}\".format(value))\n",
    "        my_function = getattr(Example, value)\n",
    "        for i, condition in enumerate(conditions[value]):\n",
    "            print(i, \"condition: {}\".format(condition))\n",
    "            num_true = 0\n",
    "            num_in_class = 0\n",
    "            num_hit = 0\n",
    "            for example in test_examples:\n",
    "                if condition(example) == True:\n",
    "                    num_true += 1\n",
    "                    if my_function(example) == True:\n",
    "                        num_hit += 1\n",
    "                if my_function(example) == True:\n",
    "                    num_in_class += 1\n",
    "            support.append(num_true)\n",
    "            if num_hit > 0: \n",
    "                precision.append(n(num_hit/num_true))\n",
    "                lift.append(n(num_hit/num_true)/n(num_in_class/len(test_examples)))\n",
    "            else:\n",
    "                precision.append(0.0)\n",
    "                lift.append(0.0)\n",
    "    if len(target_property_names) == 1:\n",
    "        for i, condition in enumerate(conditions[\"necessary\"]):\n",
    "            print(i, \"condition: {}\".format(condition))\n",
    "            num_false = 0\n",
    "            num_in_class = 0\n",
    "            num_hit = 0\n",
    "            for example in test_examples:\n",
    "                if condition(example) == False:\n",
    "                    num_false += 1\n",
    "                    if my_function(example) == False:\n",
    "                        num_hit += 1\n",
    "                if my_function(example) == False:\n",
    "                    num_in_class += 1\n",
    "            support.append(num_false)\n",
    "            if num_false == 858:\n",
    "                print(condition)\n",
    "            if num_hit > 0: \n",
    "                precision.append(n(num_hit/num_false))\n",
    "                lift.append(n(num_hit/num_false)/n(num_in_class/len(test_examples)))\n",
    "            else:\n",
    "                precision.append(0.0)\n",
    "                lift.append(0.0)\n",
    "            \n",
    "results_df = pd.DataFrame({'support':support, 'precision':precision, 'lift':lift})\n",
    "        \n",
    "results_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "e6ebb428-48e9-4aa3-9f45-926488636708",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "value: TARGET_As Expected\n",
      "0 condition: ~Weighted_HIE_DSA_leq_open_bracket_Flagging_ratio_plus_Tx_R_O1_close_bracket_divided_by_open_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_minus_1_close_bracket\n",
      "1 condition: ~N_Tx_Ctr1_leq_inverse_of_2_times_e_to_the_power_open_bracket_2_times_e_to_the_power_open_bracket_e_to_the_power_Weighted_HIE_DSA_close_bracket_plus_1_close_bracket\n",
      "3 condition: ~Tx_R_O1_leq_inverse_of_sqrtopen_bracket_2_times_logopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket\n",
      "6 condition: ~N_Tx_Ctr1_geq_e_to_the_power_open_bracket_e_to_the_power_open_bracket_e_to_the_power_open_bracket_Weighted_HIE_DSA_to_the_power_open_bracket_inverse_of_4_close_bracket_close_bracket_close_bracket_close_bracket\n",
      "10 condition: ~Weighted_HIE_DSA_geq_open_bracket_inverse_of_open_bracket_10_to_the_power_open_bracket_inverse_of_2_divided_by_Tx_R_O1_close_bracket_minus_N_Tx_Ctr1_close_bracket_close_bracket&Tx_R_O1_geq_open_bracket_inverse_of_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_Flagging_ratio_close_bracket_plus_N_Tx_Ctr1_close_bracket_close_bracket\n",
      "15 condition: Tx_R_O1_geq_open_bracket_logopen_bracket_flooropen_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_close_bracket_close_bracket_divided_by_logopen_bracket_10_close_bracket_minus_1_close_bracket_squared^Weighted_HIE_DSA_leq__minus_inverse_of_open_bracket_sqrtopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket_plus_Tx_R_O1\n",
      "21 condition: N_Tx_Ctr1_geq_ceilopen_bracket_10_to_the_power_open_bracket_2_times_sqrtopen_bracket_Tx_R_O1_close_bracket_plus_1_close_bracket_close_bracket^N_Tx_Ctr1_geq_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_minus_1_close_bracket_close_bracket\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/lustre/home/clarson/conjecturing/sage/conjecturing.py:279: RuntimeWarning: overflow encountered in scalar power\n",
      "  return (lambda x: 10**x), 1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "28 condition: N_Tx_Ctr1_geq_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_to_the_power_open_bracket_10_to_the_power_open_bracket_Weighted_HIE_DSA_plus_1_close_bracket_minus_1_close_bracket^N_Tx_Ctr1_leq_ceilopen_bracket_e_to_the_power_open_bracket_inverse_of_4_divided_by_Tx_R_O1_squared_close_bracket_close_bracket\n",
      "36 condition: Tx_R_O1_geq_10_to_the_power_open_bracket_10_to_the_power_open_bracket_Weighted_HIE_DSA_minus_1_close_bracket_minus_1_close_bracket_minus_Flagging_ratio^Weighted_HIE_DSA_leq__minus_inverse_of_open_bracket_sqrtopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket_plus_Tx_R_O1\n",
      "45 condition: Tx_R_O1_leq_ceilopen_bracket_e_to_the_power_Weighted_HIE_DSA_close_bracket_divided_by_open_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_plus_1_close_bracket^Weighted_HIE_DSA_geq__minus_inverse_of_logopen_bracket_inverse_of_2_times_N_Tx_Ctr1_plus_inverse_of_2_close_bracket_plus_Tx_R_O1\n",
      "55 condition: Tx_R_O1_leq_10_to_the_power_open_bracket_Weighted_HIE_DSA_divided_by_10_to_the_power_logopen_bracket_Flagging_ratio_close_bracket_minus_1_close_bracket^Tx_R_O1_leq_open_bracket_logopen_bracket_2_times_sqrtopen_bracket_Weighted_HIE_DSA_close_bracket_close_bracket_divided_by_logopen_bracket_10_close_bracket_plus_1_close_bracket_squared\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: divide by zero encountered in log\n",
      "  return ln(args[0], **kwds)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "66 condition: ~(Tx_R_O1_geq_open_bracket_inverse_of_open_bracket_10_to_the_power_e_to_the_power_Flagging_ratio_minus_logopen_bracket_N_Tx_Ctr1_close_bracket_close_bracket_close_bracket|Weighted_HIE_DSA_geq_open_bracket_inverse_of_2_times_Tx_R_O1_minus_1_close_bracket_to_the_power_ceilopen_bracket_inverse_of_2_times_N_Tx_Ctr1_close_bracket)\n",
      "value: TARGET_Worse than Expected\n",
      "66 condition: ~(Tx_R_O1_geq_open_bracket_inverse_of_open_bracket_10_to_the_power_e_to_the_power_Flagging_ratio_minus_logopen_bracket_N_Tx_Ctr1_close_bracket_close_bracket_close_bracket|N_Tx_Ctr1_geq_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_minus_1_close_bracket_close_bracket)\n",
      "67 condition: Flagging_ratio_geq__minus_open_bracket_Tx_R_O1_times_Weighted_HIE_DSA_minus_1_close_bracket_to_the_power_N_Tx_Ctr1&PT_HOS_T_WorsethanExpected\n",
      "value: TARGET_Better than Expected\n",
      "67 condition: ~N_Tx_Ctr1_geq__minus_inverse_of_open_bracket_Tx_R_O1_minus_4_times_Weighted_HIE_DSA_close_bracket\n",
      "68 condition: ~Tx_R_O1_geq_4_times_open_bracket_Flagging_ratio_minus_1_close_bracket_squared_divided_by_N_Tx_Ctr1\n",
      "70 condition: Weighted_HIE_DSA_geq_open_bracket_Tx_R_O1_minus_1_close_bracket_to_the_power_ceilopen_bracket_logopen_bracket_10_to_the_power_N_Tx_Ctr1_close_bracket_close_bracket^N_Tx_Ctr1_leq_10_to_the_power_flooropen_bracket_e_to_the_power_open_bracket_inverse_of_2_divided_by_Tx_R_O1_close_bracket_close_bracket\n",
      "73 condition: ~Weighted_HIE_DSA_leq_absopen_bracket_logopen_bracket_logopen_bracket_sqrtopen_bracket_Tx_R_O1_close_bracket_close_bracket_squared_close_bracket_close_bracket&PT_HOS_T_WorsethanExpected\n",
      "value: TARGET_Not Available\n",
      "73 condition: PT_HOS_T_NotAvailable\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>support</th>\n",
       "      <th>precision</th>\n",
       "      <th>lift</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2975</td>\n",
       "      <td>0.851764705882353</td>\n",
       "      <td>1.05229309716101</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4622</td>\n",
       "      <td>0.830809173517958</td>\n",
       "      <td>1.02640406712479</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2461</td>\n",
       "      <td>0.852905323039415</td>\n",
       "      <td>1.05370224636923</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2644</td>\n",
       "      <td>0.813161875945537</td>\n",
       "      <td>1.00460211960247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2306</td>\n",
       "      <td>0.802688638334779</td>\n",
       "      <td>0.991663199303691</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>3534</td>\n",
       "      <td>0.805319750990379</td>\n",
       "      <td>0.994913746862447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>2065</td>\n",
       "      <td>0.844552058111380</td>\n",
       "      <td>1.04338239751681</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1709</td>\n",
       "      <td>0.826214160327677</td>\n",
       "      <td>1.02072726386198</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>4142</td>\n",
       "      <td>0.824239497827137</td>\n",
       "      <td>1.01828771253496</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>5092</td>\n",
       "      <td>0.830322073841320</td>\n",
       "      <td>1.02580229104295</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>6840</td>\n",
       "      <td>0.813742690058480</td>\n",
       "      <td>1.00531967302722</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2341</td>\n",
       "      <td>0.851772746689449</td>\n",
       "      <td>1.05230303099103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>258</td>\n",
       "      <td>0.100775193798450</td>\n",
       "      <td>1.33818630277404</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>657</td>\n",
       "      <td>0.168949771689498</td>\n",
       "      <td>2.24347145175290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>112</td>\n",
       "      <td>0.169642857142857</td>\n",
       "      <td>2.14730604206232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>220</td>\n",
       "      <td>0.172727272727273</td>\n",
       "      <td>2.18634797009982</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>513</td>\n",
       "      <td>0.105263157894737</td>\n",
       "      <td>1.33240042499158</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>0.0555555555555556</td>\n",
       "      <td>0.703211335412222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>858</td>\n",
       "      <td>0.851981351981352</td>\n",
       "      <td>23.5007903398461</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   support           precision               lift\n",
       "0     2975   0.851764705882353   1.05229309716101\n",
       "1     4622   0.830809173517958   1.02640406712479\n",
       "2     2461   0.852905323039415   1.05370224636923\n",
       "3     2644   0.813161875945537   1.00460211960247\n",
       "4     2306   0.802688638334779  0.991663199303691\n",
       "5     3534   0.805319750990379  0.994913746862447\n",
       "6     2065   0.844552058111380   1.04338239751681\n",
       "7     1709   0.826214160327677   1.02072726386198\n",
       "8     4142   0.824239497827137   1.01828771253496\n",
       "9     5092   0.830322073841320   1.02580229104295\n",
       "10    6840   0.813742690058480   1.00531967302722\n",
       "11    2341   0.851772746689449   1.05230303099103\n",
       "12     258   0.100775193798450   1.33818630277404\n",
       "13     657   0.168949771689498   2.24347145175290\n",
       "14     112   0.169642857142857   2.14730604206232\n",
       "15     220   0.172727272727273   2.18634797009982\n",
       "16     513   0.105263157894737   1.33240042499158\n",
       "17      18  0.0555555555555556  0.703211335412222\n",
       "18     858   0.851981351981352   23.5007903398461"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "support = []\n",
    "lift = []\n",
    "precision = []\n",
    "count=0\n",
    "if \"TARGET\" in categorical_names:\n",
    "    for value in target_property_names:\n",
    "        print(\"value: {}\".format(value))\n",
    "        my_function = getattr(Example, value)\n",
    "        for i, condition in enumerate(conditions[value]):\n",
    "            count = count+i\n",
    "            print(count, \"condition: {}\".format(condition))\n",
    "            num_true = 0\n",
    "            num_in_class = 0\n",
    "            num_hit = 0\n",
    "            for example in test_examples:\n",
    "                if condition(example) == True:\n",
    "                    num_true += 1\n",
    "                    if my_function(example) == True:\n",
    "                        num_hit += 1\n",
    "                if my_function(example) == True:\n",
    "                    num_in_class += 1\n",
    "            support.append(num_true)\n",
    "            if num_hit > 0: \n",
    "                precision.append(n(num_hit/num_true))\n",
    "                lift.append(n(num_hit/num_true)/n(num_in_class/len(test_examples)))\n",
    "            else:\n",
    "                precision.append(0.0)\n",
    "                lift.append(0.0)\n",
    "    if len(target_property_names) == 1:\n",
    "        for i, condition in enumerate(conditions[\"necessary\"]):\n",
    "            count = count+i\n",
    "            print(count, \"condition: {}\".format(condition))\n",
    "            num_false = 0\n",
    "            num_in_class = 0\n",
    "            num_hit = 0\n",
    "            for example in test_examples:\n",
    "                if condition(example) == False:\n",
    "                    num_false += 1\n",
    "                    if my_function(example) == False:\n",
    "                        num_hit += 1\n",
    "                if my_function(example) == False:\n",
    "                    num_in_class += 1\n",
    "            support.append(num_false)\n",
    "            if num_hit > 0: \n",
    "                precision.append(n(num_hit/num_false))\n",
    "                lift.append(n(num_hit/num_false)/n(num_in_class/len(test_examples)))\n",
    "            else:\n",
    "                precision.append(0.0)\n",
    "                lift.append(0.0)\n",
    "            \n",
    "results_df = pd.DataFrame({'support':support, 'precision':precision, 'lift':lift})\n",
    "        \n",
    "results_df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "SageMath 10.2",
   "language": "sage",
   "name": "sagemath"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}