{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "acf961fd-2b08-491e-93be-e1404aa10029",
   "metadata": {},
   "source": [
    "# Titanic Example\n",
    "\n",
    "Load libraries."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "045e0f5b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_2045038/3360833649.py:1: DeprecationWarning: \n",
      "Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n",
      "(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\n",
      "but was not found to be installed on your system.\n",
      "If this would cause problems for you,\n",
      "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n",
      "        \n",
      "  import pandas as pd\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import sys\n",
    "from sklearn.model_selection import train_test_split\n",
    "load(\"conjecturing.py\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c561be47-a291-4492-baa8-5cc698c879cb",
   "metadata": {},
   "source": [
    "Specify output files."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4861be5f-d0ae-4a84-b768-b6903560f130",
   "metadata": {},
   "outputs": [],
   "source": [
    "inv_file = open(\"2022_12_07_inv.txt\", \"w\")\n",
    "prop_file = open(\"2022_12_07_prop.txt\", \"w\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f5db5172-0de7-4bfa-ad8e-6804f0b1798a",
   "metadata": {
    "tags": []
   },
   "source": [
    "Specify the number of examples to use for conjecturing and skips."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "cc76b0a8-7d39-4ce4-aecc-7495a82aaafa",
   "metadata": {},
   "outputs": [],
   "source": [
    "num_train = 10\n",
    "my_skips = 0.3"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c229b455-e876-4ce7-a910-c6766f168ecf",
   "metadata": {},
   "source": [
    "Read data.  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e449e88a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(891, 11)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Survived</th>\n",
       "      <th>Pclass</th>\n",
       "      <th>Name</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Age</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>Ticket</th>\n",
       "      <th>Fare</th>\n",
       "      <th>Cabin</th>\n",
       "      <th>Embarked</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PassengerId</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Braund, Mr. Owen Harris</td>\n",
       "      <td>male</td>\n",
       "      <td>22.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>A/5 21171</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
       "      <td>female</td>\n",
       "      <td>38.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>PC 17599</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>C85</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>Heikkinen, Miss. Laina</td>\n",
       "      <td>female</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>STON/O2. 3101282</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
       "      <td>female</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>113803</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>C123</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>Allen, Mr. William Henry</td>\n",
       "      <td>male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>373450</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Survived  Pclass  \\\n",
       "PassengerId                     \n",
       "1                   0       3   \n",
       "2                   1       1   \n",
       "3                   1       3   \n",
       "4                   1       1   \n",
       "5                   0       3   \n",
       "\n",
       "                                                          Name     Sex   Age  \\\n",
       "PassengerId                                                                    \n",
       "1                                      Braund, Mr. Owen Harris    male  22.0   \n",
       "2            Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0   \n",
       "3                                       Heikkinen, Miss. Laina  female  26.0   \n",
       "4                 Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0   \n",
       "5                                     Allen, Mr. William Henry    male  35.0   \n",
       "\n",
       "             SibSp  Parch            Ticket     Fare Cabin Embarked  \n",
       "PassengerId                                                          \n",
       "1                1      0         A/5 21171   7.2500   NaN        S  \n",
       "2                1      0          PC 17599  71.2833   C85        C  \n",
       "3                0      0  STON/O2. 3101282   7.9250   NaN        S  \n",
       "4                1      0            113803  53.1000  C123        S  \n",
       "5                0      0            373450   8.0500   NaN        S  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my_data =pd.read_csv(\"train.csv\",\n",
    "                    index_col=int(0),\n",
    "                    header=int(0)\n",
    "                    )\n",
    "print(my_data.shape)\n",
    "my_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cf280d83-a24c-4458-ab72-66a115e7cf99",
   "metadata": {},
   "source": [
    "Check the data types of the columns.  For categorical data, make sure the type is integer or objects.  Make sure the categories do not contain special characters besides numbers and \"_\"."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "17f4debf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Index: 891 entries, 1 to 891\n",
      "Data columns (total 11 columns):\n",
      " #   Column    Non-Null Count  Dtype  \n",
      "---  ------    --------------  -----  \n",
      " 0   Survived  891 non-null    int64  \n",
      " 1   Pclass    891 non-null    int64  \n",
      " 2   Name      891 non-null    object \n",
      " 3   Sex       891 non-null    object \n",
      " 4   Age       714 non-null    float64\n",
      " 5   SibSp     891 non-null    int64  \n",
      " 6   Parch     891 non-null    int64  \n",
      " 7   Ticket    891 non-null    object \n",
      " 8   Fare      891 non-null    float64\n",
      " 9   Cabin     204 non-null    object \n",
      " 10  Embarked  889 non-null    object \n",
      "dtypes: float64(2), int64(4), object(5)\n",
      "memory usage: 83.5+ KB\n"
     ]
    }
   ],
   "source": [
    "my_data.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "622c9753-c59a-4cb8-bfd4-bb03f9c5434b",
   "metadata": {},
   "source": [
    "Pandas thinks Survived and Pclass are integers/numeric, but they are categorical.  Recast them as objects."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "bc565d07",
   "metadata": {},
   "outputs": [],
   "source": [
    "my_data = my_data.astype({\"Survived\": object, \"Pclass\": object})"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7c50de95-24d6-4bcc-be94-21b42beb3dd2",
   "metadata": {},
   "source": [
    "Create a new feature which is the first letter of the cabin."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "77108245",
   "metadata": {},
   "outputs": [],
   "source": [
    "my_data[\"cabin_letter\"]=my_data[\"Cabin\"].str[:1]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b5b3c159-cac7-4599-8680-8c39c381da77",
   "metadata": {},
   "source": [
    "Identify invariant and categorical columns and the target column.  The target should be in one of the lists."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "820e9888",
   "metadata": {},
   "outputs": [],
   "source": [
    "invariant_names=[\"Age\", \"Fare\", \"SibSp\", \"Parch\"] \n",
    "categorical_names=[\"Survived\", \"Sex\",\"Pclass\", \"cabin_letter\", \"Embarked\"]\n",
    "target = \"Survived\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "88b339af-22c1-415b-8faa-fbfaa3437f71",
   "metadata": {},
   "source": [
    "Generic code starts here.  Rename target column.  Select columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "5ae99a05-9075-4ce7-9bb7-bb7137c3cb44",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['TARGET', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket',\n",
      "       'Fare', 'Cabin', 'Embarked', 'cabin_letter'],\n",
      "      dtype='object')\n",
      "['TARGET', 'Sex', 'Pclass', 'cabin_letter', 'Embarked']\n"
     ]
    }
   ],
   "source": [
    "if target in categorical_names:\n",
    "    categorical_names[categorical_names.index(target)] = \"TARGET\"\n",
    "else:\n",
    "    invariant_names[invariant_names.index(target)] = \"TARGET\"\n",
    "\n",
    "my_data = my_data.rename(columns={target: \"TARGET\"})\n",
    "print(my_data.columns)\n",
    "my_data = my_data[invariant_names + categorical_names]\n",
    "print(categorical_names)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "94bcbfd3-514a-4a5b-bbea-9ffdd72425c1",
   "metadata": {},
   "source": [
    "Convert categorical variables to dummies.  One dummy for each binary variable and one dummy for each level for variables with more than two levels."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "daa9681d-dcc4-4600-86c2-8926b2ba2bd1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Fare</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>TARGET_0</th>\n",
       "      <th>TARGET_1</th>\n",
       "      <th>TARGET_nan</th>\n",
       "      <th>Sex_female</th>\n",
       "      <th>Sex_male</th>\n",
       "      <th>Sex_nan</th>\n",
       "      <th>...</th>\n",
       "      <th>cabin_letter_D</th>\n",
       "      <th>cabin_letter_E</th>\n",
       "      <th>cabin_letter_F</th>\n",
       "      <th>cabin_letter_G</th>\n",
       "      <th>cabin_letter_T</th>\n",
       "      <th>cabin_letter_nan</th>\n",
       "      <th>Embarked_C</th>\n",
       "      <th>Embarked_Q</th>\n",
       "      <th>Embarked_S</th>\n",
       "      <th>Embarked_nan</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PassengerId</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>22.0</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>26.0</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>35.0</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              Age     Fare  SibSp  Parch  TARGET_0  TARGET_1  TARGET_nan  \\\n",
       "PassengerId                                                                \n",
       "1            22.0   7.2500      1      0         1         0           0   \n",
       "2            38.0  71.2833      1      0         0         1           0   \n",
       "3            26.0   7.9250      0      0         0         1           0   \n",
       "4            35.0  53.1000      1      0         0         1           0   \n",
       "5            35.0   8.0500      0      0         1         0           0   \n",
       "\n",
       "             Sex_female  Sex_male  Sex_nan  ...  cabin_letter_D  \\\n",
       "PassengerId                                 ...                   \n",
       "1                     0         1        0  ...               0   \n",
       "2                     1         0        0  ...               0   \n",
       "3                     1         0        0  ...               0   \n",
       "4                     1         0        0  ...               0   \n",
       "5                     0         1        0  ...               0   \n",
       "\n",
       "             cabin_letter_E  cabin_letter_F  cabin_letter_G  cabin_letter_T  \\\n",
       "PassengerId                                                                   \n",
       "1                         0               0               0               0   \n",
       "2                         0               0               0               0   \n",
       "3                         0               0               0               0   \n",
       "4                         0               0               0               0   \n",
       "5                         0               0               0               0   \n",
       "\n",
       "             cabin_letter_nan  Embarked_C  Embarked_Q  Embarked_S  \\\n",
       "PassengerId                                                         \n",
       "1                           1           0           0           1   \n",
       "2                           0           1           0           0   \n",
       "3                           1           0           0           1   \n",
       "4                           0           0           0           1   \n",
       "5                           1           0           0           1   \n",
       "\n",
       "             Embarked_nan  \n",
       "PassengerId                \n",
       "1                       0  \n",
       "2                       0  \n",
       "3                       0  \n",
       "4                       0  \n",
       "5                       0  \n",
       "\n",
       "[5 rows x 27 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "property_names = []\n",
    "for col in categorical_names:\n",
    "    if col != \"TARGET\":\n",
    "        unique_vals=list(my_data[col].unique())  # if nan is a level\n",
    "        #unique_vals=list(my_data[col].dropna().unique())  # if nan is not a level\n",
    "        if len(unique_vals)==2: # just use one level for binary features\n",
    "            property_names.append(col+\"_\"+str(unique_vals[1]))\n",
    "        elif len(unique_vals) > 2: #one property for each level.\n",
    "            for level in unique_vals:\n",
    "                property_names.append(col+\"_\"+str(level))\n",
    "\n",
    "\n",
    "if \"TARGET\" in categorical_names:\n",
    "    target_property_names = []\n",
    "    unique_vals = list(my_data[\"TARGET\"].unique()) # if nan is a level\n",
    "    #unique_vals = list(my_data[\"TARGET\"].dropna().unique()) # if nan is not a level\n",
    "    if len(unique_vals)==2:\n",
    "        target_property_names.append(\"TARGET_\"+str(unique_vals[1]))\n",
    "    elif len(unique_vals) > 2:\n",
    "        for level in unique_vals:\n",
    "            target_property_names.append(\"TARGET_\"+str(level))\n",
    "            \n",
    "my_df = pd.get_dummies(my_data, \n",
    "                       columns=categorical_names,\n",
    "                       dtype=np.uint8,\n",
    "                       dummy_na=True,  # False is the default.  If False, use dropna() above\n",
    "                       drop_first=False) # False is the default\n",
    "\n",
    "my_df = my_df.rename(lambda col: col.replace('.0', ''), axis='columns')\n",
    "my_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2c9c7a94-00ee-49df-8b80-90ece844bee9",
   "metadata": {},
   "source": [
    "Define class, invariants, properties, and target properties (if applicable)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "5aaf4118",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Sex_female', 'Pclass_3', 'Pclass_1', 'Pclass_2', 'cabin_letter_nan', 'cabin_letter_C', 'cabin_letter_E', 'cabin_letter_G', 'cabin_letter_D', 'cabin_letter_A', 'cabin_letter_B', 'cabin_letter_F', 'cabin_letter_T', 'Embarked_S', 'Embarked_C', 'Embarked_Q', 'Embarked_nan']\n"
     ]
    }
   ],
   "source": [
    "class Example():\n",
    "    def __init__(self, name, mydf):\n",
    "        self.name = name\n",
    "        self.mydf = mydf\n",
    "        \n",
    "for i in invariant_names:\n",
    "    inv = build_inv(i)\n",
    "    setattr(Example,inv.__name__,inv )\n",
    "\n",
    "for i in property_names:\n",
    "    prop = build_prop(i)\n",
    "    setattr(Example, prop.__name__,prop)\n",
    "\n",
    "if \"TARGET\" in categorical_names:\n",
    "    for i in target_property_names:\n",
    "        prop = build_prop(i)\n",
    "        setattr(Example, prop.__name__, prop)\n",
    "else:\n",
    "    target_invariant = invariant_names.index(\"TARGET\")\n",
    "print(property_names)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1a474cff-5048-45c0-8cdb-f28ae59e6215",
   "metadata": {},
   "source": [
    "Split into training and testing data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "a84753d5-f6ff-44a2-ae83-e084938808ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "if \"TARGET\" in categorical_names:\n",
    "    X_train, X_test = train_test_split(\n",
    "        my_df.index,\n",
    "        stratify=my_data[\"TARGET\"],  # stratify on target levels\n",
    "        train_size=num_train,\n",
    "        random_state=12345\n",
    "    )\n",
    "else:\n",
    "    X_train, X_test = train_test_split(\n",
    "        my_df.index,\n",
    "        train_size=num_train,\n",
    "        random_state=12345\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "41f925f4-f870-46bc-9778-7935914574a7",
   "metadata": {},
   "source": [
    "Create examples for conjecturing."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "fee01df8",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_examples = [Example(i, my_df) for i in X_train]\n",
    "test_examples = [Example(i, my_df) for i in X_test]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "62bfa521-f831-45f2-86b2-9cd69f2cc4e8",
   "metadata": {},
   "source": [
    "Get lists of invariant and property functions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "3f252376",
   "metadata": {},
   "outputs": [],
   "source": [
    "invariants =[]\n",
    "for i in invariant_names:\n",
    "    invariants.append(Example.__dict__[i])\n",
    "properties=[]\n",
    "for i in property_names:\n",
    "    properties.append(Example.__dict__[i])\n",
    "target_properties=[]\n",
    "if \"TARGET\" in categorical_names:\n",
    "    for i in target_property_names:\n",
    "        target_properties.append(Example.__dict__[i])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "0367a44b-736d-4388-8e63-8307607f1f8b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Fare</th>\n",
       "      <th>SibSp</th>\n",
       "      <th>Parch</th>\n",
       "      <th>TARGET_0</th>\n",
       "      <th>TARGET_1</th>\n",
       "      <th>TARGET_nan</th>\n",
       "      <th>Sex_female</th>\n",
       "      <th>Sex_male</th>\n",
       "      <th>Sex_nan</th>\n",
       "      <th>...</th>\n",
       "      <th>cabin_letter_D</th>\n",
       "      <th>cabin_letter_E</th>\n",
       "      <th>cabin_letter_F</th>\n",
       "      <th>cabin_letter_G</th>\n",
       "      <th>cabin_letter_T</th>\n",
       "      <th>cabin_letter_nan</th>\n",
       "      <th>Embarked_C</th>\n",
       "      <th>Embarked_Q</th>\n",
       "      <th>Embarked_S</th>\n",
       "      <th>Embarked_nan</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PassengerId</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>22.0</td>\n",
       "      <td>7.2500</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>38.0</td>\n",
       "      <td>71.2833</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>26.0</td>\n",
       "      <td>7.9250</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>35.0</td>\n",
       "      <td>53.1000</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>35.0</td>\n",
       "      <td>8.0500</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>887</th>\n",
       "      <td>27.0</td>\n",
       "      <td>13.0000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>888</th>\n",
       "      <td>19.0</td>\n",
       "      <td>30.0000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>889</th>\n",
       "      <td>NaN</td>\n",
       "      <td>23.4500</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>890</th>\n",
       "      <td>26.0</td>\n",
       "      <td>30.0000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>891</th>\n",
       "      <td>32.0</td>\n",
       "      <td>7.7500</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>891 rows × 27 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              Age     Fare  SibSp  Parch  TARGET_0  TARGET_1  TARGET_nan  \\\n",
       "PassengerId                                                                \n",
       "1            22.0   7.2500      1      0         1         0           0   \n",
       "2            38.0  71.2833      1      0         0         1           0   \n",
       "3            26.0   7.9250      0      0         0         1           0   \n",
       "4            35.0  53.1000      1      0         0         1           0   \n",
       "5            35.0   8.0500      0      0         1         0           0   \n",
       "...           ...      ...    ...    ...       ...       ...         ...   \n",
       "887          27.0  13.0000      0      0         1         0           0   \n",
       "888          19.0  30.0000      0      0         0         1           0   \n",
       "889           NaN  23.4500      1      2         1         0           0   \n",
       "890          26.0  30.0000      0      0         0         1           0   \n",
       "891          32.0   7.7500      0      0         1         0           0   \n",
       "\n",
       "             Sex_female  Sex_male  Sex_nan  ...  cabin_letter_D  \\\n",
       "PassengerId                                 ...                   \n",
       "1                     0         1        0  ...               0   \n",
       "2                     1         0        0  ...               0   \n",
       "3                     1         0        0  ...               0   \n",
       "4                     1         0        0  ...               0   \n",
       "5                     0         1        0  ...               0   \n",
       "...                 ...       ...      ...  ...             ...   \n",
       "887                   0         1        0  ...               0   \n",
       "888                   1         0        0  ...               0   \n",
       "889                   1         0        0  ...               0   \n",
       "890                   0         1        0  ...               0   \n",
       "891                   0         1        0  ...               0   \n",
       "\n",
       "             cabin_letter_E  cabin_letter_F  cabin_letter_G  cabin_letter_T  \\\n",
       "PassengerId                                                                   \n",
       "1                         0               0               0               0   \n",
       "2                         0               0               0               0   \n",
       "3                         0               0               0               0   \n",
       "4                         0               0               0               0   \n",
       "5                         0               0               0               0   \n",
       "...                     ...             ...             ...             ...   \n",
       "887                       0               0               0               0   \n",
       "888                       0               0               0               0   \n",
       "889                       0               0               0               0   \n",
       "890                       0               0               0               0   \n",
       "891                       0               0               0               0   \n",
       "\n",
       "             cabin_letter_nan  Embarked_C  Embarked_Q  Embarked_S  \\\n",
       "PassengerId                                                         \n",
       "1                           1           0           0           1   \n",
       "2                           0           1           0           0   \n",
       "3                           1           0           0           1   \n",
       "4                           0           0           0           1   \n",
       "5                           1           0           0           1   \n",
       "...                       ...         ...         ...         ...   \n",
       "887                         1           0           0           1   \n",
       "888                         0           0           0           1   \n",
       "889                         1           0           0           1   \n",
       "890                         0           1           0           0   \n",
       "891                         1           0           1           0   \n",
       "\n",
       "             Embarked_nan  \n",
       "PassengerId                \n",
       "1                       0  \n",
       "2                       0  \n",
       "3                       0  \n",
       "4                       0  \n",
       "5                       0  \n",
       "...                   ...  \n",
       "887                     0  \n",
       "888                     0  \n",
       "889                     0  \n",
       "890                     0  \n",
       "891                     0  \n",
       "\n",
       "[891 rows x 27 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "my_df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5b513bc3-3f57-4c7b-8825-c3a9d19527b9",
   "metadata": {},
   "source": [
    "Invariant conjecturing - upper and lower bounds."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "24ae1792",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TARGET_1\n",
      "28\n",
      "TARGET_1 False\n",
      "41\n"
     ]
    }
   ],
   "source": [
    "#define operators for expression tree to build upper bounsand lower bouns for each class\n",
    "use_operators =  { '-1', '+1', '*2', '/2', '^2', '-()', '1/', \n",
    "                  'sqrt', 'ln', 'log10', 'exp', '10^', 'ceil', \n",
    "                  'floor', 'abs', '+', '*', 'max', 'min', '-', '/', '^'}\n",
    "\n",
    "inv_conjectures = []\n",
    "\n",
    "if \"TARGET\" in categorical_names:\n",
    "    for value in target_property_names:\n",
    "        print(value)\n",
    "        target_property = Example.__dict__[value]\n",
    "        my_examples = [example for example in train_examples if target_property(example) == True]\n",
    "        for inv in invariants:\n",
    "            sys.stdout.flush()\n",
    "            inv_of_interest = invariants.index(inv)\n",
    "            conjs = conjecture(my_examples, \n",
    "                               invariants, \n",
    "                               inv_of_interest, \n",
    "                               operators=use_operators, \n",
    "                               upperBound=True, \n",
    "                               time=Integer(5)\n",
    "                             # ,debug=True,\n",
    "                             #  verbose=True,\n",
    "                               ,skips=my_skips\n",
    "                              )\n",
    "            convert_conjecture_names(conjs)\n",
    "            inv_conjectures += conjs\n",
    "\n",
    "            conjs = conjecture(my_examples, \n",
    "                               invariants, \n",
    "                               inv_of_interest, \n",
    "                               operators=use_operators, \n",
    "                               upperBound=False, \n",
    "                               time=Integer(5)\n",
    "                              ,skips=my_skips)\n",
    "            convert_conjecture_names(conjs)\n",
    "            inv_conjectures += conjs\n",
    "    print(len(inv_conjectures))\n",
    "    if len(target_property_names) == 1:\n",
    "        value = target_property_names[0]\n",
    "        print(value + \" False\")\n",
    "        target_property = Example.__dict__[value]\n",
    "        my_examples = [example for example in train_examples if target_property(example) == False]\n",
    "        for inv in invariants:\n",
    "            sys.stdout.flush()\n",
    "            inv_of_interest = invariants.index(inv)\n",
    "            conjs = conjecture(my_examples, \n",
    "                               invariants, \n",
    "                               inv_of_interest, \n",
    "                               operators=use_operators, \n",
    "                               upperBound=True, \n",
    "                               time=Integer(5)\n",
    "                             # ,debug=True,\n",
    "                             #  verbose=True,\n",
    "                               ,skips=my_skips\n",
    "                              )\n",
    "            convert_conjecture_names(conjs)\n",
    "            inv_conjectures += conjs\n",
    "\n",
    "            conjs = conjecture(my_examples, \n",
    "                               invariants, \n",
    "                               inv_of_interest, \n",
    "                               operators=use_operators, \n",
    "                               upperBound=False, \n",
    "                               time=Integer(5)\n",
    "                              ,skips=my_skips)\n",
    "            convert_conjecture_names(conjs)\n",
    "            inv_conjectures += conjs\n",
    "else: # target is an invariant\n",
    "    my_examples = [example for example in train_examples]\n",
    "    conjs = conjecture(my_examples, \n",
    "                       invariants, \n",
    "                       target_invariant, \n",
    "                       operators=use_operators, \n",
    "                       upperBound=True, \n",
    "                       time=Integer(5)\n",
    "                        # ,debug=True,\n",
    "                        #  verbose=True,\n",
    "                        ,skips=my_skips)\n",
    "    convert_conjecture_names(conjs)\n",
    "    inv_conjectures += conjs\n",
    "    conjs = conjecture(my_examples, \n",
    "                       invariants, \n",
    "                       target_invariant, \n",
    "                       operators=use_operators,\n",
    "                       upperBound=False, \n",
    "                       time=Integer(5)\n",
    "                       ,skips=my_skips)\n",
    "    convert_conjecture_names(conjs)\n",
    "    inv_conjectures += conjs     \n",
    "print(len(inv_conjectures))  \n",
    "\n",
    "for c in inv_conjectures:\n",
    "    inv_file.write(\"%s\\n\" % c)\n",
    "    inv_file.flush()\n",
    "inv_file.close()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2fc6d1d3-75e2-43e5-ad29-5e1669954216",
   "metadata": {},
   "source": [
    "Property conjecturing - sufficient conditions for a categorical target values.  For a binary target, get sufficient conditions for the positive class and necessary conditions for the negative class."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "9c8befb8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TARGET_1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: divide by zero encountered in log\n",
      "  return ln(args[0], **kwds)\n",
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: invalid value encountered in log\n",
      "  return ln(args[0], **kwds)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(Sex_female)->(TARGET_1)\n",
      "(Pclass_2)->(TARGET_1)\n",
      "(cabin_letter_C)->(TARGET_1)\n",
      "(~(Fare_leq_10_to_the_power_e_to_the_power_open_bracket_e_to_the_power_SibSp_divided_by_10_to_the_power_Parch_close_bracket))->(TARGET_1)\n",
      "TARGET_1 Necessary\n",
      "(TARGET_1)->(Age_leq_2_divided_by_SibSp)\n",
      "(TARGET_1)->((Fare_leq_maximumopen_bracket_logopen_bracket_Age_close_bracket_or_logopen_bracket__minus_SibSp_close_bracket_close_bracket_squared)->(Sex_female))\n"
     ]
    }
   ],
   "source": [
    "all_properties = [\"TARGET\"] + properties + inv_conjectures #\"TARGET\" is just a placeholder\n",
    "prop_conjs = []\n",
    "conditions = {}\n",
    "if \"TARGET\" in categorical_names:\n",
    "    for value in target_property_names:\n",
    "        print(value)\n",
    "        all_properties[0] = Example.__dict__[value]\n",
    "        #print(all_properties)\n",
    "        these_prop_conjs = propertyBasedConjecture(objects=train_examples, # edit here 6/27/23\n",
    "                                           properties = all_properties,\n",
    "                                           mainProperty=0,\n",
    "                                           #verbose=True,\n",
    "                                           #debug=True,\n",
    "                                           skips=my_skips)\n",
    "        conditions[value] = []\n",
    "        for c in these_prop_conjs: # edit here 6/27/23 just get premises once\n",
    "            conditions[value].append(get_premise(c, myprint=False))\n",
    "        prop_conjs += these_prop_conjs\n",
    "    if len(target_property_names) == 1:\n",
    "        print(value + \" Necessary\")\n",
    "        all_properties[0] = Example.__dict__[value]\n",
    "        these_prop_conjs = propertyBasedConjecture(objects=train_examples,  # edit here 6/27/23\n",
    "                                           properties = all_properties,\n",
    "                                           mainProperty=0,\n",
    "                                           sufficient=False,\n",
    "                                           #verbose=True,\n",
    "                                            #  debug=True,\n",
    "                                             skips=my_skips)\n",
    "        conditions[\"necessary\"] = []\n",
    "        for c in these_prop_conjs:\n",
    "            conditions[\"necessary\"].append(get_conclusion(c, myprint=False))\n",
    "        prop_conjs += these_prop_conjs  # edit here 6/27/23\n",
    "        \n",
    "for c in prop_conjs:\n",
    "    prop_file.write(\"%s\\n\" % convert_name_back(c.__name__))\n",
    "    prop_file.flush()\n",
    "    \n",
    "prop_file.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9e3b80c5-fa02-4d58-bfe2-b44ff0b81666",
   "metadata": {},
   "source": [
    "Apply conjectures to train and test data if target is categorical."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "f3ab0127-5bef-46ca-9f52-f0a27f35e55a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "./conjecturing.py:279: RuntimeWarning: overflow encountered in scalar power\n",
      "  return (lambda x: 10**x), 1\n",
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: divide by zero encountered in log\n",
      "  return ln(args[0], **kwds)\n",
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: invalid value encountered in log\n",
      "  return ln(args[0], **kwds)\n",
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: invalid value encountered in log\n",
      "  return ln(args[0], **kwds)\n",
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: divide by zero encountered in log\n",
      "  return ln(args[0], **kwds)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'TARGET_1': [Sex_female, Pclass_2, cabin_letter_C, ~Fare_leq_10_to_the_power_e_to_the_power_open_bracket_e_to_the_power_SibSp_divided_by_10_to_the_power_Parch_close_bracket], 'necessary': [Age_leq_2_divided_by_SibSp, Fare_leq_maximumopen_bracket_logopen_bracket_Age_close_bracket_or_logopen_bracket__minus_SibSp_close_bracket_close_bracket_squared->Sex_female]}\n"
     ]
    }
   ],
   "source": [
    "X_train_df = my_df.loc[X_train,property_names+invariant_names]  # drop target and one level for each binary variable\n",
    "X_test_df = my_df.loc[X_test,property_names+invariant_names]\n",
    "y_train_df = my_data.loc[X_train,\"TARGET\"] # get original target, even if it is multiple levels\n",
    "y_test_df = my_data.loc[X_test, \"TARGET\"]\n",
    "if \"TARGET\" in categorical_names:\n",
    "    index = 0\n",
    "    for value in target_property_names:\n",
    "        for condition in conditions[value]:\n",
    "            index += 1\n",
    "            X_train_df['conj_' + str(index)] = [condition(example) for example in train_examples]\n",
    "            X_test_df['conj_' + str(index)] = [condition(example) for example in test_examples]\n",
    "    if len(target_property_names) == 1:\n",
    "        for condition in conditions[\"necessary\"]:\n",
    "            index += 1\n",
    "            X_train_df['conj_' + str(index)] = [condition(example) for example in train_examples]\n",
    "            X_test_df['conj_' + str(index)] = [condition(example) for example in test_examples]\n",
    "        \n",
    "    X_train_df.head()\n",
    "    #y_train_df.head()    \n",
    "    print(conditions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "62030470-d62d-4555-80a1-c2b7094c4622",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PassengerId\n",
      "861    0\n",
      "481    0\n",
      "321    0\n",
      "182    0\n",
      "565    0\n",
      "      ..\n",
      "845    0\n",
      "89     1\n",
      "748    1\n",
      "33     1\n",
      "606    0\n",
      "Name: TARGET, Length: 881, dtype: object\n"
     ]
    }
   ],
   "source": [
    "y_test_df\n",
    "target_property_names\n",
    "for value in target_property_names:\n",
    "    this_value = value.replace(\"TARGET_\", \"\")\n",
    "    print(y_test_df.astype('str'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b410f6ff-3cff-41f6-aaab-7a60f6f6cbe3",
   "metadata": {},
   "source": [
    "Calculate support, precision, recall, lift, and F1.  The F1 score is only for the class for the sufficient condition it was derived for."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "f9e29f39-e7e2-46bb-8154-ec731a18109c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "value: TARGET_1\n",
      "0 condition: Sex_female\n",
      "1 condition: Pclass_2\n",
      "3 condition: cabin_letter_C\n",
      "6 condition: ~Fare_leq_10_to_the_power_e_to_the_power_open_bracket_e_to_the_power_SibSp_divided_by_10_to_the_power_Parch_close_bracket\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "./conjecturing.py:279: RuntimeWarning: overflow encountered in scalar power\n",
      "  return (lambda x: 10**x), 1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6 condition: Age_leq_2_divided_by_SibSp\n",
      "7 condition: Fare_leq_maximumopen_bracket_logopen_bracket_Age_close_bracket_or_logopen_bracket__minus_SibSp_close_bracket_close_bracket_squared->Sex_female\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: invalid value encountered in log\n",
      "  return ln(args[0], **kwds)\n",
      "/lustre/home/clarson/anaconda3/envs/sage/lib/python3.11/site-packages/sage/misc/functional.py:1209: RuntimeWarning: divide by zero encountered in log\n",
      "  return ln(args[0], **kwds)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>class</th>\n",
       "      <th>support</th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>lift</th>\n",
       "      <th>f1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>TARGET_1</td>\n",
       "      <td>313</td>\n",
       "      <td>0.741214057507987</td>\n",
       "      <td>0.686390532544379</td>\n",
       "      <td>1.93198101971756</td>\n",
       "      <td>0.712749615975422</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>TARGET_1</td>\n",
       "      <td>183</td>\n",
       "      <td>0.469945355191257</td>\n",
       "      <td>0.254437869822485</td>\n",
       "      <td>1.22491673941863</td>\n",
       "      <td>0.330134357005758</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>TARGET_1</td>\n",
       "      <td>58</td>\n",
       "      <td>0.586206896551724</td>\n",
       "      <td>0.100591715976331</td>\n",
       "      <td>1.52795347888186</td>\n",
       "      <td>0.171717171717172</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>TARGET_1</td>\n",
       "      <td>135</td>\n",
       "      <td>0.600000000000000</td>\n",
       "      <td>0.239644970414201</td>\n",
       "      <td>1.56390532544379</td>\n",
       "      <td>0.342494714587738</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>necessary</td>\n",
       "      <td>406</td>\n",
       "      <td>0.613300492610837</td>\n",
       "      <td>0.736686390532544</td>\n",
       "      <td>0.995060283591432</td>\n",
       "      <td>0.734513274336283</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>necessary</td>\n",
       "      <td>294</td>\n",
       "      <td>0.877551020408163</td>\n",
       "      <td>0.763313609467456</td>\n",
       "      <td>1.42379824858120</td>\n",
       "      <td>0.761061946902655</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       class support          precision             recall               lift  \\\n",
       "0   TARGET_1     313  0.741214057507987  0.686390532544379   1.93198101971756   \n",
       "1   TARGET_1     183  0.469945355191257  0.254437869822485   1.22491673941863   \n",
       "2   TARGET_1      58  0.586206896551724  0.100591715976331   1.52795347888186   \n",
       "3   TARGET_1     135  0.600000000000000  0.239644970414201   1.56390532544379   \n",
       "4  necessary     406  0.613300492610837  0.736686390532544  0.995060283591432   \n",
       "5  necessary     294  0.877551020408163  0.763313609467456   1.42379824858120   \n",
       "\n",
       "                  f1  \n",
       "0  0.712749615975422  \n",
       "1  0.330134357005758  \n",
       "2  0.171717171717172  \n",
       "3  0.342494714587738  \n",
       "4  0.734513274336283  \n",
       "5  0.761061946902655  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "support = []\n",
    "lift = []\n",
    "precision = []\n",
    "recall = []\n",
    "f1 = []\n",
    "classes = []\n",
    "count = 0\n",
    "if \"TARGET\" in categorical_names:\n",
    "    for value in target_property_names:\n",
    "        print(\"value: {}\".format(value))\n",
    "        this_value = value.replace(\"TARGET_\", \"\")\n",
    "        my_function = getattr(Example, value)\n",
    "        for i, condition in enumerate(conditions[value]):\n",
    "            count = count+i\n",
    "            print(count, \"condition: {}\".format(condition))\n",
    "            classes.append(value)\n",
    "            num_true = 0\n",
    "            num_in_class = 0\n",
    "            num_hit = 0\n",
    "            for example in test_examples:\n",
    "                if condition(example) == True:\n",
    "                    num_true += 1\n",
    "                    if my_function(example) == True:\n",
    "                        num_hit += 1\n",
    "                if my_function(example) == True:\n",
    "                    num_in_class += 1\n",
    "            support.append(num_true)\n",
    "            if num_hit > 0: \n",
    "                precision.append(n(num_hit/num_true))\n",
    "                lift.append(n(num_hit/num_true)/n(num_in_class/len(test_examples)))\n",
    "                recall.append(n(num_hit/sum(y_test_df.astype('str') == this_value)))\n",
    "                my_precision = n(num_hit/num_true)\n",
    "                my_recall = n(num_hit/sum(y_test_df.astype('str') == this_value))\n",
    "                f1.append((2*my_precision*my_recall)/(my_precision + my_recall))\n",
    "            else:\n",
    "                precision.append(0.0)\n",
    "                lift.append(0.0)\n",
    "                recall.append(0.0)\n",
    "                f1.append(0.0)\n",
    "    if len(target_property_names) == 1:\n",
    "        for i, condition in enumerate(conditions[\"necessary\"]):\n",
    "            count = count+i\n",
    "            print(count, \"condition: {}\".format(condition))\n",
    "            classes.append(\"necessary\")\n",
    "            num_false = 0\n",
    "            num_in_class = 0\n",
    "            num_hit = 0\n",
    "            for example in test_examples:\n",
    "                if condition(example) == False:\n",
    "                    num_false += 1\n",
    "                    if my_function(example) == False:\n",
    "                        num_hit += 1\n",
    "                if my_function(example) == False:\n",
    "                    num_in_class += 1\n",
    "            support.append(num_false)\n",
    "            if num_hit > 0: \n",
    "                precision.append(n(num_hit/num_false))\n",
    "                lift.append(n(num_hit/num_false)/n(num_in_class/len(test_examples)))\n",
    "                recall.append(n(num_hit/(len(test_examples) - sum(y_test_df.astype('str') != this_value))))\n",
    "                my_precision = n(num_hit/num_true)\n",
    "                my_recall = n(num_hit/sum(y_test_df.astype('str') != this_value))\n",
    "                f1.append((2*my_precision*my_recall)/(my_precision + my_recall))\n",
    "            else:\n",
    "                precision.append(0.0)\n",
    "                lift.append(0.0)\n",
    "                recall.append(0.0)\n",
    "                f1.append(0.0)\n",
    "            \n",
    "results_df = pd.DataFrame({\n",
    "    'class': classes,\n",
    "    'support':support, \n",
    "    'precision':precision, \n",
    "    'recall': recall, \n",
    "    'lift':lift, \n",
    "    'f1': f1})\n",
    "        \n",
    "results_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eae54efa-df74-4850-a64b-1aff582176ea",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "SageMath 10.2",
   "language": "sage",
   "name": "sagemath"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}