In [1]:
import pandas as pd
import numpy as np
import sys
from sklearn.model_selection import train_test_split
load("~/conjecturing/sage/conjecturing.py")

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Specify output files.

In [2]:
inv_file = open("2022_12_07_inv.txt", "w")
prop_file = open("2022_12_07_prop.txt", "w")

Specify the number of examples to use for conjecturing and skips.

In [3]:
num_train = 100
my_skips = 0.3

Read data.  

In [4]:
my_data =pd.read_excel("essi.xlsx",
                    header=int(0),
                    sheet_name = "Sheet1"   
                    )
print(my_data.shape)
my_data.head()

(25808, 63)


Unnamed: 0,Year,DSA,Program_Year.x,Facility_Count,HIE_B_Count,Weighted_HIE_DSA,HIE_DSA_Penetration,Time,ZIP,County.Code,...,Tx_R_O1,Tx_R_ST1,Tx_R_ST2,Flagging_ratio,Tx_R_ST_C,M_R_O1,M_R_ST1,HalfYear.y,HIE_DSA_P,P_HIE_Adopt
0,2012,ALOB,2013,102,0,0.0,0,1,35010,1037,...,0.05,1,1,1.0,1,0.08,1,7,0,0
1,2012,ALOB,2013,102,0,0.0,0,1,36033,1013,...,0.05,1,1,1.0,1,0.08,1,7,0,0
2,2012,ALOB,2013,102,0,0.0,0,1,35401,1125,...,0.05,1,1,1.0,1,0.08,1,7,0,0
3,2012,ALOB,2013,102,0,0.0,0,1,36401,1035,...,0.05,1,1,1.0,1,0.08,1,7,0,0
4,2012,ALOB,2013,102,0,0.0,0,1,35462,1063,...,0.05,1,1,1.0,1,0.08,1,7,0,0


Check the data types of the columns.  For categorical data, make sure the type is integer or objects.  Make sure the categories do not contain special characters besides numbers and "_".

In [5]:
my_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25808 entries, 0 to 25807
Data columns (total 63 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Year                 25808 non-null  int64  
 1   DSA                  25808 non-null  object 
 2   Program_Year.x       25808 non-null  int64  
 3   Facility_Count       25808 non-null  int64  
 4   HIE_B_Count          25808 non-null  int64  
 5   Weighted_HIE_DSA     25808 non-null  float64
 6   HIE_DSA_Penetration  25808 non-null  int64  
 7   Time                 25808 non-null  int64  
 8   ZIP                  25808 non-null  int64  
 9   County.Code          25808 non-null  int64  
 10  State.Name           25808 non-null  object 
 11  County.Name          25808 non-null  object 
 12  County.FIPS          25808 non-null  int64  
 13  Provider_Number      25808 non-null  int64  
 14  Network              25808 non-null  int64  
 15  Facility_Name        25808 non-null 

Pandas thinks PTSURV_T and PT_HOS_T are integers/numeric, but they are categorical.  Recast them as objects.

In [6]:
#"PTSURV_T", "PT_HOS_T"
my_data = my_data.astype({"PTSURV_T": object, "PT_HOS_T": object})


In [7]:
my_data.dropna(subset=["PTSURV_T"], inplace=True)
#my_data.dropna().reset_index(drop=True)

Identify invariant and categorical columns and the target column.  The target should be in one of the lists.

In [8]:
#invariant_names=["Tx_R_O1", "N_Tx_Ctr1", "N_R_Ctr1", "N_Center", "M_R_O1", "M_R_ST1",   "N_PT_TRANS_S", "PT_TRANS_T1", "N_PTSURV_SUM", "N_PT_HOS_S", "VAVF_F", "N_DP_HGBD12", "N_DP_HGBD", "HOMEHD", "PD", "HD", "TOTSTAS"]
invariant_names=["Weighted_HIE_DSA", "Flagging_ratio", "Tx_R_O1", "N_Tx_Ctr1"]

categorical_names=["PTSURV_T", "PT_HOS_T"]
target = "PTSURV_T"

Generic code starts here.  Rename target column.  Select columns.

In [9]:
if target in categorical_names:
    categorical_names[categorical_names.index(target)] = "TARGET"
else:
    invariant_names[invariant_names.index(target)] = "TARGET"

my_data = my_data.rename(columns={target: "TARGET"})
print(my_data.columns)
my_data = my_data[invariant_names + categorical_names]
print(categorical_names)

Index(['Year', 'DSA', 'Program_Year.x', 'Facility_Count', 'HIE_B_Count',
       'Weighted_HIE_DSA', 'HIE_DSA_Penetration', 'Time', 'ZIP', 'County.Code',
       'State.Name', 'County.Name', 'County.FIPS', 'Provider_Number',
       'Network', 'Facility_Name', 'Unnamed: 16', 'City', 'STATE',
       'Late.Shift', 'Chain.Owned', 'Chain.Organization', 'TOTSTAS', 'HD',
       'PD', 'HOMEHD', 'N_DP_HGBD', 'N_DP_HGBD12', 'PTSURV_C', 'County',
       'HDKTVPM12_F', 'VAVF_F', 'F_Star', 'PT_HOS_T', 'PT_HOS_T1', 'PT_HOS_T2',
       'PT_HOS_C', 'N_PT_HOS_S', 'N_PTSURV_SUM', 'SHR', 'SMR', 'PT_TRANS_T',
       'PT_TRANS_T1', 'PT_TRANS_T2', 'N_PT_TRANS_S', 'TARGET', 'PTSURV_T1',
       'PTSURV_T2', 'R_Year', 'HalfYear.x', 'N_Tx_Ctr1', 'N_R_Ctr1',
       'N_Center', 'Tx_R_O1', 'Tx_R_ST1', 'Tx_R_ST2', 'Flagging_ratio',
       'Tx_R_ST_C', 'M_R_O1', 'M_R_ST1', 'HalfYear.y', 'HIE_DSA_P',
       'P_HIE_Adopt'],
      dtype='object')
['TARGET', 'PT_HOS_T']


Convert categorical variables to dummies.  One dummy for each binary variable and one dummy for each level for variables with more than two levels.

In [10]:
property_names = []
for col in categorical_names:
    if col != "TARGET":
        unique_vals=list(my_data[col].unique())  # if nan is a level
        #unique_vals=list(my_data[col].dropna().unique())  # if nan is not a level
        if len(unique_vals)==2: # just use one level for binary features
            property_names.append(col+"_"+str(unique_vals[1]))
        elif len(unique_vals) > 2: #one property for each level.
            for level in unique_vals:
                property_names.append(col+"_"+str(level))


if "TARGET" in categorical_names:
    target_property_names = []
    unique_vals = list(my_data["TARGET"].unique()) # if nan is a level
    #unique_vals = list(my_data["TARGET"].dropna().unique()) # if nan is not a level
    if len(unique_vals)==2:
        target_property_names.append("TARGET_"+str(unique_vals[1]))
    elif len(unique_vals) > 2:
        for level in unique_vals:
            target_property_names.append("TARGET_"+str(level))
            
my_df = pd.get_dummies(my_data, 
                       columns=categorical_names,
                       dtype=np.uint8,
                       dummy_na=True,  # False is the default.  If False, use dropna() above
                       drop_first=False) # False is the default

my_df = my_df.rename(lambda col: col.replace('.0', ''), axis='columns')
my_df.head()

Unnamed: 0,Weighted_HIE_DSA,Flagging_ratio,Tx_R_O1,N_Tx_Ctr1,TARGET_As Expected,TARGET_Better than Expected,TARGET_Not Available,TARGET_Worse than Expected,TARGET_nan,PT_HOS_T_As Expected,PT_HOS_T_Better than Expected,PT_HOS_T_Not Available,PT_HOS_T_Worse than Expected,PT_HOS_T_nan
0,0.0,1.0,0.05,157,1,0,0,0,0,1,0,0,0,0
1,0.0,1.0,0.05,157,1,0,0,0,0,0,1,0,0,0
2,0.0,1.0,0.05,157,0,0,0,1,0,1,0,0,0,0
3,0.0,1.0,0.05,157,1,0,0,0,0,1,0,0,0,0
4,0.0,1.0,0.05,157,1,0,0,0,0,1,0,0,0,0


Define class, invariants, properties, and target properties (if applicable).

In [11]:
class Example():
    def __init__(self, name, mydf):
        self.name = name
        self.mydf = mydf
        
for i in invariant_names:
    inv = build_inv(i)
    setattr(Example,inv.__name__,inv )

for i in property_names:
    prop = build_prop(i)
    setattr(Example, prop.__name__,prop)

if "TARGET" in categorical_names:
    for i in target_property_names:
        prop = build_prop(i)
        setattr(Example, prop.__name__, prop)
else:
    target_invariant = invariant_names.index("TARGET")
print(property_names)

['PT_HOS_T_As Expected', 'PT_HOS_T_Better than Expected', 'PT_HOS_T_Worse than Expected', 'PT_HOS_T_Not Available', 'PT_HOS_T_nan']


Split into training and testing data.

In [12]:
if "TARGET" in categorical_names:
    X_train, X_test = train_test_split(
        my_df.index,
        stratify=my_data["TARGET"],  # stratify on target levels
        train_size=num_train,
        random_state=12345
    )
else:
    X_train, X_test = train_test_split(
        my_df.index,
        train_size=num_train,
        random_state=12345
    )

Create examples for conjecturing.

In [13]:
train_examples = [Example(i, my_df) for i in X_train]
test_examples = [Example(i, my_df) for i in X_test]

Get lists of invariant and property functions.

In [14]:
invariants =[]
for i in invariant_names:
    invariants.append(Example.__dict__[i])
properties=[]
for i in property_names:
    properties.append(Example.__dict__[i])
target_properties=[]
if "TARGET" in categorical_names:
    for i in target_property_names:
        target_properties.append(Example.__dict__[i])


In [15]:
my_df

Unnamed: 0,Weighted_HIE_DSA,Flagging_ratio,Tx_R_O1,N_Tx_Ctr1,TARGET_As Expected,TARGET_Better than Expected,TARGET_Not Available,TARGET_Worse than Expected,TARGET_nan,PT_HOS_T_As Expected,PT_HOS_T_Better than Expected,PT_HOS_T_Not Available,PT_HOS_T_Worse than Expected,PT_HOS_T_nan
0,0.0,1.0,0.050000,157,1,0,0,0,0,1,0,0,0,0
1,0.0,1.0,0.050000,157,1,0,0,0,0,0,1,0,0,0
2,0.0,1.0,0.050000,157,0,0,0,1,0,1,0,0,0,0
3,0.0,1.0,0.050000,157,1,0,0,0,0,1,0,0,0,0
4,0.0,1.0,0.050000,157,1,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25803,0.0,1.0,0.089506,118,0,1,0,0,0,1,0,0,0,0
25804,0.0,1.0,0.089506,118,1,0,0,0,0,1,0,0,0,0
25805,0.0,1.0,0.089506,118,1,0,0,0,0,1,0,0,0,0
25806,0.0,1.0,0.089506,118,1,0,0,0,0,1,0,0,0,0


Invariant conjecturing - upper and lower bounds.

In [16]:
#define operators for expression tree to build upper bounsand lower bouns for each class
use_operators =  { '-1', '+1', '*2', '/2', '^2', '-()', '1/', 
                  'sqrt', 'ln', 'log10', 'exp', '10^', 'ceil', 
                  'floor', 'abs', '+', '*', 'max', 'min', '-', '/', '^'}

inv_conjectures = []

if "TARGET" in categorical_names:
    for value in target_property_names:
        print(value)
        target_property = Example.__dict__[value]
        my_examples = [example for example in train_examples if target_property(example) == True]
        for inv in invariants:
            sys.stdout.flush()
            inv_of_interest = invariants.index(inv)
            conjs = conjecture(my_examples, 
                               invariants, 
                               inv_of_interest, 
                               operators=use_operators, 
                               upperBound=True, 
                               time=Integer(5)
                             # ,debug=True,
                             #  verbose=True,
                               ,skips=my_skips
                              )
            convert_conjecture_names(conjs)
            inv_conjectures += conjs

            conjs = conjecture(my_examples, 
                               invariants, 
                               inv_of_interest, 
                               operators=use_operators, 
                               upperBound=False, 
                               time=Integer(5)
                              ,skips=my_skips)
            convert_conjecture_names(conjs)
            inv_conjectures += conjs
    print(len(inv_conjectures))
    if len(target_property_names) == 1:
        value = target_property_names[0]
        print(value + " False")
        target_property = Example.__dict__[value]
        my_examples = [example for example in train_examples if target_property(example) == False]
        for inv in invariants:
            sys.stdout.flush()
            inv_of_interest = invariants.index(inv)
            conjs = conjecture(my_examples, 
                               invariants, 
                               inv_of_interest, 
                               operators=use_operators, 
                               upperBound=True, 
                               time=Integer(5)
                             # ,debug=True,
                             #  verbose=True,
                               ,skips=my_skips
                              )
            convert_conjecture_names(conjs)
            inv_conjectures += conjs

            conjs = conjecture(my_examples, 
                               invariants, 
                               inv_of_interest, 
                               operators=use_operators, 
                               upperBound=False, 
                               time=Integer(5)
                              ,skips=my_skips)
            convert_conjecture_names(conjs)
            inv_conjectures += conjs
else: # target is an invariant
    my_examples = [example for example in train_examples]
    conjs = conjecture(my_examples, 
                       invariants, 
                       target_invariant, 
                       operators=use_operators, 
                       upperBound=True, 
                       time=Integer(5)
                        # ,debug=True,
                        #  verbose=True,
                        ,skips=my_skips)
    convert_conjecture_names(conjs)
    inv_conjectures += conjs
    conjs = conjecture(my_examples, 
                       invariants, 
                       target_invariant, 
                       operators=use_operators,
                       upperBound=False, 
                       time=Integer(5)
                       ,skips=my_skips)
    convert_conjecture_names(conjs)
    inv_conjectures += conjs     
print(len(inv_conjectures))  

for c in inv_conjectures:
    inv_file.write("%s\n" % c)
    inv_file.flush()
inv_file.close()


TARGET_As Expected
TARGET_Worse than Expected
TARGET_Better than Expected
TARGET_Not Available
342
342


Property conjecturing - sufficient conditions for a categorical target values.  For a binary target, get sufficient conditions for the positive class and necessary conditions for the negative class.

In [17]:
all_properties = ["TARGET"] + properties + inv_conjectures #"TARGET" is just a placeholder
prop_conjs = []
conditions = {}
if "TARGET" in categorical_names:
    for value in target_property_names:
        print(value)
        all_properties[0] = Example.__dict__[value]
        #print(all_properties)
        these_prop_conjs = propertyBasedConjecture(objects=train_examples, # edit here 6/27/23
                                           properties = all_properties,
                                           mainProperty=0,
                                           #verbose=True,
                                           #debug=True,
                                           skips=my_skips)
        conditions[value] = []
        for c in these_prop_conjs: # edit here 6/27/23 just get premises once
            conditions[value].append(get_premise(c, myprint=False))
        prop_conjs += these_prop_conjs
    if len(target_property_names) == 1:
        print(value + " Necessary")
        all_properties[0] = Example.__dict__[value]
        these_prop_conjs = propertyBasedConjecture(objects=train_examples,  # edit here 6/27/23
                                           properties = all_properties,
                                           mainProperty=0,
                                           sufficient=False,
                                           #verbose=True,
                                            #  debug=True,
                                             skips=my_skips)
        conditions["necessary"] = []
        for c in these_prop_conjs:
            conditions["necessary"].append(get_conclusion(c, myprint=False))
        prop_conjs += these_prop_conjs  # edit here 6/27/23
        
for c in prop_conjs:
    prop_file.write("%s\n" % convert_name_back(c.__name__))
    prop_file.flush()
    
prop_file.close()

TARGET_As Expected


  return ln(args[0], **kwds)
  return (lambda x: 10**x), 1
  stack.append(op(left, right))
  stack.append(op(stack.pop()))
  return ln(args[0], **kwds)
  stack.append(op(left, right))


(~(Weighted_HIE_DSA_leq_maximumopen_bracket_Flagging_ratio_or_Tx_R_O1_close_bracket_divided_by_open_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_minus_1_close_bracket))->(TARGET_AsExpected)
(~(N_Tx_Ctr1_leq_inverse_of_2_times_e_to_the_power_open_bracket_2_times_e_to_the_power_open_bracket_e_to_the_power_Weighted_HIE_DSA_close_bracket_plus_1_close_bracket))->(TARGET_AsExpected)
((Tx_R_O1_leq_e_to_the_power_open_bracket_e_to_the_power_open_bracket_10_to_the_power_Weighted_HIE_DSA_close_bracket_plus_1_close_bracket_divided_by_N_Tx_Ctr1)^(Weighted_HIE_DSA_geq__minus_inverse_of_logopen_bracket_inverse_of_2_times_N_Tx_Ctr1_plus_inverse_of_2_close_bracket_plus_Tx_R_O1))->(TARGET_AsExpected)
((N_Tx_Ctr1_leq_open_bracket_inverse_of_4_times_Weighted_HIE_DSA_squared_close_bracket_to_the_power_open_bracket_2_times_Tx_R_O1_minus_1_close_bracket)^(Weighted_HIE_DSA_leq_Flagging_ratio_to_the_power_open_bracket_open_bracket__minus_Tx_R_O1_close_bracket_to_the_power_N_Tx_Ctr1_close_bracket))->(TARGET

In [18]:
len(these_prop_conjs)

1

Apply conjectures to train and test data if target is categorical.

In [19]:
X_train_df = my_df.loc[X_train,property_names+invariant_names]  # drop target and one level for each binary variable
X_test_df = my_df.loc[X_test,property_names+invariant_names]
y_train_df = my_data.loc[X_train,"TARGET"] # get original target, even if it is multiple levels
y_test_df = my_data.loc[X_test, "TARGET"]
if "TARGET" in categorical_names:
    index=0
    for value in target_property_names:
        index += 1
        for i, condition in enumerate(conditions[value]):
            X_train_df['conj_' + str(i)] = [condition(example) for example in train_examples]
            X_test_df['conj_' + str(i)] = [condition(example) for example in test_examples]
    if len(target_property_names) == 1:
        index += 1
        for i, condition in enumerate(conditions["necessary"]):
            X_train_df['conj_' + str(index)] = [condition(example) for example in train_examples]
            X_test_df['conj_' + str(index)] = [condition(example) for example in test_examples]
        
    X_train_df.head()
    #y_train_df.head()    
    print(conditions)

  return ln(args[0], **kwds)
  return ln(args[0], **kwds)
  return (lambda x: 10**x), 1
  return (lambda x: 10**x), 1
  return (lambda x: 10**x), 1


{'TARGET_As Expected': [~Weighted_HIE_DSA_leq_maximumopen_bracket_Flagging_ratio_or_Tx_R_O1_close_bracket_divided_by_open_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_minus_1_close_bracket, ~N_Tx_Ctr1_leq_inverse_of_2_times_e_to_the_power_open_bracket_2_times_e_to_the_power_open_bracket_e_to_the_power_Weighted_HIE_DSA_close_bracket_plus_1_close_bracket, Tx_R_O1_leq_e_to_the_power_open_bracket_e_to_the_power_open_bracket_10_to_the_power_Weighted_HIE_DSA_close_bracket_plus_1_close_bracket_divided_by_N_Tx_Ctr1^Weighted_HIE_DSA_geq__minus_inverse_of_logopen_bracket_inverse_of_2_times_N_Tx_Ctr1_plus_inverse_of_2_close_bracket_plus_Tx_R_O1, N_Tx_Ctr1_leq_open_bracket_inverse_of_4_times_Weighted_HIE_DSA_squared_close_bracket_to_the_power_open_bracket_2_times_Tx_R_O1_minus_1_close_bracket^Weighted_HIE_DSA_leq_Flagging_ratio_to_the_power_open_bracket_open_bracket__minus_Tx_R_O1_close_bracket_to_the_power_N_Tx_Ctr1_close_bracket, ~Flagging_ratio_leq_Weighted_HIE_DSA_to_the_power_open_bracket_

Calculate support, precision, and lift.

In [20]:
target_property_names

['TARGET_As Expected',
 'TARGET_Worse than Expected',
 'TARGET_Better than Expected',
 'TARGET_Not Available']

In [21]:
support = []
lift = []
precision = []
if "TARGET" in categorical_names:
    for value in target_property_names:
        print("value: {}".format(value))
        my_function = getattr(Example, value)
        for i, condition in enumerate(conditions[value]):
            print(i, "condition: {}".format(condition))
            num_true = 0
            num_in_class = 0
            num_hit = 0
            for example in test_examples:
                if condition(example) == True:
                    num_true += 1
                    if my_function(example) == True:
                        num_hit += 1
                if my_function(example) == True:
                    num_in_class += 1
            support.append(num_true)
            if num_hit > 0: 
                precision.append(n(num_hit/num_true))
                lift.append(n(num_hit/num_true)/n(num_in_class/len(test_examples)))
            else:
                precision.append(0.0)
                lift.append(0.0)
    if len(target_property_names) == 1:
        for i, condition in enumerate(conditions["necessary"]):
            print(i, "condition: {}".format(condition))
            num_false = 0
            num_in_class = 0
            num_hit = 0
            for example in test_examples:
                if condition(example) == False:
                    num_false += 1
                    if my_function(example) == False:
                        num_hit += 1
                if my_function(example) == False:
                    num_in_class += 1
            support.append(num_false)
            if num_false == 858:
                print(condition)
            if num_hit > 0: 
                precision.append(n(num_hit/num_false))
                lift.append(n(num_hit/num_false)/n(num_in_class/len(test_examples)))
            else:
                precision.append(0.0)
                lift.append(0.0)
            
results_df = pd.DataFrame({'support':support, 'precision':precision, 'lift':lift})
        
results_df

value: TARGET_As Expected
0 condition: ~Weighted_HIE_DSA_leq_maximumopen_bracket_Flagging_ratio_or_Tx_R_O1_close_bracket_divided_by_open_bracket_logopen_bracket_N_Tx_Ctr1_close_bracket_minus_1_close_bracket
1 condition: ~N_Tx_Ctr1_leq_inverse_of_2_times_e_to_the_power_open_bracket_2_times_e_to_the_power_open_bracket_e_to_the_power_Weighted_HIE_DSA_close_bracket_plus_1_close_bracket
2 condition: Tx_R_O1_leq_e_to_the_power_open_bracket_e_to_the_power_open_bracket_10_to_the_power_Weighted_HIE_DSA_close_bracket_plus_1_close_bracket_divided_by_N_Tx_Ctr1^Weighted_HIE_DSA_geq__minus_inverse_of_logopen_bracket_inverse_of_2_times_N_Tx_Ctr1_plus_inverse_of_2_close_bracket_plus_Tx_R_O1
3 condition: N_Tx_Ctr1_leq_open_bracket_inverse_of_4_times_Weighted_HIE_DSA_squared_close_bracket_to_the_power_open_bracket_2_times_Tx_R_O1_minus_1_close_bracket^Weighted_HIE_DSA_leq_Flagging_ratio_to_the_power_open_bracket_open_bracket__minus_Tx_R_O1_close_bracket_to_the_power_N_Tx_Ctr1_close_bracket
4 condition: 

  return ln(args[0], **kwds)


8 condition: N_Tx_Ctr1_geq_ceilopen_bracket_10_to_the_power_open_bracket_2_times_sqrtopen_bracket_Tx_R_O1_close_bracket_plus_1_close_bracket_close_bracket^N_Tx_Ctr1_geq_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_minus_1_close_bracket_close_bracket


  return (lambda x: 10**x), 1


9 condition: N_Tx_Ctr1_geq_open_bracket_10_to_the_power_Tx_R_O1_close_bracket_to_the_power_open_bracket_10_to_the_power_open_bracket_Weighted_HIE_DSA_plus_1_close_bracket_minus_1_close_bracket^N_Tx_Ctr1_leq_ceilopen_bracket_e_to_the_power_open_bracket_inverse_of_4_divided_by_Tx_R_O1_squared_close_bracket_close_bracket
10 condition: ~Tx_R_O1_geq_inverse_of_4_times_open_bracket_Weighted_HIE_DSA_minus_1_close_bracket_to_the_power_open_bracket_N_Tx_Ctr1_plus_1_close_bracket&Tx_R_O1_geq_open_bracket_inverse_of_logopen_bracket_10_to_the_power_open_bracket_10_to_the_power_Flagging_ratio_close_bracket_plus_N_Tx_Ctr1_close_bracket_close_bracket
11 condition: ~(Tx_R_O1_geq_open_bracket_inverse_of_open_bracket_10_to_the_power_e_to_the_power_Flagging_ratio_minus_logopen_bracket_N_Tx_Ctr1_close_bracket_close_bracket_close_bracket|Weighted_HIE_DSA_geq_open_bracket_inverse_of_2_times_Tx_R_O1_minus_1_close_bracket_to_the_power_ceilopen_bracket_inverse_of_2_times_N_Tx_Ctr1_close_bracket)
value: TARGET_

Unnamed: 0,support,precision,lift
0,3095,0.850080775444265,1.05021272406753
1,4622,0.830809173517958,1.02640406712479
2,4872,0.845032840722496,1.04397636932548
3,5512,0.820210449927431,1.01331011806115
4,2162,0.838112858464385,1.03542723655161
5,3850,0.808831168831169,0.999251847196486
6,5092,0.83032207384132,1.02580229104295
7,6840,0.81374269005848,1.00531967302722
8,2065,0.84455205811138,1.04338239751681
9,1709,0.826214160327677,1.02072726386198


In [None]:
support = []
lift = []
precision = []
recall = []
f1 = []
classes = []
if "TARGET" in categorical_names:
    for value in target_property_names:
        this_value = value.replace("TARGET_", "")
        my_function = getattr(Example, value)
        for i, condition in enumerate(conditions[value]):
            classes.append(value)
            num_true = 0
            num_in_class = 0
            num_hit = 0
            for example in test_examples:
                if condition(example) == True:
                    num_true += 1
                    if my_function(example) == True:
                        num_hit += 1
                if my_function(example) == True:
                    num_in_class += 1
            support.append(num_true)
            if num_hit > 0: 
                precision.append(n(num_hit/num_true))
                lift.append(n(num_hit/num_true)/n(num_in_class/len(test_examples)))
                recall.append(n(num_hit/sum(y_test_df.astype('str') == this_value)))
                my_precision = n(num_hit/num_true)
                my_recall = n(num_hit/sum(y_test_df.astype('str') == this_value))
                f1.append((2*my_precision*my_recall)/(my_precision + my_recall))
            else:
                precision.append(0.0)
                lift.append(0.0)
                recall.append(0.0)
                f1.append(0.0)
    if len(target_property_names) == 1:
        for i, condition in enumerate(conditions["necessary"]):
            classes.append("necessary")
            num_false = 0
            num_in_class = 0
            num_hit = 0
            for example in test_examples:
                if condition(example) == False:
                    num_false += 1
                    if my_function(example) == False:
                        num_hit += 1
                if my_function(example) == False:
                    num_in_class += 1
            support.append(num_false)
            if num_hit > 0: 
                precision.append(n(num_hit/num_false))
                lift.append(n(num_hit/num_false)/n(num_in_class/len(test_examples)))
                recall.append(n(num_hit/(len(test_examples) - sum(y_test_df.astype('str') != this_value))))
                my_precision = n(num_hit/num_true)
                my_recall = n(num_hit/sum(y_test_df.astype('str') != this_value))
                f1.append((2*my_precision*my_recall)/(my_precision + my_recall))
            else:
                precision.append(0.0)
                lift.append(0.0)
                recall.append(0.0)
                f1.append(0.0)
            
results_df = pd.DataFrame({
    'class': classes,
    'support':support, 
    'precision':precision, 
    'recall': recall, 
    'lift':lift, 
    'f1': f1})
        
results_df

  return ln(args[0], **kwds)
  return (lambda x: 10**x), 1
  return (lambda x: 10**x), 1
  return (lambda x: 10**x), 1
