from datetime import datetime from random_forest import RandomForest from decision_tree import DecisionTree #export import csv import numpy as np # http://www.numpy.org import ast from math import log, floor, ceil import random import numpy as np #export # TODO: Determine the forest size according to your implementation. # This function will be used by the autograder to set your forest size during testing # VERY IMPORTANT: Minimum forest_size should be 10 def get_forest_size(): forest_size = 10 return forest_size # TODO: Determine random seed to set for reproducibility # This function will be used by the autograder to set the random seed to obtain the same results you achieve locally def get_random_seed(): random_seed = 0 return random_seed def run(): np.random.seed(get_random_seed()) # start time start = datetime.now() X = list() y = list() XX = list() # Contains data features and data labels numerical_cols = set([i for i in range(0, 9)]) # indices of numeric attributes (columns) # Loading data set print("reading the data") with open("pima-indians-diabetes.csv") as f: next(f, None) for line in csv.reader(f, delimiter=","): xline = [] for i in range(len(line)): if i in numerical_cols: xline.append(ast.literal_eval(line[i])) else: xline.append(line[i]) X.append(xline[:-1]) y.append(xline[-1]) XX.append(xline[:]) # Initializing a random forest. randomForest = RandomForest(get_forest_size()) # printing the name print("__Name: " + randomForest.user()+"__") # Creating the bootstrapping datasets print("creating the bootstrap datasets") randomForest.bootstrapping(XX) # Building trees in the forest print("fitting the forest") randomForest.fitting() # Calculating an unbiased error estimation of the random forest # based on out-of-bag (OOB) error estimate. y_predicted = randomForest.voting(X) # Comparing predicted and true labels results = [prediction == truth for prediction, truth in zip(y_predicted, y)] # Accuracy accuracy = float(results.count(True)) / float(len(results)) print("accuracy: %.4f" % accuracy) print("OOB estimate: %.4f" % (1 - accuracy)) # end time print("Execution time: " + str(datetime.now() - start)) if __name__ in "__main__": run()