82 lines
2.4 KiB
Python
82 lines
2.4 KiB
Python
from datetime import datetime
|
|
from random_forest import RandomForest
|
|
from decision_tree import DecisionTree
|
|
#export
|
|
import csv
|
|
import numpy as np # http://www.numpy.org
|
|
import ast
|
|
from math import log, floor, ceil
|
|
import random
|
|
import numpy as np
|
|
#export
|
|
|
|
# TODO: Determine the forest size according to your implementation.
|
|
# This function will be used by the autograder to set your forest size during testing
|
|
# VERY IMPORTANT: Minimum forest_size should be 10
|
|
def get_forest_size():
|
|
forest_size = 10
|
|
return forest_size
|
|
|
|
# TODO: Determine random seed to set for reproducibility
|
|
# This function will be used by the autograder to set the random seed to obtain the same results you achieve locally
|
|
def get_random_seed():
|
|
random_seed = 0
|
|
return random_seed
|
|
|
|
def run():
|
|
np.random.seed(get_random_seed())
|
|
# start time
|
|
start = datetime.now()
|
|
X = list()
|
|
y = list()
|
|
XX = list() # Contains data features and data labels
|
|
numerical_cols = set([i for i in range(0, 9)]) # indices of numeric attributes (columns)
|
|
|
|
# Loading data set
|
|
print("reading the data")
|
|
with open("pima-indians-diabetes.csv") as f:
|
|
next(f, None)
|
|
for line in csv.reader(f, delimiter=","):
|
|
xline = []
|
|
for i in range(len(line)):
|
|
if i in numerical_cols:
|
|
xline.append(ast.literal_eval(line[i]))
|
|
else:
|
|
xline.append(line[i])
|
|
|
|
X.append(xline[:-1])
|
|
y.append(xline[-1])
|
|
XX.append(xline[:])
|
|
|
|
# Initializing a random forest.
|
|
randomForest = RandomForest(get_forest_size())
|
|
|
|
# printing the name
|
|
print("__Name: " + randomForest.user()+"__")
|
|
|
|
# Creating the bootstrapping datasets
|
|
print("creating the bootstrap datasets")
|
|
randomForest.bootstrapping(XX)
|
|
|
|
# Building trees in the forest
|
|
print("fitting the forest")
|
|
randomForest.fitting()
|
|
|
|
# Calculating an unbiased error estimation of the random forest
|
|
# based on out-of-bag (OOB) error estimate.
|
|
y_predicted = randomForest.voting(X)
|
|
|
|
# Comparing predicted and true labels
|
|
results = [prediction == truth for prediction, truth in zip(y_predicted, y)]
|
|
|
|
# Accuracy
|
|
accuracy = float(results.count(True)) / float(len(results))
|
|
|
|
print("accuracy: %.4f" % accuracy)
|
|
print("OOB estimate: %.4f" % (1 - accuracy))
|
|
|
|
# end time
|
|
print("Execution time: " + str(datetime.now() - start))
|
|
|
|
if __name__ in "__main__":
|
|
run() |