Files
tunmnlu/task_2/others-answer/omsa-main/CS-6242-OAN/hw4/Q2/script.py
louiscklaw 9035c1312b update,
2025-02-01 02:09:32 +08:00

82 lines
2.4 KiB
Python

from datetime import datetime
from random_forest import RandomForest
from decision_tree import DecisionTree
#export
import csv
import numpy as np # http://www.numpy.org
import ast
from math import log, floor, ceil
import random
import numpy as np
#export
# TODO: Determine the forest size according to your implementation.
# This function will be used by the autograder to set your forest size during testing
# VERY IMPORTANT: Minimum forest_size should be 10
def get_forest_size():
forest_size = 10
return forest_size
# TODO: Determine random seed to set for reproducibility
# This function will be used by the autograder to set the random seed to obtain the same results you achieve locally
def get_random_seed():
random_seed = 0
return random_seed
def run():
np.random.seed(get_random_seed())
# start time
start = datetime.now()
X = list()
y = list()
XX = list() # Contains data features and data labels
numerical_cols = set([i for i in range(0, 9)]) # indices of numeric attributes (columns)
# Loading data set
print("reading the data")
with open("pima-indians-diabetes.csv") as f:
next(f, None)
for line in csv.reader(f, delimiter=","):
xline = []
for i in range(len(line)):
if i in numerical_cols:
xline.append(ast.literal_eval(line[i]))
else:
xline.append(line[i])
X.append(xline[:-1])
y.append(xline[-1])
XX.append(xline[:])
# Initializing a random forest.
randomForest = RandomForest(get_forest_size())
# printing the name
print("__Name: " + randomForest.user()+"__")
# Creating the bootstrapping datasets
print("creating the bootstrap datasets")
randomForest.bootstrapping(XX)
# Building trees in the forest
print("fitting the forest")
randomForest.fitting()
# Calculating an unbiased error estimation of the random forest
# based on out-of-bag (OOB) error estimate.
y_predicted = randomForest.voting(X)
# Comparing predicted and true labels
results = [prediction == truth for prediction, truth in zip(y_predicted, y)]
# Accuracy
accuracy = float(results.count(True)) / float(len(results))
print("accuracy: %.4f" % accuracy)
print("OOB estimate: %.4f" % (1 - accuracy))
# end time
print("Execution time: " + str(datetime.now() - start))
if __name__ in "__main__":
run()