update,
This commit is contained in:
467
tunmnlu/task_1/Q2-src/Q2.py
Normal file
467
tunmnlu/task_1/Q2-src/Q2.py
Normal file
@@ -0,0 +1,467 @@
|
||||
########################### DO NOT MODIFY THIS SECTION ##########################
|
||||
#################################################################################
|
||||
import sqlite3
|
||||
from sqlite3 import Error
|
||||
import csv
|
||||
#################################################################################
|
||||
|
||||
## Change to False to disable Sample
|
||||
SHOW = True
|
||||
|
||||
############### SAMPLE CLASS AND SQL QUERY ###########################
|
||||
######################################################################
|
||||
class Sample():
|
||||
def sample(self):
|
||||
try:
|
||||
connection = sqlite3.connect("sample")
|
||||
connection.text_factory = str
|
||||
except Error as e:
|
||||
print("Error occurred: " + str(e))
|
||||
print('\033[32m' + "Sample: " + '\033[m')
|
||||
|
||||
# Sample Drop table
|
||||
connection.execute("DROP TABLE IF EXISTS sample;")
|
||||
# Sample Create
|
||||
connection.execute("CREATE TABLE sample(id integer, name text);")
|
||||
# Sample Insert
|
||||
connection.execute("INSERT INTO sample VALUES (?,?)",("1","test_name"))
|
||||
connection.commit()
|
||||
# Sample Select
|
||||
cursor = connection.execute("SELECT * FROM sample;")
|
||||
print(cursor.fetchall())
|
||||
|
||||
######################################################################
|
||||
|
||||
class HW2_sql():
|
||||
############### DO NOT MODIFY THIS SECTION ###########################
|
||||
######################################################################
|
||||
def create_connection(self, path):
|
||||
connection = None
|
||||
try:
|
||||
connection = sqlite3.connect(path)
|
||||
connection.text_factory = str
|
||||
except Error as e:
|
||||
print("Error occurred: " + str(e))
|
||||
|
||||
return connection
|
||||
|
||||
def execute_query(self, connection, query):
|
||||
cursor = connection.cursor()
|
||||
try:
|
||||
if query == "":
|
||||
return "Query Blank"
|
||||
else:
|
||||
cursor.execute(query)
|
||||
connection.commit()
|
||||
return "Query executed successfully"
|
||||
except Error as e:
|
||||
return "Error occurred: " + str(e)
|
||||
######################################################################
|
||||
######################################################################
|
||||
|
||||
# GTusername [0 points]
|
||||
def GTusername(self):
|
||||
gt_username = "tlou31"
|
||||
return gt_username
|
||||
|
||||
# Part a.i Create Tables [2 points]
|
||||
def part_ai_1(self,connection):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_ai_1_sql = """
|
||||
CREATE TABLE IF NOT EXISTS movies (
|
||||
id INTEGER ,
|
||||
title TEXT,
|
||||
score REAL);
|
||||
"""
|
||||
|
||||
######################################################################
|
||||
|
||||
return self.execute_query(connection, part_ai_1_sql)
|
||||
|
||||
def part_ai_2(self,connection):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_ai_2_sql = """
|
||||
CREATE TABLE IF NOT EXISTS movie_cast (
|
||||
movie_id integer,
|
||||
cast_id integer,
|
||||
cast_name text,
|
||||
birthday text,
|
||||
popularity real);
|
||||
"""
|
||||
|
||||
######################################################################
|
||||
|
||||
return self.execute_query(connection, part_ai_2_sql)
|
||||
|
||||
# Part a.ii Import Data [2 points]
|
||||
def part_aii_1(self,connection,path):
|
||||
############### CREATE IMPORT CODE BELOW ############################
|
||||
|
||||
with open(path, 'r') as movie_data:
|
||||
md = csv.reader(movie_data)
|
||||
md_list = list(md)
|
||||
for i in md_list:
|
||||
connection.execute("INSERT INTO movies VALUES (?, ?, ?)", (i[0], i[1], i[2]))
|
||||
|
||||
######################################################################
|
||||
|
||||
sql = "SELECT COUNT(id) FROM movies;"
|
||||
cursor = connection.execute(sql)
|
||||
return cursor.fetchall()[0][0]
|
||||
|
||||
def part_aii_2(self,connection, path):
|
||||
############### CREATE IMPORT CODE BELOW ############################
|
||||
|
||||
curs = connection.cursor()
|
||||
with open(path, 'r') as moviecast_data:
|
||||
mcd = csv.reader(moviecast_data)
|
||||
mcd_list = list(mcd)
|
||||
for i in mcd_list:
|
||||
curs.execute("INSERT INTO movie_cast VALUES (?, ?, ?, ?, ?)", (i[0], i[1], i[2], i[3], i[4]))
|
||||
|
||||
######################################################################
|
||||
|
||||
sql = "SELECT COUNT(cast_id) FROM movie_cast;"
|
||||
cursor = connection.execute(sql)
|
||||
return cursor.fetchall()[0][0]
|
||||
|
||||
# Part a.iii Vertical Database Partitioning [5 points]
|
||||
def part_aiii(self,connection):
|
||||
############### EDIT CREATE TABLE SQL STATEMENT ###################################
|
||||
part_aiii_sql = """
|
||||
CREATE TABLE IF NOT EXISTS cast_bio (
|
||||
cast_id integer,
|
||||
cast_name text,
|
||||
birthday text,
|
||||
popularity real);
|
||||
"""
|
||||
|
||||
###############################################################################
|
||||
|
||||
self.execute_query(connection, part_aiii_sql)
|
||||
|
||||
############### CREATE IMPORT CODE BELOW ############################
|
||||
part_aiii_insert_sql = """
|
||||
INSERT INTO cast_bio
|
||||
SELECT DISTINCT cast_id, cast_name, birthday, popularity
|
||||
FROM movie_cast
|
||||
"""
|
||||
######################################################################
|
||||
|
||||
self.execute_query(connection, part_aiii_insert_sql)
|
||||
|
||||
sql = "SELECT COUNT(cast_id) FROM cast_bio;"
|
||||
cursor = connection.execute(sql)
|
||||
return cursor.fetchall()[0][0]
|
||||
|
||||
|
||||
# Part b Create Indexes [1 points]
|
||||
def part_b_1(self,connection):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_b_1_sql = "CREATE INDEX moive_index ON movies (id); "
|
||||
######################################################################
|
||||
return self.execute_query(connection, part_b_1_sql)
|
||||
|
||||
def part_b_2(self,connection):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_b_2_sql = "CREATE INDEX cast_index ON movie_cast (cast_id); "
|
||||
######################################################################
|
||||
return self.execute_query(connection, part_b_2_sql)
|
||||
|
||||
def part_b_3(self,connection):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_b_3_sql = "CREATE INDEX cast_bio_index ON cast_bio (cast_id); "
|
||||
######################################################################
|
||||
return self.execute_query(connection, part_b_3_sql)
|
||||
|
||||
# Part c Calculate a Proportion [3 points]
|
||||
def part_c(self,connection):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_c_sql = """
|
||||
SELECT printf( "%.2f", (
|
||||
(
|
||||
SELECT (count(id) * 1.0)
|
||||
FROM movies
|
||||
WHERE score >=7 and score <=20
|
||||
) / (
|
||||
SELECT count(id)
|
||||
FROM movies
|
||||
)
|
||||
) * 100.0
|
||||
)
|
||||
"""
|
||||
# 0.0732
|
||||
|
||||
######################################################################
|
||||
cursor = connection.execute(part_c_sql)
|
||||
return cursor.fetchall()[0][0]
|
||||
|
||||
# Part d Find the Most Prolific Actors [4 points]
|
||||
def part_d(self,connection):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_d_sql = """
|
||||
SELECT
|
||||
cast_name,
|
||||
COUNT(movie_id) as appearance_count
|
||||
FROM movie_cast
|
||||
WHERE popularity > 10
|
||||
GROUP BY cast_name
|
||||
ORDER BY count(movie_id) DESC, cast_name ASC
|
||||
LIMIT 5
|
||||
"""
|
||||
######################################################################
|
||||
cursor = connection.execute(part_d_sql)
|
||||
return cursor.fetchall()
|
||||
|
||||
# Part e Find the Highest Scoring Movies With the Least Amount of Cast [4 points]
|
||||
def part_e(self,connection):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_e_sql = """
|
||||
SELECT
|
||||
a.title as movie_title,
|
||||
printf('%.2f', a.score) as movie_score,
|
||||
(COUNT(b.cast_id)) as cast_count
|
||||
FROM movies as a
|
||||
INNER JOIN movie_cast as b
|
||||
ON a.id = b.movie_id
|
||||
GROUP By a.title
|
||||
ORDER BY a.score DESC, COUNT(b.cast_id) ASC, a.title ASC
|
||||
LIMIT 5
|
||||
"""
|
||||
######################################################################
|
||||
cursor = connection.execute(part_e_sql)
|
||||
return cursor.fetchall()
|
||||
|
||||
# Part f Get High Scoring Actors [4 points]
|
||||
def part_f(self,connection):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_f_sql = """
|
||||
SELECT
|
||||
a.cast_id,
|
||||
a.cast_name,
|
||||
printf('%.2f', AVG(b.score)) as average_score
|
||||
FROM movie_cast as a
|
||||
INNER JOIN movies as b
|
||||
ON a.movie_id = b.id
|
||||
WHERE NOT(b.score < 25)
|
||||
GROUP BY a.cast_id, a.cast_name
|
||||
HAVING COUNT(a.movie_id) >= 3
|
||||
ORDER BY AVG(b.score) DESC, a.cast_name ASC
|
||||
LIMIT 10
|
||||
"""
|
||||
######################################################################
|
||||
cursor = connection.execute(part_f_sql)
|
||||
return cursor.fetchall()
|
||||
|
||||
# Part g Creating Views [6 points]
|
||||
def part_g(self,connection):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_g_sql = '''
|
||||
CREATE VIEW good_collaboration
|
||||
(
|
||||
cast_member_id1,
|
||||
cast_member_id2,
|
||||
movie_count,
|
||||
average_movie_score
|
||||
)
|
||||
AS
|
||||
|
||||
SELECT
|
||||
cast1.cast_id AS cast_member_id1
|
||||
,cast2.cast_id AS cast_member_id2
|
||||
,Count(cast1.movie_id) AS cocasted
|
||||
,AVG(movie.score) AS avg_score
|
||||
FROM movie_cast cast1
|
||||
|
||||
JOIN movie_cast cast2
|
||||
ON cast1.movie_id = cast2.movie_id
|
||||
AND cast1.cast_id < cast2.cast_id
|
||||
|
||||
JOIN movies movie ON cast1.movie_id = movie.id
|
||||
|
||||
GROUP BY cast1.cast_id, cast2.cast_id
|
||||
HAVING cocasted >= 2 AND avg_score >= 40
|
||||
|
||||
'''
|
||||
# ORDER BY avg_score DESC;
|
||||
######################################################################
|
||||
return self.execute_query(connection, part_g_sql)
|
||||
|
||||
def part_gi(self,connection):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_g_i_sql = """
|
||||
WITH
|
||||
temp1 AS (
|
||||
SELECT
|
||||
cast_member_id1 AS cast_id
|
||||
FROM good_collaboration
|
||||
UNION
|
||||
SELECT
|
||||
cast_member_id2
|
||||
FROM good_collaboration
|
||||
),
|
||||
temp2 AS (
|
||||
SELECT
|
||||
cast_id,
|
||||
AVG(average_movie_score) AS collaboration_score
|
||||
FROM temp1
|
||||
INNER JOIN good_collaboration AS g1
|
||||
ON (
|
||||
temp1.cast_id = g1.cast_member_id1
|
||||
OR
|
||||
temp1.cast_id = g1.cast_member_id2
|
||||
)
|
||||
GROUP BY temp1.cast_id
|
||||
)
|
||||
SELECT
|
||||
DISTINCT temp2.cast_id AS cast_id,
|
||||
cast_name,
|
||||
printf("%.2f", collaboration_score) AS collaboration_score
|
||||
FROM temp2
|
||||
INNER JOIN movie_cast AS mc ON mc.cast_id = temp2.cast_id
|
||||
ORDER BY collaboration_score DESC, cast_name ASC
|
||||
LIMIT 5;
|
||||
"""
|
||||
######################################################################
|
||||
cursor = connection.execute(part_g_i_sql)
|
||||
return cursor.fetchall()
|
||||
|
||||
# Part h FTS [4 points]
|
||||
def part_h(self,connection,path):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_h_sql = """
|
||||
CREATE VIRTUAL TABLE movie_overview USING fts4(id INTEGER, overview TEXT)
|
||||
"""
|
||||
######################################################################
|
||||
connection.execute(part_h_sql)
|
||||
############### CREATE IMPORT CODE BELOW ############################
|
||||
|
||||
with open(path, 'r') as movie_overview_data:
|
||||
mod = csv.reader(movie_overview_data)
|
||||
mod_list = list(mod)
|
||||
for i in mod_list:
|
||||
connection.execute("INSERT INTO movie_overview VALUES (?, ?)", (i[0], i[1]))
|
||||
|
||||
######################################################################
|
||||
sql = "SELECT COUNT(id) FROM movie_overview;"
|
||||
cursor = connection.execute(sql)
|
||||
return cursor.fetchall()[0][0]
|
||||
|
||||
def part_hi(self,connection):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_hi_sql = """
|
||||
SELECT
|
||||
count(*) as cnt
|
||||
FROM movie_overview
|
||||
WHERE overview MATCH '"fight"'
|
||||
"""
|
||||
######################################################################
|
||||
cursor = connection.execute(part_hi_sql)
|
||||
return cursor.fetchall()[0][0]
|
||||
|
||||
def part_hii(self,connection):
|
||||
############### EDIT SQL STATEMENT ###################################
|
||||
part_hii_sql = """
|
||||
SELECT
|
||||
count(*) as cnt
|
||||
FROM movie_overview
|
||||
WHERE overview MATCH '"space" NEAR/5 "program"'
|
||||
"""
|
||||
######################################################################
|
||||
cursor = connection.execute(part_hii_sql)
|
||||
return cursor.fetchall()[0][0]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
########################### DO NOT MODIFY THIS SECTION ##########################
|
||||
#################################################################################
|
||||
if SHOW == True:
|
||||
sample = Sample()
|
||||
sample.sample()
|
||||
|
||||
print('\033[32m' + "Q2 Output: " + '\033[m')
|
||||
db = HW2_sql()
|
||||
try:
|
||||
conn = db.create_connection("Q2")
|
||||
except:
|
||||
print("Database Creation Error")
|
||||
|
||||
try:
|
||||
conn.execute("DROP TABLE IF EXISTS movies;")
|
||||
conn.execute("DROP TABLE IF EXISTS movie_cast;")
|
||||
conn.execute("DROP TABLE IF EXISTS cast_bio;")
|
||||
conn.execute("DROP VIEW IF EXISTS good_collaboration;")
|
||||
conn.execute("DROP TABLE IF EXISTS movie_overview;")
|
||||
except:
|
||||
print("Error in Table Drops")
|
||||
|
||||
try:
|
||||
print('\033[32m' + "part ai 1: " + '\033[m' + str(db.part_ai_1(conn)))
|
||||
print('\033[32m' + "part ai 2: " + '\033[m' + str(db.part_ai_2(conn)))
|
||||
except:
|
||||
print("Error in Part a.i")
|
||||
|
||||
try:
|
||||
print('\033[32m' + "Row count for Movies Table: " + '\033[m' + str(db.part_aii_1(conn,"data/movies.csv")))
|
||||
print('\033[32m' + "Row count for Movie Cast Table: " + '\033[m' + str(db.part_aii_2(conn,"data/movie_cast.csv")))
|
||||
except:
|
||||
print("Error in part a.ii")
|
||||
|
||||
try:
|
||||
print('\033[32m' + "Row count for Cast Bio Table: " + '\033[m' + str(db.part_aiii(conn)))
|
||||
except:
|
||||
print("Error in part a.iii")
|
||||
|
||||
try:
|
||||
print('\033[32m' + "part b 1: " + '\033[m' + db.part_b_1(conn))
|
||||
print('\033[32m' + "part b 2: " + '\033[m' + db.part_b_2(conn))
|
||||
print('\033[32m' + "part b 3: " + '\033[m' + db.part_b_3(conn))
|
||||
except:
|
||||
print("Error in part b")
|
||||
|
||||
try:
|
||||
print('\033[32m' + "part c: " + '\033[m' + str(db.part_c(conn)))
|
||||
except:
|
||||
print("Error in part c")
|
||||
|
||||
try:
|
||||
print('\033[32m' + "part d: " + '\033[m')
|
||||
for line in db.part_d(conn):
|
||||
print(line[0],line[1])
|
||||
except:
|
||||
print("Error in part d")
|
||||
|
||||
try:
|
||||
print('\033[32m' + "part e: " + '\033[m')
|
||||
for line in db.part_e(conn):
|
||||
print(line[0],line[1],line[2])
|
||||
except:
|
||||
print("Error in part e")
|
||||
|
||||
try:
|
||||
print('\033[32m' + "part f: " + '\033[m')
|
||||
for line in db.part_f(conn):
|
||||
print(line[0],line[1],line[2])
|
||||
except:
|
||||
print("Error in part f")
|
||||
|
||||
try:
|
||||
print('\033[32m' + "part g: " + '\033[m' + str(db.part_g(conn)))
|
||||
print('\033[32m' + "part g.i: " + '\033[m')
|
||||
for line in db.part_gi(conn):
|
||||
print(line[0],line[1],line[2])
|
||||
except:
|
||||
print("Error in part g")
|
||||
|
||||
try:
|
||||
print('\033[32m' + "part h.i: " + '\033[m'+ str(db.part_h(conn,"data/movie_overview.csv")))
|
||||
print('\033[32m' + "Count h.ii: " + '\033[m' + str(db.part_hi(conn)))
|
||||
print('\033[32m' + "Count h.iii: " + '\033[m' + str(db.part_hii(conn)))
|
||||
except:
|
||||
print("Error in part h")
|
||||
|
||||
conn.close()
|
||||
#################################################################################
|
||||
#################################################################################
|
||||
|
Reference in New Issue
Block a user