This commit is contained in:
louiscklaw
2025-01-31 22:17:25 +08:00
parent cdc3678990
commit 3688f9ee24
100 changed files with 65454 additions and 0 deletions

View File

@@ -0,0 +1,433 @@
import http.client
import json
import csv
import urllib.request
#############################################################################################################################
# cse6242
# All instructions, code comments, etc. contained within this notebook are part of the assignment instructions.
# Portions of this file will auto-graded in Gradescope using different sets of parameters / data to ensure that values are not
# hard-coded.
#
# Instructions: Implement all methods in this file that have a return
# value of 'NotImplemented'. See the documentation within each method for specific details, including
# the expected return value
#
# Helper Functions:
# You are permitted to write additional helper functions/methods or use additional instance variables within
# the `Graph` class or `TMDbAPIUtils` class so long as the originally included methods work as required.
#
# Use:
# The `Graph` class is used to represent and store the data for the TMDb co-actor network graph. This class must
# also provide some basic analytics, i.e., number of nodes, edges, and nodes with the highest degree.
#
# The `TMDbAPIUtils` class is used to retrieve Actor/Movie data using themoviedb.org API. We have provided a few necessary methods
# to test your code w/ the API, e.g.: get_movie_cast(), get_movie_credits_for_person(). You may add additional
# methods and instance variables as desired (see Helper Functions).
#
# The data that you retrieve from the TMDb API is used to build your graph using the Graph class. After you build your graph using the
# TMDb API data, use the Graph class write_edges_file & write_nodes_file methods to produce the separate nodes and edges
# .csv files for submission to Gradescope.
#
# While building the co-actor graph, you will be required to write code to expand the graph by iterating
# through a portion of the graph nodes and finding similar artists using the TMDb API. We will not grade this code directly
# but will grade the resulting graph data in your nodes and edges .csv files.
#
#############################################################################################################################
class Graph:
# Do not modify
def __init__(self, with_nodes_file=None, with_edges_file=None):
"""
option 1: init as an empty graph and add nodes
option 2: init by specifying a path to nodes & edges files
"""
self.nodes = []
self.edges = []
if with_nodes_file and with_edges_file:
nodes_CSV = csv.reader(open(with_nodes_file))
nodes_CSV = list(nodes_CSV)[1:]
self.nodes = [(n[0], n[1]) for n in nodes_CSV]
edges_CSV = csv.reader(open(with_edges_file))
edges_CSV = list(edges_CSV)[1:]
self.edges = [(e[0], e[1]) for e in edges_CSV]
def add_node(self, id: str, name: str) -> None:
"""
add a tuple (id, name) representing a node to self.nodes if it does not already exist
The graph should not contain any duplicate nodes
"""
if (id, name) not in self.nodes:
self.nodes.append((id, name))
def add_edge(self, source: str, target: str) -> None:
"""
Add an edge between two nodes if it does not already exist.
An edge is represented by a tuple containing two strings: e.g.: ('source', 'target').
Where 'source' is the id of the source node and 'target' is the id of the target node
e.g., for two nodes with ids 'a' and 'b' respectively, add the tuple ('a', 'b') to self.edges
"""
current_edge = (source, target)
current_edge_inverted = (target, source)
if (source != target) and (current_edge not in self.edges) and (current_edge_inverted not in self.edges):
self.edges.append(current_edge)
def total_nodes(self) -> int:
"""
Returns an integer value for the total number of nodes in the graph
"""
output = len(self.nodes)
return output
def total_edges(self) -> int:
"""
Returns an integer value for the total number of edges in the graph
"""
output = len(self.edges)
return output
def max_degree_nodes(self) -> dict:
"""
Return the node(s) with the highest degree
Return multiple nodes in the event of a tie
Format is a dict where the key is the node_id and the value is an integer for the node degree
e.g. {'a': 8}
or {'a': 22, 'b': 22}
"""
max_degree_nodes = {}
for edge in self.edges:
for idx in edge:
if idx in max_degree_nodes:
max_degree_nodes[idx] += 1
else:
max_degree_nodes[idx] = 1
v = list(max_degree_nodes.values())
k = list(max_degree_nodes.keys())
max_nodes = {k[v.index(max(v))]: max(v)}
return max_nodes
def print_nodes(self):
"""
No further implementation required
May be used for de-bugging if necessary
"""
print(self.nodes)
def print_edges(self):
"""
No further implementation required
May be used for de-bugging if necessary
"""
print(self.edges)
# Do not modify
def write_edges_file(self, path="edges.csv")->None:
"""
write all edges out as .csv
:param path: string
:return: None
"""
edges_path = path
edges_file = open(edges_path, 'w', encoding='utf-8')
edges_file.write("source" + "," + "target" + "\n")
for e in self.edges:
edges_file.write(e[0] + "," + e[1] + "\n")
edges_file.close()
print("finished writing edges to csv")
# Do not modify
def write_nodes_file(self, path="nodes.csv")->None:
"""
write all nodes out as .csv
:param path: string
:return: None
"""
nodes_path = path
nodes_file = open(nodes_path, 'w', encoding='utf-8')
nodes_file.write("id,name" + "\n")
for n in self.nodes:
nodes_file.write(n[0] + "," + n[1] + "\n")
nodes_file.close()
print("finished writing nodes to csv")
class TMDBAPIUtils:
# Do not modify
def __init__(self, api_key:str):
self.api_key=api_key
def get_movie_cast(self, movie_id:str, limit:int=None, exclude_ids:list=None) -> list:
"""
Get the movie cast for a given movie id, with optional parameters to exclude an cast member
from being returned and/or to limit the number of returned cast members
documentation url: https://developers.themoviedb.org/3/movies/get-movie-credits
:param string movie_id: a movie_id
:param list exclude_ids: a list of ints containing ids (not cast_ids) of cast members that should be excluded from the returned result
e.g., if exclude_ids are [353, 455] then exclude these from any result.
:param integer limit: maximum number of returned cast members by their 'order' attribute
e.g., limit=5 will attempt to return the 5 cast members having 'order' attribute values between 0-4
If after excluding, there are fewer cast members than the specified limit, then return the remaining members (excluding the ones whose order values are outside the limit range).
If cast members with 'order' attribute in the specified limit range have been excluded, do not include more cast members to reach the limit.
If after excluding, the limit is not specified, then return all remaining cast members."
e.g., if limit=5 and the actor whose id corresponds to cast member with order=1 is to be excluded,
return cast members with order values [0, 2, 3, 4], not [0, 2, 3, 4, 5]
:rtype: list
return a list of dicts, one dict per cast member with the following structure:
[{'id': '97909' # the id of the cast member
'character': 'John Doe' # the name of the character played
'credit_id': '52fe4249c3a36847f8012927' # id of the credit, ...}, ... ]
Note that this is an example of the structure of the list and some of the fields returned by the API.
The result of the API call will include many more fields for each cast member.
"""
url = 'https://api.themoviedb.org/3/movie/'+ str(movie_id) +'/credits?api_key='+self.api_key +"&language=en-US"
req = urllib.request.Request(url, headers={
'Accept': 'application/json',
})
response = urllib.request.urlopen(req)
if response.status == 200:
json_data = json.load(response)
cast = json_data['cast']
if exclude_ids is not None:
cast = [i for i in cast if i['id'] not in exclude_ids]
# print(cast)
if limit is not None:
cast = [i for i in cast if i['order'] < limit]
# print(cast)
for c in cast:
c['name'] = c['name'].replace(',', '')
return cast
else:
print(response.status.__str__())
return None
def get_movie_credits_for_person(self, person_id:str, vote_avg_threshold:float=None)->list:
"""
Using the TMDb API, get the movie credits for a person serving in a cast role
documentation url: https://developers.themoviedb.org/3/people/get-person-movie-credits
:param string person_id: the id of a person
:param vote_avg_threshold: optional parameter to return the movie credit if it is >=
the specified threshold.
e.g., if the vote_avg_threshold is 5.0, then only return credits with a vote_avg >= 5.0
:rtype: list
return a list of dicts, with each dict having 'id', 'title', and 'vote_avg' keys,
one dict per movie credit with the following structure:
[{'id': '97909' # the id of the movie
'title': 'Long, Stock and Two Smoking Barrels' # the title (not original title) of the credit
'vote_avg': 5.0 # the float value of the vote average value for the credit}, ... ]
"""
url = 'https://api.themoviedb.org/3/person/'+ str(person_id) +'/movie_credits?api_key='+self.api_key+'&language=en-US'
req = urllib.request.Request(url, headers={
'Accept': 'application/json',
})
response = urllib.request.urlopen(req)
if response.status == 200:
json_data = json.load(response)
raw_credits = json_data['cast']
if vote_avg_threshold is not None:
filtered_credits = []
for idx in raw_credits:
if idx["vote_average"] >= vote_avg_threshold:
filtered_credits.append({
"id":idx["id"],
"title":idx["title"],
"vote_avg":idx["vote_average"]
})
return filtered_credits
else:
unfiltered_credits = []
for idx in raw_credits:
unfiltered_credits.append({
"id":idx["id"],
"title":idx["title"],
"vote_avg":idx["vote_average"]
})
return unfiltered_credits
else:
print("ERROR: " + response.status.__str__())
return None
#############################################################################################################################
#
# BUILDING YOUR GRAPH
#
# Working with the API: See use of http.request: https://docs.python.org/3/library/http.client.html#examples
#
# Using TMDb's API, build a co-actor network for the actor's/actress' highest rated movies
# In this graph, each node represents an actor
# An edge between any two nodes indicates that the two actors/actresses acted in a movie together
# i.e., they share a movie credit.
# e.g., An edge between Samuel L. Jackson and Robert Downey Jr. indicates that they have acted in one
# or more movies together.
#
# For this assignment, we are interested in a co-actor network of highly rated movies; specifically,
# we only want the top 3 co-actors in each movie credit of an actor having a vote average >= 8.0.
# Build your co-actor graph on the actor 'Laurence Fishburne' w/ person_id 2975.
#
# You will need to add extra functions or code to accomplish this. We will not directly call or explicitly grade your
# algorithm. We will instead measure the correctness of your output by evaluating the data in your nodes.csv and edges.csv files.
#
# GRAPH SIZE
# With each iteration of your graph build, the number of nodes and edges grows approximately at an exponential rate.
# Our testing indicates growth approximately equal to e^2x.
# Since the TMDB API is a live database, the number of nodes / edges in the final graph will vary slightly depending on when
# you execute your graph building code. We take this into account by rebuilding the solution graph every few days and
# updating the auto-grader. We establish a bound for lowest & highest encountered numbers of nodes and edges with a
# margin of +/- 100 for nodes and +/- 150 for edges. e.g., The allowable range of nodes is set to:
#
# Min allowable nodes = min encountered nodes - 100
# Max allowable nodes = max allowable nodes + 100
#
# e.g., if the minimum encountered nodes = 507 and the max encountered nodes = 526, then the min/max range is 407-626
# The same method is used to calculate the edges with the exception of using the aforementioned edge margin.
# ----------------------------------------------------------------------------------------------------------------------
# BEGIN BUILD CO-ACTOR NETWORK
#
# INITIALIZE GRAPH
# Initialize a Graph object with a single node representing Laurence Fishburne
#
# BEGIN BUILD BASE GRAPH:
# Find all of Laurence Fishburne's movie credits that have a vote average >= 8.0
# FOR each movie credit:
# | get the movie cast members having an 'order' value between 0-2 (these are the co-actors)
# |
# | FOR each movie cast member:
# | | using graph.add_node(), add the movie cast member as a node (keep track of all new nodes added to the graph)
# | | using graph.add_edge(), add an edge between the Laurence Fishburne (actor) node
# | | and each new node (co-actor/co-actress)
# | END FOR
# END FOR
# END BUILD BASE GRAPH
#
#
# BEGIN LOOP - DO 2 TIMES:
# IF first iteration of loop:
# | nodes = The nodes added in the BUILD BASE GRAPH (this excludes the original node of Laurence Fishburne!)
# ELSE
# | nodes = The nodes added in the previous iteration:
# ENDIF
#
# FOR each node in nodes:
# | get the movie credits for the actor that have a vote average >= 8.0
# |
# | FOR each movie credit:
# | | try to get the 3 movie cast members having an 'order' value between 0-2
# | |
# | | FOR each movie cast member:
# | | | IF the node doesn't already exist:
# | | | | add the node to the graph (track all new nodes added to the graph)
# | | | ENDIF
# | | |
# | | | IF the edge does not exist:
# | | | | add an edge between the node (actor) and the new node (co-actor/co-actress)
# | | | ENDIF
# | | END FOR
# | END FOR
# END FOR
# END LOOP
#
# Your graph should not have any duplicate edges or nodes
# Write out your finished graph as a nodes file and an edges file using:
# graph.write_edges_file()
# graph.write_nodes_file()
#
# END BUILD CO-ACTOR NETWORK
# ----------------------------------------------------------------------------------------------------------------------
# Exception handling and best practices
# - You should use the param 'language=en-US' in all API calls to avoid encoding issues when writing data to file.
# - If the actor name has a comma char ',' it should be removed to prevent extra columns from being inserted into the .csv file
# - Some movie_credits do not return cast data. Handle this situation by skipping these instances.
# - While The TMDb API does not have a rate-limiting scheme in place, consider that making hundreds / thousands of calls
# can occasionally result in timeout errors. If you continue to experience 'ConnectionRefusedError : [Errno 61] Connection refused',
# - wait a while and then try again. It may be necessary to insert periodic sleeps when you are building your graph.
def return_name()->str:
"""
Return a string containing your GT Username
e.g., tlou31
Do not return your 9 digit GTId
"""
return "tlou31"
# You should modify __main__ as you see fit to build/test your graph using the TMDBAPIUtils & Graph classes.
# Some boilerplate/sample code is provided for demonstration. We will not call __main__ during grading.
if __name__ == "__main__":
graph = Graph()
graph.add_node(id='2975', name='Laurence Fishburne')
# build base graph
# find all laurence fishburne's movie credits with a vote average >= 8.0
tmdb_api_utils = TMDBAPIUtils(api_key='1eed362096d67d9f077084d5abb30a35')
movie_credits = tmdb_api_utils.get_movie_credits_for_person(person_id='2975', vote_avg_threshold=8.0)
# for each movie credit get movie cast members having and order value between 0-2 (3 0-base)
for movie in movie_credits:
movie_cast = tmdb_api_utils.get_movie_cast(movie_id=movie['id'], limit=3)
for cast_member in movie_cast:
graph.add_node(str(cast_member['id']), cast_member['name'])
graph.add_edge(source='2975', target=str(cast_member['id']))
break
break
for i in range(2):
print('running loop ' + str(i))
if i == 0:
current_nodes = graph.nodes[1:]
else:
current_nodes = [i for i in graph.nodes if i not in current_nodes]
for node in current_nodes:
cast = tmdb_api_utils.get_movie_credits_for_person(person_id=node[0], vote_avg_threshold=8.0)
for movie in cast:
actors = tmdb_api_utils.get_movie_cast(movie_id=movie['id'], limit=3)
for actor in actors:
graph.add_node(str(actor['id']), actor['name'])
graph.add_edge(str(node[0]), str(actor['id']))
# tmdb_api_utils.get_movie_cast(movie_id='100', limit=3, exclude_ids=[973, 974])
# tmdb_api_utils.get_movie_credits_for_person(person_id="9709", vote_avg_threshold=5.0)
# call functions or place code here to build graph (graph building code not graded)
# Suggestion: code should contain steps outlined above in BUILD CO-ACTOR NETWORK
print(f"Edges: {graph.total_edges()}")
print(f"Nodes: {graph.total_nodes()}")
graph.write_edges_file()
graph.write_nodes_file()
# If you have already built & written out your graph, you could read in your nodes & edges files
# to perform testing on your graph.
graph = Graph(with_edges_file="edges.csv", with_nodes_file="nodes.csv")
print(graph.max_degree_nodes())

BIN
tunmnlu/task_1/Q1/bundle/Q1_problem.jpg (Stored with Git LFS) Normal file

Binary file not shown.

View File

@@ -0,0 +1,123 @@
source,target
2975,6384
6384,530
6384,1357063
6384,40644
6384,1779512
6384,21127
6384,1692944
6384,64
6384,1776
6384,38803
6384,20215
6384,2130
2975,530
2975,8349
2975,8351
2975,8354
2975,1107983
2975,52057
2975,110380
2975,18471
2975,74611
2975,1407498
530,529
530,532
1357063,40644
1357063,3894
1357063,1892
1357063,19498
40644,1779512
40644,2710
40644,4587
40644,8691
40644,72129
40644,189111
40644,23964
1779512,1357063
21127,1692944
21127,9827
21127,143103
21127,21708
21127,72983
21127,124909
21127,4038
21127,18992
21127,2047
21127,4730
21127,9464
21127,14409
64,1776
64,1415341
64,1267653
64,7467
64,1519399
64,1003
64,524
64,38803
64,3894
64,1810
64,3895
64,958722
64,41381
64,10980
64,10989
64,10990
64,20215
64,2130
64,219479
64,1101349
64,1407495
64,3361135
64,1812
64,2037
64,5081
64,1892
1776,2559324
1776,2559325
1776,27888
1776,2630
1776,958722
1776,41381
1776,38803
1776,151007
1776,1769
1776,19772
1776,1
1776,7879
1776,20215
1776,2130
1776,1032
1776,1006721
1776,11483
1776,8349
1776,8351
1776,8354
38803,151007
38803,5953
38803,325
38803,3125
38803,3084
38803,1158
38803,3085
38803,3087
38803,3092
20215,2130
20215,117669
20215,558466
20215,61555
20215,10017
20215,5563
20215,26557
20215,18688
20215,8437
20215,589
2130,16431
2130,2203
2130,3026
2130,32
2130,12132
2130,932719
2130,2127
2130,2128
2130,2675
1 source target
2 2975 6384
3 6384 530
4 6384 1357063
5 6384 40644
6 6384 1779512
7 6384 21127
8 6384 1692944
9 6384 64
10 6384 1776
11 6384 38803
12 6384 20215
13 6384 2130
14 2975 530
15 2975 8349
16 2975 8351
17 2975 8354
18 2975 1107983
19 2975 52057
20 2975 110380
21 2975 18471
22 2975 74611
23 2975 1407498
24 530 529
25 530 532
26 1357063 40644
27 1357063 3894
28 1357063 1892
29 1357063 19498
30 40644 1779512
31 40644 2710
32 40644 4587
33 40644 8691
34 40644 72129
35 40644 189111
36 40644 23964
37 1779512 1357063
38 21127 1692944
39 21127 9827
40 21127 143103
41 21127 21708
42 21127 72983
43 21127 124909
44 21127 4038
45 21127 18992
46 21127 2047
47 21127 4730
48 21127 9464
49 21127 14409
50 64 1776
51 64 1415341
52 64 1267653
53 64 7467
54 64 1519399
55 64 1003
56 64 524
57 64 38803
58 64 3894
59 64 1810
60 64 3895
61 64 958722
62 64 41381
63 64 10980
64 64 10989
65 64 10990
66 64 20215
67 64 2130
68 64 219479
69 64 1101349
70 64 1407495
71 64 3361135
72 64 1812
73 64 2037
74 64 5081
75 64 1892
76 1776 2559324
77 1776 2559325
78 1776 27888
79 1776 2630
80 1776 958722
81 1776 41381
82 1776 38803
83 1776 151007
84 1776 1769
85 1776 19772
86 1776 1
87 1776 7879
88 1776 20215
89 1776 2130
90 1776 1032
91 1776 1006721
92 1776 11483
93 1776 8349
94 1776 8351
95 1776 8354
96 38803 151007
97 38803 5953
98 38803 325
99 38803 3125
100 38803 3084
101 38803 1158
102 38803 3085
103 38803 3087
104 38803 3092
105 20215 2130
106 20215 117669
107 20215 558466
108 20215 61555
109 20215 10017
110 20215 5563
111 20215 26557
112 20215 18688
113 20215 8437
114 20215 589
115 2130 16431
116 2130 2203
117 2130 3026
118 2130 32
119 2130 12132
120 2130 932719
121 2130 2127
122 2130 2128
123 2130 2675

View File

@@ -0,0 +1,103 @@
id,name
2975,Laurence Fishburne
6384,Keanu Reeves
530,Carrie-Anne Moss
1357063,Darrin Prescott
40644,Chad Stahelski
1779512,Jackson Spidell
21127,Bobby Cannavale
1692944,Heidi Schreck
64,Gary Oldman
1776,Francis Ford Coppola
38803,Roman Coppola
20215,Billy Campbell
2130,Cary Elwes
8349,Martin Sheen
8351,Frederic Forrest
8354,Albert Hall
1107983,Martin Luther King Jr.
52057,Obba Babatundé
110380,Colin Powell
18471,Anthony Anderson
74611,Tracee Ellis Ross
1407498,Marsai Martin
529,Guy Pearce
532,Joe Pantoliano
3894,Christian Bale
1892,Matt Damon
19498,Jon Bernthal
2710,James Cameron
4587,Halle Berry
8691,Zoe Saldaña
72129,Jennifer Lawrence
189111,Suzanne Collins
23964,Gary Ross
9827,Rose Byrne
143103,Krew Boylan
21708,Tomas Milian
72983,Manny Pérez
124909,Danny Hoch
4038,Susan Sarandon
18992,Aidan Quinn
2047,Danny Glover
4730,Emmy Rossum
9464,Harry Lennix
14409,David Schwimmer
1415341,Kazuhiro Tsuji
1267653,Chet Zar
7467,David Fincher
1519399,Erik Messerschmidt
1003,Jean Reno
524,Natalie Portman
1810,Heath Ledger
3895,Michael Caine
958722,Eiko Ishioka
41381,Sadie Frost
10980,Daniel Radcliffe
10989,Rupert Grint
10990,Emma Watson
219479,Criss Angel
1101349,Steve Aoki
1407495,Miles Brown
3361135,Peter Kent
1812,Michelle Williams
2037,Cillian Murphy
5081,Emily Blunt
2559324,Beth Lane
2559325,Lea Madda
27888,Raúl Juliá
2630,Nastassja Kinski
151007,Peter Ramsey
1769,Sofia Coppola
19772,Paul Rassam
1,George Lucas
7879,John Lasseter
1032,Martin Scorsese
1006721,Charles Scorsese
11483,Catherine Scorsese
5953,Spike Jonze
325,Eminem
3125,Madonna
3084,Marlon Brando
1158,Al Pacino
3085,James Caan
3087,Robert Duvall
3092,Diane Keaton
117669,Portia Doubleday
558466,Alex Russell
61555,Haley Ramm
10017,Charlton Heston
5563,James Coburn
26557,Ferdy Mayne
18688,Harry Connick Jr.
8437,Teri Garr
589,Daryl Hannah
16431,Sam Elliott
2203,Neal McDonough
3026,Rob Reiner
32,Robin Wright
12132,Michael Rooker
932719,Jeff Gordon
2127,James Wan
2128,Leigh Whannell
2675,Darren Lynn Bousman
1 id name
2 2975 Laurence Fishburne
3 6384 Keanu Reeves
4 530 Carrie-Anne Moss
5 1357063 Darrin Prescott
6 40644 Chad Stahelski
7 1779512 Jackson Spidell
8 21127 Bobby Cannavale
9 1692944 Heidi Schreck
10 64 Gary Oldman
11 1776 Francis Ford Coppola
12 38803 Roman Coppola
13 20215 Billy Campbell
14 2130 Cary Elwes
15 8349 Martin Sheen
16 8351 Frederic Forrest
17 8354 Albert Hall
18 1107983 Martin Luther King Jr.
19 52057 Obba Babatundé
20 110380 Colin Powell
21 18471 Anthony Anderson
22 74611 Tracee Ellis Ross
23 1407498 Marsai Martin
24 529 Guy Pearce
25 532 Joe Pantoliano
26 3894 Christian Bale
27 1892 Matt Damon
28 19498 Jon Bernthal
29 2710 James Cameron
30 4587 Halle Berry
31 8691 Zoe Saldaña
32 72129 Jennifer Lawrence
33 189111 Suzanne Collins
34 23964 Gary Ross
35 9827 Rose Byrne
36 143103 Krew Boylan
37 21708 Tomas Milian
38 72983 Manny Pérez
39 124909 Danny Hoch
40 4038 Susan Sarandon
41 18992 Aidan Quinn
42 2047 Danny Glover
43 4730 Emmy Rossum
44 9464 Harry Lennix
45 14409 David Schwimmer
46 1415341 Kazuhiro Tsuji
47 1267653 Chet Zar
48 7467 David Fincher
49 1519399 Erik Messerschmidt
50 1003 Jean Reno
51 524 Natalie Portman
52 1810 Heath Ledger
53 3895 Michael Caine
54 958722 Eiko Ishioka
55 41381 Sadie Frost
56 10980 Daniel Radcliffe
57 10989 Rupert Grint
58 10990 Emma Watson
59 219479 Criss Angel
60 1101349 Steve Aoki
61 1407495 Miles Brown
62 3361135 Peter Kent
63 1812 Michelle Williams
64 2037 Cillian Murphy
65 5081 Emily Blunt
66 2559324 Beth Lane
67 2559325 Lea Madda
68 27888 Raúl Juliá
69 2630 Nastassja Kinski
70 151007 Peter Ramsey
71 1769 Sofia Coppola
72 19772 Paul Rassam
73 1 George Lucas
74 7879 John Lasseter
75 1032 Martin Scorsese
76 1006721 Charles Scorsese
77 11483 Catherine Scorsese
78 5953 Spike Jonze
79 325 Eminem
80 3125 Madonna
81 3084 Marlon Brando
82 1158 Al Pacino
83 3085 James Caan
84 3087 Robert Duvall
85 3092 Diane Keaton
86 117669 Portia Doubleday
87 558466 Alex Russell
88 61555 Haley Ramm
89 10017 Charlton Heston
90 5563 James Coburn
91 26557 Ferdy Mayne
92 18688 Harry Connick Jr.
93 8437 Teri Garr
94 589 Daryl Hannah
95 16431 Sam Elliott
96 2203 Neal McDonough
97 3026 Rob Reiner
98 32 Robin Wright
99 12132 Michael Rooker
100 932719 Jeff Gordon
101 2127 James Wan
102 2128 Leigh Whannell
103 2675 Darren Lynn Bousman

View File

@@ -0,0 +1,22 @@
#!/usr/bin/env bash
set -ex
rm -rf *.csv
python ./Q1.py
# mv edges.csv edges_try1.csv
# mv nodes.csv nodes_try1.csv
# python ./Q1.py
# mv edges.csv edges_try2.csv
# mv nodes.csv nodes_try2.csv
# python ./Q1.py
# mv edges.csv edges_try3.csv
# mv nodes.csv nodes_try3.csv
wc -l *.csv

View File

@@ -0,0 +1,10 @@
import os,sys
test = []
temp = ""
with open('./edges.csv','r') as fi:
test = list(map(lambda x: x.strip(), fi.readlines()))
test = test[1:]
print(len(test))
print(len(set(test)))