This commit is contained in:
louiscklaw
2025-02-01 02:10:52 +08:00
commit faf8366e53
40 changed files with 32528 additions and 0 deletions

31
.gitattributes vendored Normal file
View File

@@ -0,0 +1,31 @@
*.mp4 filter=lfs diff=lfs merge=lfs
*.zip filter=lfs diff=lfs merge=lfs
*.7z filter=lfs diff=lfs merge=lfs
*.tar.gz filter=lfs diff=lfs merge=lfs
*.jpg filter=lfs diff=lfs merge=lfs
*.png filter=lfs diff=lfs merge=lfs
*.avif filter=lfs diff=lfs merge=lfs
*.webm filter=lfs diff=lfs merge=lfs
*.mkv filter=lfs diff=lfs merge=lfs
# Documents
*.doc diff=astextplain
*.DOC diff=astextplain
*.docx diff=astextplain
*.DOCX diff=astextplain
*.dot diff=astextplain
*.DOT diff=astextplain
*.pdf diff=astextplain
*.PDF diff=astextplain
*.rtf diff=astextplain
*.RTF diff=astextplain
*.gif filter=lfs diff=lfs merge=lfs
*.GIF filter=lfs diff=lfs merge=lfs
*.bmp filter=lfs diff=lfs merge=lfs
*.BMP filter=lfs diff=lfs merge=lfs
*.tiff filter=lfs diff=lfs merge=lfs
*.TIFF filter=lfs diff=lfs merge=lfs
*.wav filter=lfs diff=lfs merge=lfs
*.WAV filter=lfs diff=lfs merge=lfs
*.log filter=lfs diff=lfs merge=lfs

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
**/~*.*

8
gitUpdate.sh Executable file
View File

@@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -ex
git add .
git commit -m'update,'
git push

5
meta.md Normal file
View File

@@ -0,0 +1,5 @@
---
tags: MSc, Alan Ho
---
https://filetransfer.io/data-package/qrjzX2rx#link

BIN
quotation2/assignment 3.pdf Normal file

Binary file not shown.

32
quotation2/notes.md Normal file
View File

@@ -0,0 +1,32 @@
V:
2bcd 我自己本身做咗,但係覺得唔係好啱
3A就唔識做
最後第四題都唔太識🙂‍↔️
L:
Q1: 你想我做左佢算定係要講埋你知點做?
你可以做咗先,然之後我自己理解吓
Q2: 幾時要?
我下個禮拜四(21/Nov) 2100交
咦,咁 2b,c,d 我洗唔洗做?
點為之 3A?
第4題我做?
Sorry 應該係Q3 part 2: who are the top4 coaches leading the most
number of Athletes ?
Q4 就你做
Q2 bcd就睇價錢再決定🙏🏻🙏🏻
Q2 HKD350
Q3 2 HKD 200
1. Read the 4 csvs to be your pandas data frame into your kernel.
2. In athletes.csv and coaches.xlsx, who are the top 4 coaches leading the most
number of Athletes ?
Q4 HKD 300

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

BIN
task1/archive/caution.png (Stored with Git LFS) Normal file

Binary file not shown.

View File

@@ -0,0 +1,10 @@
[
{
"id": "1",
"name": "helloworld"
},
{
"id": "2",
"name": "hellohell"
}
]

13310
task1/archive/js_result.json Normal file

File diff suppressed because it is too large Load Diff

13184
task1/archive/js_result_o.json Normal file

File diff suppressed because it is too large Load Diff

1
task1/archive/notes.md Normal file
View File

@@ -0,0 +1 @@
https://colab.research.google.com/drive/1mdhOdWbnqwfk5RalwNqEj6IBnV7_LUia#scrollTo=cSea-HnRSxKV

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

77
task1/archive/spices.json Normal file
View File

@@ -0,0 +1,77 @@
{
"HGV": { "space": 0, "spaceDIS": 0, "spaceEV": 0, "spaceUNL": 0 },
"LGV": { "space": 0, "spaceDIS": 0, "spaceEV": 0, "spaceUNL": 0 },
"address": {
"buildingName": "Kai Tak Cruise Terminal",
"buildingNo": "33",
"dcDistrict": "Kwun Tong District",
"floor": "1",
"region": "KLN",
"streetName": "Shing Fung Road",
"subDistrict": "Kowloon Bay"
},
"carpark_Type": "multi-storey",
"coach": { "space": 0, "spaceDIS": 0, "spaceEV": 0, "spaceUNL": 0 },
"contactNo": "+852 3465 6888, 09:30-18:00 Mon-Fri, except public holiday",
"creationDate": "2016-08-16 10:03:56",
"displayAddress": "1st floor, Kai Tak Cruise Terminal, 33 Shing Fung Road, Kowloon Bay, KLN",
"district": "Kwun Tong District",
"facilities": ["disabilities", "evCharger"],
"gracePeriods": [{ "minutes": 10 }],
"heightLimits": [{ "height": 2 }],
"lang": "en_US",
"latitude": 22.3062049,
"longitude": 114.21309471,
"modifiedDate": "2024-01-03 13:28:25",
"motorCycle": { "space": 0, "spaceDIS": 0, "spaceEV": 0, "spaceUNL": 0 },
"name": "Kai Tak Cruise Terminal Car Park 1",
"nature": "commercial",
"openingHours": [
{
"excludePublicHoliday": "False",
"periodEnd": "23:00",
"periodStart": "07:00",
"weekdays": ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN", "PH"]
}
],
"opening_status": "OPEN",
"park_Id": "10",
"paymentMethods": ["octopus", "visa"],
"privateCar": {
"hourlyCharges": [
{
"covered": "covered",
"excludePublicHoliday": "True",
"periodEnd": "23:00",
"periodStart": "07:00",
"price": 15,
"remark": "",
"type": "hourly",
"usageMinimum": 1,
"weekdays": ["MON", "TUE", "WED", "THU", "FRI"]
},
{
"covered": "covered",
"excludePublicHoliday": "False",
"periodEnd": "23:00",
"periodStart": "07:00",
"price": 20,
"remark": "",
"type": "hourly",
"usageMinimum": 1,
"weekdays": ["SAT", "SUN", "PH"]
}
],
"space": 112,
"spaceDIS": 0,
"spaceEV": 0,
"spaceUNL": 0
},
"publishedDate": "2018-12-12 12:22:47",
"renditionUrls": {
"banner": "https://sps-opendata.pilotsmartke.gov.hk/rest/getRendition/fs-1%3A693265207413252869411532657339312395903827562313.JPG/banner.png",
"square": "https://sps-opendata.pilotsmartke.gov.hk/rest/getRendition/fs-1%3A693265207413252869411532657339312395903827562313.JPG/square.png",
"thumbnail": "https://sps-opendata.pilotsmartke.gov.hk/rest/getRendition/fs-1%3A693265207413252869411532657339312395903827562313.JPG/thumbnail.png"
},
"website": "http://www.kaitakcruiseterminal.com.hk/"
}

36
task1/archive/task1.py Normal file
View File

@@ -0,0 +1,36 @@
# https://api.data.gov.hk/v1/carpark-info-vacancy?data=vacancy&vehicleTypes=privateCar
import requests
from pprint import pprint
import json
url = "https://api.data.gov.hk/v1/carpark-info-vacancy"
params = {
"data": "vacancy",
"vehicleTypes": "privateCar"
}
response = requests.get(url, params=params)
js_result_vacancy = response.json()['results']
url = "https://api.data.gov.hk/v1/carpark-info-vacancy"
response = requests.get(url)
js_result_all = response.json()['results']
output = []
for jrv in js_result_vacancy:
for jra in js_result_all:
if jrv['park_Id'] == jra['park_Id']:
output.append(jra)
break
js_row = output[0]
pprint(js_row)
pprint("length of json: "+str(len(output)))
# Store js_result into js_result.json
with open('js_result_o.json', 'w') as outfile:
json.dump(output, outfile, indent=2)
task1_json = output

View File

@@ -0,0 +1,20 @@
# https://api.data.gov.hk/v1/carpark-info-vacancy?data=vacancy&vehicleTypes=privateCar
import requests
from pprint import pprint
url = "https://api.data.gov.hk/v1/carpark-info-vacancy"
response = requests.get(url)
js_result = response.json()['results']
js_row = js_result[0]
pprint(js_row)
pprint("length of json: "+str(len(js_result)))
import json
# Store js_result into js_result.json
with open('js_result.json', 'w') as outfile:
json.dump(js_result, outfile, indent=2)

View File

@@ -0,0 +1,18 @@
# https://api.data.gov.hk/v1/carpark-info-vacancy?data=vacancy&vehicleTypes=privateCar
import requests
from pprint import pprint
url = "https://api.data.gov.hk/v1/carpark-info-vacancy"
params = {
"data": "vacancy",
"vehicleTypes": "privateCar"
}
response = requests.get(url, params=params)
js_result = response.json()['results']
js_row = js_result[0]
pprint(js_row)
pprint("length of json: "+str(len(js_result)))

17
task1/archive/task2A.py Normal file
View File

@@ -0,0 +1,17 @@
import requests
from pprint import pprint
url = "https://api.data.gov.hk/v1/carpark-info-vacancy"
response = requests.get(url)
js_result = response.json()['results']
js_row = js_result[0]
pprint(js_row)
pprint("length of json: "+str(len(js_result)))
def get_url_image(js_row):
return None if js_row.get('renditionUrls', {}).get("square") == None else js_row['renditionUrls']['square']
pprint(get_url_image(js_row))

View File

@@ -0,0 +1,17 @@
# %pip install beautifulsoup4
from pprint import pprint
import requests
from bs4 import BeautifulSoup
def scrape_h1():
url = 'http://example.com'
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')
h1 = soup.find('h1').text
return h1
ph_dates = scrape_h1()
pprint(ph_dates)

31
task1/archive/task2B.py Normal file
View File

@@ -0,0 +1,31 @@
# %pip install beautifulsoup4
from pprint import pprint
import re
import requests
from bs4 import BeautifulSoup
txt_month =["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
def formatNumber(number):
return number.zfill(2)
def txtMonthToIntMonth(txtMonth):
return formatNumber(str((txt_month.index(txtMonth)+1)))
def scrape_holiday():
output = []
url = 'https://www.gov.hk/en/about/abouthk/holiday/2024.htm'
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')
h1s = soup.select('tr>td.date')
for h1 in h1s[1:]:
temp = re.split(r'\s', h1.text)
temp1 = '2024-'+txtMonthToIntMonth(temp[1])+ '-'+ formatNumber(temp[0])
output.append(temp1)
pprint(output)
return output
ph_dates = scrape_holiday()
pprint(ph_dates)

52
task1/archive/task2c.py Normal file
View File

@@ -0,0 +1,52 @@
# get_todayinfo(js_row, ph_dates)
# get_todayinfo(js_row, ph_dates, '2024-12-25')
import datetime
import json
from pprint import pprint
js_row = {}
with open('spices.json') as f:
js_row = json.load(f)
ph_dates = [
'2024-10-18',
]
def get_todayinfo(js_row, ph_dates, today_date_str = datetime.datetime.today().strftime('%Y-%m-%d')):
given_date = datetime.datetime.strptime(today_date_str, '%Y-%m-%d')
weekdays = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
txt_weekday = "PH" if today_date_str in ph_dates else weekdays[given_date.weekday()]
pprint((today_date_str,ph_dates,txt_weekday))
output = {
'periodStart': '',
'periodEnd': '',
'price': 0,
'space': 0,
'today': today_date_str,
'today_weekday': txt_weekday
}
if (js_row.get('privateCar') != None):
jp = js_row['privateCar']
if (jp.get('space') != None):
output['space'] = js_row['privateCar']['space']
if (jp.get('hourlyCharges') != None):
for i in js_row['privateCar']['hourlyCharges']:
if txt_weekday in i['weekdays']:
output['periodStart'] = i['periodStart']
output['periodEnd'] = i['periodEnd']
output['price'] = i['price']
break
output['today'] = today_date_str
output['today_weekday'] = txt_weekday
return output
pprint(get_todayinfo(js_row, ph_dates))
pprint(get_todayinfo(js_row, ph_dates,'2024-01-01'))
pprint(get_todayinfo(js_row, ph_dates,'2024-10-18'))

View File

@@ -0,0 +1,90 @@
import pandas as pd
import json
import datetime
from pprint import pprint
test_json = []
# Load and parse the JSON file
with open('js_result.json', 'r') as file:
test_json = json.load(file)
ph_dates = [
'2024-10-18',
]
task1_json = []
# Load and parse the JSON file
with open('task1.json', 'r') as file:
task1_json = json.load(file)
def get_url_image(js_row):
url_image = ''
return None if js_row.get('renditionUrls', {}).get("square") == None else js_row['renditionUrls']['square']
def get_todayinfo(js_row, ph_dates, today_date_str = datetime.datetime.today().strftime('%Y-%m-%d')):
given_date = datetime.datetime.strptime(today_date_str, '%Y-%m-%d')
weekdays = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
txt_weekday = "PH" if today_date_str in ph_dates else weekdays[given_date.weekday()]
output = {
'periodStart': '',
'periodEnd': '',
'price': 0,
'space': 0,
'today': today_date_str,
'today_weekday': txt_weekday
}
if (js_row.get('privateCar') != None):
jp = js_row['privateCar']
if (jp.get('space') != None):
output['space'] = js_row['privateCar']['space']
if (jp.get('hourlyCharges') != None):
for i in js_row['privateCar']['hourlyCharges']:
if txt_weekday in i['weekdays']:
output['periodStart'] = i['periodStart']
output['periodEnd'] = i['periodEnd']
output['price'] = i['price']
break
output['today'] = today_date_str
output['today_weekday'] = txt_weekday
return output
hodgepodge = []
for jr in test_json:
hodgepodge.append({**jr, **get_todayinfo(jr, ph_dates), "url_image": get_url_image(jr)})
output = []
for hp in hodgepodge:
output.append({
"park_Id": hp.get('park_Id',""),
"name": hp.get('name',""),
"displayAddress": hp.get('displayAddress',""),
"district": hp.get('district',""),
"latitude": hp.get('latitude',""),
"longitude": hp.get('longitude',""),
"opening_status": hp.get('opening_status',""),
"facilities": hp.get('facilities',"") ,
"paymentMethods": hp.get('paymentMethods',"") ,
"modifiedDate": hp.get('modifiedDate',""),
'periodStart': hp.get('periodStart',""),
"url_image": hp.get('url_image',""),
'periodEnd': hp.get('periodEnd',""),
'price': hp.get('price',""),
'space': hp.get('space',""),
'today': hp.get('today',""),
'today_weekday': hp.get('today_weekday',""),
})
pprint(output[0])
# Translate the JSON data to a pandas DataFrame
test_df = pd.DataFrame(test_json)
print("done")

94
task1/archive/task2d.py Normal file
View File

@@ -0,0 +1,94 @@
import pandas as pd
import json
import datetime
from pprint import pprint
test_json = []
# Load and parse the JSON file
with open('js_result.json', 'r') as file:
test_json = json.load(file)
ph_dates = [
'2024-10-18',
]
task1_json = []
# Load and parse the JSON file
with open('task1.json', 'r') as file:
task1_json = json.load(file)
def get_url_image(js_row):
url_image = ''
return None if js_row.get('renditionUrls', {}).get("square") == None else js_row['renditionUrls']['square']
task1_df = pd.DataFrame(task1_json)
def get_todayinfo(js_row, ph_dates, today_date_str = datetime.datetime.today().strftime('%Y-%m-%d')):
given_date = datetime.datetime.strptime(today_date_str, '%Y-%m-%d')
weekdays = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
txt_weekday = "PH" if today_date_str in ph_dates else weekdays[given_date.weekday()]
output = {
'periodStart': '',
'periodEnd': '',
'price': 0,
'space': 0,
'today': today_date_str,
'today_weekday': txt_weekday
}
if (js_row.get('privateCar') != None):
jp = js_row['privateCar']
if (jp.get('space') != None):
output['space'] = js_row['privateCar']['space']
if (jp.get('hourlyCharges') != None):
for i in js_row['privateCar']['hourlyCharges']:
if txt_weekday in i['weekdays']:
output['periodStart'] = i['periodStart']
output['periodEnd'] = i['periodEnd']
output['price'] = i['price']
break
output['today'] = today_date_str
output['today_weekday'] = txt_weekday
return output
hodgepodge = []
for jr in task1_json:
hodgepodge.append({**jr, **get_todayinfo(jr, ph_dates), "url_image": get_url_image(jr)})
output = []
for hp in hodgepodge:
output.append({
"park_Id": hp.get('park_Id',""),
"name": hp.get('name',""),
"displayAddress": hp.get('displayAddress',""),
"district": hp.get('district',""),
"latitude": hp.get('latitude',""),
"longitude": hp.get('longitude',""),
"opening_status": hp.get('opening_status',""),
"facilities": hp.get('facilities',"") ,
"paymentMethods": hp.get('paymentMethods',"") ,
"modifiedDate": hp.get('modifiedDate',""),
'periodStart': hp.get('periodStart',""),
"url_image": hp.get('url_image',""),
'periodEnd': hp.get('periodEnd',""),
'price': hp.get('price',""),
'space': hp.get('space',""),
'today': hp.get('today',""),
'today_weekday': hp.get('today_weekday',""),
})
pprint(output[0])
# Translate the JSON data to a pandas DataFrame
test_df = pd.DataFrame(test_json)
key_col = 'park_Id'
new_df = test_df[[key_col, 'col2', 'col3']]
print("done")

48
task1/archive/task3.py Normal file
View File

@@ -0,0 +1,48 @@
# https://api.data.gov.hk/v1/carpark-info-vacancy?data=vacancy&vehicleTypes=privateCar
import requests
from pprint import pprint
url = "https://api.data.gov.hk/v1/carpark-info-vacancy"
params = {
"data": "vacancy",
"vehicleTypes": "privateCar"
}
response = requests.get(url, params=params)
js_result = response.json()['results']
output = []
for js_row in js_result:
if ((type(js_row.get('privateCar')) == type([]) ) and len(js_row.get('privateCar'))> 0):
if (js_row.get('privateCar')[0].get("vacancy_type",'-')== "A"):
output.append(js_row)
test_json = [{
'park_Id': '10',
'privateCar': [{
'vacancy_type': 'A',
'vacancy': 29,
'lastupdate': '2024-10-18 16:04:59'
}]
}]
import pandas as pd
# Create a DataFrame from the test_json
df = pd.json_normalize(test_json, record_path=['privateCar'], meta='park_Id')
# Extract the 'vacancy' column and flatten it
vacancy_df = df[['park_Id', 'vacancy']]
print(vacancy_df)
pprint(output[0])
pprint("length of json: "+str(len(output)))
import json
# Store output into task3_output.json
with open('task3_output.json', 'w') as outfile:
json.dump(output, outfile, indent=2)

File diff suppressed because it is too large Load Diff

20
task1/archive/task4.py Normal file
View File

@@ -0,0 +1,20 @@
import json
# Load js_result.json
with open('js_result.json', 'r') as file:
js_result = json.load(file)
# Load task3_output.json
with open('task3_output.json', 'r') as file:
task3_output = json.load(file)
output = []
for js_row in js_result:
for t3 in task3_output:
if (js_row['park_Id'] == t3['park_Id']):
output.append({**js_row,
"t3_park_Id": t3['park_Id'],
"t3_privateCar": t3['privateCar'],
})
break

BIN
task1/caution.png (Stored with Git LFS) Normal file

Binary file not shown.

5
task1/notes.md Normal file
View File

@@ -0,0 +1,5 @@
quote HKD600
HKD 200 received, Thanks.
https://colab.research.google.com/drive/1gBOX9iMPVpqEGbkT3RSeH4jTQBzUykdF#scrollTo=LYIZvKU37xxR

BIN
task1/notes/Week 1.pdf Normal file

Binary file not shown.

BIN
task1/notes/Week 2.pdf Normal file

Binary file not shown.

BIN
task1/notes/Week 3.pdf Normal file

Binary file not shown.

BIN
task1/notes/Week 4.pdf Normal file

Binary file not shown.

BIN
task1/notes/Week 5.pdf Normal file

Binary file not shown.

BIN
task1/notes/Week 6.pdf Normal file

Binary file not shown.