This commit is contained in:
louiscklaw
2025-01-31 21:57:33 +08:00
parent 52904f5284
commit 6c95c5e3ea
39 changed files with 32542 additions and 0 deletions

View File

@@ -0,0 +1,31 @@
# %pip install beautifulsoup4
from pprint import pprint
import re
import requests
from bs4 import BeautifulSoup
txt_month =["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
def formatNumber(number):
return number.zfill(2)
def txtMonthToIntMonth(txtMonth):
return formatNumber(str((txt_month.index(txtMonth)+1)))
def scrape_holiday():
output = []
url = 'https://www.gov.hk/en/about/abouthk/holiday/2024.htm'
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')
h1s = soup.select('tr>td.date')
for h1 in h1s[1:]:
temp = re.split(r'\s', h1.text)
temp1 = '2024-'+txtMonthToIntMonth(temp[1])+ '-'+ formatNumber(temp[0])
output.append(temp1)
pprint(output)
return output
ph_dates = scrape_holiday()
pprint(ph_dates)