Files
louiscklaw 6c95c5e3ea update,
2025-01-31 21:57:33 +08:00

31 lines
897 B
Python

# %pip install beautifulsoup4
from pprint import pprint
import re
import requests
from bs4 import BeautifulSoup
txt_month =["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
def formatNumber(number):
return number.zfill(2)
def txtMonthToIntMonth(txtMonth):
return formatNumber(str((txt_month.index(txtMonth)+1)))
def scrape_holiday():
output = []
url = 'https://www.gov.hk/en/about/abouthk/holiday/2024.htm'
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')
h1s = soup.select('tr>td.date')
for h1 in h1s[1:]:
temp = re.split(r'\s', h1.text)
temp1 = '2024-'+txtMonthToIntMonth(temp[1])+ '-'+ formatNumber(temp[0])
output.append(temp1)
pprint(output)
return output
ph_dates = scrape_holiday()
pprint(ph_dates)