31 lines
897 B
Python
31 lines
897 B
Python
# %pip install beautifulsoup4
|
|
|
|
from pprint import pprint
|
|
import re
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
txt_month =["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
|
|
|
|
def formatNumber(number):
|
|
return number.zfill(2)
|
|
|
|
def txtMonthToIntMonth(txtMonth):
|
|
return formatNumber(str((txt_month.index(txtMonth)+1)))
|
|
|
|
def scrape_holiday():
|
|
output = []
|
|
url = 'https://www.gov.hk/en/about/abouthk/holiday/2024.htm'
|
|
res = requests.get(url)
|
|
soup = BeautifulSoup(res.text, 'html.parser')
|
|
h1s = soup.select('tr>td.date')
|
|
for h1 in h1s[1:]:
|
|
temp = re.split(r'\s', h1.text)
|
|
temp1 = '2024-'+txtMonthToIntMonth(temp[1])+ '-'+ formatNumber(temp[0])
|
|
output.append(temp1)
|
|
pprint(output)
|
|
return output
|
|
|
|
ph_dates = scrape_holiday()
|
|
|
|
pprint(ph_dates) |