update,
This commit is contained in:
31
vihuwach/task1/archive/task2B.py
Normal file
31
vihuwach/task1/archive/task2B.py
Normal file
@@ -0,0 +1,31 @@
|
||||
# %pip install beautifulsoup4
|
||||
|
||||
from pprint import pprint
|
||||
import re
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
txt_month =["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
|
||||
|
||||
def formatNumber(number):
|
||||
return number.zfill(2)
|
||||
|
||||
def txtMonthToIntMonth(txtMonth):
|
||||
return formatNumber(str((txt_month.index(txtMonth)+1)))
|
||||
|
||||
def scrape_holiday():
|
||||
output = []
|
||||
url = 'https://www.gov.hk/en/about/abouthk/holiday/2024.htm'
|
||||
res = requests.get(url)
|
||||
soup = BeautifulSoup(res.text, 'html.parser')
|
||||
h1s = soup.select('tr>td.date')
|
||||
for h1 in h1s[1:]:
|
||||
temp = re.split(r'\s', h1.text)
|
||||
temp1 = '2024-'+txtMonthToIntMonth(temp[1])+ '-'+ formatNumber(temp[0])
|
||||
output.append(temp1)
|
||||
pprint(output)
|
||||
return output
|
||||
|
||||
ph_dates = scrape_holiday()
|
||||
|
||||
pprint(ph_dates)
|
Reference in New Issue
Block a user