diff --git a/yo006yo/gitUpdate.bat b/yo006yo/gitUpdate.bat new file mode 100644 index 00000000..5375d9a8 --- /dev/null +++ b/yo006yo/gitUpdate.bat @@ -0,0 +1,7 @@ +git status . + +@pause + +git add . +git commit -m"update yo006yo," +start git push \ No newline at end of file diff --git a/yo006yo/meta.md b/yo006yo/meta.md new file mode 100644 index 00000000..f37eeb5f --- /dev/null +++ b/yo006yo/meta.md @@ -0,0 +1,7 @@ +--- +tags: [HKDI, pending, kaggle] +--- + +# yo006yo + +### Collect Open / Real-Time Data, Visualization and Analysis diff --git a/yo006yo/task1/ProjectBrief_EA_DigitalData_v1d.pdf b/yo006yo/task1/ProjectBrief_EA_DigitalData_v1d.pdf new file mode 100644 index 00000000..10737a4a Binary files /dev/null and b/yo006yo/task1/ProjectBrief_EA_DigitalData_v1d.pdf differ diff --git a/yo006yo/task1/package.json b/yo006yo/task1/package.json new file mode 100644 index 00000000..0becb97a --- /dev/null +++ b/yo006yo/task1/package.json @@ -0,0 +1,13 @@ +{ + "name": "yo006yo", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1", + "gitUpdate": "git add . && git commit -m'update,'&& git push" + }, + "keywords": [], + "author": "", + "license": "ISC" +} diff --git a/yo006yo/task1/quotation.md b/yo006yo/task1/quotation.md new file mode 100644 index 00000000..c59790a9 --- /dev/null +++ b/yo006yo/task1/quotation.md @@ -0,0 +1,42 @@ +--- +tags: [pending, kaggle] +--- + +# task1 + +## Brief + +### You are required to collect open data and real-time data. + +### Part 1: Jupyter notebook Data Analysis and suggestion of actionable items + +- Download the Top 200 common passwords by country 2021 database from www.kaggle.com +- Manipulate and rearrange the data if necessary +- Visualize the data using 8 or more charts using Python programming in Jupyter notebook. +- The sunburst chart, heat map, and pair-plot must be used. +- 1 or more 3D chart is essential. +- 1 or more map, such as choropleth map in plotly should be displayed. +- Analyze the charts (and data) which may reveal some facts to us. +- Provide insights and suggest actionable items. +- (You may add other related data set(s) to enrich your insights and suggestions.) + +### Part 2: Real-time data processing and visualization, in Jupyter notebook. + +- Collect and store real-time data using the API of HK Accident and Emergency waiting time (of Hospitals) in NoSQL database (e.g., MongoDB). + - remarks : https://data.gov.hk/en-data/dataset/hospital-hadata-ae-waiting-time/resource/164c3478-1791-4f9e-94d9-70d2374a48e2 +- The data collection duration should be 3 or more days, within November and/or December. +- The collection frequency should be every 15 minutes or less. +- Create Jupyter Notebook to read data into a Pandas dataframe. +- (You may export the data, using Mongo Compass, to a json file first.) +- Process and visualize the data. +- Produce 3 or more charts. +- You are encouraged to use python 3D visualization techniques too. +- Analyze the charts (and data) to reveal some facts. +- Provide insights / comments / suggestions. + +### Items should include: + +- Exported collection(s) of the open data / samples of real-time data, from MongoDB +- Jupyter Notebooks that visualize and analyze the data sets, with summary, conclusions and so on in Markdown. +- Demonstrate data collection process and present all results / insights, in a video. +- Upload everything to Moodle 1 week after the last lesson.