This commit is contained in:
louiscklaw
2025-02-01 01:58:19 +08:00
commit e44aead3d5
344 changed files with 201112 additions and 0 deletions

1
jupyter/jupyter-helloworld/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
**/.ipynb_checkpoints

View File

@@ -0,0 +1,101 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime\n",
"import pytz"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Imports: \n",
"* datetime: parse epoch time from listings, compare to current time\n",
"* pytz: specify timezone for datetime operations"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class listing:\n",
" ID = None\n",
" title = None\n",
" desc = None\n",
" time = None\n",
" price = None\n",
" condition = None\n",
" location = None # requires visiting listing URL\n",
" image = None\n",
" url = None\n",
" seller = None\n",
" likes = None\n",
" diffTime = None\n",
" listtype = None # bump, new user etc\n",
"\n",
" def __init__(self, parsedDict):\n",
" self.ID = parsedDict['id']\n",
" self.seller = parsedDict['seller']['username']\n",
" self.image = parsedDict['photoUrls'][0].replace('_thumbnail','')\n",
" epochTime = self.image.split('_')[-2]\n",
" sgTZ = pytz.timezone('Asia/Singapore')\n",
" currentTime = datetime.now().astimezone(sgTZ)\n",
" self.time = datetime.fromtimestamp(int(epochTime), tz=sgTZ)\n",
" self.diffTime = currentTime - self.time\n",
" self.title = parsedDict['title']\n",
" self.price = parsedDict['price']\n",
" self.desc = parsedDict['belowFold'][2]['stringContent']\n",
" self.condition = parsedDict['belowFold'][3]['stringContent']\n",
" self.listtype = parsedDict['aboveFold'][0]['component']\n",
" # self.likes = parsedDict['likesCount'] invalid if no likes yet\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"metadata": {
"interpreter": {
"hash": "22f70f503a14852dda6f3675b02ae61e38835edc56b87f63457d6e30f459ac44"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,21 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
jupyter = "*"
notebook = "*"
pandas = "*"
quandl = "*"
seaborn = "*"
sklearn = "*"
scikit-learn = "*"
pydot = "*"
bokeh = "*"
jupyter-bokeh = "*"
[dev-packages]
[requires]
python_version = "3"

1403
jupyter/jupyter-helloworld/Pipfile.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,7 @@
```bash
./start_docker.sh
// inside docker
./dev.sh
```

View File

@@ -0,0 +1,156 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import copy\n",
"import requests\n",
"import pprint"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"searchURL = 'https://www.carousell.sg/api-service/filter/cf/4.0/search/'\n",
"searchContent = {\n",
" 'bestMatchEnabled': False,\n",
" 'canChangeKeyword': False,\n",
" 'count': 40, # maximum appears to be 40 results\n",
" 'countryCode': 'SG',\n",
" 'countryId': '1880251',\n",
" #'ccid': None, # filter by category (must specify collection ID under filters too)\n",
" 'filters': [\n",
" # {'fieldName': 'price', 'rangedFloat': {'end': {'value': None}, 'start': {'value': None}}} # filter by price\n",
" # {'fieldName': 'collections', 'idsOrKeywords': {'value': None}} # filter by category (must specify correct ccid too)\n",
" ], \n",
" 'includeSuggestions': False,\n",
" 'locale': 'en',\n",
" 'prefill': [], # Seems to be used when adding filters to an existing search (not used by us)\n",
" 'query': None,\n",
" 'sortParam': { # used to specify: sorting of returned results (currently set to return most recent listings)\n",
" 'fieldName': 'time_created', \n",
" 'ascending': {\n",
" 'value': False\n",
" }\n",
" }\n",
"}\n",
"\n",
"\n",
"# additional search content for various searches\n",
"# for filtering by price\n",
" # {'rangedFloat': {'end': {'value': 'max'}, 'start': {'value': 'min'}}, 'fieldName': 'price'}\n",
"# for filtering by category\n",
" # ccid: (2196 = Desktops)\n",
" # filters\n",
" # {'fieldName': 'collections', 'idsOrKeywords': {'value': '1794'}} - for Desktops\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def query(keyword = None, category = None, minPrice = None, maxPrice = None):\n",
" # construct POST content\n",
" content = copy.deepcopy(searchContent)\n",
" if keyword != None:\n",
" print('Keyword: ' + keyword)\n",
" content['query'] = keyword\n",
" if category != None:\n",
" print('Category: ' + category)\n",
" if minPrice != None or maxPrice != None:\n",
" content['filters'].append({'fieldName': 'price', 'rangedFloat': {}})\n",
" if minPrice != None:\n",
" minPriceStr = str(minPrice)\n",
" print('Minimum price: ' + minPriceStr)\n",
" content['filters'][0]['rangedFloat']['start'] = {'value': minPriceStr}\n",
" if maxPrice != None:\n",
" maxPriceStr = str(maxPrice)\n",
" print('Maximum price: ' + maxPriceStr)\n",
" content['filters'][0]['rangedFloat']['end'] = {'value': maxPriceStr}\n",
" \n",
" pprint.pprint({\"searchURL\":searchURL})\n",
" pprint.pprint(content)\n",
" # perform POST search, retrieve JSON response\n",
" \n",
" \n",
"# searchResp = requests.post(url=searchURL, json=content)\n",
"# searchRespData = searchResp.json()\n",
"# print(searchRespData)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Keyword: gtx1060\n",
"Minimum price: 300\n",
"Maximum price: 400\n",
"{'searchURL': 'https://www.carousell.sg/api-service/filter/cf/4.0/search/'}\n",
"{'bestMatchEnabled': False,\n",
" 'canChangeKeyword': False,\n",
" 'count': 40,\n",
" 'countryCode': 'SG',\n",
" 'countryId': '1880251',\n",
" 'filters': [{'fieldName': 'price',\n",
" 'rangedFloat': {'end': {'value': '400'},\n",
" 'start': {'value': '300'}}}],\n",
" 'includeSuggestions': False,\n",
" 'locale': 'en',\n",
" 'prefill': [],\n",
" 'query': 'gtx1060',\n",
" 'sortParam': {'ascending': {'value': False}, 'fieldName': 'time_created'}}\n"
]
}
],
"source": [
"query(keyword=\"gtx1060\", minPrice=300, maxPrice=400)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"metadata": {
"interpreter": {
"hash": "22f70f503a14852dda6f3675b02ae61e38835edc56b87f63457d6e30f459ac44"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,14 @@
#!/usr/bin/env bash
set -ex
python -m pip install --upgrade pip
python -m pip install pipenv
pipenv sync
pipenv run \
jupyter-notebook \
--allow-root \
--ip=0.0.0.0

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,15 @@
#!/usr/bin/env bash
set -ex
pipenv install jupyter
pipenv install jupyter notebook
pipenv install pandas
pipenv install quandl
pipenv install seaborn
pipenv install scikit-learn
# jupyter-notebook

View File

@@ -0,0 +1,27 @@
### to spin up dev environment
```
./start_docker.sh
// inside docker
./dev.sh
open host browser:
http://127.0.0.1:8888/?token=98ab80de026fe83fd8e03c8e344b31e7575ec4a084c59f21
```
### to develop
start from fresh python docker image
```
./start_docker.sh
./init.sh
```

View File

@@ -0,0 +1,18 @@
#!/usr/bin/env bash
set -ex
docker run -it \
-v $PWD:/app \
-w /app \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ~/.ssh/id_rsa:/home/node/.ssh/id_rsa:ro \
-v ~/.ssh/known_host:/home/node/.ssh/known_hosts:ro \
-p 8888:8888 \
--rm \
python:3.10 \
bash
# -u 1000:1000 \
# -e XDG_CACHE_HOME=/app/.cache \