373 lines
13 KiB
Plaintext
373 lines
13 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "83041d33",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"%rm -rf google_output.txt"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "9b24b4df",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import os,sys, csv"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"id": "c2b90953",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# column from csv file\n",
|
||
"# COL_DATE: the day of trading\n",
|
||
"# COL_OPEN: the stock price at the beginning of the trading day\n",
|
||
"# COL_HIGH: the highest price the stock achieved on the trading day\n",
|
||
"# COL_LOW: the lowest price the stock achieved on the trading day\n",
|
||
"# COL_CLOSE: the stock price at the end of the trading day\n",
|
||
"# COL_ADJ_Close: the adjusted closing price of the trading day (reflecting the stock’s value after accounting for any corporate actions like dividends, stock splits and new stock offerings)\n",
|
||
"# COL_VOLUME: the total number of shares were traded on the trading day\n",
|
||
"COL_DATE=0\n",
|
||
"COL_OPEN=1\n",
|
||
"COL_HIGH=2\n",
|
||
"COL_LOW=3\n",
|
||
"COL_CLOSE=4\n",
|
||
"COL_ADJ_CLOSE=5\n",
|
||
"COL_VOLUME=6\n",
|
||
"\n",
|
||
"# append at middle stage\n",
|
||
"COL_TOTAL_SALE_OF_DAY=7\n",
|
||
"COL_MONTH_ONLY=8\n",
|
||
"COL_EMA=9\n",
|
||
"\n",
|
||
"# monthly_averages_list\n",
|
||
"COL_MONTHLY_AVERAGE_PRICE=1\n",
|
||
"COL_EMA=2"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"id": "09a9417f",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# get_data_list(csv_file_name)\n",
|
||
"# This function has one parameter, namely csv_file_name. \n",
|
||
"# When the function is called, you need to pass along a CSV file name which is used inside the function to open and read the CSV\n",
|
||
"# file. \n",
|
||
"# After reading each row, it will be split into a list. The list will then be appended into a main\n",
|
||
"# list (a list of lists), namely data_list. The data_list will be returned at the end of the\n",
|
||
"# function.\n",
|
||
"def get_data_list(csv_file_name):\n",
|
||
" '''read data list from csv file'''\n",
|
||
" data_list = []\n",
|
||
" try:\n",
|
||
" with open(csv_file_name, newline='') as csvfile:\n",
|
||
" temp = []\n",
|
||
" temp = csv.reader(csvfile, delimiter=',', quotechar='\"')\n",
|
||
" data_list = list(temp)\n",
|
||
" \n",
|
||
" return data_list\n",
|
||
" except Exception as e:\n",
|
||
" print('error during reading csv file ')\n",
|
||
" print('exitting...')\n",
|
||
" sys.exit()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "cd616e6e",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# get_monthly_averages(data_list)\n",
|
||
"# This function has one parameter, namely data_list. You need to pass the data_list\n",
|
||
"# generated by the get_data_list() function as the argument to this function and then\n",
|
||
"# calculate the monthly average prices of the stock. The average monthly prices are calculated in\n",
|
||
"# the following way. \n",
|
||
"# \n",
|
||
"# 1. Suppose the volume and adjusted closing price of a trading day are V1 and C1, respectively. \n",
|
||
"# 2. The total sale of that day equals V1 x C1. \n",
|
||
"# 3. Now, suppose the volume and adjusted closing price of another trading day are V2 and C2, respectively. \n",
|
||
"# 4. The average of these two trading days is the sum of the total sales divided by the total volume:\n",
|
||
"# \n",
|
||
"# Average price = (V1 x C1 + V2 x C2) / (V1 + V2)\n",
|
||
"# \n",
|
||
"# To average a whole month, you need to \n",
|
||
"# - add up the total sales (V1 x C1 + V2 x C2 + ... + Vn x Cn) for each day and \n",
|
||
"# - divide it by the sum of all volumes (V1 + V2 + ... + Vn) where n is the number of trading days in the month.\n",
|
||
"# A tuple with 2 items, including the date (year and month only) and the average for that month,\n",
|
||
"# will be generated for each month. The tuple for each month will be appended to a main list,\n",
|
||
"# namely monthly_averages_list. The monthly_averages_list will be returned at the end of the function.\n",
|
||
"\n",
|
||
"def get_monthly_averages(data_list):\n",
|
||
" '''calculate the monthly average prices of the stock'''\n",
|
||
"\n",
|
||
" monthly_averages_list=[]\n",
|
||
" data_list_data_only = data_list[1:]\n",
|
||
" month_available = []\n",
|
||
" \n",
|
||
" # data cleaning\n",
|
||
" for i in range(len(data_list_data_only)):\n",
|
||
" # V1 x C1, calculate the total sale, append into column\n",
|
||
" data_list_data_only[i].append(float(data_list_data_only[i][COL_VOLUME]) * float(data_list_data_only[i][COL_ADJ_CLOSE]))\n",
|
||
"\n",
|
||
" # mark the row by YYYY-MM for easy monthly sum calculation, COL_MONTH_ONLY\n",
|
||
" data_list_data_only[i].append(data_list_data_only[i][COL_DATE][0:7])\n",
|
||
"\n",
|
||
" # get the month in the list YYYY-MM\n",
|
||
" month_available = set(list(map(lambda x: x[COL_MONTH_ONLY], data_list_data_only)))\n",
|
||
"\n",
|
||
" # literate the whole list, calculate the total_sale and total volume\n",
|
||
" # get the average sale by total_sale / total_volume\n",
|
||
" for month in sorted(month_available):\n",
|
||
" filtered_month = list(filter(lambda x: x[COL_MONTH_ONLY] == month, data_list_data_only))\n",
|
||
" total_sale = sum(list( map(lambda x: x[COL_TOTAL_SALE_OF_DAY], filtered_month)))\n",
|
||
" total_volume = sum(list( map(lambda x: float(x[COL_VOLUME]), filtered_month)))\n",
|
||
" monthly_averages_list.append([month, total_sale/total_volume])\n",
|
||
"\n",
|
||
" return list(monthly_averages_list)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "dfe29847",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# get_moving_averages(monthly_averages_list)\n",
|
||
"# This function has one parameter, namely monthly_averages_list. You need to pass the\n",
|
||
"# monthly_averages_list generated by get_monthly_averages() as the argument\n",
|
||
"# to this function and then calculate the 5-month exponential moving average (EMA) stock prices.\n",
|
||
"# In general, the EMA for a particular month can be calculated by the following formula:\n",
|
||
"# \n",
|
||
"# EMA = (Monthly average price – previous month’s EMA) x smoothing constant + previous month’s EMA\n",
|
||
"# \n",
|
||
"# where\n",
|
||
"# \n",
|
||
"# smoothing constant = 2 / (number of time periods in months + 1)\n",
|
||
"# \n",
|
||
"# Initial SMA = 20-period sum / 20\n",
|
||
"# Multiplier = (2 / (Time periods + 1) ) = (2 / (20 + 1) ) = 0.0952(9.52%)\n",
|
||
"# EMA = {Close – EMA(previous day)} x multiplier + EMA(previous day).\n",
|
||
"def get_moving_averages(monthly_averages_list):\n",
|
||
" '''\n",
|
||
" get moving averages from montyly_average_list\n",
|
||
" input:\n",
|
||
" [ [YYYY-MM, monthly average price],\n",
|
||
" [YYYY-MM, monthly average price],\n",
|
||
" ...]\n",
|
||
"\n",
|
||
" output: \n",
|
||
" [ [YYYY-MM, monthly average price, EMA],\n",
|
||
" [YYYY-MM, monthly average price, EMA],\n",
|
||
" ...]\n",
|
||
" '''\n",
|
||
"\n",
|
||
" # by ref, the first 5 month EMA were given by SMA\n",
|
||
" monthly_averages_list[0].append(sum(map(lambda x: x[1], monthly_averages_list[0:5]))/5)\n",
|
||
" monthly_averages_list[1].append(sum(map(lambda x: x[1], monthly_averages_list[0:5]))/5)\n",
|
||
" monthly_averages_list[2].append(sum(map(lambda x: x[1], monthly_averages_list[0:5]))/5)\n",
|
||
" monthly_averages_list[3].append(sum(map(lambda x: x[1], monthly_averages_list[0:5]))/5)\n",
|
||
" monthly_averages_list[4].append(sum(map(lambda x: x[1], monthly_averages_list[0:5]))/5)\n",
|
||
"\n",
|
||
" # smoothing constant = 2 / (number of time periods in months + 1)\n",
|
||
" smoothing_constant = 2 / (5 + 1)\n",
|
||
"\n",
|
||
" # main loop to calculate EMA, start from the 6th month available till the end of the list\n",
|
||
" for i in range(5, len(monthly_averages_list)):\n",
|
||
" previous_month_EMA = monthly_averages_list[i-1][2]\n",
|
||
" Monthly_average_price = monthly_averages_list[i][1]\n",
|
||
"\n",
|
||
" EMA = (Monthly_average_price - previous_month_EMA) * smoothing_constant + previous_month_EMA\n",
|
||
" monthly_averages_list[i].append(EMA)\n",
|
||
"\n",
|
||
" return monthly_averages_list\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"id": "c89cbae8",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def format_date_string(yyyy_mm):\n",
|
||
" '''rearrange date string from csv file YYYY-MM => MM-YYYY'''\n",
|
||
" [yyyy, mm] = yyyy_mm.split('-')\n",
|
||
" return '-'.join([mm, yyyy])\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"id": "8d646beb",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def write_output_file(filename_to_write, monthly_averages_list_w_ema, report_name):\n",
|
||
" '''get output string from template and write to output file\n",
|
||
" input:\n",
|
||
" filename_to_write: txt file name with path to be written to\n",
|
||
" monthly_averages_list_w_ema: list provided with EMA\n",
|
||
" report_name: report name to be written to report\n",
|
||
" '''\n",
|
||
"\n",
|
||
" RESULT_TEMPLATE='''\n",
|
||
"# The best month for ^report_name^:\n",
|
||
"# ^best_month^, ^best_EMA^\n",
|
||
"\n",
|
||
"# The worst month for ^report_name^:\n",
|
||
"# ^worst_month^, ^worst_EMA^\n",
|
||
" '''.strip()\n",
|
||
"\n",
|
||
" # get the max EMA of the list\n",
|
||
" best_EMA = max(map(lambda x: x[2], monthly_averages_list_w_ema[5:]))\n",
|
||
" # get the month(s) by the EMA wanted\n",
|
||
" best_months = list(map(lambda x: format_date_string(x[0]), filter(lambda x: x[2] == best_EMA, monthly_averages_list_w_ema[5:])))\n",
|
||
"\n",
|
||
" # get the min(worst) EMA of the list\n",
|
||
" worst_EMA = min(map(lambda x: x[2], monthly_averages_list_w_ema[5:]))\n",
|
||
" # get the month(s) by the EMA wanted\n",
|
||
" worst_months = list(map(lambda x: format_date_string(x[0]), filter(lambda x: x[2] == worst_EMA, monthly_averages_list_w_ema[5:])))\n",
|
||
"\n",
|
||
" # assemble the output string\n",
|
||
" result_string = RESULT_TEMPLATE\n",
|
||
" result_string = result_string\\\n",
|
||
" .replace('^best_month^', ','.join(best_months))\\\n",
|
||
" .replace('^best_EMA^', str('%.2f' % best_EMA))\\\n",
|
||
" .replace('^worst_month^', ','.join(worst_months))\\\n",
|
||
" .replace('^worst_EMA^', str('%.2f' % worst_EMA)) \\\n",
|
||
" .replace('^report_name^', report_name) \n",
|
||
"\n",
|
||
" # write output file\n",
|
||
" with open(filename_to_write, 'w+') as file_write:\n",
|
||
" file_write.truncate(0)\n",
|
||
" file_write.writelines(result_string)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "1917aaef",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def main():\n",
|
||
" # Main function starts here\n",
|
||
"\n",
|
||
" print('start')\n",
|
||
"\n",
|
||
" # gather csv file with path from user\n",
|
||
" input_filename = input(\"Please input a csv filename: \")\n",
|
||
" \n",
|
||
" csv_filename = os.path.basename(input_filename)\n",
|
||
" csv_path = os.path.dirname(input_filename)\n",
|
||
"\n",
|
||
" # transform to the output file path by csv file name got\n",
|
||
" txt_filename = csv_filename.replace('.csv','_output.txt')\n",
|
||
" if (csv_path !=''):\n",
|
||
" txt_filename = '/'.join([csv_path, txt_filename])\n",
|
||
" else:\n",
|
||
" txt_filename = '/'.join(['.', txt_filename])\n",
|
||
" \n",
|
||
" # grep the corp_name from the filename google.csv => google\n",
|
||
" corp_name = os.path.basename(input_filename).split('.')[0]\n",
|
||
"\n",
|
||
" # process the data_list by csv file as stateed in assignment\n",
|
||
" print(f'processing {csv_filename}')\n",
|
||
" csv_list=get_data_list(input_filename)\n",
|
||
" monthly_averages_list = get_monthly_averages(csv_list)\n",
|
||
" monthly_averages_list_w_EMA = get_moving_averages(monthly_averages_list)\n",
|
||
"\n",
|
||
" # write output file\n",
|
||
" write_output_file(txt_filename, monthly_averages_list_w_EMA, corp_name)\n",
|
||
" print('wrote to {file} done'.format(file = txt_filename))\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "b7d3e814",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"start\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"if __name__ == \"__main__\":\n",
|
||
" main()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "325de646",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"%ls"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "de467460",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"%cat google_output.txt"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"id": "41f834e6",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.9"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|