{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "83041d33", "metadata": {}, "outputs": [], "source": [ "%rm -rf google_output.txt" ] }, { "cell_type": "code", "execution_count": 2, "id": "9b24b4df", "metadata": {}, "outputs": [], "source": [ "import os,sys, csv" ] }, { "cell_type": "code", "execution_count": 3, "id": "c2b90953", "metadata": {}, "outputs": [], "source": [ "# column from csv file\n", "# COL_DATE: the day of trading\n", "# COL_OPEN: the stock price at the beginning of the trading day\n", "# COL_HIGH: the highest price the stock achieved on the trading day\n", "# COL_LOW: the lowest price the stock achieved on the trading day\n", "# COL_CLOSE: the stock price at the end of the trading day\n", "# COL_ADJ_Close: the adjusted closing price of the trading day (reflecting the stock’s value after accounting for any corporate actions like dividends, stock splits and new stock offerings)\n", "# COL_VOLUME: the total number of shares were traded on the trading day\n", "COL_DATE=0\n", "COL_OPEN=1\n", "COL_HIGH=2\n", "COL_LOW=3\n", "COL_CLOSE=4\n", "COL_ADJ_CLOSE=5\n", "COL_VOLUME=6\n", "\n", "# append at middle stage\n", "COL_TOTAL_SALE_OF_DAY=7\n", "COL_MONTH_ONLY=8\n", "COL_EMA=9\n", "\n", "# monthly_averages_list\n", "COL_MONTHLY_AVERAGE_PRICE=1\n", "COL_EMA=2" ] }, { "cell_type": "code", "execution_count": 4, "id": "09a9417f", "metadata": {}, "outputs": [], "source": [ "# get_data_list(csv_file_name)\n", "# This function has one parameter, namely csv_file_name. \n", "# When the function is called, you need to pass along a CSV file name which is used inside the function to open and read the CSV\n", "# file. \n", "# After reading each row, it will be split into a list. The list will then be appended into a main\n", "# list (a list of lists), namely data_list. The data_list will be returned at the end of the\n", "# function.\n", "def get_data_list(csv_file_name):\n", " '''read data list from csv file'''\n", " data_list = []\n", " try:\n", " with open(csv_file_name, newline='') as csvfile:\n", " temp = []\n", " temp = csv.reader(csvfile, delimiter=',', quotechar='\"')\n", " data_list = list(temp)\n", " \n", " return data_list\n", " except Exception as e:\n", " print('error during reading csv file ')\n", " print('exitting...')\n", " sys.exit()" ] }, { "cell_type": "code", "execution_count": 5, "id": "cd616e6e", "metadata": {}, "outputs": [], "source": [ "# get_monthly_averages(data_list)\n", "# This function has one parameter, namely data_list. You need to pass the data_list\n", "# generated by the get_data_list() function as the argument to this function and then\n", "# calculate the monthly average prices of the stock. The average monthly prices are calculated in\n", "# the following way. \n", "# \n", "# 1. Suppose the volume and adjusted closing price of a trading day are V1 and C1, respectively. \n", "# 2. The total sale of that day equals V1 x C1. \n", "# 3. Now, suppose the volume and adjusted closing price of another trading day are V2 and C2, respectively. \n", "# 4. The average of these two trading days is the sum of the total sales divided by the total volume:\n", "# \n", "# Average price = (V1 x C1 + V2 x C2) / (V1 + V2)\n", "# \n", "# To average a whole month, you need to \n", "# - add up the total sales (V1 x C1 + V2 x C2 + ... + Vn x Cn) for each day and \n", "# - divide it by the sum of all volumes (V1 + V2 + ... + Vn) where n is the number of trading days in the month.\n", "# A tuple with 2 items, including the date (year and month only) and the average for that month,\n", "# will be generated for each month. The tuple for each month will be appended to a main list,\n", "# namely monthly_averages_list. The monthly_averages_list will be returned at the end of the function.\n", "\n", "def get_monthly_averages(data_list):\n", " '''calculate the monthly average prices of the stock'''\n", "\n", " monthly_averages_list=[]\n", " data_list_data_only = data_list[1:]\n", " month_available = []\n", " \n", " # data cleaning\n", " for i in range(len(data_list_data_only)):\n", " # V1 x C1, calculate the total sale, append into column\n", " data_list_data_only[i].append(float(data_list_data_only[i][COL_VOLUME]) * float(data_list_data_only[i][COL_ADJ_CLOSE]))\n", "\n", " # mark the row by YYYY-MM for easy monthly sum calculation, COL_MONTH_ONLY\n", " data_list_data_only[i].append(data_list_data_only[i][COL_DATE][0:7])\n", "\n", " # get the month in the list YYYY-MM\n", " month_available = set(list(map(lambda x: x[COL_MONTH_ONLY], data_list_data_only)))\n", "\n", " # literate the whole list, calculate the total_sale and total volume\n", " # get the average sale by total_sale / total_volume\n", " for month in sorted(month_available):\n", " filtered_month = list(filter(lambda x: x[COL_MONTH_ONLY] == month, data_list_data_only))\n", " total_sale = sum(list( map(lambda x: x[COL_TOTAL_SALE_OF_DAY], filtered_month)))\n", " total_volume = sum(list( map(lambda x: float(x[COL_VOLUME]), filtered_month)))\n", " monthly_averages_list.append([month, total_sale/total_volume])\n", "\n", " return list(monthly_averages_list)" ] }, { "cell_type": "code", "execution_count": 6, "id": "dfe29847", "metadata": {}, "outputs": [], "source": [ "# get_moving_averages(monthly_averages_list)\n", "# This function has one parameter, namely monthly_averages_list. You need to pass the\n", "# monthly_averages_list generated by get_monthly_averages() as the argument\n", "# to this function and then calculate the 5-month exponential moving average (EMA) stock prices.\n", "# In general, the EMA for a particular month can be calculated by the following formula:\n", "# \n", "# EMA = (Monthly average price – previous month’s EMA) x smoothing constant + previous month’s EMA\n", "# \n", "# where\n", "# \n", "# smoothing constant = 2 / (number of time periods in months + 1)\n", "# \n", "# Initial SMA = 20-period sum / 20\n", "# Multiplier = (2 / (Time periods + 1) ) = (2 / (20 + 1) ) = 0.0952(9.52%)\n", "# EMA = {Close – EMA(previous day)} x multiplier + EMA(previous day).\n", "def get_moving_averages(monthly_averages_list):\n", " '''\n", " get moving averages from montyly_average_list\n", " input:\n", " [ [YYYY-MM, monthly average price],\n", " [YYYY-MM, monthly average price],\n", " ...]\n", "\n", " output: \n", " [ [YYYY-MM, monthly average price, EMA],\n", " [YYYY-MM, monthly average price, EMA],\n", " ...]\n", " '''\n", "\n", " # by ref, the first 5 month EMA were given by SMA\n", " monthly_averages_list[0].append(sum(map(lambda x: x[1], monthly_averages_list[0:5]))/5)\n", " monthly_averages_list[1].append(sum(map(lambda x: x[1], monthly_averages_list[0:5]))/5)\n", " monthly_averages_list[2].append(sum(map(lambda x: x[1], monthly_averages_list[0:5]))/5)\n", " monthly_averages_list[3].append(sum(map(lambda x: x[1], monthly_averages_list[0:5]))/5)\n", " monthly_averages_list[4].append(sum(map(lambda x: x[1], monthly_averages_list[0:5]))/5)\n", "\n", " # smoothing constant = 2 / (number of time periods in months + 1)\n", " smoothing_constant = 2 / (5 + 1)\n", "\n", " # main loop to calculate EMA, start from the 6th month available till the end of the list\n", " for i in range(5, len(monthly_averages_list)):\n", " previous_month_EMA = monthly_averages_list[i-1][2]\n", " Monthly_average_price = monthly_averages_list[i][1]\n", "\n", " EMA = (Monthly_average_price - previous_month_EMA) * smoothing_constant + previous_month_EMA\n", " monthly_averages_list[i].append(EMA)\n", "\n", " return monthly_averages_list\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "c89cbae8", "metadata": {}, "outputs": [], "source": [ "def format_date_string(yyyy_mm):\n", " '''rearrange date string from csv file YYYY-MM => MM-YYYY'''\n", " [yyyy, mm] = yyyy_mm.split('-')\n", " return '-'.join([mm, yyyy])\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "8d646beb", "metadata": {}, "outputs": [], "source": [ "def write_output_file(filename_to_write, monthly_averages_list_w_ema, report_name):\n", " '''get output string from template and write to output file\n", " input:\n", " filename_to_write: txt file name with path to be written to\n", " monthly_averages_list_w_ema: list provided with EMA\n", " report_name: report name to be written to report\n", " '''\n", "\n", " RESULT_TEMPLATE='''\n", "# The best month for ^report_name^:\n", "# ^best_month^, ^best_EMA^\n", "\n", "# The worst month for ^report_name^:\n", "# ^worst_month^, ^worst_EMA^\n", " '''.strip()\n", "\n", " # get the max EMA of the list\n", " best_EMA = max(map(lambda x: x[2], monthly_averages_list_w_ema[5:]))\n", " # get the month(s) by the EMA wanted\n", " best_months = list(map(lambda x: format_date_string(x[0]), filter(lambda x: x[2] == best_EMA, monthly_averages_list_w_ema[5:])))\n", "\n", " # get the min(worst) EMA of the list\n", " worst_EMA = min(map(lambda x: x[2], monthly_averages_list_w_ema[5:]))\n", " # get the month(s) by the EMA wanted\n", " worst_months = list(map(lambda x: format_date_string(x[0]), filter(lambda x: x[2] == worst_EMA, monthly_averages_list_w_ema[5:])))\n", "\n", " # assemble the output string\n", " result_string = RESULT_TEMPLATE\n", " result_string = result_string\\\n", " .replace('^best_month^', ','.join(best_months))\\\n", " .replace('^best_EMA^', str('%.2f' % best_EMA))\\\n", " .replace('^worst_month^', ','.join(worst_months))\\\n", " .replace('^worst_EMA^', str('%.2f' % worst_EMA)) \\\n", " .replace('^report_name^', report_name) \n", "\n", " # write output file\n", " with open(filename_to_write, 'w+') as file_write:\n", " file_write.truncate(0)\n", " file_write.writelines(result_string)" ] }, { "cell_type": "code", "execution_count": 9, "id": "1917aaef", "metadata": {}, "outputs": [], "source": [ "def main():\n", " # Main function starts here\n", "\n", " print('start')\n", "\n", " # gather csv file with path from user\n", " input_filename = input(\"Please input a csv filename: \")\n", " \n", " csv_filename = os.path.basename(input_filename)\n", " csv_path = os.path.dirname(input_filename)\n", "\n", " # transform to the output file path by csv file name got\n", " txt_filename = csv_filename.replace('.csv','_output.txt')\n", " if (csv_path !=''):\n", " txt_filename = '/'.join([csv_path, txt_filename])\n", " else:\n", " txt_filename = '/'.join(['.', txt_filename])\n", " \n", " # grep the corp_name from the filename google.csv => google\n", " corp_name = os.path.basename(input_filename).split('.')[0]\n", "\n", " # process the data_list by csv file as stateed in assignment\n", " print(f'processing {csv_filename}')\n", " csv_list=get_data_list(input_filename)\n", " monthly_averages_list = get_monthly_averages(csv_list)\n", " monthly_averages_list_w_EMA = get_moving_averages(monthly_averages_list)\n", "\n", " # write output file\n", " write_output_file(txt_filename, monthly_averages_list_w_EMA, corp_name)\n", " print('wrote to {file} done'.format(file = txt_filename))\n" ] }, { "cell_type": "code", "execution_count": null, "id": "b7d3e814", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "start\n" ] } ], "source": [ "if __name__ == \"__main__\":\n", " main()" ] }, { "cell_type": "code", "execution_count": null, "id": "325de646", "metadata": { "scrolled": true }, "outputs": [], "source": [ "%ls" ] }, { "cell_type": "code", "execution_count": null, "id": "de467460", "metadata": {}, "outputs": [], "source": [ "%cat google_output.txt" ] }, { "cell_type": "code", "execution_count": null, "id": "41f834e6", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 5 }