import numpy as np
import pandas as pd
from pandas_datareader import data
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from fbprophet import Prophet
from sklearn.metrics import mean_squared_error
# Take Costco as an example to analysis
# Using data from 2015-10-01 to 2018-03-31 as training
# Using data from 2018-04-01 to 2018-10-01 as testing for the return
ticker = "COST"
start_date = '2015-10-01'
end_date = '2018-10-01'
stock_data = data.DataReader(ticker, 'iex', start_date, end_date)
print stock_data.shape
stock_data.head()
stock_data['close'].plot(figsize=(16,8),color='#002699',alpha=0.8)
plt.xlabel("Date",fontsize=12,fontweight='bold',color='gray')
plt.ylabel('Price',fontsize=12,fontweight='bold',color='gray')
plt.title("Stock price for Costco",fontsize=18)
plt.show()
def cycle_analysis(data,split_date,cycle,mode='additive',forecast_plot = False,print_ind=False):
    training = data[:split_date].iloc[:-1,]
    testing = data[split_date:]
    predict_period = len(pd.date_range(split_date,max(data.index)))
    df = training.reset_index()
    df.columns = ['ds','y']
    m = Prophet(weekly_seasonality=False,yearly_seasonality=False,daily_seasonality=False)
    m.add_seasonality('self_define_cycle',period=cycle,fourier_order=8,mode=mode)
    m.fit(df)
    future = m.make_future_dataframe(periods=predict_period)
    forecast = m.predict(future)
    if forecast_plot:
        m.plot(forecast)
        plt.plot(testing.index,testing.values,'.',color='#ff3333',alpha=0.6)
        plt.xlabel('Date',fontsize=12,fontweight='bold',color='gray')
        plt.ylabel('Price',fontsize=12,fontweight='bold',color='gray')
        plt.show()
    ret = max(forecast.self_define_cycle)-min(forecast.self_define_cycle)
    model_tb = forecast['yhat']
    model_tb.index = forecast['ds'].map(lambda x:x.strftime("%Y-%m-%d"))
    out_tb = pd.concat([testing,model_tb],axis=1)
    out_tb = out_tb[~out_tb.iloc[:,0].isnull()]
    out_tb = out_tb[~out_tb.iloc[:,1].isnull()]
    mse = mean_squared_error(out_tb.iloc[:,0],out_tb.iloc[:,1])
    rep = [ret,mse]
    if print_ind:
        print "Projected return per cycle: {}".format(round(rep[0],2))
        print "MSE: {}".format(round(rep[1],4))
    return rep
cycle_analysis(stock_data['close'],'2018-04-01',30,forecast_plot=True,print_ind=True)
cycle_analysis(stock_data['close'],'2018-04-01',300,forecast_plot=True,print_ind=True)
testing_box = range(10,301)
return_box = []
mse_box = []
for c in testing_box:
    f = cycle_analysis(stock_data['close'],'2018-04-01',c)
    return_box.append(f[0])
    mse_box.append(f[1])
f = plt.figure(figsize=(16,18))
ax = f.add_subplot(211)
ax2 = f.add_subplot(212)
ax.plot(testing_box,return_box,color='#002699',alpha=0.8)
ax2.plot(testing_box,mse_box,color='#002699',alpha=0.8)
ax.set_xlabel("Length of Cycle",fontsize=12,color='gray')
ax2.set_xlabel("Length of Cycle",fontsize=12,color='gray')
ax.set_ylabel("Projected Return per Cycle",fontsize=12,color='gray')
ax2.set_ylabel("Out-Sample Mean Squared Error",fontsize=12,color='gray')
ax.set_title("Projected Return per Cycle",fontsize=18,fontweight='bold',color='#000033')
ax2.set_title("Out-Sample Mean Squared Error",fontsize=18,fontweight='bold',color='#000033')
plt.show()
report = pd.DataFrame({'cycle':testing_box,'return':return_box,'mse':mse_box})
possible_choice = report[report['return'] >10]
possible_choice[possible_choice['mse']==min(possible_choice['mse'])]
c = possible_choice[possible_choice['mse']==min(possible_choice['mse'])]['cycle'].values[0]
cycle_analysis(stock_data['close'],'2018-04-01',c,forecast_plot=True,print_ind=True)
def Return_Dates(forecast,stock_data,cycle,cycle_name = 'self_define_cycle',time_name = 'ds'):
    # find out the highest and lowest dates in the first cycle 
    # We cannot simply search for all highest and lowest point since there is slightly difference for high and low values in different cycles
    high = forecast.iloc[:cycle,]
    high = high[high[cycle_name]==max(high[cycle_name])][time_name]
    high = datetime.strptime(str(high.values[0])[:10],"%Y-%m-%d")
    low = forecast.iloc[:cycle,]
    low = low[low[cycle_name]==min(low[cycle_name])][time_name]
    low = datetime.strptime(str(low.values[0])[:10],"%Y-%m-%d")
    end_dt = datetime.strptime(stock_data.index[-1],"%Y-%m-%d")
    find_list = stock_data.index.map(lambda x:datetime.strptime(x,"%Y-%m-%d"))
    # Finding selling and buying dates with loop
    sell_dt = []
    sell_dt.append(high)
    # Looking for new cycle until it goes beyond the last date in stock_data
    while high<end_dt:
        high = high+timedelta(days=cycle)
        dif = (find_list-high).days
        high = find_list[abs(dif)==min(abs(dif))][0] # In order to avoid the non-trading dates
        sell_dt.append(high)
    buy_dt = []
    buy_dt.append(low)
    # Looking for new cycle until it goes beyond the last date in stock_data
    while low<end_dt:
        low = low+timedelta(days=cycle)
        dif = (find_list-low).days
        low = find_list[abs(dif)==min(abs(dif))][0] # In order to avoid the non-trading dates
        buy_dt.append(low)
    if buy_dt[0] > sell_dt[0]:
        sell_dt = sell_dt[1:]
    buy_dt = buy_dt[:-1]
    sell_dt = sell_dt[:-1]
    return [buy_dt,sell_dt]
df = stock_data[:'2018-04-01'].iloc[:-1,]['close'].reset_index()
df.columns = ['ds','y']
predict_period = len(pd.date_range('2018-04-01','2018-10-01'))
m = Prophet(weekly_seasonality=False,yearly_seasonality=False,daily_seasonality=False)
m.add_seasonality('self_define_cycle',period=c,fourier_order=8,mode='additive')
m.fit(df)
future = m.make_future_dataframe(periods=predict_period)
forecast = m.predict(future)
dt_list = Return_Dates(forecast,stock_data,c)
buy_price = stock_data.loc[map(lambda x: x.strftime("%Y-%m-%d"),dt_list[0])]['close']
sell_price = stock_data.loc[map(lambda x: x.strftime("%Y-%m-%d"),dt_list[1])]['close']
buy_price
sell_price
# Setting for analysis
Analysis_ticks = ['COST','AAPL','MSFT','HD','NKE']
start_date = '2015-10-01'
end_date = '2018-10-01'
opt_cycle = []
prot_return = []
MSE = []
buy_times = []
sell_times = []
avg_buy_price = []
avg_sell_price = []
# Loop over each stock
for ticker in Analysis_ticks:
    stock_data = data.DataReader(ticker, 'iex', start_date, end_date)
    testing_box = range(50,301)
    return_box = []
    mse_box = []
    for cc in testing_box:
        f = cycle_analysis(stock_data['close'],'2018-04-01',cc)
        return_box.append(f[0])
        mse_box.append(f[1])
    report = pd.DataFrame({'cycle':testing_box,'return':return_box,'mse':mse_box})
    possible_choice = report[report['return'] >10]
    # If we cannot find a cycle with return greater than 10, give 0
    if possible_choice.shape[0]>0:
        c = possible_choice[possible_choice['mse']==min(possible_choice['mse'])]['cycle'].values[0]
        rp = possible_choice[possible_choice['mse']==min(possible_choice['mse'])]['return'].values[0]
        mse = possible_choice[possible_choice['mse']==min(possible_choice['mse'])]['mse'].values[0]
        df = stock_data[:'2018-04-01'].iloc[:-1,]['close'].reset_index()
        df.columns = ['ds','y']
        predict_period = len(pd.date_range('2018-04-01','2018-10-01'))
        m = Prophet(weekly_seasonality=False,yearly_seasonality=False,daily_seasonality=False)
        m.add_seasonality('self_define_cycle',period=c,fourier_order=8,mode='additive')
        m.fit(df)
        future = m.make_future_dataframe(periods=predict_period)
        forecast = m.predict(future)
        dt_list = Return_Dates(forecast,stock_data,c)
        buy_price = stock_data.loc[map(lambda x: x.strftime("%Y-%m-%d"),dt_list[0])]['close']
        sell_price = stock_data.loc[map(lambda x: x.strftime("%Y-%m-%d"),dt_list[1])]['close']
        bt = buy_price.shape[0]
        st = sell_price.shape[0]
        bp = np.mean(buy_price)
        sp = np.mean(sell_price)
    else:
        c = 0
        rp = 0
        mse = 0
        bt = 0
        st = 0
        bp = 0
        sp = 0
    opt_cycle.append(c)
    prot_return.append(rp)
    MSE.append(mse)
    buy_times.append(bt)
    sell_times.append(st)
    avg_buy_price.append(bp)
    avg_sell_price.append(sp)
    print "{} Finished".format(ticker)
stock_report = pd.DataFrame({'Stock':Analysis_ticks,'Cycle':opt_cycle,'Projected_Return_per_Cycle':prot_return,
                            'MSE':MSE,'Num_of_Buy':buy_times,'Num_of_Sell':sell_times,
                            'Average_Buy_Price':avg_buy_price,'Average_Sell_Price':avg_sell_price})
stock_report
stock_report.to_csv('Stock_Report.csv',index=False)