4 years ago · c72451bb93
--- a/code/wilshire_5000/nn.py
+++ b/code/wilshire_5000/nn.py
@@ -6,8 +6,10 @@ import wilshire
 import tensorflow_addons as tfa
 from statsmodels.tsa.arima.model import ARIMA


 ### Fonctions d'activations ###
 def snake(x):
    return(x+(tf.math.sin(20*x)**2)/20)
    return(x+(tf.math.sin(30*x)**2)/30)
 def sinus(x):
    return(tf.math.sin(x))
 def sinus_cosinus(x):
@@ -16,11 +18,14 @@ def swish(x):
    return(x*tf.math.sigmoid(x))


 #activations = [tf.keras.activations.relu,swish,sinus_cosinus,sinus,snake]
 activations = [snake]


 def prepare_data(filename="WILL5000INDFC.csv"):
    """
    Prepare data by preprocessing, normalizing and cutting it in train and test sets
    Return x and y train and test sets, as well as the maximum for later plots and the index separating both sets

    """
    df_train,df_test,index = wilshire.preprocess(filename)
    x_train = np.arange(df_train.shape[0])
    maximum = np.max(x_train)
@@ -35,6 +40,10 @@ def prepare_data(filename="WILL5000INDFC.csv"):
    return x_train,x_test,y_train,y_test,maximum,index

 def arima_pred(y_train,y_test,orders=[[2,1,1],[2,2,1],[3,1,1],[2,1,2]],n=5):
    """
    Computes the ARIMA errors (mse) for several orders to compare with the article

    """
    mse=[]
    for order in orders :
        
@@ -55,6 +64,10 @@ def arima_pred(y_train,y_test,orders=[[2,1,1],[2,2,1],[3,1,1],[2,1,2]],n=5):


 def create_model(activation):
    """
    Create the neural network with the requested activation function

    """
    model =  tf.keras.Sequential()

    model.add(tf.keras.layers.Dense(1,input_shape=[1,],activation=activation))
@@ -68,8 +81,12 @@ def create_model(activation):
    model.summary()
    return model

 def training_testing(n=5,activations = [tf.keras.activations.relu,swish,sinus_cosinus,sinus,snake]):
    x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC2.csv")
 def training_testing(n=5,activations = [tf.keras.activations.relu,swish,sinus_cosinus,sinus,snake],epochs = 50):
    """
    Trains models and computes means and std of test errors on n tries for each activation function requested.

    """
    x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC.csv")
    models = []
    errors_train,errors_test = [],[]
    mean_y_train,mean_y_test,std_y_test=[],[],[]
@@ -81,7 +98,7 @@ def training_testing(n=5,activations = [tf.keras.activations.relu,swish,sinus_co
        for k in range(n):

            model = create_model(activation)
            model.fit(x_train,y_train, batch_size=1, epochs=50)
            model.fit(x_train,y_train, batch_size=1, epochs=epochs)

            y_pred_test = model.predict(x_test)
            y_pred_train = model.predict(x_train)
@@ -101,25 +118,28 @@ def training_testing(n=5,activations = [tf.keras.activations.relu,swish,sinus_co
    return models,errors_train,errors_test


 def final_plot(models,errors_test,arima_err,activations=["ReLU","Swish","Sinus Cosinus","Sinus","Snake"]):
    x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC2.csv")
    x = np.arange(9000)
 def final_plot(models,errors_test,arima_err,activations=["ReLU","Swish","Sinus Cosinus","Sinus","Snake"],orders_ARIMA = ["[2,1,1]","[2,2,1]","[3,1,1]","[2,1,2]"]):
    """
    Prints the results to compare with the table of the article and plot the same plot as the article
    """
    x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC.csv")
    x = np.arange(9000) ## 9000 data points bring us to ~2031 to try and predict future data
    x_n = x / maximum
    future_preds = models[-1].predict(x_n)  ## Calculated with a website the number of working days between 01-06-2020 and 01-01-2024
    future_preds = models[-1].predict(x_n) 

    #x=np.arange(df_train.shape[0]+df_test.shape[0]+908)
    y_true = np.concatenate((y_train,y_test))
    x_cut = np.arange(x_train.shape[0]+x_test.shape[0])

    print("----- ARIMA Test MSE -----")
    orders_ARIMA = ["[2,1,1]","[2,2,1]","[3,1,1]","[2,1,2]"]
    # for k in range(len(orders_ARIMA)):
    #     print("ARIMA"+orders_ARIMA[k]+" : "+str(arima_err[k]))
    for k in range(len(orders_ARIMA)):
        print("ARIMA"+orders_ARIMA[k]+" : "+str(arima_err[k]))
    
    print("----- DNN Test MSE -----")
    
    for k in range(len(activations)):
        print("DNN "+activations[k]+" : "+str(errors_test[k]))


    ### PLOT ###
    plt.figure()
    plt.plot(x_cut,y_true,label="True data")
    plt.plot(x,future_preds,label="Predictions")
@@ -131,22 +151,16 @@ def final_plot(models,errors_test,arima_err,activations=["ReLU","Swish","Sinus C
    plt.show()


 x_train,x_test,y_train,y_test,maximum,index = prepare_data()
 #mse = arima_pred(y_train,y_test)
 # mse=[]
 # # models,errors_train,errors_test = training_testing(n=1,activations=[snake])
 # # models[0].save("Snake20a")
 # models=[]
 # errors_test=[]
 # models.append(tf.keras.models.load_model("Snake30a"))
 # print(mse,errors_test)
 # final_plot(models,errors_test,mse,activations=[])


 def plot_all_a(a=["1","10","20","30","100"]):
    """
    Plots the varying a values plot by loading pre-existing models (they are uploaded on GitHub)
    """
    models=[]
    for param in a :
        models.append(tf.keras.models.load_model("Snake"+param+"a"))
    x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC2.csv")
    x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC.csv")
    x = np.arange(9000)
    x_n = x / maximum
    y_true = np.concatenate((y_train,y_test))
--- a/code/wilshire_5000/notebook_wilshire.ipynb
+++ b/code/wilshire_5000/notebook_wilshire.ipynb
--- a/code/wilshire_5000/wilshire.py
+++ b/code/wilshire_5000/wilshire.py
@@ -6,9 +6,8 @@ import matplotlib.pyplot as plt

 def parser(path):
    df = pd.read_csv(path,na_values='.')
    #df = df.interpolate()
    #df = df.interpolate()  ### Interpolate or dropna for bank holidays
    df = df.dropna().reset_index(drop=True)
    #df = df.drop(labels=np.arange(1825)) ### To obtain the same graph than in the article
    return(df)

 def preprocess(path):
@@ -17,13 +16,9 @@ def preprocess(path):
    df_normalized = df[:]
    df_normalized["WILL5000INDFC"]=df_normalized["WILL5000INDFC"]/np.max(df_normalized["WILL5000INDFC"])
    index_train = int(df_normalized[df_normalized["DATE"]=="2020-01-31"].index.array[0])
    # df.plot()
    # plt.show()

    df_train = df_normalized[:index_train]
    df_test = df_normalized[index_train+1:index_train+85]
    df_test = df_normalized[index_train+1:index_train+85] #Between 02-01 and 05-31

    # df_train.plot()
    # df_test.plot()
    # plt.show()
    return(df_train,df_test,index_train)