Browse Source

Added notebook

Doriand
Doriand Petit 4 years ago
parent
commit
c72451bb93
3 changed files with 219 additions and 34 deletions
  1. 40
    26
      code/wilshire_5000/nn.py
  2. 176
    0
      code/wilshire_5000/notebook_wilshire.ipynb
  3. 3
    8
      code/wilshire_5000/wilshire.py

+ 40
- 26
code/wilshire_5000/nn.py View File

@@ -6,8 +6,10 @@ import wilshire
import tensorflow_addons as tfa
from statsmodels.tsa.arima.model import ARIMA


### Fonctions d'activations ###
def snake(x):
return(x+(tf.math.sin(20*x)**2)/20)
return(x+(tf.math.sin(30*x)**2)/30)
def sinus(x):
return(tf.math.sin(x))
def sinus_cosinus(x):
@@ -16,11 +18,14 @@ def swish(x):
return(x*tf.math.sigmoid(x))


#activations = [tf.keras.activations.relu,swish,sinus_cosinus,sinus,snake]
activations = [snake]


def prepare_data(filename="WILL5000INDFC.csv"):
"""
Prepare data by preprocessing, normalizing and cutting it in train and test sets
Return x and y train and test sets, as well as the maximum for later plots and the index separating both sets

"""
df_train,df_test,index = wilshire.preprocess(filename)
x_train = np.arange(df_train.shape[0])
maximum = np.max(x_train)
@@ -35,6 +40,10 @@ def prepare_data(filename="WILL5000INDFC.csv"):
return x_train,x_test,y_train,y_test,maximum,index

def arima_pred(y_train,y_test,orders=[[2,1,1],[2,2,1],[3,1,1],[2,1,2]],n=5):
"""
Computes the ARIMA errors (mse) for several orders to compare with the article

"""
mse=[]
for order in orders :
@@ -55,6 +64,10 @@ def arima_pred(y_train,y_test,orders=[[2,1,1],[2,2,1],[3,1,1],[2,1,2]],n=5):


def create_model(activation):
"""
Create the neural network with the requested activation function

"""
model = tf.keras.Sequential()

model.add(tf.keras.layers.Dense(1,input_shape=[1,],activation=activation))
@@ -68,8 +81,12 @@ def create_model(activation):
model.summary()
return model

def training_testing(n=5,activations = [tf.keras.activations.relu,swish,sinus_cosinus,sinus,snake]):
x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC2.csv")
def training_testing(n=5,activations = [tf.keras.activations.relu,swish,sinus_cosinus,sinus,snake],epochs = 50):
"""
Trains models and computes means and std of test errors on n tries for each activation function requested.

"""
x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC.csv")
models = []
errors_train,errors_test = [],[]
mean_y_train,mean_y_test,std_y_test=[],[],[]
@@ -81,7 +98,7 @@ def training_testing(n=5,activations = [tf.keras.activations.relu,swish,sinus_co
for k in range(n):

model = create_model(activation)
model.fit(x_train,y_train, batch_size=1, epochs=50)
model.fit(x_train,y_train, batch_size=1, epochs=epochs)

y_pred_test = model.predict(x_test)
y_pred_train = model.predict(x_train)
@@ -101,25 +118,28 @@ def training_testing(n=5,activations = [tf.keras.activations.relu,swish,sinus_co
return models,errors_train,errors_test


def final_plot(models,errors_test,arima_err,activations=["ReLU","Swish","Sinus Cosinus","Sinus","Snake"]):
x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC2.csv")
x = np.arange(9000)
def final_plot(models,errors_test,arima_err,activations=["ReLU","Swish","Sinus Cosinus","Sinus","Snake"],orders_ARIMA = ["[2,1,1]","[2,2,1]","[3,1,1]","[2,1,2]"]):
"""
Prints the results to compare with the table of the article and plot the same plot as the article
"""
x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC.csv")
x = np.arange(9000) ## 9000 data points bring us to ~2031 to try and predict future data
x_n = x / maximum
future_preds = models[-1].predict(x_n) ## Calculated with a website the number of working days between 01-06-2020 and 01-01-2024
future_preds = models[-1].predict(x_n)

#x=np.arange(df_train.shape[0]+df_test.shape[0]+908)
y_true = np.concatenate((y_train,y_test))
x_cut = np.arange(x_train.shape[0]+x_test.shape[0])

print("----- ARIMA Test MSE -----")
orders_ARIMA = ["[2,1,1]","[2,2,1]","[3,1,1]","[2,1,2]"]
# for k in range(len(orders_ARIMA)):
# print("ARIMA"+orders_ARIMA[k]+" : "+str(arima_err[k]))
for k in range(len(orders_ARIMA)):
print("ARIMA"+orders_ARIMA[k]+" : "+str(arima_err[k]))
print("----- DNN Test MSE -----")
for k in range(len(activations)):
print("DNN "+activations[k]+" : "+str(errors_test[k]))


### PLOT ###
plt.figure()
plt.plot(x_cut,y_true,label="True data")
plt.plot(x,future_preds,label="Predictions")
@@ -131,22 +151,16 @@ def final_plot(models,errors_test,arima_err,activations=["ReLU","Swish","Sinus C
plt.show()


x_train,x_test,y_train,y_test,maximum,index = prepare_data()
#mse = arima_pred(y_train,y_test)
# mse=[]
# # models,errors_train,errors_test = training_testing(n=1,activations=[snake])
# # models[0].save("Snake20a")
# models=[]
# errors_test=[]
# models.append(tf.keras.models.load_model("Snake30a"))
# print(mse,errors_test)
# final_plot(models,errors_test,mse,activations=[])


def plot_all_a(a=["1","10","20","30","100"]):
"""
Plots the varying a values plot by loading pre-existing models (they are uploaded on GitHub)
"""
models=[]
for param in a :
models.append(tf.keras.models.load_model("Snake"+param+"a"))
x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC2.csv")
x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC.csv")
x = np.arange(9000)
x_n = x / maximum
y_true = np.concatenate((y_train,y_test))

+ 176
- 0
code/wilshire_5000/notebook_wilshire.ipynb
File diff suppressed because it is too large
View File


+ 3
- 8
code/wilshire_5000/wilshire.py View File

@@ -6,9 +6,8 @@ import matplotlib.pyplot as plt

def parser(path):
df = pd.read_csv(path,na_values='.')
#df = df.interpolate()
#df = df.interpolate() ### Interpolate or dropna for bank holidays
df = df.dropna().reset_index(drop=True)
#df = df.drop(labels=np.arange(1825)) ### To obtain the same graph than in the article
return(df)

def preprocess(path):
@@ -17,13 +16,9 @@ def preprocess(path):
df_normalized = df[:]
df_normalized["WILL5000INDFC"]=df_normalized["WILL5000INDFC"]/np.max(df_normalized["WILL5000INDFC"])
index_train = int(df_normalized[df_normalized["DATE"]=="2020-01-31"].index.array[0])
# df.plot()
# plt.show()

df_train = df_normalized[:index_train]
df_test = df_normalized[index_train+1:index_train+85]
df_test = df_normalized[index_train+1:index_train+85] #Between 02-01 and 05-31

# df_train.plot()
# df_test.plot()
# plt.show()
return(df_train,df_test,index_train)


Loading…
Cancel
Save