Bläddra i källkod

Added notebook

Doriand
Doriand Petit 4 år sedan
förälder
incheckning
c72451bb93
3 ändrade filer med 219 tillägg och 34 borttagningar
  1. 40
    26
      code/wilshire_5000/nn.py
  2. 176
    0
      code/wilshire_5000/notebook_wilshire.ipynb
  3. 3
    8
      code/wilshire_5000/wilshire.py

+ 40
- 26
code/wilshire_5000/nn.py Visa fil

import tensorflow_addons as tfa import tensorflow_addons as tfa
from statsmodels.tsa.arima.model import ARIMA from statsmodels.tsa.arima.model import ARIMA



### Fonctions d'activations ###
def snake(x): def snake(x):
return(x+(tf.math.sin(20*x)**2)/20)
return(x+(tf.math.sin(30*x)**2)/30)
def sinus(x): def sinus(x):
return(tf.math.sin(x)) return(tf.math.sin(x))
def sinus_cosinus(x): def sinus_cosinus(x):
return(x*tf.math.sigmoid(x)) return(x*tf.math.sigmoid(x))




#activations = [tf.keras.activations.relu,swish,sinus_cosinus,sinus,snake]
activations = [snake]




def prepare_data(filename="WILL5000INDFC.csv"): def prepare_data(filename="WILL5000INDFC.csv"):
"""
Prepare data by preprocessing, normalizing and cutting it in train and test sets
Return x and y train and test sets, as well as the maximum for later plots and the index separating both sets

"""
df_train,df_test,index = wilshire.preprocess(filename) df_train,df_test,index = wilshire.preprocess(filename)
x_train = np.arange(df_train.shape[0]) x_train = np.arange(df_train.shape[0])
maximum = np.max(x_train) maximum = np.max(x_train)
return x_train,x_test,y_train,y_test,maximum,index return x_train,x_test,y_train,y_test,maximum,index


def arima_pred(y_train,y_test,orders=[[2,1,1],[2,2,1],[3,1,1],[2,1,2]],n=5): def arima_pred(y_train,y_test,orders=[[2,1,1],[2,2,1],[3,1,1],[2,1,2]],n=5):
"""
Computes the ARIMA errors (mse) for several orders to compare with the article

"""
mse=[] mse=[]
for order in orders : for order in orders :




def create_model(activation): def create_model(activation):
"""
Create the neural network with the requested activation function

"""
model = tf.keras.Sequential() model = tf.keras.Sequential()


model.add(tf.keras.layers.Dense(1,input_shape=[1,],activation=activation)) model.add(tf.keras.layers.Dense(1,input_shape=[1,],activation=activation))
model.summary() model.summary()
return model return model


def training_testing(n=5,activations = [tf.keras.activations.relu,swish,sinus_cosinus,sinus,snake]):
x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC2.csv")
def training_testing(n=5,activations = [tf.keras.activations.relu,swish,sinus_cosinus,sinus,snake],epochs = 50):
"""
Trains models and computes means and std of test errors on n tries for each activation function requested.

"""
x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC.csv")
models = [] models = []
errors_train,errors_test = [],[] errors_train,errors_test = [],[]
mean_y_train,mean_y_test,std_y_test=[],[],[] mean_y_train,mean_y_test,std_y_test=[],[],[]
for k in range(n): for k in range(n):


model = create_model(activation) model = create_model(activation)
model.fit(x_train,y_train, batch_size=1, epochs=50)
model.fit(x_train,y_train, batch_size=1, epochs=epochs)


y_pred_test = model.predict(x_test) y_pred_test = model.predict(x_test)
y_pred_train = model.predict(x_train) y_pred_train = model.predict(x_train)
return models,errors_train,errors_test return models,errors_train,errors_test




def final_plot(models,errors_test,arima_err,activations=["ReLU","Swish","Sinus Cosinus","Sinus","Snake"]):
x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC2.csv")
x = np.arange(9000)
def final_plot(models,errors_test,arima_err,activations=["ReLU","Swish","Sinus Cosinus","Sinus","Snake"],orders_ARIMA = ["[2,1,1]","[2,2,1]","[3,1,1]","[2,1,2]"]):
"""
Prints the results to compare with the table of the article and plot the same plot as the article
"""
x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC.csv")
x = np.arange(9000) ## 9000 data points bring us to ~2031 to try and predict future data
x_n = x / maximum x_n = x / maximum
future_preds = models[-1].predict(x_n) ## Calculated with a website the number of working days between 01-06-2020 and 01-01-2024
future_preds = models[-1].predict(x_n)


#x=np.arange(df_train.shape[0]+df_test.shape[0]+908)
y_true = np.concatenate((y_train,y_test)) y_true = np.concatenate((y_train,y_test))
x_cut = np.arange(x_train.shape[0]+x_test.shape[0]) x_cut = np.arange(x_train.shape[0]+x_test.shape[0])


print("----- ARIMA Test MSE -----") print("----- ARIMA Test MSE -----")
orders_ARIMA = ["[2,1,1]","[2,2,1]","[3,1,1]","[2,1,2]"]
# for k in range(len(orders_ARIMA)):
# print("ARIMA"+orders_ARIMA[k]+" : "+str(arima_err[k]))
for k in range(len(orders_ARIMA)):
print("ARIMA"+orders_ARIMA[k]+" : "+str(arima_err[k]))
print("----- DNN Test MSE -----") print("----- DNN Test MSE -----")
for k in range(len(activations)): for k in range(len(activations)):
print("DNN "+activations[k]+" : "+str(errors_test[k])) print("DNN "+activations[k]+" : "+str(errors_test[k]))


### PLOT ###
plt.figure() plt.figure()
plt.plot(x_cut,y_true,label="True data") plt.plot(x_cut,y_true,label="True data")
plt.plot(x,future_preds,label="Predictions") plt.plot(x,future_preds,label="Predictions")
plt.show() plt.show()




x_train,x_test,y_train,y_test,maximum,index = prepare_data()
#mse = arima_pred(y_train,y_test)
# mse=[]
# # models,errors_train,errors_test = training_testing(n=1,activations=[snake])
# # models[0].save("Snake20a")
# models=[]
# errors_test=[]
# models.append(tf.keras.models.load_model("Snake30a"))
# print(mse,errors_test)
# final_plot(models,errors_test,mse,activations=[])



def plot_all_a(a=["1","10","20","30","100"]): def plot_all_a(a=["1","10","20","30","100"]):
"""
Plots the varying a values plot by loading pre-existing models (they are uploaded on GitHub)
"""
models=[] models=[]
for param in a : for param in a :
models.append(tf.keras.models.load_model("Snake"+param+"a")) models.append(tf.keras.models.load_model("Snake"+param+"a"))
x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC2.csv")
x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC.csv")
x = np.arange(9000) x = np.arange(9000)
x_n = x / maximum x_n = x / maximum
y_true = np.concatenate((y_train,y_test)) y_true = np.concatenate((y_train,y_test))

+ 176
- 0
code/wilshire_5000/notebook_wilshire.ipynb
Filskillnaden har hållits tillbaka eftersom den är för stor
Visa fil


+ 3
- 8
code/wilshire_5000/wilshire.py Visa fil



def parser(path): def parser(path):
df = pd.read_csv(path,na_values='.') df = pd.read_csv(path,na_values='.')
#df = df.interpolate()
#df = df.interpolate() ### Interpolate or dropna for bank holidays
df = df.dropna().reset_index(drop=True) df = df.dropna().reset_index(drop=True)
#df = df.drop(labels=np.arange(1825)) ### To obtain the same graph than in the article
return(df) return(df)


def preprocess(path): def preprocess(path):
df_normalized = df[:] df_normalized = df[:]
df_normalized["WILL5000INDFC"]=df_normalized["WILL5000INDFC"]/np.max(df_normalized["WILL5000INDFC"]) df_normalized["WILL5000INDFC"]=df_normalized["WILL5000INDFC"]/np.max(df_normalized["WILL5000INDFC"])
index_train = int(df_normalized[df_normalized["DATE"]=="2020-01-31"].index.array[0]) index_train = int(df_normalized[df_normalized["DATE"]=="2020-01-31"].index.array[0])
# df.plot()
# plt.show()

df_train = df_normalized[:index_train] df_train = df_normalized[:index_train]
df_test = df_normalized[index_train+1:index_train+85]
df_test = df_normalized[index_train+1:index_train+85] #Between 02-01 and 05-31


# df_train.plot()
# df_test.plot()
# plt.show()
return(df_train,df_test,index_train) return(df_train,df_test,index_train)



Laddar…
Avbryt
Spara