Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

nn.py 6.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. import numpy as np
  2. import tensorflow as tf
  3. import pandas as pd
  4. import matplotlib.pyplot as plt
  5. import wilshire
  6. import tensorflow_addons as tfa
  7. from statsmodels.tsa.arima.model import ARIMA
  8. ### Fonctions d'activations ###
  9. def snake(x):
  10. return(x+(tf.math.sin(30*x)**2)/30)
  11. def sinus(x):
  12. return(tf.math.sin(x))
  13. def sinus_cosinus(x):
  14. return(tf.math.sin(x)+tf.math.cos(x))
  15. def swish(x):
  16. return(x*tf.math.sigmoid(x))
  17. def prepare_data(filename="WILL5000INDFC.csv"):
  18. """
  19. Prepare data by preprocessing, normalizing and cutting it in train and test sets
  20. Return x and y train and test sets, as well as the maximum for later plots and the index separating both sets
  21. """
  22. df_train,df_test,index = wilshire.preprocess(filename)
  23. x_train = np.arange(df_train.shape[0])
  24. maximum = np.max(x_train)
  25. x_train = x_train / maximum
  26. y_train=df_train["WILL5000INDFC"]
  27. y_train.to_numpy()
  28. x_test = np.arange(df_train.shape[0]+1,df_train.shape[0]+df_test.shape[0]+1)
  29. y_test = df_test["WILL5000INDFC"]
  30. y_test.to_numpy()
  31. x_test=x_test / maximum
  32. return x_train,x_test,y_train,y_test,maximum,index
  33. def arima_pred(y_train,y_test,orders=[[2,1,1],[2,2,1],[3,1,1],[2,1,2]],n=5):
  34. """
  35. Computes the ARIMA errors (mse) for several orders to compare with the article
  36. """
  37. mse=[]
  38. for order in orders :
  39. mean_err= []
  40. for k in range(n):
  41. train = y_train
  42. preds = []
  43. for test in range(len(y_test)):
  44. model = ARIMA(train, order=(order[0],order[1],order[2]))
  45. model = model.fit()
  46. output = model.forecast()
  47. #print(output)
  48. preds.append(output)
  49. #train.append(y_test[te
  50. mean_err.append((np.square(np.array(preds) - np.array(y_test))).mean())
  51. mse.append([np.array(mean_err).mean(),np.array(mean_err).std()])
  52. return(mse)
  53. def create_model(activation):
  54. """
  55. Create the neural network with the requested activation function
  56. """
  57. model = tf.keras.Sequential()
  58. model.add(tf.keras.layers.Dense(1,input_shape=[1,],activation=activation))
  59. model.add(tf.keras.layers.Dense(64,activation=activation))
  60. model.add(tf.keras.layers.Dense(64,activation=activation))
  61. model.add(tf.keras.layers.Dense(1))
  62. opt = tf.keras.optimizers.SGD(learning_rate=0.01,momentum=0.8)
  63. model.compile(optimizer=opt, loss='mse')
  64. model.build()
  65. model.summary()
  66. return model
  67. def training_testing(n=5,activations = [tf.keras.activations.relu,swish,sinus_cosinus,sinus,snake],epochs = 50):
  68. """
  69. Trains models and computes means and std of test errors on n tries for each activation function requested.
  70. """
  71. x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC.csv")
  72. models = []
  73. errors_train,errors_test = [],[]
  74. mean_y_train,mean_y_test,std_y_test=[],[],[]
  75. for activation in activations :
  76. y_train_5=[]
  77. y_test_5=[]
  78. errors_train_5=[]
  79. errors_test_5=[]
  80. for k in range(n):
  81. model = create_model(activation)
  82. model.fit(x_train,y_train, batch_size=1, epochs=epochs)
  83. y_pred_test = model.predict(x_test)
  84. y_pred_train = model.predict(x_train)
  85. y_train_5.append(y_pred_train)
  86. y_test_5.append(y_pred_test)
  87. errors_test_5.append(model.evaluate(x_test,y_test))
  88. errors_train_5.append(model.evaluate(x_train,y_train))
  89. models.append(model)
  90. mean_y_train.append(np.mean(y_train_5,axis=0))
  91. mean_y_test.append(np.mean(y_test_5,axis=0))
  92. std_y_test.append(np.std(y_test_5,axis=0))
  93. errors_train.append([np.mean(errors_train_5),np.std(errors_train_5)])
  94. errors_test.append([np.mean(errors_test_5),np.std(errors_test_5)])
  95. # y_preds_train.append(y_pred_train)
  96. # y_preds_test.append(y_pred_test)
  97. return models,errors_train,errors_test
  98. def final_plot(models,errors_test,arima_err,activations=["ReLU","Swish","Sinus Cosinus","Sinus","Snake"],orders_ARIMA = ["[2,1,1]","[2,2,1]","[3,1,1]","[2,1,2]"]):
  99. """
  100. Prints the results to compare with the table of the article and plot the same plot as the article
  101. """
  102. x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC.csv")
  103. x = np.arange(9000) ## 9000 data points bring us to ~2031 to try and predict future data
  104. x_n = x / maximum
  105. future_preds = models[-1].predict(x_n)
  106. y_true = np.concatenate((y_train,y_test))
  107. x_cut = np.arange(x_train.shape[0]+x_test.shape[0])
  108. print("----- ARIMA Test MSE -----")
  109. for k in range(len(orders_ARIMA)):
  110. print("ARIMA"+orders_ARIMA[k]+" : "+str(arima_err[k]))
  111. print("----- DNN Test MSE -----")
  112. for k in range(len(activations)):
  113. print("DNN "+activations[k]+" : "+str(errors_test[k]))
  114. ### PLOT ###
  115. plt.figure()
  116. plt.plot(x_cut,y_true,label="True data")
  117. plt.plot(x,future_preds,label="Predictions")
  118. plt.xticks(range(0, 9000, 500), range(1995, 2031, 2))
  119. plt.xlabel("Années")
  120. plt.ylabel("Index Willshire5000 normalisé")
  121. plt.vlines([index,index+85],ymin=0,ymax=1,colors="r",label="Test Samples")
  122. plt.legend()
  123. plt.show()
  124. def plot_all_a(a=["1","10","20","30","100"]):
  125. """
  126. Plots the varying a values plot by loading pre-existing models (they are uploaded on GitHub)
  127. """
  128. models=[]
  129. for param in a :
  130. models.append(tf.keras.models.load_model("Snake"+param+"a"))
  131. x_train,x_test,y_train,y_test,maximum,index = prepare_data(filename="WILL5000INDFC.csv")
  132. x = np.arange(9000)
  133. x_n = x / maximum
  134. y_true = np.concatenate((y_train,y_test))
  135. x_cut = np.arange(x_train.shape[0]+x_test.shape[0])
  136. future_preds=[]
  137. for k in range(len(models)):
  138. future_preds.append(models[k].predict(x_n) )
  139. plt.figure()
  140. plt.plot(x_cut,y_true,label="True data")
  141. for k in range(len(models)):
  142. plt.plot(x,future_preds[k],label="a = "+a[k])
  143. plt.xticks(range(0, 9000, 500), range(1995, 2031, 2))
  144. plt.xlabel("Années")
  145. plt.ylabel("Index Willshire5000 normalisé")
  146. plt.legend()
  147. plt.show()
  148. # plot_all_a()