Pārlūkot izejas kodu

Cleaned code into functions and modified ARIMA

Doriand
Doriand Petit pirms 4 gadiem
vecāks
revīzija
5e65114de5
1 mainītis faili ar 92 papildinājumiem un 97 dzēšanām
  1. 92
    97
      code/wilshire_5000/nn.py

+ 92
- 97
code/wilshire_5000/nn.py Parādīt failu

@@ -15,112 +15,107 @@ def sinus_cosinus(x):
def swish(x):
return(x*tf.math.sigmoid(x))

def arima_pred(y_train,y_test,order=[2,1,1]):
train = y_train
preds = []
for test in range(len(y_test)):
model = ARIMA(train, order=(order[0],order[1],order[2]))
model = model.fit()
output = model.forecast()
preds.append(output[0])
train.append(y_test[test])
return((np.square(np.array(preds) - np.array(y_test))).mean(),preds)
def arima_pred(y_train,y_test,orders=[[2,1,1],[2,2,1],[3,1,1],[2,1,2]],n=5):
mse=[]
for order in orders :
mean_err=np.array()
for k in range(n):
train = y_train
preds = []
for test in range(len(y_test)):
model = ARIMA(train, order=(order[0],order[1],order[2]))
model = model.fit()
output = model.forecast()
preds.append(output[0])
#train.append(y_test[te
mean_err.append((np.square(np.array(preds) - np.array(y_test))).mean())
mse.append(mean_err.mean())
return(mse)


#activations = [tf.keras.activations.relu,swish,sinus_cosinus,sinus,snake]
activations = [snake]
models = []
errors_train,errors_test = [],[]
mean_y_train,mean_y_test,std_y_test=[],[],[]

df_train,df_test,index = wilshire.preprocess('WILL5000INDFC2.csv')
x_train = np.arange(df_train.shape[0])


maximum = np.max(x_train)
x_train = x_train / maximum


y_train=df_train["WILL5000INDFC"]
y_train.to_numpy()

x_test = np.arange(df_train.shape[0]+1,df_train.shape[0]+df_test.shape[0]+1)
y_test = df_test["WILL5000INDFC"]
y_test.to_numpy()
print("----")
print(y_test)
x_test=x_test / maximum

print(arima_pred(list(y_train),list(y_test)))


for activation in activations :
y_train_5=[]
y_test_5=[]
errors_train_5=[]
errors_test_5=[]
for k in range(1):

model = tf.keras.Sequential()

model.add(tf.keras.layers.Dense(1,input_shape=[1,],activation=activation))
model.add(tf.keras.layers.Dense(64,activation=activation))
model.add(tf.keras.layers.Dense(64,activation=activation))
model.add(tf.keras.layers.Dense(1))
opt = tf.keras.optimizers.SGD(learning_rate=0.01,momentum=0.9)
model.compile(optimizer=opt, loss='mse')
model.build()
model.summary()
model.fit(x_train,y_train, batch_size=1, epochs=1)

y_pred_test = model.predict(x_test)
y_pred_train = model.predict(x_train)
y_train_5.append(y_pred_train)
y_test_5.append(y_pred_test)
errors_test_5.append(model.evaluate(x_test,y_test))
errors_train_5.append(model.evaluate(x_train,y_train))


mean_y_train.append(np.mean(y_train_5,axis=0))
mean_y_test.append(np.mean(y_test_5,axis=0))
std_y_test.append(np.std(y_test_5,axis=0))
errors_train.append([np.mean(errors_train_5),np.std(errors_train_5)])
errors_test.append([np.mean(errors_test_5),np.std(errors_test_5)])
# y_preds_train.append(y_pred_train)
# y_preds_test.append(y_pred_test)


x = np.arange(9000)
x_n = x / maximum
future_preds = model.predict(x_n) ## Calculated with a website the number of working days between 01-06-2020 and 01-01-2024


def plot_total(x_train,y_train,y_pred_train,x_test,y_test,y_pred_test):
x = np.concatenate((x_train,x_test))
def prepare_data(filename="WILL5000INDFC2.csv"):
df_train,df_test,index = wilshire.preprocess(filename)
x_train = np.arange(df_train.shape[0])
maximum = np.max(x_train)
x_train = x_train / maximum
y_train=df_train["WILL5000INDFC"]
y_train.to_numpy()

x_test = np.arange(df_train.shape[0]+1,df_train.shape[0]+df_test.shape[0]+1)
y_test = df_test["WILL5000INDFC"]
y_test.to_numpy()
x_test=x_test / maximum
return x_train,x_test,y_train,y_test




def create_model(activation):
model = tf.keras.Sequential()

model.add(tf.keras.layers.Dense(1,input_shape=[1,],activation=activation))
model.add(tf.keras.layers.Dense(64,activation=activation))
model.add(tf.keras.layers.Dense(64,activation=activation))
model.add(tf.keras.layers.Dense(1))

opt = tf.keras.optimizers.SGD(learning_rate=0.01,momentum=0.9)
model.compile(optimizer=opt, loss='mse')
model.build()
model.summary()
return model

def training_testing(n,activations):
models = []
errors_train,errors_test = [],[]
mean_y_train,mean_y_test,std_y_test=[],[],[]
for activation in activations :
y_train_5=[]
y_test_5=[]
errors_train_5=[]
errors_test_5=[]
for k in range(n):

model = create_model(activations)
model.fit(x_train,y_train, batch_size=1, epochs=1)

y_pred_test = model.predict(x_test)
y_pred_train = model.predict(x_train)
y_train_5.append(y_pred_train)
y_test_5.append(y_pred_test)
errors_test_5.append(model.evaluate(x_test,y_test))
errors_train_5.append(model.evaluate(x_train,y_train))

models.append(model)
mean_y_train.append(np.mean(y_train_5,axis=0))
mean_y_test.append(np.mean(y_test_5,axis=0))
std_y_test.append(np.std(y_test_5,axis=0))
errors_train.append([np.mean(errors_train_5),np.std(errors_train_5)])
errors_test.append([np.mean(errors_test_5),np.std(errors_test_5)])
# y_preds_train.append(y_pred_train)
# y_preds_test.append(y_pred_test)
return models,errors_train,errors_test


def final_plot():
x = np.arange(9000)
x_n = x / maximum
future_preds = model.predict(x_n) ## Calculated with a website the number of working days between 01-06-2020 and 01-01-2024

#x=np.arange(df_train.shape[0]+df_test.shape[0]+908)
y_true = np.concatenate((y_train,y_test))
y_pred = np.concatenate((y_pred_train,y_pred_test))
x_cut = np.arange(df_train.shape[0]+df_test.shape[0])
plt.figure()
plt.plot(x,y_true,label="True data")
plt.plot(x,y_pred,label="Predictions")
plt.vlines([index,index+85])
plt.plot(x_cut,y_true,label="True data")
plt.plot(x,future_preds,label="Predictions")
plt.xticks(range(0, 9000, 250), range(1995, 2031, 1))
plt.xlabel("Années")
plt.ylabel("Index Willshire5000 normalisé")
plt.vlines([index,index+85],ymin=0,ymax=1,colors="r",label="Test Samples")
plt.legend()
plt.show()

#plot_total(x_train,y_train,y_pred_train,x_test,y_test,y_pred_test)

print(errors_test)

#x=np.arange(df_train.shape[0]+df_test.shape[0]+908)
y_true = np.concatenate((y_train,y_test))
x_cut = np.arange(df_train.shape[0]+df_test.shape[0])
plt.figure()
plt.plot(x_cut,y_true,label="True data")
plt.plot(x,future_preds,label="Predictions")
plt.xticks(range(0, 9000, 250), range(1995, 2031, 1))
plt.xlabel("Années")
plt.ylabel("Index Willshire5000 normalisé")
plt.vlines([index,index+85],ymin=0,ymax=1,colors="r",label="Test Samples")
plt.legend()
plt.show()



Notiek ielāde…
Atcelt
Saglabāt