Explorar el Código

Commit des fichiers traitement données préprocess et réseau de neurones, plus nouvelles data

Doriand
Doriand Petit hace 4 años
padre
commit
7a1c626e5d
Se han modificado 3 ficheros con 6921 adiciones y 0 borrados
  1. 6785
    0
      code/wilshire_5000/WILL5000INDFC2.csv
  2. 104
    0
      code/wilshire_5000/nn.py
  3. 32
    0
      code/wilshire_5000/wilshire.py

+ 6785
- 0
code/wilshire_5000/WILL5000INDFC2.csv
La diferencia del archivo ha sido suprimido porque es demasiado grande
Ver fichero


+ 104
- 0
code/wilshire_5000/nn.py Ver fichero

@@ -0,0 +1,104 @@
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import wilshire
import tensorflow_addons as tfa

def snake(x):
return(x+(tf.math.sin(50*x)**2)/50)
def sinus(x):
return(tf.math.sin(x))
def sinus_cosinus(x):
return(tf.math.sin(x)+tf.math.cos(x))
def swish(x):
return(x*tf.math.sigmoid(x))

#activations = [tf.keras.activations.relu,swish,sinus_cosinus,sinus,snake]
activations = [snake]
models = []
errors_train,errors_test = [],[]
mean_y_train,mean_y_test,std_y_test=[],[],[]

df_train,df_test = wilshire.preprocess('WILL5000INDFC2.csv')
x_train = np.arange(df_train.shape[0])


maximum = np.max(x_train)
x_train = x_train / maximum


y_train=df_train["WILL5000INDFC"]
y_train.to_numpy()

x_test = np.arange(df_train.shape[0]+1,df_train.shape[0]+df_test.shape[0]+1)
y_test = df_test["WILL5000INDFC"]
y_test.to_numpy()

x_test=x_test / maximum

for activation in activations :
y_train_5=[]
y_test_5=[]
errors_train_5=[]
errors_test_5=[]
for k in range(1):

model = tf.keras.Sequential()

model.add(tf.keras.layers.Dense(1,input_shape=[1,],activation=activation))
model.add(tf.keras.layers.Dense(64,activation=activation))
model.add(tf.keras.layers.Dense(64,activation=activation))
model.add(tf.keras.layers.Dense(1))
opt = tf.keras.optimizers.SGD(learning_rate=0.01,momentum=0.9)
model.compile(optimizer=opt, loss='mse')
model.build()
model.summary()
model.fit(x_train,y_train, batch_size=2, epochs=20)

y_pred_test = model.predict(x_test)
y_pred_train = model.predict(x_train)
y_train_5.append(y_pred_train)
y_test_5.append(y_pred_test)
errors_test_5.append(model.evaluate(x_test,y_test))
errors_train_5.append(model.evaluate(x_train,y_train))


mean_y_train.append(np.mean(y_train_5,axis=0))
mean_y_test.append(np.mean(y_test_5,axis=0))
std_y_test.append(np.std(y_test_5,axis=0))
errors_train.append([np.mean(errors_train_5),np.std(errors_train_5)])
errors_test.append([np.mean(errors_test_5),np.std(errors_test_5)])
# y_preds_train.append(y_pred_train)
# y_preds_test.append(y_pred_test)


x = np.arange(df_train.shape[0]+df_test.shape[0]+908)
x = x / maximum
future_preds = model.predict(x) ## Calculated with a website the number of working days between 01-06-2020 and 01-01-2024

def plot_total(x_train,y_train,y_pred_train,x_test,y_test,y_pred_test):
x = np.concatenate((x_train,x_test))
y_true = np.concatenate((y_train,y_test))
y_pred = np.concatenate((y_pred_train,y_pred_test))
plt.figure()
plt.plot(x,y_true,label="True data")
plt.plot(x,y_pred,label="Predictions")
plt.vlines([5240,5326])
plt.legend()
plt.show()

#plot_total(x_train,y_train,y_pred_train,x_test,y_test,y_pred_test)

print(errors_test)

x=np.arange(df_train.shape[0]+df_test.shape[0]+908)
y_true = np.concatenate((y_train,y_test))
x_cut = np.arange(df_train.shape[0]+df_test.shape[0])
plt.figure()
plt.plot(x_cut,y_true,label="True data")
plt.plot(x,future_preds,label="Predictions")
plt.vlines([5240,5326],ymin=0,ymax=1)
plt.legend()
plt.show()

+ 32
- 0
code/wilshire_5000/wilshire.py Ver fichero

@@ -0,0 +1,32 @@
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt


def parser(path):
df = pd.read_csv(path,na_values='.')
print(df.shape)
df = df.interpolate()
#df = df.dropna()
print(df.shape)
#df = df.drop(labels=np.arange(1825)) ### To obtain the same graph than in the article
print(df.shape)
return(df)

def preprocess(path):
df = parser(path)
print(df)
df_normalized = df[:]
df_normalized["WILL5000INDFC"]=df_normalized["WILL5000INDFC"]/np.max(df_normalized["WILL5000INDFC"])

# df.plot()
# plt.show()
df_train = df_normalized[:6544]
df_test = df_normalized[6545:6629]

# df_train.plot()
# df_test.plot()
# plt.show()
return(df_train,df_test)


Cargando…
Cancelar
Guardar