4 lat temu · ed599a96f0
--- a/code/wilshire_5000/WILL5000INDFC2.csv
+++ b/code/wilshire_5000/WILL5000INDFC2.csv
 2020-12-29,185.51
 2020-12-30,186.01
 2020-12-31,186.76
 2021-01-01,.
 2021-01-01,.
--- a/code/wilshire_5000/nn.py
+++ b/code/wilshire_5000/nn.py
 import matplotlib.pyplot as plt
 import wilshire
 import tensorflow_addons as tfa
 from statsmodels.tsa.arima.model import ARIMA
 def snake(x):
    return(x+(tf.math.sin(50*x)**2)/50)
 def swish(x):
    return(x*tf.math.sigmoid(x))
 def arima_pred(x_train,y_test):
    train = x_train
    preds = []
    for test in range(len(y_test)):
        model = ARIMA(train, order=(2,1,1))
        model = model.fit()
        output = model.forecast() 
        preds.append(output[0])
        train.append(y_test[test])
    return((np.square(preds - test)).mean())
 activations = [tf.keras.activations.relu,swish,sinus_cosinus,sinus,snake]
 #activations = [snake]
 models = []
 errors_train,errors_test = [],[]
 mean_y_train,mean_y_test,std_y_test=[],[],[]
 df_train,df_test = wilshire.preprocess('WILL5000INDFC2.csv')
 df_train,df_test,index = wilshire.preprocess('WILL5000INDFC2.csv')
 x_train = np.arange(df_train.shape[0])
 x_test = np.arange(df_train.shape[0]+1,df_train.shape[0]+df_test.shape[0]+1)
 y_test = df_test["WILL5000INDFC"]
 y_test.to_numpy()
 print("----")
 print(y_test)
 x_test=x_test / maximum
 #print(arima_pred(list(x_train),list(y_test)))
 for activation in activations :
    y_train_5=[]
    y_test_5=[]
    errors_train_5=[]
    errors_test_5=[]
    for k in range(2):
    for k in range(1):
        model =  tf.keras.Sequential()
        model.compile(optimizer=opt, loss='mse')
        model.build()
        model.summary()
        model.fit(x_train,y_train, batch_size=2, epochs=20)
        model.fit(x_train,y_train, batch_size=1, epochs=2)
        y_pred_test = model.predict(x_test)
        y_pred_train = model.predict(x_train)
    plt.figure()
    plt.plot(x,y_true,label="True data")
    plt.plot(x,y_pred,label="Predictions")
    plt.vlines([6545,6629])
    plt.vlines([index,index+85])
    plt.legend()
    plt.show()
 print(errors_test)
 x=np.arange(df_train.shape[0]+df_test.shape[0]+908)
 #x=np.arange(df_train.shape[0]+df_test.shape[0]+908)
 y_true = np.concatenate((y_train,y_test))
 x_cut = np.arange(df_train.shape[0]+df_test.shape[0])
 plt.figure()
 plt.xticks(range(0, 9000, 250), range(1995, 2030, 1))
 plt.xlabel("Années")
 plt.ylabel("Index Willshire5000 normalisé")
 plt.vlines([6545,6629],ymin=0,ymax=1)
 plt.vlines([index,index+85],ymin=0,ymax=1,colors="r",label="Test Samples")
 plt.legend()
 plt.show()
 plt.show()
--- a/code/wilshire_5000/wilshire.py
+++ b/code/wilshire_5000/wilshire.py
 def parser(path):
    df = pd.read_csv(path,na_values='.')
    print(df.shape)
    df = df.interpolate()
    #df = df.dropna()
    #df = df.interpolate()
    df = df.dropna().reset_index(drop=True)
    print(df.shape)
    #df = df.drop(labels=np.arange(1825)) ### To obtain the same graph than in the article
    print(df.shape)
 def preprocess(path):
    df = parser(path)
    print(df)
    df_normalized = df[:]
    df_normalized["WILL5000INDFC"]=df_normalized["WILL5000INDFC"]/np.max(df_normalized["WILL5000INDFC"])
    index_train = int(df_normalized[df_normalized["DATE"]=="2020-01-31"].index.array[0])
    print(df)
    print(df_normalized)
    print(df_normalized[df_normalized["DATE"]=="2020-01-31"])
    # df.plot()
    # plt.show()
    df_train = df_normalized[:6544]
    df_test = df_normalized[6545:6629]
    df_train = df_normalized[:index_train]
    df_test = df_normalized[index_train+1:index_train+85]
    # df_train.plot()
    # df_test.plot()
    # plt.show()
    return(df_train,df_test)
    return(df_train,df_test,index_train)