| 123456789101112131415161718192021222324 |
- import numpy as np
- import tensorflow as tf
- import pandas as pd
- import matplotlib.pyplot as plt
-
-
- def parser(path):
- df = pd.read_csv(path,na_values='.')
- #df = df.interpolate() ### Interpolate or dropna for bank holidays
- df = df.dropna().reset_index(drop=True)
- return(df)
-
- def preprocess(path):
- df = parser(path)
-
- df_normalized = df[:]
- df_normalized["WILL5000INDFC"]=df_normalized["WILL5000INDFC"]/np.max(df_normalized["WILL5000INDFC"])
- index_train = int(df_normalized[df_normalized["DATE"]=="2020-01-31"].index.array[0])
-
- df_train = df_normalized[:index_train]
- df_test = df_normalized[index_train+1:index_train+85] #Between 02-01 and 05-31
-
- return(df_train,df_test,index_train)
|