https://www.kaggle.com/datasets/hmavrodiev/london-bike-sharing-dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
df = pd.read_csv('/kaggle/input/london-bike-sharing-dataset/london_merged.csv', parse_dates = ['timestamp'])
df.head()
df['timestamp']
df.shape
train = df.iloc[:17000, 1:2]
test = df.iloc[17000:17414, 1:2]
print(train.shape)
print(test.shape)
df['cnt'][:17000].plot(figsize = (15, 4), legend = True)
df['cnt'][17000:].plot(figsize = (15, 4), legend = True)
plt.legend(['train', 'test'])
plt.title('bike share demand')
plt.show()
from statsmodels.tsa.stattools import pacf
pacf = pacf(df['cnt'], nlags = 20, method = 'ols')
print(pacf)
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(pacf, lags = 8, method = 'ols', title = 'pa').show
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
train_scaled = sc.fit_transform(train)
train_scaled
X_train = []
y_train = []
for i in range(1, 17000):
X_train.append(train_scaled[i-1:i, 0])
y_train.append(train_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train.shape
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_train.shape
'Programming Language > Python' 카테고리의 다른 글
[Kaggle] London Bike, RNN and LSTM (0) | 2022.05.18 |
---|---|
[Kaggle] 런던 자전거 데이터 세트, 시계열 딥러닝 전 데이터 전처리 정리 (0) | 2022.05.13 |
[Kaggle] 런던자전거 - 머신러닝, 딥러닝 모델 비교해보기 (0) | 2022.05.11 |
[Kaggle] London Bike Sharing Dataset 딥러닝 모델 만들어보기 (0) | 2022.05.08 |
[Kaggle] London Bike Sharing Dataset 데이터 전처리 해보기 (0) | 2022.05.08 |