#!/usr/bin/env python
# coding: utf-8
# In[1]:
#0. 사용할 패키지 불러오기
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers.recurrent import LSTM
from tensorflow import keras
import tensorflow as tf
import seaborn as sns
# In[2]:
#1. 데이터 준비하기
df = pd.read_csv('price.csv')
df.isna().sum()
# In[3]:
df.pop('date')
df.pop('in')
df.dropna()
df.head(3)
mean(°C) |
high(°C) |
low(°C) |
prep(mm) |
humidity(%) |
whole_price |
retail_price |
|
0 |
1.2 |
-3.3 |
4.0 |
0.0 |
73.0 |
26000 |
4125 |
1 |
5.7 |
1.0 |
9.5 |
0.0 |
76.9 |
26000 |
4125 |
2 |
6.5 |
5.1 |
9.4 |
0.0 |
80.6 |
26000 |
4125 |
# In[4]:
#sns.pairplot(df[["mean(°C)","high(°C)","low(°C)","prep(mm)","humidity(%)","in","whole_price","retail_price"]], diag_kind="kde")
# In[5]:
train_dataset = df.sample(frac=0.8,random_state=0)
test_dataset = df.drop(train_dataset.index)
train_stats = df.describe()
train_stats.pop("retail_price")
train_stats = train_stats.transpose()
train_stats
count |
mean |
std |
min |
25% |
50% |
75% |
max |
|
mean(°C) |
1428.0 |
13.590126 |
10.888134 |
-14.8 |
4.375 |
15.0 |
23.000 |
33.7 |
high(°C) |
1428.0 |
9.391737 |
11.028528 |
-18.0 |
0.200 |
10.2 |
18.925 |
30.3 |
low(°C) |
1427.0 |
18.614085 |
11.000782 |
-10.7 |
9.350 |
20.6 |
27.850 |
39.6 |
prep(mm) |
1428.0 |
3.065616 |
11.846932 |
0.0 |
0.000 |
0.0 |
0.200 |
144.5 |
humidity(%) |
1428.0 |
57.811975 |
14.807831 |
21.8 |
46.800 |
57.8 |
67.650 |
97.0 |
whole_price |
1428.0 |
26810.224090 |
8658.011133 |
10000.0 |
20000.000 |
26000.0 |
32250.000 |
55000.0 |
# In[6]:
train_dataset = df.sample(frac=0.8,random_state=0)
test_dataset = df.drop(train_dataset.index)
train_labels = train_dataset.pop("retail_price")
test_labels = test_dataset.pop("retail_price")
# In[7]:
def norm(x):
return (x - train_stats['mean']) / train_stats['std']
# In[8]:
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)
#2. 데이터셋 생성하기
# y = train_labels.values # train_dataset[:,-1] #8번째 데이터를 y로
# x = normed_train_data # dataset[:,[0,1,2,4,5]] #0번째부터 7번째 데이터 까지 x로
x_train = normed_train_data.values # x[0:850,] # 699번까지
y_train = train_labels.values # y[0:850,]
x_test = normed_test_data.values # x[850:,] #700번부터
y_test = test_labels.values # y[850:,]
# In[9]:
# 에포크가 끝날 때마다 점(.)을 출력해 훈련 진행 과정을 표시합니다
class PrintDot(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 100 == 0: print('')
print('.', end='')
# In[10]:
act_relu = False
if act_relu:
model = keras.Sequential([
keras.layers.Dense(32, activation='relu', input_shape=[len(train_dataset.keys())]),
keras.layers.Dense(32, activation='relu'),
keras.layers.Dense(1)
])
else:
model = keras.Sequential([
keras.layers.Dense(32, activation=tf.nn.relu, input_shape=[len(train_dataset.keys())]),
keras.layers.Dense(32, activation=tf.nn.relu),
keras.layers.Dense(1)
])
# In[11]:
#4. 모델 학습과정 설정하기
adam_opt = False
if adam_opt:
model.compile(loss='mse', optimizer='adam', metrics=['mse', 'binary_crossentropy'])
adam = keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999, amsgrad=False)
model.compile(loss='mse', optimizer=adam, metrics=['mse', 'binary_crossentropy'])
else:
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse'])
#5. 모델 학습시키기
#hist = model.fit(x_train, y_train, epochs=50, batch_size=50)
#hist = model.fit(x_train, y_train, epochs=1000, batch_size=1000, validation_split = 0.2, verbose=0, callbacks=[PrintDot()])
history = model.fit( normed_train_data, train_labels, epochs=100, validation_split = 0.2, verbose=0, callbacks=[PrintDot()])
#6. 모델 평가하기
scores = model.evaluate(normed_test_data,y_test)
# In[12]:
print(scores)
[678038.5297202797, 578.2003, 678038.5]
# In[13]:
fig, loss_ax=plt.subplots()
loss_ax.plot(history.history['loss'], 'y',label='train_loss')
loss_ax.set_xlabel('epoch')
loss_ax.set_ylabel('loss')
loss_ax.legend(loc = 'upper left')
plt.show()
# In[14]:
def show_diff(y_predict, y_label, y_test, y_test_label, samples, y_value):
fig, predict_ax=plt.subplots()
predict_ax.plot(y_predict, 'y', label=y_label)
predict_ax.plot(y_test, 'r', label=y_test_label)
predict_ax.set_xlabel(samples)
predict_ax.set_ylabel(y_value)
predict_ax.legend(loc = 'upper left')
plt.show()
# In[15]:
#7 예측하기
pre_test_y = model.predict(x_train)
show_diff(pre_test_y, 'pred y', y_train, 'train y', 'samples', 'Y value')
y_predict = model.predict(x_test).flatten()
show_diff(y_predict, 'y_predict', y_test, 'y_test', 'samples', 'y_value')
# In[16]:
#8 저장하기
df = pd.DataFrame(y_predict)
df. insert(1, 'y_test', y_test)
df.to_csv('predict_price.csv')
# In[17]:
# compute the difference between the *predicted* house prices and the
# *actual* house prices, then compute the percentage difference and
# the absolute percentage difference
diff = y_predict - y_test
percentDiff = (diff / y_test) * 100
absPercentDiff = np.abs(percentDiff)
# compute the mean and standard deviation of the absolute percentage
# difference
mean = np.mean(absPercentDiff)
std = np.std(absPercentDiff)
# finally, show some statistics on our model
#locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
print("[INFO] mean: {:.2f}%, std: {:.2f}%".format(mean, std))
[INFO] mean: 13.37%, std: 11.71%
'TensorFlow OpenCV' 카테고리의 다른 글
Keras Color Classification (0) | 2019.12.06 |
---|---|
PyCharm new project + interpreter (0) | 2019.11.11 |
jupyter notebook + tensorflow (0) | 2019.11.10 |
CUDA10.0, cuDNN, Anaconda, Pycharm (0) | 2019.10.30 |
python file example 개행문자 \r 없애기 (0) | 2019.10.05 |