Skip to content

Commit

Permalink
first-commit
Browse files Browse the repository at this point in the history
  • Loading branch information
zzpr committed Feb 22, 2023
1 parent 7afc93f commit 7fc282d
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 34 deletions.
Binary file modified HousePrice/model.pth
Binary file not shown.
119 changes: 113 additions & 6 deletions HousePrice/src/DataPreprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
import numpy as np

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
Expand Down Expand Up @@ -40,17 +44,120 @@
# 处理非数值型数据
# print(all_data.shape) # (2919, 79)
all_data = pd.get_dummies(all_data, dummy_na=True)
# print(all_data.shape) # (2919, 331)

# 将处理好的数据分成训练集和测试集,以及标签
train_feature = torch.tensor(all_data.iloc[:len(train_data)].values, dtype=torch.float64)
test_feature = torch.tensor(all_data.iloc[len(train_data):].values, dtype=torch.float64)
train_label = torch.tensor(train_data.iloc[:, -1].values.reshape(-1, 1), dtype=torch.float64)
train_feature = all_data.iloc[:len(train_data)]
test_feature = all_data.iloc[len(train_data):]
train_label = train_data.iloc[:, -1]

class myDataset(Dataset):
def __init__(self, dataset, labels, mode='train', valid_ration=0.25):
# 确定是训练集还是验证集
self.mode = mode
# 确定数据集
self.data = dataset
self.label = labels
# 数据集长度
self.data_len = len(self.data)
# 训练集长度
self.train_len = int(self.data_len * (1 - valid_ration))

if self.mode == 'train':
# 从data和lable中获取对应的数据
self.train_data = np.asarray(self.data.loc[:self.train_len])
self.train_label = np.asarray(self.label.loc[:self.train_len])
self.datas = self.train_data
self.labels = self.train_label
elif self.mode == "valid":
self.valid_data = np.asarray(self.data.loc[self.train_len:])
self.valid_label = np.asarray(self.label.loc[self.train_len:])
self.datas = self.valid_data
self.labels = self.valid_label

self.datas_len = len(self.datas)
print(f'Finished reading {self.mode} dataset. {self.datas_len} number samples found.')

def __getitem__(self, index):
data = self.datas[index]
label = self.labels[index]
return data, label

def __len__(self):
return self.datas_len

train_dataset = myDataset(train_feature, train_label, mode='train')
valid_dataset = myDataset(train_feature, train_label, mode='valid')
# print(train_dataset, valid_dataset)

BATCH_SIZE = 16
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True)
# print(len(train_loader), len(valid_loader))




# class Network(object):
#
# def __init__(self, num_of_weights): # 初始化权重值
# # 随机产生w的初始值,为了保持程序每次运行结果的一致性,设置固定的随机数种子
# np.random.seed(0)
# self.w = np.random.randn(num_of_weights, 1) #初始参数一般符合标准正态分布
# self.b = 0.
#
# def forward(self, x):
# z = np.dot(x, self.w) + self.b
# return z
#
# def loss(self, z, y):
# error = z - y
# cost = error * error
# cost = np.mean(cost)
# return cost
#
# def gradient(self, x, y, z):
# gradient_w = (z - y) * x
# gradient_w = np.mean(gradient_w, axis=0)
# gradient_w = gradient_w[:, np.newaxis]
# gradient_b = (z - y)
# gradient_b = np.mean(gradient_b)
# return gradient_w, gradient_b
#
# def update(self, gradient_w, gradient_b, eta=0.01):
# self.w = self.w - eta * gradient_w
# self.b = self.b - eta * gradient_b
#
# def train(self, train_data, train_label, valid_data, valid_label, epoch_num=100, batch_size=10, lr=0.01):
# n = len(train_data)
# # losses = []
# step = 0
# for epoch in range(1, epoch_num+1):
# print(f'=============第{epoch}轮=============')
# # 打乱数据集并分批
# np.random.shuffle(train_data)
# for k in range(0, n, batch_size):
# mini_batches = [train_data[k:k+batch_size]]
# for iter_id, mini_batch in enumerate(mini_batches):
# x = mini_batch[:, :-1]
# y = mini_batch[:, -1:]
# z = self.forward(x)
# L = self.loss(z, y)
# gradient_w, gradient_b = self.gradient(x, y, z)
# self.update(gradient_w, gradient_b, lr)
# # losses.append(L)
# step += 1
#
# if step % 100 == 0:
# print(f'Loss:{L.item()}')
# model = Network(331)
#
# x_train, x_valid, y_train, y_valid = train_test_split(train[0], train[1], test_size=.20)
# print(x_train.shape, x_valid.shape) # torch.Size([1168, 331]) torch.Size([292, 331])


train = [train_feature, train_label]

if __name__ == '__main__':
print(type(train))
pass



Expand Down
6 changes: 4 additions & 2 deletions HousePrice/src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@ class haonet(nn.Module):
def __init__(self):
super(haonet, self).__init__()
self.module = nn.Sequential(
nn.Linear(331, 1)
nn.Linear(331, 128),
nn.Linear(128, 1)
)

def forward(self, input):
output = self.module(input)
return output

if __name__ == '__main__':
pass
model = haonet()
print(model)
47 changes: 21 additions & 26 deletions HousePrice/src/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,28 @@
from DataPreprocess import *
from model import *
import torch
import numpy as np
import torchvision
from torch import optim
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from torch.utils.data.sampler import SubsetRandomSampler

batch_size = 64
EPOCHS = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

x_train, x_test, y_train, y_test = train_test_split(train[0], train[1], test_size=.20)
# print(x_train.shape)
# train_loader = DataLoader(train, batch_size=batch_size, sampler=)
# test_loader = DataLoader(test_feature, batch_size=batch_size)

# model = haonet()
def get_model():
model = nn.Sequential(nn.Linear(331, 1))
return model
model = get_model()
model = haonet()
model.to(device)

lossfc = nn.MSELoss()
lossfc.to(device)
# lossfc = nn.L1Loss()
# lossfc.to(device)

def loss(z, y):
error = z - y
cost = (error * error).mean()
# cost = np.mean(cost)
return cost


learning_rate = 1e-3
optim = optim.Adam(model.parameters(), lr=learning_rate)
Expand All @@ -42,30 +40,27 @@ def get_model():
print(f'================第{epoch}轮================')

model.train()

for x, y in zip(x_train, y_train):
x = torch.tensor(x, dtype=float).to(torch.float32)
y = torch.tensor(y).to(torch.float32)
for x, y in train_loader:
x, y = x.to(torch.float32), y.to(torch.float32)
output = model(x)
loss = lossfc(output, y)
L = loss(output, y)
optim.zero_grad()
loss.backward()
L.backward()
optim.step()
step += 1

if step % 100 == 0:
print(f'step:{step}, loss:{loss.item()}')
print(f'step:{step}, loss:{L.item()}')

model.eval()
with torch.no_grad():
for x, y in zip(x_test, y_test):
x = torch.tensor(x, dtype=float).to(torch.float32)
y = torch.tensor(y).to(torch.float32)
for x, y in valid_loader:
x, y = x.to(torch.float32), y.to(torch.float32)
output = model(x)
# print(output)
right_num += (output.item() == y).sum()
# right_num += (output.item() == y).sum()

accuracy = right_num / len(x_test)
print(f'accuracy:{accuracy}')
# accuracy = right_num / len(valid_dataset)
# print(f'accuracy:{accuracy}')

torch.save(model.state_dict(), '../model.pth')

0 comments on commit 7fc282d

Please sign in to comment.