Skip to content

Commit

Permalink
first-commit
Browse files Browse the repository at this point in the history
  • Loading branch information
zzpr committed Feb 22, 2023
1 parent 5425da8 commit 7afc93f
Show file tree
Hide file tree
Showing 10 changed files with 4,747 additions and 0 deletions.
1,460 changes: 1,460 additions & 0 deletions HousePrice/data/sample_submission.csv

Large diffs are not rendered by default.

1,460 changes: 1,460 additions & 0 deletions HousePrice/data/test.csv

Large diffs are not rendered by default.

1,461 changes: 1,461 additions & 0 deletions HousePrice/data/train.csv

Large diffs are not rendered by default.

Binary file added HousePrice/model.pth
Binary file not shown.
60 changes: 60 additions & 0 deletions HousePrice/src/DataPreprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""
@Auth : zhang-zhang
@Time : 2023/1/28 10:26
@IDE : PyCharm
"""

import pandas as pd
import torch
from torch import nn

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)


train_data = pd.read_csv('../data/train.csv')
test_data = pd.read_csv('../data/test.csv')
# print(train_data.shape, test_data.shape) (1460, 81) (1459, 80)

# 查看一下数据特征, id不需要,SalePrice需要预测
five = train_data.iloc[:2, [0, 1, 2, 3, -3, -2, -1]]
# Id MSSubClass MSZoning LotFrontage SaleType SaleCondition SalePrice
# 0 1 60 RL 65.0 WD Normal 208500
# 1 2 20 RL 80.0 WD Normal 181500

# 合并数据
all_data = pd.concat((train_data.iloc[:, 1:-1], test_data.iloc[:, 1:]))
# print(all_data.shape) # (2919, 79)

# 处理数值型数据
# 获取所有的数值型数据
numberic_feature = all_data.dtypes[all_data.dtypes != 'object'].index
# 1.将所有的数值型数据变为正态分布
all_data[numberic_feature] = all_data[numberic_feature].apply(
lambda x: (x - x.mean()) / (x.max() - x.min())
)
# 2.填充Nan
all_data[numberic_feature] = all_data[numberic_feature].fillna(0)

# 处理非数值型数据
# print(all_data.shape) # (2919, 79)
all_data = pd.get_dummies(all_data, dummy_na=True)
# print(all_data.shape) # (2919, 331)

# 将处理好的数据分成训练集和测试集,以及标签
train_feature = torch.tensor(all_data.iloc[:len(train_data)].values, dtype=torch.float64)
test_feature = torch.tensor(all_data.iloc[len(train_data):].values, dtype=torch.float64)
train_label = torch.tensor(train_data.iloc[:, -1].values.reshape(-1, 1), dtype=torch.float64)

train = [train_feature, train_label]

if __name__ == '__main__':
print(type(train))







20 changes: 20 additions & 0 deletions HousePrice/src/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
@Auth : zhang-zhang
@Time : 2023/1/28 10:32
@IDE : PyCharm
"""
from torch import nn

class haonet(nn.Module):
def __init__(self):
super(haonet, self).__init__()
self.module = nn.Sequential(
nn.Linear(331, 1)
)

def forward(self, input):
output = self.module(input)
return output

if __name__ == '__main__':
pass
71 changes: 71 additions & 0 deletions HousePrice/src/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
@Auth : zhang-zhang
@Time : 2023/1/28 11:25
@IDE : PyCharm
"""

from DataPreprocess import *
from model import *
import torch
import torchvision
from torch import optim
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from torch.utils.data.sampler import SubsetRandomSampler

batch_size = 64
EPOCHS = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

x_train, x_test, y_train, y_test = train_test_split(train[0], train[1], test_size=.20)
# print(x_train.shape)
# train_loader = DataLoader(train, batch_size=batch_size, sampler=)
# test_loader = DataLoader(test_feature, batch_size=batch_size)

# model = haonet()
def get_model():
model = nn.Sequential(nn.Linear(331, 1))
return model
model = get_model()
model.to(device)

lossfc = nn.MSELoss()
lossfc.to(device)

learning_rate = 1e-3
optim = optim.Adam(model.parameters(), lr=learning_rate)

step = 0
right_num = 0
for epoch in range(1, EPOCHS+1):

print(f'================第{epoch}轮================')

model.train()

for x, y in zip(x_train, y_train):
x = torch.tensor(x, dtype=float).to(torch.float32)
y = torch.tensor(y).to(torch.float32)
output = model(x)
loss = lossfc(output, y)
optim.zero_grad()
loss.backward()
optim.step()
step += 1

if step % 100 == 0:
print(f'step:{step}, loss:{loss.item()}')

model.eval()
with torch.no_grad():
for x, y in zip(x_test, y_test):
x = torch.tensor(x, dtype=float).to(torch.float32)
y = torch.tensor(y).to(torch.float32)
output = model(x)
# print(output)
right_num += (output.item() == y).sum()

accuracy = right_num / len(x_test)
print(f'accuracy:{accuracy}')

torch.save(model.state_dict(), '../model.pth')
178 changes: 178 additions & 0 deletions LeafClassifier/src/precess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
"""
@Auth : zhang-zhang
@Time : 2023/2/1 11:57
@IDE : PyCharm
"""

import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import os
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import torch
import torchvision
from torch import nn
from train import *
import warnings
warnings.filterwarnings("ignore")

# 处理图像和csv数据

train = pd.read_csv('../classify-leaves/train.csv')
# 类别 -> 下标
class2num = dict(zip(list(train.loc[:, 'label'].unique()), range(len(train.label.unique()))))
# 下标 -> 类别
num2class = {b: a for a, b in class2num.items()}
# print(len(num2class)) # 176

class myDataset(Dataset):

def __init__(self, csv_path, file_path, mode='train', valid_ratio=0.25, resize=(256, 256)):
self.resize_height = resize[0]
self.resize_width = resize[1]
self.file_path = file_path
self.mode = mode
self.data = pd.read_csv(csv_path)
self.data_len = len(self.data.index)
self.train_len = int(self.data_len * (1 - valid_ratio))

if mode == 'train':
self.train_image = np.asarray(self.data.loc[:self.train_len-1, 'image'])
self.train_label = np.asarray(self.data.loc[:self.train_len-1, 'label'])
self.images = self.train_image
self.labels = self.train_label
elif mode == 'valid':
self.valid_image = np.asarray(self.data.loc[self.train_len:, 'image'])
self.valid_label = np.asarray(self.data.loc[self.train_len:, 'label'])
self.images = self.valid_image
self.labels = self.valid_label
elif mode == 'test':
self.test_image = np.asarray(self.data.loc[:, 'image'])
self.images = self.test_image

self.images_len = len(self.images)

print('Finished reading %s dataset. %d number samples found.' % (mode, self.images_len))

def __getitem__(self, index):
image_path = self.images[index]
image = Image.open(os.path.join(self.file_path, image_path))

if self.mode == 'train':
transform = transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
else:
transform = transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

image = transform(image)

if self.mode == 'test':
return image

label = self.labels[index]
label_num = class2num[label]
return image, label_num

def __len__(self):
return self.images_len

train_path = '../classify-leaves/train.csv'
test_path = '../classify-leaves/test.csv'
image_path = '../classify-leaves/'

data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
"val": transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

train_dataset = myDataset(train_path, image_path, mode='train')
valid_dataset = myDataset(train_path, image_path, mode='valid')
test_dataset = myDataset(test_path, image_path, mode='test')

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

print(len(train_loader), len(valid_loader), len(test_loader)) # 一共分成对应个batch:1377 459 880

# 显示前两块GPU
# os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
decive = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = torchvision.models.resnet34(pretrained=True, progress=True)
# # 显卡大于1块时,device_ids选择模型载入数据对应的显卡
# if torch.cuda.device_count() > 1:
# model = nn.DataParallel(model)
# model.fc = nn.Linear(in_features=2048, out_features=176, bias=True)
model.to(decive)

criterion = nn.CrossEntropyLoss().to(decive)

LR = 1e-3
optim = torch.optim.Adam(model.fc.parameters(), lr=LR)
optim.state_dict()
Epochs = 50

def train_model(Epochs, device, model, criterion, optim, DataLoaders, ValidLoaders, ValidLen):
glo_step = 0
for epoch in range(1, Epochs+1):
tik = time()
print(f'=====================第{epoch}轮=====================')

model.train()
tik_train = time()
for data in DataLoaders:
img, label = data
img, label = img.to(device), label.to(device)
pred = model(img)
loss = criterion(pred, label)
optim.zero_grad()
loss.backward()
optim.step()

if glo_step % 100 == 0:
print(f'Loss:{loss.item():.2f}, Time:{time()- tik_train}')
tik_train = time()

# 每一个epoch都保存一个断点
checkpoint = {'model_state_dict': model.state_dict(),
'optim_state_dict': optim.state_dict()}
# dir = '/checkpoint'
# if not os.path.exists(dir):
# os.mkdir(dir)
torch.save(checkpoint, f'./checkpoint{epoch}.cp')

model.eval()
right_num = 0
with torch.no_grad():
for data in ValidLoaders:
img, label = data
img, label = img.to(device), label.to(device)
pred = model(img)
right_num += (pred.argmax(1)).sum()

accuracy = right_num / ValidLen
print(f'Accuracy:{accuracy}, Time:{time() - tik}')

torch.save(model.state_dict(), f'./model{accuracy:.3f}.pth')
train_model(Epochs, decive, model, criterion, optim, train_loader, valid_loader, len(valid_dataset))
print('================END================')






21 changes: 21 additions & 0 deletions LeafClassifier/src/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""
@Auth : zhang-zhang
@Time : 2023/2/17 10:59
"""

import torch
from time import time
import torchvision
from torch import nn
from torchvision import transforms











16 changes: 16 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# This is a sample Python script.

# Press ⌃R to execute it or replace it with your code.
# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.


def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name}') # Press ⌘F8 to toggle the breakpoint.


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
print_hi('PyCharm')

# See PyCharm help at https://www.jetbrains.com/help/pycharm/

0 comments on commit 7afc93f

Please sign in to comment.