-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
4,747 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
""" | ||
@Auth : zhang-zhang | ||
@Time : 2023/1/28 10:26 | ||
@IDE : PyCharm | ||
""" | ||
|
||
import pandas as pd | ||
import torch | ||
from torch import nn | ||
|
||
pd.set_option('display.max_rows', 500) | ||
pd.set_option('display.max_columns', 100) | ||
pd.set_option('display.width', 1000) | ||
|
||
|
||
train_data = pd.read_csv('../data/train.csv') | ||
test_data = pd.read_csv('../data/test.csv') | ||
# print(train_data.shape, test_data.shape) (1460, 81) (1459, 80) | ||
|
||
# 查看一下数据特征, id不需要,SalePrice需要预测 | ||
five = train_data.iloc[:2, [0, 1, 2, 3, -3, -2, -1]] | ||
# Id MSSubClass MSZoning LotFrontage SaleType SaleCondition SalePrice | ||
# 0 1 60 RL 65.0 WD Normal 208500 | ||
# 1 2 20 RL 80.0 WD Normal 181500 | ||
|
||
# 合并数据 | ||
all_data = pd.concat((train_data.iloc[:, 1:-1], test_data.iloc[:, 1:])) | ||
# print(all_data.shape) # (2919, 79) | ||
|
||
# 处理数值型数据 | ||
# 获取所有的数值型数据 | ||
numberic_feature = all_data.dtypes[all_data.dtypes != 'object'].index | ||
# 1.将所有的数值型数据变为正态分布 | ||
all_data[numberic_feature] = all_data[numberic_feature].apply( | ||
lambda x: (x - x.mean()) / (x.max() - x.min()) | ||
) | ||
# 2.填充Nan | ||
all_data[numberic_feature] = all_data[numberic_feature].fillna(0) | ||
|
||
# 处理非数值型数据 | ||
# print(all_data.shape) # (2919, 79) | ||
all_data = pd.get_dummies(all_data, dummy_na=True) | ||
# print(all_data.shape) # (2919, 331) | ||
|
||
# 将处理好的数据分成训练集和测试集,以及标签 | ||
train_feature = torch.tensor(all_data.iloc[:len(train_data)].values, dtype=torch.float64) | ||
test_feature = torch.tensor(all_data.iloc[len(train_data):].values, dtype=torch.float64) | ||
train_label = torch.tensor(train_data.iloc[:, -1].values.reshape(-1, 1), dtype=torch.float64) | ||
|
||
train = [train_feature, train_label] | ||
|
||
if __name__ == '__main__': | ||
print(type(train)) | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
""" | ||
@Auth : zhang-zhang | ||
@Time : 2023/1/28 10:32 | ||
@IDE : PyCharm | ||
""" | ||
from torch import nn | ||
|
||
class haonet(nn.Module): | ||
def __init__(self): | ||
super(haonet, self).__init__() | ||
self.module = nn.Sequential( | ||
nn.Linear(331, 1) | ||
) | ||
|
||
def forward(self, input): | ||
output = self.module(input) | ||
return output | ||
|
||
if __name__ == '__main__': | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
""" | ||
@Auth : zhang-zhang | ||
@Time : 2023/1/28 11:25 | ||
@IDE : PyCharm | ||
""" | ||
|
||
from DataPreprocess import * | ||
from model import * | ||
import torch | ||
import torchvision | ||
from torch import optim | ||
from torch.utils.data import DataLoader | ||
from sklearn.model_selection import train_test_split | ||
from torch.utils.data.sampler import SubsetRandomSampler | ||
|
||
batch_size = 64 | ||
EPOCHS = 100 | ||
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | ||
|
||
x_train, x_test, y_train, y_test = train_test_split(train[0], train[1], test_size=.20) | ||
# print(x_train.shape) | ||
# train_loader = DataLoader(train, batch_size=batch_size, sampler=) | ||
# test_loader = DataLoader(test_feature, batch_size=batch_size) | ||
|
||
# model = haonet() | ||
def get_model(): | ||
model = nn.Sequential(nn.Linear(331, 1)) | ||
return model | ||
model = get_model() | ||
model.to(device) | ||
|
||
lossfc = nn.MSELoss() | ||
lossfc.to(device) | ||
|
||
learning_rate = 1e-3 | ||
optim = optim.Adam(model.parameters(), lr=learning_rate) | ||
|
||
step = 0 | ||
right_num = 0 | ||
for epoch in range(1, EPOCHS+1): | ||
|
||
print(f'================第{epoch}轮================') | ||
|
||
model.train() | ||
|
||
for x, y in zip(x_train, y_train): | ||
x = torch.tensor(x, dtype=float).to(torch.float32) | ||
y = torch.tensor(y).to(torch.float32) | ||
output = model(x) | ||
loss = lossfc(output, y) | ||
optim.zero_grad() | ||
loss.backward() | ||
optim.step() | ||
step += 1 | ||
|
||
if step % 100 == 0: | ||
print(f'step:{step}, loss:{loss.item()}') | ||
|
||
model.eval() | ||
with torch.no_grad(): | ||
for x, y in zip(x_test, y_test): | ||
x = torch.tensor(x, dtype=float).to(torch.float32) | ||
y = torch.tensor(y).to(torch.float32) | ||
output = model(x) | ||
# print(output) | ||
right_num += (output.item() == y).sum() | ||
|
||
accuracy = right_num / len(x_test) | ||
print(f'accuracy:{accuracy}') | ||
|
||
torch.save(model.state_dict(), '../model.pth') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
""" | ||
@Auth : zhang-zhang | ||
@Time : 2023/2/1 11:57 | ||
@IDE : PyCharm | ||
""" | ||
|
||
import pandas as pd | ||
import numpy as np | ||
from PIL import Image | ||
import matplotlib.pyplot as plt | ||
import os | ||
from torch.utils.data import Dataset | ||
from torch.utils.data import DataLoader | ||
from torchvision import transforms | ||
import torch | ||
import torchvision | ||
from torch import nn | ||
from train import * | ||
import warnings | ||
warnings.filterwarnings("ignore") | ||
|
||
# 处理图像和csv数据 | ||
|
||
train = pd.read_csv('../classify-leaves/train.csv') | ||
# 类别 -> 下标 | ||
class2num = dict(zip(list(train.loc[:, 'label'].unique()), range(len(train.label.unique())))) | ||
# 下标 -> 类别 | ||
num2class = {b: a for a, b in class2num.items()} | ||
# print(len(num2class)) # 176 | ||
|
||
class myDataset(Dataset): | ||
|
||
def __init__(self, csv_path, file_path, mode='train', valid_ratio=0.25, resize=(256, 256)): | ||
self.resize_height = resize[0] | ||
self.resize_width = resize[1] | ||
self.file_path = file_path | ||
self.mode = mode | ||
self.data = pd.read_csv(csv_path) | ||
self.data_len = len(self.data.index) | ||
self.train_len = int(self.data_len * (1 - valid_ratio)) | ||
|
||
if mode == 'train': | ||
self.train_image = np.asarray(self.data.loc[:self.train_len-1, 'image']) | ||
self.train_label = np.asarray(self.data.loc[:self.train_len-1, 'label']) | ||
self.images = self.train_image | ||
self.labels = self.train_label | ||
elif mode == 'valid': | ||
self.valid_image = np.asarray(self.data.loc[self.train_len:, 'image']) | ||
self.valid_label = np.asarray(self.data.loc[self.train_len:, 'label']) | ||
self.images = self.valid_image | ||
self.labels = self.valid_label | ||
elif mode == 'test': | ||
self.test_image = np.asarray(self.data.loc[:, 'image']) | ||
self.images = self.test_image | ||
|
||
self.images_len = len(self.images) | ||
|
||
print('Finished reading %s dataset. %d number samples found.' % (mode, self.images_len)) | ||
|
||
def __getitem__(self, index): | ||
image_path = self.images[index] | ||
image = Image.open(os.path.join(self.file_path, image_path)) | ||
|
||
if self.mode == 'train': | ||
transform = transforms.Compose([transforms.RandomResizedCrop(224), | ||
transforms.RandomHorizontalFlip(), | ||
transforms.ToTensor(), | ||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) | ||
else: | ||
transform = transforms.Compose([transforms.Resize(256), | ||
transforms.CenterCrop(224), | ||
transforms.ToTensor(), | ||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) | ||
|
||
image = transform(image) | ||
|
||
if self.mode == 'test': | ||
return image | ||
|
||
label = self.labels[index] | ||
label_num = class2num[label] | ||
return image, label_num | ||
|
||
def __len__(self): | ||
return self.images_len | ||
|
||
train_path = '../classify-leaves/train.csv' | ||
test_path = '../classify-leaves/test.csv' | ||
image_path = '../classify-leaves/' | ||
|
||
data_transform = { | ||
"train": transforms.Compose([transforms.RandomResizedCrop(224), | ||
transforms.RandomHorizontalFlip(), | ||
transforms.ToTensor(), | ||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), | ||
"val": transforms.Compose([transforms.Resize(256), | ||
transforms.CenterCrop(224), | ||
transforms.ToTensor(), | ||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])} | ||
|
||
train_dataset = myDataset(train_path, image_path, mode='train') | ||
valid_dataset = myDataset(train_path, image_path, mode='valid') | ||
test_dataset = myDataset(test_path, image_path, mode='test') | ||
|
||
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True) | ||
valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=True) | ||
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False) | ||
|
||
print(len(train_loader), len(valid_loader), len(test_loader)) # 一共分成对应个batch:1377 459 880 | ||
|
||
# 显示前两块GPU | ||
# os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' | ||
decive = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | ||
|
||
model = torchvision.models.resnet34(pretrained=True, progress=True) | ||
# # 显卡大于1块时,device_ids选择模型载入数据对应的显卡 | ||
# if torch.cuda.device_count() > 1: | ||
# model = nn.DataParallel(model) | ||
# model.fc = nn.Linear(in_features=2048, out_features=176, bias=True) | ||
model.to(decive) | ||
|
||
criterion = nn.CrossEntropyLoss().to(decive) | ||
|
||
LR = 1e-3 | ||
optim = torch.optim.Adam(model.fc.parameters(), lr=LR) | ||
optim.state_dict() | ||
Epochs = 50 | ||
|
||
def train_model(Epochs, device, model, criterion, optim, DataLoaders, ValidLoaders, ValidLen): | ||
glo_step = 0 | ||
for epoch in range(1, Epochs+1): | ||
tik = time() | ||
print(f'=====================第{epoch}轮=====================') | ||
|
||
model.train() | ||
tik_train = time() | ||
for data in DataLoaders: | ||
img, label = data | ||
img, label = img.to(device), label.to(device) | ||
pred = model(img) | ||
loss = criterion(pred, label) | ||
optim.zero_grad() | ||
loss.backward() | ||
optim.step() | ||
|
||
if glo_step % 100 == 0: | ||
print(f'Loss:{loss.item():.2f}, Time:{time()- tik_train}') | ||
tik_train = time() | ||
|
||
# 每一个epoch都保存一个断点 | ||
checkpoint = {'model_state_dict': model.state_dict(), | ||
'optim_state_dict': optim.state_dict()} | ||
# dir = '/checkpoint' | ||
# if not os.path.exists(dir): | ||
# os.mkdir(dir) | ||
torch.save(checkpoint, f'./checkpoint{epoch}.cp') | ||
|
||
model.eval() | ||
right_num = 0 | ||
with torch.no_grad(): | ||
for data in ValidLoaders: | ||
img, label = data | ||
img, label = img.to(device), label.to(device) | ||
pred = model(img) | ||
right_num += (pred.argmax(1)).sum() | ||
|
||
accuracy = right_num / ValidLen | ||
print(f'Accuracy:{accuracy}, Time:{time() - tik}') | ||
|
||
torch.save(model.state_dict(), f'./model{accuracy:.3f}.pth') | ||
train_model(Epochs, decive, model, criterion, optim, train_loader, valid_loader, len(valid_dataset)) | ||
print('================END================') | ||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
""" | ||
@Auth : zhang-zhang | ||
@Time : 2023/2/17 10:59 | ||
""" | ||
|
||
import torch | ||
from time import time | ||
import torchvision | ||
from torch import nn | ||
from torchvision import transforms | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# This is a sample Python script. | ||
|
||
# Press ⌃R to execute it or replace it with your code. | ||
# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings. | ||
|
||
|
||
def print_hi(name): | ||
# Use a breakpoint in the code line below to debug your script. | ||
print(f'Hi, {name}') # Press ⌘F8 to toggle the breakpoint. | ||
|
||
|
||
# Press the green button in the gutter to run the script. | ||
if __name__ == '__main__': | ||
print_hi('PyCharm') | ||
|
||
# See PyCharm help at https://www.jetbrains.com/help/pycharm/ |