first-commit

zzpr · Feb 22, 2023 · 7afc93f · 7afc93f
1 parent 5425da8
commit 7afc93f
Show file tree

Hide file tree

Showing 10 changed files with 4,747 additions and 0 deletions.
diff --git a/HousePrice/data/sample_submission.csv b/HousePrice/data/sample_submission.csv
diff --git a/HousePrice/data/test.csv b/HousePrice/data/test.csv
diff --git a/HousePrice/data/train.csv b/HousePrice/data/train.csv
diff --git a/HousePrice/model.pth b/HousePrice/model.pth
diff --git a/HousePrice/src/DataPreprocess.py b/HousePrice/src/DataPreprocess.py
@@ -0,0 +1,60 @@
+"""
+@Auth ： zhang-zhang
+@Time ： 2023/1/28 10:26
+@IDE  ： PyCharm
+"""
+
+import pandas as pd
+import torch
+from torch import nn
+
+pd.set_option('display.max_rows', 500)
+pd.set_option('display.max_columns', 100)
+pd.set_option('display.width', 1000)
+
+
+train_data = pd.read_csv('../data/train.csv')
+test_data = pd.read_csv('../data/test.csv')
+# print(train_data.shape, test_data.shape)  (1460, 81) (1459, 80)
+
+# 查看一下数据特征, id不需要，SalePrice需要预测
+five = train_data.iloc[:2, [0, 1, 2, 3, -3, -2, -1]]
+#    Id  MSSubClass MSZoning  LotFrontage SaleType SaleCondition  SalePrice
+# 0   1          60       RL         65.0       WD        Normal     208500
+# 1   2          20       RL         80.0       WD        Normal     181500
+
+# 合并数据
+all_data = pd.concat((train_data.iloc[:, 1:-1], test_data.iloc[:, 1:]))
+# print(all_data.shape)  # (2919, 79)
+
+# 处理数值型数据
+# 获取所有的数值型数据
+numberic_feature = all_data.dtypes[all_data.dtypes != 'object'].index
+# 1.将所有的数值型数据变为正态分布
+all_data[numberic_feature] = all_data[numberic_feature].apply(
+    lambda x: (x - x.mean()) / (x.max() - x.min())
+)
+# 2.填充Nan
+all_data[numberic_feature] = all_data[numberic_feature].fillna(0)
+
+# 处理非数值型数据
+# print(all_data.shape)  # (2919, 79)
+all_data = pd.get_dummies(all_data, dummy_na=True)
+# print(all_data.shape)  # (2919, 331)
+
+# 将处理好的数据分成训练集和测试集，以及标签
+train_feature = torch.tensor(all_data.iloc[:len(train_data)].values, dtype=torch.float64)
+test_feature = torch.tensor(all_data.iloc[len(train_data):].values, dtype=torch.float64)
+train_label = torch.tensor(train_data.iloc[:, -1].values.reshape(-1, 1), dtype=torch.float64)
+
+train = [train_feature, train_label]
+
+if __name__ == '__main__':
+    print(type(train))
+
+
+
+
+
+
+
diff --git a/HousePrice/src/model.py b/HousePrice/src/model.py
@@ -0,0 +1,20 @@
+"""
+@Auth ： zhang-zhang
+@Time ： 2023/1/28 10:32
+@IDE  ： PyCharm
+"""
+from torch import nn
+
+class haonet(nn.Module):
+    def __init__(self):
+        super(haonet, self).__init__()
+        self.module = nn.Sequential(
+            nn.Linear(331, 1)
+        )
+
+    def forward(self, input):
+        output = self.module(input)
+        return output
+
+if __name__ == '__main__':
+    pass
diff --git a/HousePrice/src/train.py b/HousePrice/src/train.py
@@ -0,0 +1,71 @@
+"""
+@Auth ： zhang-zhang
+@Time ： 2023/1/28 11:25
+@IDE  ： PyCharm
+"""
+
+from DataPreprocess import *
+from model import *
+import torch
+import torchvision
+from torch import optim
+from torch.utils.data import DataLoader
+from sklearn.model_selection import train_test_split
+from torch.utils.data.sampler import SubsetRandomSampler
+
+batch_size = 64
+EPOCHS = 100
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+x_train, x_test, y_train, y_test = train_test_split(train[0], train[1], test_size=.20)
+# print(x_train.shape)
+# train_loader = DataLoader(train, batch_size=batch_size, sampler=)
+# test_loader = DataLoader(test_feature, batch_size=batch_size)
+
+# model = haonet()
+def get_model():
+    model = nn.Sequential(nn.Linear(331, 1))
+    return model
+model = get_model()
+model.to(device)
+
+lossfc = nn.MSELoss()
+lossfc.to(device)
+
+learning_rate = 1e-3
+optim = optim.Adam(model.parameters(), lr=learning_rate)
+
+step = 0
+right_num = 0
+for epoch in range(1, EPOCHS+1):
+
+    print(f'================第{epoch}轮================')
+
+    model.train()
+
+    for x, y in zip(x_train, y_train):
+        x = torch.tensor(x, dtype=float).to(torch.float32)
+        y = torch.tensor(y).to(torch.float32)
+        output = model(x)
+        loss = lossfc(output, y)
+        optim.zero_grad()
+        loss.backward()
+        optim.step()
+        step += 1
+
+        if step % 100 == 0:
+            print(f'step:{step}, loss:{loss.item()}')
+
+    model.eval()
+    with torch.no_grad():
+        for x, y in zip(x_test, y_test):
+            x = torch.tensor(x, dtype=float).to(torch.float32)
+            y = torch.tensor(y).to(torch.float32)
+            output = model(x)
+            # print(output)
+            right_num += (output.item() == y).sum()
+
+        accuracy = right_num / len(x_test)
+        print(f'accuracy:{accuracy}')
+
+torch.save(model.state_dict(), '../model.pth')
diff --git a/LeafClassifier/src/precess.py b/LeafClassifier/src/precess.py
@@ -0,0 +1,178 @@
+"""
+@Auth ： zhang-zhang
+@Time ： 2023/2/1 11:57
+@IDE  ： PyCharm
+"""
+
+import pandas as pd
+import numpy as np
+from PIL import Image
+import matplotlib.pyplot as plt
+import os
+from torch.utils.data import Dataset
+from torch.utils.data import DataLoader
+from torchvision import transforms
+import torch
+import torchvision
+from torch import nn
+from train import *
+import warnings
+warnings.filterwarnings("ignore")
+
+# 处理图像和csv数据
+
+train = pd.read_csv('../classify-leaves/train.csv')
+# 类别 -> 下标
+class2num = dict(zip(list(train.loc[:, 'label'].unique()), range(len(train.label.unique()))))
+# 下标 -> 类别
+num2class = {b: a for a, b in class2num.items()}
+# print(len(num2class))  # 176
+
+class myDataset(Dataset):
+
+    def __init__(self, csv_path, file_path, mode='train', valid_ratio=0.25, resize=(256, 256)):
+        self.resize_height = resize[0]
+        self.resize_width = resize[1]
+        self.file_path = file_path
+        self.mode = mode
+        self.data = pd.read_csv(csv_path)
+        self.data_len = len(self.data.index)
+        self.train_len = int(self.data_len * (1 - valid_ratio))
+
+        if mode == 'train':
+            self.train_image = np.asarray(self.data.loc[:self.train_len-1, 'image'])
+            self.train_label = np.asarray(self.data.loc[:self.train_len-1, 'label'])
+            self.images = self.train_image
+            self.labels = self.train_label
+        elif mode == 'valid':
+            self.valid_image = np.asarray(self.data.loc[self.train_len:, 'image'])
+            self.valid_label = np.asarray(self.data.loc[self.train_len:, 'label'])
+            self.images = self.valid_image
+            self.labels = self.valid_label
+        elif mode == 'test':
+            self.test_image = np.asarray(self.data.loc[:, 'image'])
+            self.images = self.test_image
+
+        self.images_len = len(self.images)
+
+        print('Finished reading %s dataset. %d number samples found.' % (mode, self.images_len))
+
+    def __getitem__(self, index):
+        image_path = self.images[index]
+        image = Image.open(os.path.join(self.file_path, image_path))
+
+        if self.mode == 'train':
+            transform = transforms.Compose([transforms.RandomResizedCrop(224),
+                                 transforms.RandomHorizontalFlip(),
+                                 transforms.ToTensor(),
+                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
+        else:
+            transform = transforms.Compose([transforms.Resize(256),
+                               transforms.CenterCrop(224),
+                               transforms.ToTensor(),
+                               transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
+
+        image = transform(image)
+
+        if self.mode == 'test':
+            return image
+
+        label = self.labels[index]
+        label_num = class2num[label]
+        return image, label_num
+
+    def __len__(self):
+        return self.images_len
+
+train_path = '../classify-leaves/train.csv'
+test_path = '../classify-leaves/test.csv'
+image_path = '../classify-leaves/'
+
+data_transform = {
+    "train": transforms.Compose([transforms.RandomResizedCrop(224),
+                                 transforms.RandomHorizontalFlip(),
+                                 transforms.ToTensor(),
+                                 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
+    "val": transforms.Compose([transforms.Resize(256),
+                               transforms.CenterCrop(224),
+                               transforms.ToTensor(),
+                               transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}
+
+train_dataset = myDataset(train_path, image_path, mode='train')
+valid_dataset = myDataset(train_path, image_path, mode='valid')
+test_dataset = myDataset(test_path, image_path, mode='test')
+
+train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
+valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=True)
+test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
+
+print(len(train_loader), len(valid_loader), len(test_loader))  # 一共分成对应个batch：1377 459 880
+
+# 显示前两块GPU
+# os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
+decive = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+
+model = torchvision.models.resnet34(pretrained=True, progress=True)
+# # 显卡大于1块时，device_ids选择模型载入数据对应的显卡
+# if torch.cuda.device_count() > 1:
+#     model = nn.DataParallel(model)
+# model.fc = nn.Linear(in_features=2048, out_features=176, bias=True)
+model.to(decive)
+
+criterion = nn.CrossEntropyLoss().to(decive)
+
+LR = 1e-3
+optim = torch.optim.Adam(model.fc.parameters(), lr=LR)
+optim.state_dict()
+Epochs = 50
+
+def train_model(Epochs, device, model, criterion, optim, DataLoaders, ValidLoaders, ValidLen):
+    glo_step = 0
+    for epoch in range(1, Epochs+1):
+        tik = time()
+        print(f'=====================第{epoch}轮=====================')
+
+        model.train()
+        tik_train = time()
+        for data in DataLoaders:
+            img, label = data
+            img, label = img.to(device), label.to(device)
+            pred = model(img)
+            loss = criterion(pred, label)
+            optim.zero_grad()
+            loss.backward()
+            optim.step()
+
+            if glo_step % 100 == 0:
+                print(f'Loss:{loss.item():.2f}, Time:{time()- tik_train}')
+            tik_train = time()
+
+        # 每一个epoch都保存一个断点
+        checkpoint = {'model_state_dict': model.state_dict(),
+                      'optim_state_dict': optim.state_dict()}
+        # dir = '/checkpoint'
+        # if not os.path.exists(dir):
+        #     os.mkdir(dir)
+        torch.save(checkpoint, f'./checkpoint{epoch}.cp')
+
+        model.eval()
+        right_num = 0
+        with torch.no_grad():
+            for data in ValidLoaders:
+                img, label = data
+                img, label = img.to(device), label.to(device)
+                pred = model(img)
+                right_num += (pred.argmax(1)).sum()
+
+            accuracy = right_num / ValidLen
+            print(f'Accuracy:{accuracy}, Time:{time() - tik}')
+
+        torch.save(model.state_dict(), f'./model{accuracy:.3f}.pth')
+train_model(Epochs, decive, model, criterion, optim, train_loader, valid_loader, len(valid_dataset))
+print('================END================')
+
+
+
+
+
+
diff --git a/LeafClassifier/src/train.py b/LeafClassifier/src/train.py
@@ -0,0 +1,21 @@
+"""
+@Auth ： zhang-zhang
+@Time ： 2023/2/17 10:59
+"""
+
+import torch
+from time import time
+import torchvision
+from torch import nn
+from torchvision import transforms
+
+
+
+
+
+
+
+
+
+
+
diff --git a/main.py b/main.py
@@ -0,0 +1,16 @@
+# This is a sample Python script.
+
+# Press ⌃R to execute it or replace it with your code.
+# Press Double ⇧ to search everywhere for classes, files, tool windows, actions, and settings.
+
+
+def print_hi(name):
+    # Use a breakpoint in the code line below to debug your script.
+    print(f'Hi, {name}')  # Press ⌘F8 to toggle the breakpoint.
+
+
+# Press the green button in the gutter to run the script.
+if __name__ == '__main__':
+    print_hi('PyCharm')
+
+# See PyCharm help at https://www.jetbrains.com/help/pycharm/