diff --git a/ReadMe.md b/ReadMe.md index d1d29c8..bda5fc3 100755 --- a/ReadMe.md +++ b/ReadMe.md @@ -48,7 +48,9 @@ Right now there are 8908 images in the [files_trainable](https://github.com/comm It seems to perform ok after >20 epochs, but the fine detail seems to struggle. Training started at 4:53pm on March 13, 2022 and reached epoch 33 at 8:55pm (7 minutes per epoch) on a 1080Ti card. It would be interesting to perform evaluation only on "confident" network returns. -Average loss of 0.0694 on test and 0.0549 on training data after 100 epochs +Average loss of 0.0694 on test and 0.0549 on training data after 100 epochs. +If dropout is used the average loss is 0.1060 on test and 0.0960 on training data after 100 epochs. + Input picture (left), groundtruth (top right), and prediction (bottom right) ![](docs/example_pred.png) diff --git a/docs/example_pred.png b/docs/example_pred.png index 49befab..f8e575c 100644 Binary files a/docs/example_pred.png and b/docs/example_pred.png differ diff --git a/docs/example_probs.png b/docs/example_probs.png index c3a97ee..9187b12 100644 Binary files a/docs/example_probs.png and b/docs/example_probs.png differ diff --git a/src/data/Comma10kDataset.cpp b/src/data/Comma10kDataset.cpp index cdee832..aa13959 100755 --- a/src/data/Comma10kDataset.cpp +++ b/src/data/Comma10kDataset.cpp @@ -60,7 +60,7 @@ Comma10kDataset::Comma10kDataset(std::string pathroot, ModeDataSplit mode, bool // Random order (ensure same random shuffle on both) // https://stackoverflow.com/a/16968342 - if (randomize) { + if (mode == Comma10kDataset::ModeDataSplit::kTrain && randomize) { unsigned int seed = std::time(NULL); std::srand(seed); std::random_shuffle(paths_rgb.begin(), paths_rgb.end()); diff --git a/src/net_seg_test.cpp b/src/net_seg_test.cpp index 0ea52ce..c582f58 100644 --- a/src/net_seg_test.cpp +++ b/src/net_seg_test.cpp @@ -77,9 +77,12 @@ int main(int argc, char *argv[]) { // Finally convert it to a unique pointer dataloader auto dataset_mapped = dataset.map(torch::data::transforms::Stack<>()); - auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), torch::data::DataLoaderOptions().batch_size(1).workers(6)); + auto sampler = torch::data::samplers::SequentialSampler(dataset.size().value()); + auto options = torch::data::DataLoaderOptions().enforce_ordering(true).batch_size(1).workers(10); + auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), sampler, options); // Loop through our batches of training data + bool visualize = true; double loss_sum = 0.0; size_t loss_ct = 0; size_t batch_idx = 0; @@ -108,66 +111,68 @@ int main(int argc, char *argv[]) { std::cout << items_curr << "/" << items_total << " | loss = " << loss.item() << " | loss_avg = " << loss_avg << " (" << loss_ct << " samples)" << std::endl; - // Softmax the output to get our total class probabilities [N, classes, H, W] - // Thus across all classes, our probabilities should sum to 1 - auto output_probs = torch::softmax(output, 1); - - // Plot the first image, need to change to opencv format [H,W,C] - // Note that we arg max the softmax network output, then need to add an dimension - // We scale up the 0..1 range back to the 0..255 that opencv expects (later cast to int) - torch::Tensor cv_input = 255.0 * batch.data[0].permute({1, 2, 0}).clone().cpu(); - torch::Tensor cv_label = batch.target[0].permute({1, 2, 0}).clone().cpu(); - torch::Tensor cv_output = torch::unsqueeze(output_probs[0].argmax(0), 0).permute({1, 2, 0}).clone().cpu(); - - // Convert them all to 0..255 ranges - cv_input = cv_input.to(torch::kInt8); - cv_label = cv_label.to(torch::kInt8); - cv_output = cv_output.to(torch::kInt8); - - // Point the cv::Mats to the transformed locations in memory - cv::Mat img_input(cv::Size((int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv_input.data_ptr()); - cv::Mat img_label(cv::Size((int)cv_label.size(1), (int)cv_label.size(0)), CV_8UC1, cv_label.data_ptr()); - cv::Mat img_output(cv::Size((int)cv_output.size(1), (int)cv_output.size(0)), CV_8UC1, cv_output.data_ptr()); - - // Convert labeled images to color - cv::cvtColor(img_label, img_label, cv::COLOR_GRAY2BGR); - cv::cvtColor(img_output, img_output, cv::COLOR_GRAY2BGR); - // img_label = 255.0 / (double)n_classes * img_label; - // img_output = 255.0 / (double)n_classes * img_output; - - // Change both to be colored like the comma10k - img_label.forEach([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; }); - img_output.forEach([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; }); - - // Finally stack and display in a window - cv::Mat outimg1, outimg2, outimg3; - cv::hconcat(img_input, img_label, outimg1); - cv::hconcat(img_input, img_output, outimg2); - cv::vconcat(outimg1, outimg2, outimg3); - cv::imshow("prediction", outimg3); - - // Next we will visualize our probability distributions [N, classes, H, W] - torch::Tensor cv_probs = output_probs[0].clone().cpu(); - cv_probs = cv_probs.to(torch::kFloat32); - cv::Mat outimg4 = cv::Mat(cv::Size(n_classes * (int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv::Scalar(0, 0, 0)); - assert((size_t)output_probs.size(0) == 1); - assert((size_t)cv_probs.size(0) == n_classes); - for (int n = 0; n < (int)n_classes; n++) { - cv::Mat imgtmp(cv::Size((int)cv_probs.size(2), (int)cv_probs.size(1)), CV_32FC1, cv_probs[n].data_ptr()); - imgtmp = 255 * imgtmp; - imgtmp.convertTo(imgtmp, CV_8UC1); - cv::Mat imgtmp_color; - cv::applyColorMap(imgtmp, imgtmp_color, cv::COLORMAP_JET); - imgtmp_color.copyTo(outimg4(cv::Rect(n * (int)cv_input.size(1), 0, imgtmp.cols, imgtmp.rows))); + // Visualize if we need to + if (visualize) { + // Softmax the output to get our total class probabilities [N, classes, H, W] + // Thus across all classes, our probabilities should sum to 1 + auto output_probs = torch::softmax(output, 1); + + // Plot the first image, need to change to opencv format [H,W,C] + // Note that we arg max the softmax network output, then need to add an dimension + // We scale up the 0..1 range back to the 0..255 that opencv expects (later cast to int) + torch::Tensor cv_input = 255.0 * batch.data[0].permute({1, 2, 0}).clone().cpu(); + torch::Tensor cv_label = batch.target[0].permute({1, 2, 0}).clone().cpu(); + torch::Tensor cv_output = torch::unsqueeze(output_probs[0].argmax(0), 0).permute({1, 2, 0}).clone().cpu(); + + // Convert them all to 0..255 ranges + cv_input = cv_input.to(torch::kInt8); + cv_label = cv_label.to(torch::kInt8); + cv_output = cv_output.to(torch::kInt8); + + // Point the cv::Mats to the transformed locations in memory + cv::Mat img_input(cv::Size((int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv_input.data_ptr()); + cv::Mat img_label(cv::Size((int)cv_label.size(1), (int)cv_label.size(0)), CV_8UC1, cv_label.data_ptr()); + cv::Mat img_output(cv::Size((int)cv_output.size(1), (int)cv_output.size(0)), CV_8UC1, cv_output.data_ptr()); + + // Convert labeled images to color + cv::cvtColor(img_label, img_label, cv::COLOR_GRAY2BGR); + cv::cvtColor(img_output, img_output, cv::COLOR_GRAY2BGR); + // img_label = 255.0 / (double)n_classes * img_label; + // img_output = 255.0 / (double)n_classes * img_output; + + // Change both to be colored like the comma10k + img_label.forEach([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; }); + img_output.forEach([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; }); + + // Finally stack and display in a window + cv::Mat outimg1, outimg2, outimg3; + cv::hconcat(img_input, img_label, outimg1); + cv::hconcat(img_input, img_output, outimg2); + cv::vconcat(outimg1, outimg2, outimg3); + cv::imshow("prediction", outimg3); + + // Next we will visualize our probability distributions [N, classes, H, W] + torch::Tensor cv_probs = output_probs[0].clone().cpu(); + cv_probs = cv_probs.to(torch::kFloat32); + cv::Mat outimg4 = cv::Mat(cv::Size(n_classes * (int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv::Scalar(0, 0, 0)); + assert((size_t)output_probs.size(0) == 1); + assert((size_t)cv_probs.size(0) == n_classes); + for (int n = 0; n < (int)n_classes; n++) { + cv::Mat imgtmp(cv::Size((int)cv_probs.size(2), (int)cv_probs.size(1)), CV_32FC1, cv_probs[n].data_ptr()); + imgtmp = 255 * imgtmp; + imgtmp.convertTo(imgtmp, CV_8UC1); + cv::Mat imgtmp_color; + cv::applyColorMap(imgtmp, imgtmp_color, cv::COLORMAP_JET); + imgtmp_color.copyTo(outimg4(cv::Rect(n * (int)cv_input.size(1), 0, imgtmp.cols, imgtmp.rows))); + } + cv::imshow("uncertainties", outimg4); + cv::waitKey(100); + + // Save to file for readme + // cv::imwrite("/home/patrick/github/segnet/docs/example_pred.png", outimg3); + // cv::imwrite("/home/patrick/github/segnet/docs/example_probs.png", outimg4); + // std::exit(EXIT_FAILURE); } - cv::imshow("uncertainties", outimg4); - cv::waitKey(100); - - // Save to file for readme - // cv::imwrite("/home/patrick/github/segnet/docs/example_pred.png", outimg3); - // cv::imwrite("/home/patrick/github/segnet/docs/example_probs.png", outimg4); - // std::exit(EXIT_FAILURE); - batch_idx++; } } diff --git a/src/net_seg_train.cpp b/src/net_seg_train.cpp index d0329ec..3a4ecce 100755 --- a/src/net_seg_train.cpp +++ b/src/net_seg_train.cpp @@ -71,7 +71,9 @@ int main() { // Finally convert it to a unique pointer dataloader auto dataset_mapped = dataset.map(torch::data::transforms::Stack<>()); - auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), torch::data::DataLoaderOptions().batch_size(5).workers(30)); + auto sampler = torch::data::samplers::RandomSampler(dataset.size().value()); + auto options = torch::data::DataLoaderOptions().enforce_ordering(false).batch_size(5).workers(30); + auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), sampler, options); // Create the optimizer // torch::optim::SGD optimizer(model->parameters(), torch::optim::SGDOptions(0.01).momentum(0.5)); @@ -111,7 +113,7 @@ int main() { optimizer.step(); // Print our the loss every once in a while - if (batch_idx % 10 == 0) { + if (batch_idx % 100 == 0) { // Debug printout size_t items_curr = batch_idx * batch.data.size(0); diff --git a/src/network/blocks/UNetBlocks.h b/src/network/blocks/UNetBlocks.h index c65c687..37f7484 100755 --- a/src/network/blocks/UNetBlocks.h +++ b/src/network/blocks/UNetBlocks.h @@ -63,7 +63,10 @@ struct UNetDownwardsImpl : torch::nn::Module { } // Forward propagation - torch::Tensor forward(torch::Tensor input) { return conv2(conv1(torch::max_pool2d(input, 2))); } + torch::Tensor forward(torch::Tensor input) { + auto output = conv2(conv1(torch::max_pool2d(input, 2))); + return torch::dropout(output, 0.5, this->is_training()); + } // Parts of the network // NOTE: for submodules, we call the "empty holder" constructor @@ -96,7 +99,8 @@ struct UNetUpwardsImpl : torch::nn::Module { input = torch::cat({input, bridge}, 1); // Finally do our convolutions and return - return conv2(conv1(input)); + auto output = torch::dropout(input, 0.5, this->is_training()); + return conv2(conv1(output)); } // Parts of the network diff --git a/src/network/models/UNetModel.h b/src/network/models/UNetModel.h index 2e85ee8..c0cd84f 100755 --- a/src/network/models/UNetModel.h +++ b/src/network/models/UNetModel.h @@ -91,6 +91,7 @@ struct UNetModelImpl : torch::nn::Module { // First do our starting two convolutions x1 = inconv1(input); x1 = inconv2(x1); + x1 = torch::dropout(x1, 0.25, this->is_training()); // Downscale to the bottleneck x2 = down1(x1); diff --git a/src/utils/augmentations.h b/src/utils/augmentations.h index 74bf73b..d61b4eb 100644 --- a/src/utils/augmentations.h +++ b/src/utils/augmentations.h @@ -135,9 +135,9 @@ inline void random_camera_model(cv::Mat &cv_rgb, cv::Mat &cv_label) { cam.at(2, 1) = 0.0f; cam.at(2, 2) = 1.0f; cv::Mat dist(5, 1, cv::DataType::type); - dist.at(0, 0) = 0.1 * unif_pn(rng); - dist.at(1, 0) = 0.05 * unif_pn(rng); - dist.at(2, 0) = 1e-3 * unif_pn(rng); + dist.at(0, 0) = 0.20 * unif_pn(rng); + dist.at(1, 0) = 0.10 * unif_pn(rng); + dist.at(2, 0) = 1e-2 * unif_pn(rng); dist.at(3, 0) = 1e-4 * unif_pn(rng); dist.at(4, 0) = 1e-5 * unif_pn(rng);