From 9411c424fdcc6bbe898d1b942b48667f49ee44fe Mon Sep 17 00:00:00 2001
From: Elmo Moilanen <49366097+elmomoilanen@users.noreply.github.com>
Date: Sat, 10 Jun 2023 17:05:35 +0300
Subject: [PATCH] Update default thread count and threads naming convention

---
 README.md           |  2 +-
 benches/bench.rs    | 10 ++++++++++
 src/factor/mod.rs   | 43 ++++++++++++++++++++++++++-----------------
 src/factor/tests.rs |  6 +++---
 4 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 1a3ed1c..59e9700 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ where argument `num` is the mandatory natural number and option *-p* or *--prett
 
 ## Remarks ##
 
-- Elliptic-curve factorization must use few worker threads to be efficient. Default thread count is five which happened to be the most effective by rough empirical testing during the development period. Thread count can be changed by the *MAX_WORKERS* constant in the *factor* module but its value must be two at least (otherwise performance will deteriorate notably).
+- Elliptic-curve factorization must use OS threads to be efficient. The thread count should be set to a value of at least two and preferably below the number of CPU cores to optimize performance. In terms of performance, lower value (2-5) seems to be the best but large 128 bit semiprimes could be factorized faster with larger thread count based on benchmarking. Thread count can be changed by the *MAX_THREADS_* constants in the *factor* module.
 
 - Miller-Rabin and Baillie-PSW primality tests are probabilistic but do not contain counterexamples in the number range this program uses. Elliptic-curve factorization uses random initial points on the curves which can cause some deviation to execution times.
 
diff --git a/benches/bench.rs b/benches/bench.rs
index fe50064..9c34e67 100644
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -13,6 +13,11 @@ fn bench_factorization(c: &mut Criterion) {
         b.iter(|| Factorization::run(number))
     });
 
+    group.bench_function("u64_semiprime", |b| {
+        let number = 9_804_659_461_513_846_513u64;
+        b.iter(|| Factorization::run(number))
+    });
+
     group.bench_function("u64::MAX", |b| {
         let number = u64::MAX;
         b.iter(|| Factorization::run(number))
@@ -23,6 +28,11 @@ fn bench_factorization(c: &mut Criterion) {
         b.iter(|| Factorization::run(number))
     });
 
+    group.bench_function("u128_many_factors", |b| {
+        let number = 340_282_366_920_938_463_463_374_607_431_768_211_455u128;
+        b.iter(|| Factorization::run(number))
+    });
+
     group.bench_function("u128_semiprime", |b| {
         let number = 5_316_911_983_139_663_122_320_058_796_740_706_329u128;
         b.iter(|| Factorization::run(number))
diff --git a/src/factor/mod.rs b/src/factor/mod.rs
index d6c9f68..52d3e72 100644
--- a/src/factor/mod.rs
+++ b/src/factor/mod.rs
@@ -4,13 +4,16 @@
 //! - Trial division with the first 1006 primes.
 //! - Fermat's factorization method, useful if the integer is of the form n=(a+b)*(a-b).
 //! - Primality test, consisting of Miller-Rabin and strong Baillie-PSW tests.
-//! - Lenstra elliptic-curve factorization with multiple of worker threads. Module `elliptic`
+//! - Lenstra elliptic-curve factorization with multiple of OS threads. Module `elliptic`
 //! implements elliptic curve arithmetic needed during factorization.
 //!
-//! Constant `MAX_WORKERS` defines the maximal thread count. This value must be at least two and preferably
-//! between three and six (by rough empirical testing). First thread will actually run wheel factorization
-//! targeting smaller prime factors whereas other threads run the actual elliptic-curve factorization method.
-//! Thus, if the thread count has been set to one, only the wheel factorization will run.
+//! Constants `MAX_THREADS_` define the maximal thread counts. These values must be at least two and preferably
+//! below the number of CPU cores. In terms of performance, lower value (2-5) seems to be the best but large
+//! 128 bit semiprimes could be factorized faster with larger thread count based on benchmarking.
+//!
+//! First thread will actually run wheel factorization targeting smaller prime factors whereas other threads
+//! run the actual elliptic-curve factorization method. Thus, if the thread count has been set to one,
+//! only the wheel factorization will run.
 //!
 //! Factorization algorithm stops when the factored number equals one.
 //!
@@ -23,9 +26,9 @@ use num::integer;
 
 use crate::{arith::Arith, elliptic::EllipticCurve, prime, UInt};
 
-/// Thread count for elliptic curve factorization.
-/// Set between 3 and 6 (best efficiency by rough empirical testing).
-const MAX_WORKERS: usize = 5;
+/// Thread count for elliptic curve factorization. Currently, optimal count seems to be between 2 and 5.
+const MAX_THREADS_SMALL: usize = 2;
+const MAX_THREADS_LARGE: usize = 3;
 
 /// Max count of elliptic curves during single elliptic factorization run.
 const MAX_ELLIPTIC_CURVES: usize = 125;
@@ -308,7 +311,7 @@ impl<T: 'static + UInt> Factorization<T> {
     fn factorize_elliptic(&mut self, mut num: T) -> T {
         let mut ec_factors: Vec<(T, bool)> = Vec::new();
 
-        num = self.spawn_and_run_workers(num, &mut ec_factors);
+        num = self.spawn_and_run(num, &mut ec_factors);
 
         for (ec_factor, is_sure_prime) in ec_factors {
             if is_sure_prime || prime::is_odd_prime_factor(ec_factor) {
@@ -332,7 +335,7 @@ impl<T: 'static + UInt> Factorization<T> {
         num
     }
 
-    fn spawn_and_run_workers(&self, num: T, factors: &mut Vec<(T, bool)>) -> T {
+    fn spawn_and_run(&self, num: T, factors: &mut Vec<(T, bool)>) -> T {
         let (sender, receiver) = mpsc::channel();
 
         let maybe_factors_mtx = Arc::new(Mutex::new(MaybeFactors {
@@ -340,16 +343,22 @@ impl<T: 'static + UInt> Factorization<T> {
             factors: Vec::new(),
         }));
 
-        for worker in 0..MAX_WORKERS {
+        let max_threads = if num.into() <= u64::MAX as u128 {
+            MAX_THREADS_SMALL
+        } else {
+            MAX_THREADS_LARGE
+        };
+
+        for thread in 0..max_threads {
             let sender = sender.clone();
             let maybe_factors_mtx_clone = Arc::clone(&maybe_factors_mtx);
 
             thread::spawn(move || {
-                if worker == 0 {
+                if thread == 0 {
                     // Try to find smaller factors with wheel factorization
-                    Self::wheel_worker(maybe_factors_mtx_clone, num, sender);
+                    Self::wheel_runner(maybe_factors_mtx_clone, num, sender);
                 } else {
-                    Self::elliptic_worker(maybe_factors_mtx_clone, num, sender);
+                    Self::elliptic_runner(maybe_factors_mtx_clone, num, sender);
                 }
             });
         }
@@ -375,7 +384,7 @@ impl<T: 'static + UInt> Factorization<T> {
                 }
             }
             Err(_) => {
-                eprintln!("Error: all elliptic workers disconnected, channel closed.");
+                eprintln!("Error: all elliptic threads disconnected, channel closed.");
 
                 let maybe_factors_guard = maybe_factors_mtx.lock().unwrap();
 
@@ -388,7 +397,7 @@ impl<T: 'static + UInt> Factorization<T> {
         }
     }
 
-    fn elliptic_worker(
+    fn elliptic_runner(
         maybe_factors: Arc<Mutex<MaybeFactors<T>>>,
         mut num: T,
         sender: mpsc::Sender<bool>,
@@ -449,7 +458,7 @@ impl<T: 'static + UInt> Factorization<T> {
         if sender.send(num == T::one()).is_err() {}
     }
 
-    fn wheel_worker(
+    fn wheel_runner(
         maybe_factors: Arc<Mutex<MaybeFactors<T>>>,
         mut num: T,
         sender: mpsc::Sender<bool>,
diff --git a/src/factor/tests.rs b/src/factor/tests.rs
index d6760c0..3dccb36 100644
--- a/src/factor/tests.rs
+++ b/src/factor/tests.rs
@@ -280,7 +280,7 @@ fn factorize_fermat_mix_composites() {
 }
 
 #[test]
-fn wheel_factorization_as_worker() {
+fn wheel_factorization_as_runner() {
     let test_num = 51_384_281_238_756_235_937u128;
 
     let correct_factors: [u128; 5] = [7993, 8017, 8039, 8243, 12_101];
@@ -295,7 +295,7 @@ fn wheel_factorization_as_worker() {
 
     let maybe_factors_cln = Arc::clone(&maybe_factors);
 
-    Factorization::wheel_worker(maybe_factors_cln, test_num, tx);
+    Factorization::wheel_runner(maybe_factors_cln, test_num, tx);
 
     match rx.recv() {
         Ok(true) => {
@@ -305,7 +305,7 @@ fn wheel_factorization_as_worker() {
                 resulted_factors.push((*tuple).0);
             }
         }
-        Ok(false) => panic!("wheel worker returned `false`."),
+        Ok(false) => panic!("wheel thread returned `false`."),
         Err(_) => panic!("wheel factorization error"),
     }