diff --git a/README_ASIC.md b/README_ASIC.md index 7a925f06..2baea37e 100644 --- a/README_ASIC.md +++ b/README_ASIC.md @@ -58,10 +58,11 @@ List of options [default, min - max]: Allows connecting to a timelord running on a remote host. Useful when running multiple machines with VDF hardware connecting to a single timelord. --vdfs-mask N - mask for enabling VDF engines [7, 1 - 7] The ASIC has 3 VDF engines numbered 0, 1, 2. If not running all 3 engines, the mask can be specified to enable specific engines. It must be the result of bitwise OR of the engine bits (1, 2, 4 for engines 0, 1, 2). - --vdf-threads N - number of software threads per VDF engine [4, 2 - 64] + --vdf-threads N - max number of software threads per VDF engine [4, 2 - 64] Number of software threads computing intermediate values and proofs per VDF engine. - --proof-threads N - number of proof threads per VDF engine + --proof-threads N - max number of proof threads per VDF engine Number of software threads only computing proofs per VDF engine. Must be less than --vdf-threads. + --segment-threads N - number of proof threads per segment [2, 1 - 8] --auto-freq-period N - auto-adjust frequency every N seconds [0, 10 - inf] --list - list available devices and exit ``` diff --git a/src/hw/hw_proof.cpp b/src/hw/hw_proof.cpp index 3ea63ee6..23caaa4e 100644 --- a/src/hw/hw_proof.cpp +++ b/src/hw/hw_proof.cpp @@ -6,7 +6,7 @@ #include #include -static const uint32_t g_chkp_thres = 1000000; +static const uint32_t g_chkp_thres = 500000; static const uint32_t g_skip_thres = 10; void report_bad_vdf_value(struct vdf_state *vdf, struct vdf_value *val) @@ -297,13 +297,47 @@ bool hw_proof_should_queue(struct vdf_state *vdf, uint64_t iters) return iters < vdf->proofs[last_queued_idx].iters; } +void hw_proof_queue_proofs(struct vdf_state *vdf, uint64_t iters, uint16_t prev) +{ + uint32_t s = vdf->segment_threads; + uint32_t chkp_thres = g_chkp_thres * s; + double f = 1.0 / s; + // Calculate the share of the total iterations for the first proof segment + // The formula below is for a 2-segment case + double x = (f + 1) / (2 * f + 1); + + if (iters > chkp_thres) { + // At least 2 segments are needed, find length of the first segment + uint64_t chkp_iters = iters * x; + if (iters - chkp_iters > chkp_thres) { + // Three segments are needed + uint64_t chkp2_iters; + double p = f + 1.0; + double k = p / f; + // Share of the total iterations for the second segment (of 3) + double y = p / (k * p + 2.0 * f + 1.0); + x = y * k; + + chkp_iters = iters * x; + chkp_iters = chkp_iters / vdf->interval * vdf->interval; + prev = hw_queue_proof(vdf, chkp_iters, prev, 0); + + chkp2_iters = iters * y; + iters -= chkp_iters; + chkp_iters = chkp2_iters; + } + chkp_iters = chkp_iters / vdf->interval * vdf->interval; + prev = hw_queue_proof(vdf, chkp_iters, prev, 0); + iters -= chkp_iters; + } + hw_queue_proof(vdf, iters, prev, HW_VDF_PROOF_FLAG_IS_REQ); +} + void hw_proof_process_req(struct vdf_state *vdf) { uint64_t iters; uint64_t req_iters; uint64_t base_iters = 0; - uint64_t chkp_iters; - uint32_t chkp_div = 4, chkp_mul = 3; uint8_t max_chkp_segments = 64 - 3; int i; uint16_t prev = HW_VDF_PROOF_NONE; @@ -348,27 +382,7 @@ void hw_proof_process_req(struct vdf_state *vdf) iters = req_iters - base_iters; - if (iters > g_chkp_thres) { - // Split iters as [75%, 25%] - chkp_iters = iters * chkp_mul / chkp_div; - if (iters - chkp_iters > g_chkp_thres) { - // Split iters as [69%, 23%, 8%] - uint32_t chkp2_mul[] = { 69, 69 + 23 }; - uint64_t chkp2_iters; - - chkp_iters = iters * chkp2_mul[0] / 100; - chkp_iters = chkp_iters / vdf->interval * vdf->interval; - prev = hw_queue_proof(vdf, chkp_iters, prev, 0); - - chkp2_iters = iters * chkp2_mul[1] / 100 - chkp_iters; - iters -= chkp_iters; - chkp_iters = chkp2_iters; - } - chkp_iters = chkp_iters / vdf->interval * vdf->interval; - prev = hw_queue_proof(vdf, chkp_iters, prev, 0); - iters -= chkp_iters; - } - hw_queue_proof(vdf, iters, prev, HW_VDF_PROOF_FLAG_IS_REQ); + hw_proof_queue_proofs(vdf, iters, prev); { ProofCmp cmp(vdf->proofs); @@ -431,7 +445,7 @@ void hw_proof_process_work(struct vdf_state *vdf) vdf->idx, i, iters, proof->seg_iters, is_chkp ? " [checkpoint]" : ""); vdf->queued_proofs.erase(vdf->queued_proofs.begin()); vdf->aux_threads_busy |= 1UL << i; - vdf->n_proof_threads += PARALLEL_PROVER_N_THREADS; + vdf->n_proof_threads += vdf->segment_threads; proof->flags |= HW_VDF_PROOF_FLAG_STARTED; std::thread(hw_compute_proof, vdf, idx, proof, i).detach(); } @@ -569,7 +583,7 @@ void hw_stop_proof(struct vdf_state *vdf) class HwProver : public ParallelProver { public: HwProver(Segment segm, integer D, struct vdf_state *vdf) - : ParallelProver(segm, D) + : ParallelProver(segm, D, vdf->segment_threads) { this->vdf = vdf; k = FindK(segm.length); @@ -677,7 +691,7 @@ void hw_compute_proof(struct vdf_state *vdf, size_t proof_idx, struct vdf_proof Segment seg(start_iters, proof_iters - start_iters, x, y); HwProver prover(seg, vdf->D, vdf); - if (!is_chkp && seg.length > g_chkp_thres) { + if (!is_chkp && seg.length > g_chkp_thres * vdf->segment_threads) { LOG_INFO("VDF %d: Warning: too long final proof segment length=%lu", vdf->idx, seg.length); } @@ -727,7 +741,7 @@ void hw_compute_proof(struct vdf_state *vdf, size_t proof_idx, struct vdf_proof out: if (thr_idx < vdf->max_aux_threads) { vdf->aux_threads_busy &= ~(1UL << thr_idx); - vdf->n_proof_threads -= PARALLEL_PROVER_N_THREADS; + vdf->n_proof_threads -= vdf->segment_threads; } } @@ -806,6 +820,10 @@ void init_vdf_state(struct vdf_state *vdf, struct vdf_proof_opts *opts, const ch if (opts && opts->max_proof_threads) { vdf->max_proof_threads = opts->max_proof_threads; } + vdf->segment_threads = 2; + if (opts && opts->segment_threads) { + vdf->segment_threads = opts->segment_threads; + } mpz_set_str(vdf->D.impl, d_str, 0); mpz_set(vdf->L.impl, vdf->D.impl); diff --git a/src/hw/hw_proof.hpp b/src/hw/hw_proof.hpp index 36684131..b70a5151 100644 --- a/src/hw/hw_proof.hpp +++ b/src/hw/hw_proof.hpp @@ -51,6 +51,7 @@ struct vdf_proof { struct vdf_proof_opts { uint8_t max_aux_threads; uint8_t max_proof_threads; + uint8_t segment_threads; }; struct vdf_state { @@ -85,6 +86,7 @@ struct vdf_state { uint8_t idx; uint8_t max_aux_threads; uint8_t max_proof_threads; + uint8_t segment_threads; bool completed; bool stopping; bool init_done; diff --git a/src/hw/hw_vdf_client.cpp b/src/hw/hw_vdf_client.cpp index c27ac51a..2a00d6df 100644 --- a/src/hw/hw_vdf_client.cpp +++ b/src/hw/hw_vdf_client.cpp @@ -419,6 +419,7 @@ int parse_opts(int argc, char **argv, struct vdf_client_opts *opts) {"vdfs-mask", required_argument, NULL, 1}, {"vdf-threads", required_argument, NULL, 1}, {"proof-threads", required_argument, NULL, 1}, + {"segment-threads", required_argument, NULL, 1}, {"list", no_argument, NULL, 1}, {"auto-freq-period", required_argument, NULL, 1}, {0} @@ -435,6 +436,7 @@ int parse_opts(int argc, char **argv, struct vdf_client_opts *opts) opts->auto_freq = false; opts->vpo.max_aux_threads = HW_VDF_DEFAULT_MAX_AUX_THREADS; opts->vpo.max_proof_threads = 0; + opts->vpo.segment_threads = 0; opts->vdfs_mask = 0; while ((ret = getopt_long(argc, argv, "", long_opts, &long_idx)) == 1) { @@ -451,8 +453,10 @@ int parse_opts(int argc, char **argv, struct vdf_client_opts *opts) } else if (long_idx == 5) { opts->vpo.max_proof_threads = strtoul(optarg, NULL, 0); } else if (long_idx == 6) { - opts->do_list = true; + opts->vpo.segment_threads = strtoul(optarg, NULL, 0); } else if (long_idx == 7) { + opts->do_list = true; + } else if (long_idx == 8) { opts->auto_freq = true; opts->auto_freq_period = strtoul(optarg, NULL, 0); } @@ -493,6 +497,10 @@ int parse_opts(int argc, char **argv, struct vdf_client_opts *opts) LOG_SIMPLE("Number of proof threads must be less than VDF threads"); return -1; } + if (opts->vpo.segment_threads > 8) { + LOG_SIMPLE("Number of proof threads per segment must be between 1 and 8"); + return -1; + } if (opts->auto_freq && opts->auto_freq_period < 10) { LOG_SIMPLE("Invalid auto freq period"); return -1; @@ -525,8 +533,9 @@ int hw_vdf_client_main(int argc, char **argv) " --voltage N - set board voltage [%.2f, 0.7 - 1.0]\n" " --ip A.B.C.D - timelord IP address [localhost]\n" " --vdfs-mask N - mask for enabling VDF engines [7, 1 - 7]\n" - " --vdf-threads N - number of software threads per VDF engine [4, 2 - 64]\n" - " --proof-threads N - number of proof threads per VDF engine\n" + " --vdf-threads N - max number of software threads per VDF engine [4, 2 - 64]\n" + " --proof-threads N - max number of proof threads per VDF engine\n" + " --segment-threads N - number of proof threads per segment [2, 1 - 8]\n" " --auto-freq-period N - auto-adjust frequency every N seconds [0, 10 - inf]\n" " --list - list available devices and exit", argv[0], (int)HW_VDF_DEF_FREQ, HW_VDF_DEF_VOLTAGE); diff --git a/src/prover_parallel.hpp b/src/prover_parallel.hpp index 6967477e..18551db0 100644 --- a/src/prover_parallel.hpp +++ b/src/prover_parallel.hpp @@ -6,7 +6,7 @@ #include "proof_common.h" #include "util.h" -#define PARALLEL_PROVER_N_THREADS 2 +#define PROVER_MAX_SEGMENT_THREADS 8 class ParallelProver : public Prover { private: @@ -76,33 +76,47 @@ class ParallelProver : public Prover { x_vals[thr_idx] = x; } public: - ParallelProver(Segment segm, integer D) : Prover(segm, D) {} + ParallelProver(Segment segm, integer D, size_t n_thr) : Prover(segm, D) { + this->n_threads = n_thr; + } void GenerateProof(); protected: integer B; integer L; form id; - form x_vals[PARALLEL_PROVER_N_THREADS]; + form x_vals[PROVER_MAX_SEGMENT_THREADS]; + size_t n_threads; }; void ParallelProver::GenerateProof() { PulmarkReducer reducer; + uint32_t len = l / n_threads; + uint32_t rem = l % n_threads; + uint32_t start = l; + std::thread threads[PROVER_MAX_SEGMENT_THREADS]; this->B = GetB(D, segm.x, segm.y); this->L = root(-D, 4); this->id = form::identity(D); - uint32_t l0 = l / 2; - uint32_t l1 = l - l0; - std::thread proof_thr(ParallelProver::ProofThread, this, 0, l, l0); - ProvePart(1, l1, l1); + for (size_t i = 0; i < n_threads; i++) { + uint32_t cur_len = len + (i < rem); + threads[i] = std::thread(ParallelProver::ProofThread, this, i, start, cur_len); + start -= cur_len; + } - proof_thr.join(); + for (size_t i = 0; i < n_threads; i++) { + threads[i].join(); + } if (!PerformExtraStep()) { return; } - nucomp_form(proof, x_vals[0], x_vals[1], D, L); + + proof = x_vals[0]; + for (size_t i = 1; i < n_threads; i++) { + nucomp_form(proof, proof, x_vals[i], D, L); + } reducer.reduce(proof); OnFinish(); } diff --git a/src/vdf_base.hpp b/src/vdf_base.hpp index dba3c29b..61cc9328 100644 --- a/src/vdf_base.hpp +++ b/src/vdf_base.hpp @@ -295,17 +295,20 @@ class Prover { std::atomic is_finished; }; -#define PARALLEL_PROVER_N_THREADS 2 +#define PROVER_MAX_SEGMENT_THREADS 8 class ParallelProver : public Prover { public: - ParallelProver(Segment segm, integer D) : Prover(segm, D) {} + ParallelProver(Segment segm, integer D, size_t n_thr) : Prover(segm, D) { + this->n_threads = n_thr; + } void GenerateProof(); protected: integer B; integer L; form id; - form x_vals[PARALLEL_PROVER_N_THREADS]; + form x_vals[PROVER_MAX_SEGMENT_THREADS]; + size_t n_threads; }; void nudupl_form(form &a, form &b, integer &D, integer &L);