From 52054719037b1db304862c4b0963dc58e01de491 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 17 Nov 2025 14:10:28 -0800 Subject: [PATCH 01/45] Add constraints and adjust dual simplex to warm start from the current basis --- cpp/src/dual_simplex/basis_updates.cpp | 157 ++++++++++++++++ cpp/src/dual_simplex/basis_updates.hpp | 2 + cpp/src/dual_simplex/solve.cpp | 191 ++++++++++++++++++++ cpp/src/dual_simplex/solve.hpp | 13 ++ cpp/src/dual_simplex/sparse_matrix.cpp | 45 +++++ cpp/src/dual_simplex/sparse_matrix.hpp | 2 + cpp/src/dual_simplex/sparse_vector.cpp | 15 ++ cpp/src/dual_simplex/sparse_vector.hpp | 2 + cpp/tests/dual_simplex/unit_tests/solve.cpp | 108 +++++++++++ 9 files changed, 535 insertions(+) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 3e16411f4..2a8c34d1c 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1108,6 +1108,163 @@ i_t basis_update_t::lower_triangular_multiply(const csc_matrix_t +i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts_basic) +{ + const i_t m = L0_.m; + + // Solve for U^T W^T = C_B^T + // We do this one row at a time of C_B + csc_matrix_t WT(m, cuts_basic.m, 0); + printf("Constructing WT\n"); + + i_t WT_nz = 0; + for (i_t k = 0; k < cuts_basic.m; k++) { + sparse_vector_t rhs(cuts_basic, k); + u_transpose_solve(rhs); + WT.col_start[k] = WT_nz; + for (i_t q = 0; q < rhs.i.size(); q++) { + WT.i.push_back(rhs.i[q]); + WT.x.push_back(rhs.x[q]); + WT_nz++; + } + } + WT.col_start[cuts_basic.m] = WT_nz; + + printf("Constructing V (num updates %d)\n", num_updates_); + + csc_matrix_t V(cuts_basic.m, m, 0); + if (num_updates_ > 0) { + // W = V T_0 ... T_{num_updates_ - 1} + // or V = W T_{num_updates_ - 1}^{-1} ... T_0^{-1} + // or V^T = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T + // We can compute V^T column by column so that we have + // V^T(:, h) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h) + // or + // V(h, :) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h) + + csr_matrix_t V_row(cuts_basic.m, m, 0); + i_t V_nz = 0; + const f_t zero_tol = 1e-13; + for (i_t h = 0; h < cuts_basic.m; h++) { + sparse_vector_t rhs(WT, h); + scatter_into_workspace(rhs); + i_t nz = rhs.i.size(); + for (i_t k = num_updates_ - 1; k >= 0; --k) { + // T_k^{-T} = ( I - v u^T/(1 + u^T v)) + // T_k^{-T} * b = b - v * (u^T * b) / (1 + u^T * v) = b - theta * v, theta = u^T b / mu + + const i_t u_col = 2 * k; + const i_t v_col = 2 * k + 1; + const f_t mu = mu_values_[k]; + + // dot = u^T * b + f_t dot = dot_product(u_col, xi_workspace_, x_workspace_); + const f_t theta = dot / mu; + if (std::abs(theta) > zero_tol) { + add_sparse_column(S_, v_col, -theta, xi_workspace_, nz, x_workspace_); + } + } + gather_into_sparse_vector(nz, rhs); + V_row.row_start[h] = V_nz; + for (i_t q = 0; q < rhs.i.size(); q++) { + V_row.j.push_back(rhs.i[q]); + V_row.x.push_back(rhs.x[q]); + V_nz++; + } + } + V_row.row_start[cuts_basic.m] = V_nz; + + V_row.to_compressed_col(V); + } + else + { + // W = V + WT.transpose(V); + } + + // Extend u_i, v_i for i = 0, ..., num_updates_ - 1 + S_.m += cuts_basic.m; + + // Adjust L and U + // L = [ L0 0 ] + // [ V I ] + printf("Adjusting L\n"); + + i_t V_nz = V.col_start[m]; + i_t L_nz = L0_.col_start[m]; + csc_matrix_t new_L(m + cuts_basic.m, m + cuts_basic.m, L_nz + V_nz + cuts_basic.m); + L_nz = 0; + for (i_t j = 0; j < m; ++j) { + new_L.col_start[j] = L_nz; + const i_t col_start = L0_.col_start[j]; + const i_t col_end = L0_.col_start[j + 1]; + for (i_t p = col_start; p < col_end; ++p) { + new_L.i[L_nz] = L0_.i[p]; + new_L.x[L_nz] = L0_.x[p]; + L_nz++; + } + const i_t V_col_start = V.col_start[j]; + const i_t V_col_end = V.col_start[j + 1]; + for (i_t p = V_col_start; p < V_col_end; ++p) { + new_L.i[L_nz] = V.i[p] + m; + new_L.x[L_nz] = V.x[p]; + L_nz++; + } + } + for (i_t j = m; j < m + cuts_basic.m; ++j) { + new_L.col_start[j] = L_nz; + new_L.i[L_nz] = j; + new_L.x[L_nz] = 1.0; + L_nz++; + } + new_L.col_start[m + cuts_basic.m] = L_nz; + + L0_ = new_L; + + + // Adjust U + // U = [ U0 0 ] + // [ 0 I ] + printf("Adjusting U\n"); + + i_t U_nz = U0_.col_start[m]; + U0_.col_start.resize(m + cuts_basic.m + 1); + U0_.i.resize(U_nz + cuts_basic.m); + U0_.x.resize(U_nz + cuts_basic.m); + for (i_t k = m; k < m + cuts_basic.m; ++k) { + U0_.col_start[k] = U_nz; + U0_.i[U_nz] = k; + U0_.x[U_nz] = 1.0; + U_nz++; + } + U0_.col_start[m + cuts_basic.m] = U_nz; + U0_.n = m + cuts_basic.m; + U0_.m = m + cuts_basic.m; + + printf("Computing transposes\n"); + compute_transposes(); + + + // Adjust row_permutation_ and inverse_row_permutation_ + printf("Adjusting row_permutation_ and inverse_row_permutation_\n"); + row_permutation_.resize(m + cuts_basic.m); + inverse_row_permutation_.resize(m + cuts_basic.m); + for (i_t k = m; k < m + cuts_basic.m; ++k) { + row_permutation_[k] = k; + } + inverse_permutation(row_permutation_, inverse_row_permutation_); + + // Adjust workspace sizes + printf("Adjusting workspace sizes\n"); + xi_workspace_.resize(2 * (m + cuts_basic.m), 0); + x_workspace_.resize(m + cuts_basic.m, 0.0); + + return 0; +} + template void basis_update_mpf_t::gather_into_sparse_vector(i_t nz, sparse_vector_t& out) const diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index 078dfffeb..283a1513e 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -291,6 +291,8 @@ class basis_update_mpf_t { reset_stats(); } + i_t append_cuts(const csr_matrix_t& cuts_basic); + f_t estimate_solution_density(f_t rhs_nz, f_t sum, i_t& num_calls, bool& use_hypersparse) const { num_calls++; diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 5c5f9e165..5aa74906d 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -293,6 +293,184 @@ lp_status_t solve_linear_program_with_advanced_basis( return lp_status; } +template +lp_status_t solve_linear_program_with_cuts( + const f_t start_time, + const simplex_solver_settings_t& settings, + const csr_matrix_t& cuts, + const std::vector& cut_rhs, + lp_problem_t& lp, + lp_solution_t& solution, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list, + std::vector& vstatus, + std::vector& edge_norms) { + // Given a set of cuts: C*x <= d that are currently violated + // by the current solution x* (i.e. C*x* > d), this function + // adds the cuts into the LP and solves again. + + const i_t p = cuts.m; + if (cut_rhs.size() != static_cast(p)) { + settings.log.printf("cut_rhs must have the same number of rows as cuts\n"); + return lp_status_t::NUMERICAL_ISSUES; + } + printf("Number of cuts %d\n", p); + printf("Original lp rows %d\n", lp.num_rows); + printf("Original lp cols %d\n", lp.num_cols); + + csr_matrix_t new_A_row(lp.num_rows, lp.num_cols, 1); + printf("Converting A to compressed row\n"); + lp.A.to_compressed_row(new_A_row); + + + printf("Appening cuts\n"); + new_A_row.append_rows(cuts); + + printf("Converting back to compressed column\n"); + csc_matrix_t new_A_col(lp.num_rows + p, lp.num_cols, 1); + new_A_row.to_compressed_col(new_A_col); + printf("new A col rows %d cols %d\n", new_A_col.m, new_A_col.n); + + + printf("Adding slacks\n"); + // Add in slacks variables for the new rows + lp.lower.resize(lp.num_cols + p); + lp.upper.resize(lp.num_cols + p); + lp.objective.resize(lp.num_cols + p); + i_t nz = new_A_col.col_start[lp.num_cols]; + new_A_col.col_start.resize(lp.num_cols + p + 1); + new_A_col.i.resize(nz + p); + new_A_col.x.resize(nz + p); + i_t k = lp.num_rows; + for (i_t j = lp.num_cols; j < lp.num_cols + p; j++) { + new_A_col.col_start[j] = nz; + new_A_col.i[nz] = k++; + new_A_col.x[nz] = 1.0; + nz++; + lp.lower[j] = 0.0; + lp.upper[j] = inf; + lp.objective[j] = 0.0; + } + new_A_col.col_start[lp.num_cols + p] = nz; + new_A_col.n = lp.num_cols + p; + printf("new A col rows %d cols %d\n", new_A_col.m, new_A_col.n); + printf("new A nnz %d\n", new_A_col.col_start[lp.num_cols + p]); + + lp.A = new_A_col; + i_t old_rows = lp.num_rows; + lp.num_rows += p; + printf("lp rows %d A rows %d\n", lp.num_rows, lp.A.m); + i_t old_cols = lp.num_cols; + lp.num_cols += p; + printf("lp cols %d A cols %d\n", lp.num_cols, lp.A.n); + + printf("New A matrix\n"); + lp.A.print_matrix(stdout); + + printf("Adding rhs\n"); + lp.rhs.resize(lp.num_rows); + for (i_t k = old_rows; k < old_rows + p; k++) { + const i_t h = k - old_rows; + lp.rhs[k] = cut_rhs[h]; + } + + + printf("Constructing column degree\n"); + // Construct C_B = C(:, basic_list) + std::vector C_col_degree(p, 0); + i_t cuts_nz = cuts.row_start[p]; + for (i_t q = 0; q < cuts_nz; q++) { + const i_t j = cuts.j[q]; + C_col_degree[j]++; + } + + std::vector in_basis(old_cols, 0); + const i_t num_basic = static_cast(basic_list.size()); + i_t C_B_nz = 0; + for (i_t k = 0; k < num_basic; k++) { + const i_t j = basic_list[k]; + in_basis[j] = 1; + C_B_nz += C_col_degree[j]; + } + + printf("Constructing C_B\n"); + csr_matrix_t C_B(num_basic, num_basic, C_B_nz); + nz = 0; + for (i_t i = 0; i < p; i++) { + C_B.row_start[i] = nz; + const i_t row_start = cuts.row_start[i]; + const i_t row_end = cuts.row_start[i+1]; + for (i_t q = row_start; q < row_end; q++) { + const i_t j = cuts.j[q]; + if (in_basis[j] == 0) { continue; } + C_B.j[nz] = j; + C_B.x[nz] = cuts.x[q]; + nz++; + } + } + C_B.row_start[p] = nz; + settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz); + if (nz != C_B_nz) { + exit(1); + } + + printf("Adjusting basis update\n"); + // Adjust the basis update to include the new cuts + basis_update.append_cuts(C_B); + + + // Adjust the vstatus + vstatus.resize(lp.num_cols); + for (i_t j = old_cols; j < lp.num_cols; j++) { + vstatus[j] = variable_status_t::BASIC; + } + + basic_list.resize(lp.num_rows, 0); + i_t h = old_cols; + for (i_t j = old_rows; j < lp.num_rows; j++) { + basic_list[j] = h++; + } + + printf("basic list\n"); + for (i_t k = 0; k < basic_list.size(); k++) { + printf("%d ", basic_list[k]); + } + printf("\n"); + + // Adjust the solution + solution.x.resize(lp.num_cols, 0.0); + solution.y.resize(lp.num_rows, 0.0); + solution.z.resize(lp.num_cols, 0.0); + + // For now just clear the edge norms + edge_norms.clear(); + i_t iter = 0; + dual::status_t status = dual_phase2_with_advanced_basis(2, + 0, + false, + start_time, + lp, + settings, + vstatus, + basis_update, + basic_list, + nonbasic_list, + solution, + iter, + edge_norms); + + lp_status_t lp_status; + if (status == dual::status_t::OPTIMAL) { lp_status = lp_status_t::OPTIMAL; } + if (status == dual::status_t::DUAL_UNBOUNDED) { lp_status = lp_status_t::INFEASIBLE; } + if (status == dual::status_t::TIME_LIMIT) { lp_status = lp_status_t::TIME_LIMIT; } + if (status == dual::status_t::ITERATION_LIMIT) { lp_status = lp_status_t::ITERATION_LIMIT; } + if (status == dual::status_t::CONCURRENT_LIMIT) { lp_status = lp_status_t::CONCURRENT_LIMIT; } + if (status == dual::status_t::NUMERICAL) { lp_status = lp_status_t::NUMERICAL_ISSUES; } + if (status == dual::status_t::CUTOFF) { lp_status = lp_status_t::CUTOFF; } + return lp_status; +} + template lp_status_t solve_linear_program_with_barrier(const user_problem_t& user_problem, const simplex_solver_settings_t& settings, @@ -661,6 +839,19 @@ template lp_status_t solve_linear_program_with_advanced_basis( std::vector& vstatus, std::vector& edge_norms); +template lp_status_t solve_linear_program_with_cuts( + const double start_time, + const simplex_solver_settings_t& settings, + const csr_matrix_t& cuts, + const std::vector& cut_rhs, + lp_problem_t& lp, + lp_solution_t& solution, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list, + std::vector& vstatus, + std::vector& edge_norms); + template lp_status_t solve_linear_program_with_barrier( const user_problem_t& user_problem, const simplex_solver_settings_t& settings, diff --git a/cpp/src/dual_simplex/solve.hpp b/cpp/src/dual_simplex/solve.hpp index e96229784..d659d6282 100644 --- a/cpp/src/dual_simplex/solve.hpp +++ b/cpp/src/dual_simplex/solve.hpp @@ -61,6 +61,19 @@ lp_status_t solve_linear_program_with_advanced_basis( std::vector& vstatus, std::vector& edge_norms); +template +lp_status_t solve_linear_program_with_cuts(const f_t start_time, + const simplex_solver_settings_t& settings, + const csr_matrix_t& cuts, + const std::vector& cut_rhs, + lp_problem_t& lp, + lp_solution_t& solution, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list, + std::vector& vstatus, + std::vector& edge_norms); + template lp_status_t solve_linear_program_with_barrier(const user_problem_t& user_problem, const simplex_solver_settings_t& settings, diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index cdd45f720..c2fc343ce 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -357,6 +357,51 @@ i_t csc_matrix_t::remove_row(i_t row) return 0; } +template +i_t csr_matrix_t::append_rows(const csr_matrix_t& C) +{ + const i_t old_m = this->m; + const i_t n = this->n; + const i_t old_nz = this->row_start[old_m]; + const i_t C_row = C.m; + if (0 && C.n != n) { + printf("C n %d != n %d\n", C.n, n); + return -1; + } + const i_t C_nz = C.row_start[C_row]; + const i_t new_nz = old_nz + C_nz; + const i_t new_m = old_m + C_row; + printf("old m %d C_row %d new m %d\n", old_m, C_row, new_m); + + this->j.resize(new_nz); + this->x.resize(new_nz); + this->row_start.resize(new_m + 1); + + i_t nz = old_nz; + for (i_t i = old_m; i < new_m; i++) { + const i_t k = i - old_m; + const i_t nz_row = C.row_start[k+1] - C.row_start[k]; + this->row_start[i] = nz; + nz += nz_row; + } + this->row_start[new_m] = nz; + + for (i_t p = old_nz; p < new_nz; p++) { + const i_t q = p - old_nz; + this->j[p] = C.j[q]; + } + + for (i_t p = old_nz; p < new_nz; p++) { + const i_t q = p - old_nz; + this->x[p] = C.x[q]; + } + + this->m = new_m; + this->nz_max = new_nz; + return 0; +} + + template void csc_matrix_t::print_matrix(FILE* fid) const { diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index c14e6d0f1..eefc31b1f 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -136,6 +136,8 @@ class csr_matrix_t { // Create a new matrix with the marked rows removed i_t remove_rows(std::vector& row_marker, csr_matrix_t& Aout) const; + i_t append_rows(const csr_matrix_t& C); + // Ensures no repeated column indices within a row void check_matrix() const; diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp index 2d4745650..f33903fc5 100644 --- a/cpp/src/dual_simplex/sparse_vector.cpp +++ b/cpp/src/dual_simplex/sparse_vector.cpp @@ -28,6 +28,21 @@ sparse_vector_t::sparse_vector_t(const csc_matrix_t& A, i_t } } +template +sparse_vector_t::sparse_vector_t(const csr_matrix_t& A, i_t row) +{ + const i_t row_start = A.row_start[row]; + const i_t row_end = A.row_start[row+1]; + const i_t nz = row_end - row_start; + n = A.n; + i.reserve(nz); + x.reserve(nz); + for (i_t k = row_start; k < row_end; ++k) { + i.push_back(A.j[k]); + x.push_back(A.x[k]); + } +} + template void sparse_vector_t::from_dense(const std::vector& in) { diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp index 7acfdc8b5..afa559926 100644 --- a/cpp/src/dual_simplex/sparse_vector.hpp +++ b/cpp/src/dual_simplex/sparse_vector.hpp @@ -25,6 +25,8 @@ class sparse_vector_t { sparse_vector_t(const std::vector& in) { from_dense(in); } // Construct a sparse vector from a column of a CSC matrix sparse_vector_t(const csc_matrix_t& A, i_t col); + // Construct a sparse vector from a row of a CSR matrix + sparse_vector_t(const csr_matrix_t& A, i_t row); // gather a dense vector into a sparse vector void from_dense(const std::vector& in); // convert a sparse vector into a CSC matrix with a single column diff --git a/cpp/tests/dual_simplex/unit_tests/solve.cpp b/cpp/tests/dual_simplex/unit_tests/solve.cpp index 7aed72fe0..0f5c1802b 100644 --- a/cpp/tests/dual_simplex/unit_tests/solve.cpp +++ b/cpp/tests/dual_simplex/unit_tests/solve.cpp @@ -326,4 +326,112 @@ TEST(dual_simplex, dual_variable_greater_than) EXPECT_NEAR(solution.z[1], 0.0, 1e-6); } + +TEST(dual_simplex, simple_cuts) +{ + // minimize x + y + 2 z + // subject to x + y + z == 1 + // x, y, z >= 0 + + raft::handle_t handle{}; + cuopt::linear_programming::dual_simplex::user_problem_t user_problem(&handle); + constexpr int m = 1; + constexpr int n = 3; + constexpr int nz = 3; + + user_problem.num_rows = m; + user_problem.num_cols = n; + user_problem.objective.resize(n); + user_problem.objective[0] = 1.0; + user_problem.objective[1] = 1.0; + user_problem.objective[2] = 2.0; + user_problem.A.m = m; + user_problem.A.n = n; + user_problem.A.nz_max = nz; + user_problem.A.reallocate(nz); + user_problem.A.col_start.resize(n + 1); + user_problem.A.col_start[0] = 0; + user_problem.A.col_start[1] = 1; + user_problem.A.col_start[2] = 2; + user_problem.A.col_start[3] = 3; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 0; + user_problem.A.x[1] = 1.0; + user_problem.A.i[2] = 0; + user_problem.A.x[2] = 1.0; + user_problem.lower.resize(n, 0.0); + user_problem.upper.resize(n, dual_simplex::inf); + user_problem.num_range_rows = 0; + user_problem.problem_name = "simple_cuts"; + user_problem.obj_scale = 1.0; + user_problem.obj_constant = 0.0; + user_problem.rhs.resize(m, 1.0); + user_problem.row_sense.resize(m, 'E'); + user_problem.var_types.resize(n, cuopt::linear_programming::dual_simplex::variable_type_t::CONTINUOUS); + + cuopt::init_logger_t logger("", true); + + cuopt::linear_programming::dual_simplex::lp_problem_t lp(user_problem.handle_ptr, 1, 1, 1); + cuopt::linear_programming::dual_simplex::simplex_solver_settings_t settings; + settings.barrier = false; + settings.barrier_presolve = false; + settings.log.log = true; + settings.log.log_to_console = true; + settings.log.printf("Test print\n"); + std::vector new_slacks; + cuopt::linear_programming::dual_simplex::dualize_info_t dualize_info; + cuopt::linear_programming::dual_simplex::convert_user_problem(user_problem, settings, lp, new_slacks, dualize_info); + cuopt::linear_programming::dual_simplex::lp_solution_t solution(lp.num_rows, lp.num_cols); + std::vector vstatus; + std::vector edge_norms; + std::vector basic_list(lp.num_rows); + std::vector nonbasic_list; + cuopt::linear_programming::dual_simplex::basis_update_mpf_t basis_update(lp.num_cols, settings.refactor_frequency); + double start_time = dual_simplex::tic(); + printf("Calling solve linear program with advanced basis\n"); + EXPECT_EQ((cuopt::linear_programming::dual_simplex::solve_linear_program_with_advanced_basis( + lp, start_time, settings, solution, basis_update, basic_list, nonbasic_list, vstatus, edge_norms)), + cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL); + printf("Solution objective: %e\n", solution.objective); + printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]); + printf("Solution y: %e\n", solution.y[0]); + printf("Solution z: %e %e %e\n", solution.z[0], solution.z[1], solution.z[2]); + EXPECT_NEAR(solution.objective, 1.0, 1e-6); + EXPECT_NEAR(solution.x[0], 1.0, 1e-6); + + + // Add a cut z >= 1/3. Needs to be in the form C*x <= d + csr_matrix_t cuts(1, n, 1); + cuts.row_start[0] = 0; + cuts.j[0] = 2; + cuts.x[0] = -1.0; + cuts.row_start[1] = 1; + printf("cuts m %d n %d\n", cuts.m, cuts.n); + std::vector cut_rhs(1); + cut_rhs[0] = -1.0 / 3.0; + EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts( + start_time, settings, cuts, cut_rhs, lp, solution, basis_update, basic_list, nonbasic_list, vstatus, edge_norms), + cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL); + printf("Solution objective: %e\n", solution.objective); + printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]); + EXPECT_NEAR(solution.objective, 4.0 / 3.0, 1e-6); + + cuts.row_start.resize(3); + cuts.j[0] = 1; + cuts.row_start[2] = 2; + cuts.j[1] = 0; + cuts.x[1] = 1.0; + cuts.m = 2; + cut_rhs.resize(2); + cut_rhs[1] = 0.0; + + EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts( + start_time, settings, cuts, cut_rhs, lp, solution, basis_update, basic_list, nonbasic_list, vstatus, edge_norms), + cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL); + printf("Solution objective: %e\n", solution.objective); + printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]); + EXPECT_NEAR(solution.objective, 4.0 / 3.0, 1e-6); +} + } // namespace cuopt::linear_programming::dual_simplex::test From 058433717476a417b56ee135d6e7c4c599742324 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 17 Nov 2025 14:11:43 -0800 Subject: [PATCH 02/45] Style fixes --- cpp/src/dual_simplex/basis_updates.cpp | 36 ++++----- cpp/src/dual_simplex/solve.cpp | 54 ++++++------- cpp/src/dual_simplex/sparse_matrix.cpp | 21 +++-- cpp/src/dual_simplex/sparse_vector.cpp | 6 +- cpp/tests/dual_simplex/unit_tests/solve.cpp | 87 ++++++++++++++------- 5 files changed, 111 insertions(+), 93 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 2a8c34d1c..55a48c2ae 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1146,13 +1146,13 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts // V(h, :) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h) csr_matrix_t V_row(cuts_basic.m, m, 0); - i_t V_nz = 0; + i_t V_nz = 0; const f_t zero_tol = 1e-13; for (i_t h = 0; h < cuts_basic.m; h++) { - sparse_vector_t rhs(WT, h); - scatter_into_workspace(rhs); - i_t nz = rhs.i.size(); - for (i_t k = num_updates_ - 1; k >= 0; --k) { + sparse_vector_t rhs(WT, h); + scatter_into_workspace(rhs); + i_t nz = rhs.i.size(); + for (i_t k = num_updates_ - 1; k >= 0; --k) { // T_k^{-T} = ( I - v u^T/(1 + u^T v)) // T_k^{-T} * b = b - v * (u^T * b) / (1 + u^T * v) = b - theta * v, theta = u^T b / mu @@ -1161,7 +1161,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts const f_t mu = mu_values_[k]; // dot = u^T * b - f_t dot = dot_product(u_col, xi_workspace_, x_workspace_); + f_t dot = dot_product(u_col, xi_workspace_, x_workspace_); const f_t theta = dot / mu; if (std::abs(theta) > zero_tol) { add_sparse_column(S_, v_col, -theta, xi_workspace_, nz, x_workspace_); @@ -1178,9 +1178,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts V_row.row_start[cuts_basic.m] = V_nz; V_row.to_compressed_col(V); - } - else - { + } else { // W = V WT.transpose(V); } @@ -1198,16 +1196,16 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts csc_matrix_t new_L(m + cuts_basic.m, m + cuts_basic.m, L_nz + V_nz + cuts_basic.m); L_nz = 0; for (i_t j = 0; j < m; ++j) { - new_L.col_start[j] = L_nz; + new_L.col_start[j] = L_nz; const i_t col_start = L0_.col_start[j]; - const i_t col_end = L0_.col_start[j + 1]; + const i_t col_end = L0_.col_start[j + 1]; for (i_t p = col_start; p < col_end; ++p) { new_L.i[L_nz] = L0_.i[p]; new_L.x[L_nz] = L0_.x[p]; L_nz++; } const i_t V_col_start = V.col_start[j]; - const i_t V_col_end = V.col_start[j + 1]; + const i_t V_col_end = V.col_start[j + 1]; for (i_t p = V_col_start; p < V_col_end; ++p) { new_L.i[L_nz] = V.i[p] + m; new_L.x[L_nz] = V.x[p]; @@ -1216,15 +1214,14 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts } for (i_t j = m; j < m + cuts_basic.m; ++j) { new_L.col_start[j] = L_nz; - new_L.i[L_nz] = j; - new_L.x[L_nz] = 1.0; + new_L.i[L_nz] = j; + new_L.x[L_nz] = 1.0; L_nz++; } new_L.col_start[m + cuts_basic.m] = L_nz; L0_ = new_L; - // Adjust U // U = [ U0 0 ] // [ 0 I ] @@ -1236,18 +1233,17 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts U0_.x.resize(U_nz + cuts_basic.m); for (i_t k = m; k < m + cuts_basic.m; ++k) { U0_.col_start[k] = U_nz; - U0_.i[U_nz] = k; - U0_.x[U_nz] = 1.0; + U0_.i[U_nz] = k; + U0_.x[U_nz] = 1.0; U_nz++; } U0_.col_start[m + cuts_basic.m] = U_nz; - U0_.n = m + cuts_basic.m; - U0_.m = m + cuts_basic.m; + U0_.n = m + cuts_basic.m; + U0_.m = m + cuts_basic.m; printf("Computing transposes\n"); compute_transposes(); - // Adjust row_permutation_ and inverse_row_permutation_ printf("Adjusting row_permutation_ and inverse_row_permutation_\n"); row_permutation_.resize(m + cuts_basic.m); diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 5aa74906d..2754fa677 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -294,18 +294,18 @@ lp_status_t solve_linear_program_with_advanced_basis( } template -lp_status_t solve_linear_program_with_cuts( - const f_t start_time, - const simplex_solver_settings_t& settings, - const csr_matrix_t& cuts, - const std::vector& cut_rhs, - lp_problem_t& lp, - lp_solution_t& solution, - basis_update_mpf_t& basis_update, - std::vector& basic_list, - std::vector& nonbasic_list, - std::vector& vstatus, - std::vector& edge_norms) { +lp_status_t solve_linear_program_with_cuts(const f_t start_time, + const simplex_solver_settings_t& settings, + const csr_matrix_t& cuts, + const std::vector& cut_rhs, + lp_problem_t& lp, + lp_solution_t& solution, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list, + std::vector& vstatus, + std::vector& edge_norms) +{ // Given a set of cuts: C*x <= d that are currently violated // by the current solution x* (i.e. C*x* > d), this function // adds the cuts into the LP and solves again. @@ -323,7 +323,6 @@ lp_status_t solve_linear_program_with_cuts( printf("Converting A to compressed row\n"); lp.A.to_compressed_row(new_A_row); - printf("Appening cuts\n"); new_A_row.append_rows(cuts); @@ -332,7 +331,6 @@ lp_status_t solve_linear_program_with_cuts( new_A_row.to_compressed_col(new_A_col); printf("new A col rows %d cols %d\n", new_A_col.m, new_A_col.n); - printf("Adding slacks\n"); // Add in slacks variables for the new rows lp.lower.resize(lp.num_cols + p); @@ -345,19 +343,19 @@ lp_status_t solve_linear_program_with_cuts( i_t k = lp.num_rows; for (i_t j = lp.num_cols; j < lp.num_cols + p; j++) { new_A_col.col_start[j] = nz; - new_A_col.i[nz] = k++; - new_A_col.x[nz] = 1.0; + new_A_col.i[nz] = k++; + new_A_col.x[nz] = 1.0; nz++; - lp.lower[j] = 0.0; - lp.upper[j] = inf; + lp.lower[j] = 0.0; + lp.upper[j] = inf; lp.objective[j] = 0.0; } new_A_col.col_start[lp.num_cols + p] = nz; - new_A_col.n = lp.num_cols + p; + new_A_col.n = lp.num_cols + p; printf("new A col rows %d cols %d\n", new_A_col.m, new_A_col.n); printf("new A nnz %d\n", new_A_col.col_start[lp.num_cols + p]); - lp.A = new_A_col; + lp.A = new_A_col; i_t old_rows = lp.num_rows; lp.num_rows += p; printf("lp rows %d A rows %d\n", lp.num_rows, lp.A.m); @@ -372,10 +370,9 @@ lp_status_t solve_linear_program_with_cuts( lp.rhs.resize(lp.num_rows); for (i_t k = old_rows; k < old_rows + p; k++) { const i_t h = k - old_rows; - lp.rhs[k] = cut_rhs[h]; + lp.rhs[k] = cut_rhs[h]; } - printf("Constructing column degree\n"); // Construct C_B = C(:, basic_list) std::vector C_col_degree(p, 0); @@ -387,7 +384,7 @@ lp_status_t solve_linear_program_with_cuts( std::vector in_basis(old_cols, 0); const i_t num_basic = static_cast(basic_list.size()); - i_t C_B_nz = 0; + i_t C_B_nz = 0; for (i_t k = 0; k < num_basic; k++) { const i_t j = basic_list[k]; in_basis[j] = 1; @@ -398,9 +395,9 @@ lp_status_t solve_linear_program_with_cuts( csr_matrix_t C_B(num_basic, num_basic, C_B_nz); nz = 0; for (i_t i = 0; i < p; i++) { - C_B.row_start[i] = nz; + C_B.row_start[i] = nz; const i_t row_start = cuts.row_start[i]; - const i_t row_end = cuts.row_start[i+1]; + const i_t row_end = cuts.row_start[i + 1]; for (i_t q = row_start; q < row_end; q++) { const i_t j = cuts.j[q]; if (in_basis[j] == 0) { continue; } @@ -411,15 +408,12 @@ lp_status_t solve_linear_program_with_cuts( } C_B.row_start[p] = nz; settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz); - if (nz != C_B_nz) { - exit(1); - } + if (nz != C_B_nz) { exit(1); } printf("Adjusting basis update\n"); // Adjust the basis update to include the new cuts basis_update.append_cuts(C_B); - // Adjust the vstatus vstatus.resize(lp.num_cols); for (i_t j = old_cols; j < lp.num_cols; j++) { @@ -445,7 +439,7 @@ lp_status_t solve_linear_program_with_cuts( // For now just clear the edge norms edge_norms.clear(); - i_t iter = 0; + i_t iter = 0; dual::status_t status = dual_phase2_with_advanced_basis(2, 0, false, diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index c2fc343ce..1263fa63e 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -360,17 +360,17 @@ i_t csc_matrix_t::remove_row(i_t row) template i_t csr_matrix_t::append_rows(const csr_matrix_t& C) { - const i_t old_m = this->m; - const i_t n = this->n; + const i_t old_m = this->m; + const i_t n = this->n; const i_t old_nz = this->row_start[old_m]; - const i_t C_row = C.m; + const i_t C_row = C.m; if (0 && C.n != n) { printf("C n %d != n %d\n", C.n, n); return -1; } - const i_t C_nz = C.row_start[C_row]; + const i_t C_nz = C.row_start[C_row]; const i_t new_nz = old_nz + C_nz; - const i_t new_m = old_m + C_row; + const i_t new_m = old_m + C_row; printf("old m %d C_row %d new m %d\n", old_m, C_row, new_m); this->j.resize(new_nz); @@ -379,8 +379,8 @@ i_t csr_matrix_t::append_rows(const csr_matrix_t& C) i_t nz = old_nz; for (i_t i = old_m; i < new_m; i++) { - const i_t k = i - old_m; - const i_t nz_row = C.row_start[k+1] - C.row_start[k]; + const i_t k = i - old_m; + const i_t nz_row = C.row_start[k + 1] - C.row_start[k]; this->row_start[i] = nz; nz += nz_row; } @@ -388,20 +388,19 @@ i_t csr_matrix_t::append_rows(const csr_matrix_t& C) for (i_t p = old_nz; p < new_nz; p++) { const i_t q = p - old_nz; - this->j[p] = C.j[q]; + this->j[p] = C.j[q]; } for (i_t p = old_nz; p < new_nz; p++) { const i_t q = p - old_nz; - this->x[p] = C.x[q]; + this->x[p] = C.x[q]; } - this->m = new_m; + this->m = new_m; this->nz_max = new_nz; return 0; } - template void csc_matrix_t::print_matrix(FILE* fid) const { diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp index f33903fc5..46bae286c 100644 --- a/cpp/src/dual_simplex/sparse_vector.cpp +++ b/cpp/src/dual_simplex/sparse_vector.cpp @@ -32,9 +32,9 @@ template sparse_vector_t::sparse_vector_t(const csr_matrix_t& A, i_t row) { const i_t row_start = A.row_start[row]; - const i_t row_end = A.row_start[row+1]; - const i_t nz = row_end - row_start; - n = A.n; + const i_t row_end = A.row_start[row + 1]; + const i_t nz = row_end - row_start; + n = A.n; i.reserve(nz); x.reserve(nz); for (i_t k = row_start; k < row_end; ++k) { diff --git a/cpp/tests/dual_simplex/unit_tests/solve.cpp b/cpp/tests/dual_simplex/unit_tests/solve.cpp index 0f5c1802b..41a3a8e62 100644 --- a/cpp/tests/dual_simplex/unit_tests/solve.cpp +++ b/cpp/tests/dual_simplex/unit_tests/solve.cpp @@ -326,7 +326,6 @@ TEST(dual_simplex, dual_variable_greater_than) EXPECT_NEAR(solution.z[1], 0.0, 1e-6); } - TEST(dual_simplex, simple_cuts) { // minimize x + y + 2 z @@ -354,44 +353,57 @@ TEST(dual_simplex, simple_cuts) user_problem.A.col_start[1] = 1; user_problem.A.col_start[2] = 2; user_problem.A.col_start[3] = 3; - user_problem.A.i[0] = 0; - user_problem.A.x[0] = 1.0; - user_problem.A.i[1] = 0; - user_problem.A.x[1] = 1.0; - user_problem.A.i[2] = 0; - user_problem.A.x[2] = 1.0; + user_problem.A.i[0] = 0; + user_problem.A.x[0] = 1.0; + user_problem.A.i[1] = 0; + user_problem.A.x[1] = 1.0; + user_problem.A.i[2] = 0; + user_problem.A.x[2] = 1.0; user_problem.lower.resize(n, 0.0); user_problem.upper.resize(n, dual_simplex::inf); user_problem.num_range_rows = 0; user_problem.problem_name = "simple_cuts"; - user_problem.obj_scale = 1.0; - user_problem.obj_constant = 0.0; + user_problem.obj_scale = 1.0; + user_problem.obj_constant = 0.0; user_problem.rhs.resize(m, 1.0); user_problem.row_sense.resize(m, 'E'); - user_problem.var_types.resize(n, cuopt::linear_programming::dual_simplex::variable_type_t::CONTINUOUS); + user_problem.var_types.resize( + n, cuopt::linear_programming::dual_simplex::variable_type_t::CONTINUOUS); cuopt::init_logger_t logger("", true); - cuopt::linear_programming::dual_simplex::lp_problem_t lp(user_problem.handle_ptr, 1, 1, 1); + cuopt::linear_programming::dual_simplex::lp_problem_t lp( + user_problem.handle_ptr, 1, 1, 1); cuopt::linear_programming::dual_simplex::simplex_solver_settings_t settings; - settings.barrier = false; - settings.barrier_presolve = false; - settings.log.log = true; + settings.barrier = false; + settings.barrier_presolve = false; + settings.log.log = true; settings.log.log_to_console = true; settings.log.printf("Test print\n"); std::vector new_slacks; cuopt::linear_programming::dual_simplex::dualize_info_t dualize_info; - cuopt::linear_programming::dual_simplex::convert_user_problem(user_problem, settings, lp, new_slacks, dualize_info); - cuopt::linear_programming::dual_simplex::lp_solution_t solution(lp.num_rows, lp.num_cols); + cuopt::linear_programming::dual_simplex::convert_user_problem( + user_problem, settings, lp, new_slacks, dualize_info); + cuopt::linear_programming::dual_simplex::lp_solution_t solution(lp.num_rows, + lp.num_cols); std::vector vstatus; std::vector edge_norms; std::vector basic_list(lp.num_rows); std::vector nonbasic_list; - cuopt::linear_programming::dual_simplex::basis_update_mpf_t basis_update(lp.num_cols, settings.refactor_frequency); + cuopt::linear_programming::dual_simplex::basis_update_mpf_t basis_update( + lp.num_cols, settings.refactor_frequency); double start_time = dual_simplex::tic(); printf("Calling solve linear program with advanced basis\n"); EXPECT_EQ((cuopt::linear_programming::dual_simplex::solve_linear_program_with_advanced_basis( - lp, start_time, settings, solution, basis_update, basic_list, nonbasic_list, vstatus, edge_norms)), + lp, + start_time, + settings, + solution, + basis_update, + basic_list, + nonbasic_list, + vstatus, + edge_norms)), cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL); printf("Solution objective: %e\n", solution.objective); printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]); @@ -400,34 +412,51 @@ TEST(dual_simplex, simple_cuts) EXPECT_NEAR(solution.objective, 1.0, 1e-6); EXPECT_NEAR(solution.x[0], 1.0, 1e-6); - // Add a cut z >= 1/3. Needs to be in the form C*x <= d csr_matrix_t cuts(1, n, 1); cuts.row_start[0] = 0; - cuts.j[0] = 2; - cuts.x[0] = -1.0; + cuts.j[0] = 2; + cuts.x[0] = -1.0; cuts.row_start[1] = 1; printf("cuts m %d n %d\n", cuts.m, cuts.n); std::vector cut_rhs(1); cut_rhs[0] = -1.0 / 3.0; - EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts( - start_time, settings, cuts, cut_rhs, lp, solution, basis_update, basic_list, nonbasic_list, vstatus, edge_norms), + EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts(start_time, + settings, + cuts, + cut_rhs, + lp, + solution, + basis_update, + basic_list, + nonbasic_list, + vstatus, + edge_norms), cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL); printf("Solution objective: %e\n", solution.objective); printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]); EXPECT_NEAR(solution.objective, 4.0 / 3.0, 1e-6); cuts.row_start.resize(3); - cuts.j[0] = 1; + cuts.j[0] = 1; cuts.row_start[2] = 2; - cuts.j[1] = 0; - cuts.x[1] = 1.0; - cuts.m = 2; + cuts.j[1] = 0; + cuts.x[1] = 1.0; + cuts.m = 2; cut_rhs.resize(2); cut_rhs[1] = 0.0; - EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts( - start_time, settings, cuts, cut_rhs, lp, solution, basis_update, basic_list, nonbasic_list, vstatus, edge_norms), + EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts(start_time, + settings, + cuts, + cut_rhs, + lp, + solution, + basis_update, + basic_list, + nonbasic_list, + vstatus, + edge_norms), cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL); printf("Solution objective: %e\n", solution.objective); printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]); From 74fff991b4ca0a47bf076b9edc767cdcb745c038 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 17 Nov 2025 14:17:05 -0800 Subject: [PATCH 03/45] Remove debugging --- cpp/src/dual_simplex/basis_updates.cpp | 8 ------- cpp/src/dual_simplex/solve.cpp | 30 ++++---------------------- cpp/src/dual_simplex/sparse_matrix.cpp | 4 +--- 3 files changed, 5 insertions(+), 37 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 55a48c2ae..5c7834cda 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1118,7 +1118,6 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts // Solve for U^T W^T = C_B^T // We do this one row at a time of C_B csc_matrix_t WT(m, cuts_basic.m, 0); - printf("Constructing WT\n"); i_t WT_nz = 0; for (i_t k = 0; k < cuts_basic.m; k++) { @@ -1133,8 +1132,6 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts } WT.col_start[cuts_basic.m] = WT_nz; - printf("Constructing V (num updates %d)\n", num_updates_); - csc_matrix_t V(cuts_basic.m, m, 0); if (num_updates_ > 0) { // W = V T_0 ... T_{num_updates_ - 1} @@ -1189,7 +1186,6 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts // Adjust L and U // L = [ L0 0 ] // [ V I ] - printf("Adjusting L\n"); i_t V_nz = V.col_start[m]; i_t L_nz = L0_.col_start[m]; @@ -1225,7 +1221,6 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts // Adjust U // U = [ U0 0 ] // [ 0 I ] - printf("Adjusting U\n"); i_t U_nz = U0_.col_start[m]; U0_.col_start.resize(m + cuts_basic.m + 1); @@ -1241,11 +1236,9 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts U0_.n = m + cuts_basic.m; U0_.m = m + cuts_basic.m; - printf("Computing transposes\n"); compute_transposes(); // Adjust row_permutation_ and inverse_row_permutation_ - printf("Adjusting row_permutation_ and inverse_row_permutation_\n"); row_permutation_.resize(m + cuts_basic.m); inverse_row_permutation_.resize(m + cuts_basic.m); for (i_t k = m; k < m + cuts_basic.m; ++k) { @@ -1254,7 +1247,6 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts inverse_permutation(row_permutation_, inverse_row_permutation_); // Adjust workspace sizes - printf("Adjusting workspace sizes\n"); xi_workspace_.resize(2 * (m + cuts_basic.m), 0); x_workspace_.resize(m + cuts_basic.m, 0.0); diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 2754fa677..f8fbd66c1 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -315,23 +315,18 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, settings.log.printf("cut_rhs must have the same number of rows as cuts\n"); return lp_status_t::NUMERICAL_ISSUES; } - printf("Number of cuts %d\n", p); - printf("Original lp rows %d\n", lp.num_rows); - printf("Original lp cols %d\n", lp.num_cols); + settings.log.printf("Number of cuts %d\n", p); + settings.log.printf("Original lp rows %d\n", lp.num_rows); + settings.log.printf("Original lp cols %d\n", lp.num_cols); csr_matrix_t new_A_row(lp.num_rows, lp.num_cols, 1); - printf("Converting A to compressed row\n"); lp.A.to_compressed_row(new_A_row); - printf("Appening cuts\n"); new_A_row.append_rows(cuts); - printf("Converting back to compressed column\n"); csc_matrix_t new_A_col(lp.num_rows + p, lp.num_cols, 1); new_A_row.to_compressed_col(new_A_col); - printf("new A col rows %d cols %d\n", new_A_col.m, new_A_col.n); - printf("Adding slacks\n"); // Add in slacks variables for the new rows lp.lower.resize(lp.num_cols + p); lp.upper.resize(lp.num_cols + p); @@ -352,28 +347,20 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, } new_A_col.col_start[lp.num_cols + p] = nz; new_A_col.n = lp.num_cols + p; - printf("new A col rows %d cols %d\n", new_A_col.m, new_A_col.n); - printf("new A nnz %d\n", new_A_col.col_start[lp.num_cols + p]); lp.A = new_A_col; i_t old_rows = lp.num_rows; lp.num_rows += p; - printf("lp rows %d A rows %d\n", lp.num_rows, lp.A.m); i_t old_cols = lp.num_cols; lp.num_cols += p; - printf("lp cols %d A cols %d\n", lp.num_cols, lp.A.n); - printf("New A matrix\n"); - lp.A.print_matrix(stdout); - printf("Adding rhs\n"); lp.rhs.resize(lp.num_rows); for (i_t k = old_rows; k < old_rows + p; k++) { const i_t h = k - old_rows; lp.rhs[k] = cut_rhs[h]; } - printf("Constructing column degree\n"); // Construct C_B = C(:, basic_list) std::vector C_col_degree(p, 0); i_t cuts_nz = cuts.row_start[p]; @@ -391,7 +378,6 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, C_B_nz += C_col_degree[j]; } - printf("Constructing C_B\n"); csr_matrix_t C_B(num_basic, num_basic, C_B_nz); nz = 0; for (i_t i = 0; i < p; i++) { @@ -408,9 +394,8 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, } C_B.row_start[p] = nz; settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz); - if (nz != C_B_nz) { exit(1); } + if (nz != C_B_nz) { return lp_status_t::NUMERICAL_ISSUES; } - printf("Adjusting basis update\n"); // Adjust the basis update to include the new cuts basis_update.append_cuts(C_B); @@ -425,13 +410,6 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, for (i_t j = old_rows; j < lp.num_rows; j++) { basic_list[j] = h++; } - - printf("basic list\n"); - for (i_t k = 0; k < basic_list.size(); k++) { - printf("%d ", basic_list[k]); - } - printf("\n"); - // Adjust the solution solution.x.resize(lp.num_cols, 0.0); solution.y.resize(lp.num_rows, 0.0); diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index 1263fa63e..3e01c2f9c 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -364,14 +364,12 @@ i_t csr_matrix_t::append_rows(const csr_matrix_t& C) const i_t n = this->n; const i_t old_nz = this->row_start[old_m]; const i_t C_row = C.m; - if (0 && C.n != n) { - printf("C n %d != n %d\n", C.n, n); + if (C.n > n) { return -1; } const i_t C_nz = C.row_start[C_row]; const i_t new_nz = old_nz + C_nz; const i_t new_m = old_m + C_row; - printf("old m %d C_row %d new m %d\n", old_m, C_row, new_m); this->j.resize(new_nz); this->x.resize(new_nz); From 18828927b62bab0c6fd649aba5ec7ea5b9960c91 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 24 Nov 2025 17:04:09 -0800 Subject: [PATCH 04/45] Fix issues in adding cuts. Add gomory cuts. Temporarily disable MIP heursitics and MIP presolve for experimentation --- cpp/src/dual_simplex/basis_updates.cpp | 61 ++++ cpp/src/dual_simplex/branch_and_bound.cpp | 410 ++++++++++++++++++++-- cpp/src/dual_simplex/phase2.cpp | 33 ++ cpp/src/dual_simplex/solve.cpp | 72 +++- cpp/src/dual_simplex/sparse_matrix.cpp | 27 ++ cpp/src/dual_simplex/sparse_matrix.hpp | 4 + cpp/src/mip/solver.cu | 10 +- 7 files changed, 575 insertions(+), 42 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 5c7834cda..2590c5226 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1132,6 +1132,27 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts } WT.col_start[cuts_basic.m] = WT_nz; + +#ifdef CHECK_W + { + for (i_t k = 0; k < cuts_basic.m; k++) { + std::vector WT_col(m, 0.0); + WT.load_a_column(k, WT_col); + std::vector CBT_col(m, 0.0); + matrix_transpose_vector_multiply(U0_, 1.0, WT_col, 0.0, CBT_col); + sparse_vector_t CBT_col_sparse(cuts_basic, k); + std::vector CBT_col_dense(m); + CBT_col_sparse.to_dense(CBT_col_dense); + for (i_t h = 0; h < m; h++) { + if (std::abs(CBT_col_dense[h] - CBT_col[h]) > 1e-6) { + printf("col %d CBT_col_dense[%d] = %e CBT_col[%d] = %e\n", k, h, CBT_col_dense[h], h, CBT_col[h]); + exit(1); + } + } + } + } +#endif + csc_matrix_t V(cuts_basic.m, m, 0); if (num_updates_ > 0) { // W = V T_0 ... T_{num_updates_ - 1} @@ -1141,6 +1162,8 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts // V^T(:, h) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h) // or // V(h, :) = T_0^{-T} ... T_{num_updates_ - 1}^{-T} W^T(:, h) + // So we can form V row by row in CSR and then covert it to CSC + // for appending to L0 csr_matrix_t V_row(cuts_basic.m, m, 0); i_t V_nz = 0; @@ -1175,6 +1198,39 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts V_row.row_start[cuts_basic.m] = V_nz; V_row.to_compressed_col(V); + + +#ifdef CHECK_V + csc_matrix_t CB_col(cuts_basic.m, m, 0); + cuts_basic.to_compressed_col(CB_col); + for (i_t k = 0; k < m; k++) { + std::vector U_col(m, 0.0); + U0_.load_a_column(k, U_col); + for (i_t h = num_updates_ - 1; h >= 0; --h) { + // T_h = ( I + u_h v_h^T) + // T_h * x = x + u_h * v_h^T * x = x + theta * u_h + const i_t u_col = 2 * h; + const i_t v_col = 2 * h + 1; + f_t theta = dot_product(v_col, U_col); + const i_t col_start = S_.col_start[u_col]; + const i_t col_end = S_.col_start[u_col + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = S_.i[p]; + U_col[i] += theta * S_.x[p]; + } + } + std::vector CB_column(cuts_basic.m, 0.0); + matrix_vector_multiply(V, 1.0, U_col, 0.0, CB_column); + std::vector CB_col_dense(cuts_basic.m); + CB_col.load_a_column(k, CB_col_dense); + for (i_t l = 0; l < cuts_basic.m; l++) { + if (std::abs(CB_col_dense[l] - CB_column[l]) > 1e-6) { + printf("col %d CB_col_dense[%d] = %e CB_column[%d] = %e\n", k, l, CB_col_dense[l], l, CB_column[l]); + exit(1); + } + } + } +#endif } else { // W = V WT.transpose(V); @@ -1190,6 +1246,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts i_t V_nz = V.col_start[m]; i_t L_nz = L0_.col_start[m]; csc_matrix_t new_L(m + cuts_basic.m, m + cuts_basic.m, L_nz + V_nz + cuts_basic.m); + i_t predicted_nz = L_nz + V_nz + cuts_basic.m; L_nz = 0; for (i_t j = 0; j < m; ++j) { new_L.col_start[j] = L_nz; @@ -1215,6 +1272,10 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts L_nz++; } new_L.col_start[m + cuts_basic.m] = L_nz; + if (L_nz != predicted_nz) { + printf("L_nz %d predicted_nz %d\n", L_nz, predicted_nz); + exit(1); + } L0_ = new_L; diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 9f207b6a6..aed49ad4f 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -206,6 +207,10 @@ branch_and_bound_t::branch_and_bound_t( { stats_.start_time = tic(); dualize_info_t dualize_info; +#ifdef PRINT_A + settings_.log.printf("A"); + original_problem_.A.print_matrix(); +#endif convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info); full_variable_types(original_problem_, original_lp_, var_types_); @@ -1062,8 +1067,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.log.printf("Solving LP root relaxation\n"); simplex_solver_settings_t lp_settings = settings_; lp_settings.inside_mip = 1; - lp_status_t root_status = solve_linear_program_advanced( - original_lp_, stats_.start_time, lp_settings, root_relax_soln_, root_vstatus_, edge_norms_); + lp_settings.scale_columns = false; + std::vector basic_list(original_lp_.num_rows); + std::vector nonbasic_list; + basis_update_mpf_t basis_update(original_lp_.num_rows, settings_.refactor_frequency); + lp_status_t root_status = solve_linear_program_with_advanced_basis( + original_lp_, stats_.start_time, lp_settings, root_relax_soln_, basis_update, basic_list, nonbasic_list, root_vstatus_, edge_norms_); stats_.total_lp_iters = root_relax_soln_.iterations; stats_.total_lp_solve_time = toc(stats_.start_time); if (root_status == lp_status_t::INFEASIBLE) { @@ -1111,31 +1120,384 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } std::vector fractional; - const i_t num_fractional = + i_t num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); - if (num_fractional == 0) { - mutex_upper_.lock(); - incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x); - upper_bound_ = root_objective_; - mutex_upper_.unlock(); - // We should be done here - uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); - solution.objective = incumbent_.objective; - solution.lower_bound = root_objective_; - solution.nodes_explored = 0; - solution.simplex_iterations = root_relax_soln_.iterations; - settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n", - compute_user_objective(original_lp_, root_objective_), - toc(stats_.start_time)); + csc_matrix_t Arow(1, 1, 1); + original_lp_.A.transpose(Arow); - if (settings_.solution_callback != nullptr) { - settings_.solution_callback(solution.x, solution.objective); - } - if (settings_.heuristic_preemption_callback != nullptr) { - settings_.heuristic_preemption_callback(); + for (i_t cut_pass = 0; cut_pass < 10; cut_pass++) { + if (num_fractional == 0) { + for (i_t j = 0; j < original_lp_.num_cols; j++) { + if (var_types_[j] == variable_type_t::INTEGER) { + settings_.log.printf("Variable %d type %d val %e\n", j, var_types_[j], root_relax_soln_.x[j]); + } + } + mutex_upper_.lock(); + incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x); + upper_bound_ = root_objective_; + mutex_upper_.unlock(); + // We should be done here + uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); + solution.objective = incumbent_.objective; + solution.lower_bound = root_objective_; + solution.nodes_explored = 0; + solution.simplex_iterations = root_relax_soln_.iterations; + settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n", + compute_user_objective(original_lp_, root_objective_), + toc(stats_.start_time)); + + if (settings_.solution_callback != nullptr) { + settings_.solution_callback(solution.x, solution.objective); + } + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); + } + return mip_status_t::OPTIMAL; + } else { + settings_.log.printf("Found %d fractional variables on cut pass %d\n", num_fractional, cut_pass); + for (i_t j: fractional) { + settings_.log.printf("Fractional variable %d lower %e value %e upper %e\n", j, original_lp_.lower[j], root_relax_soln_.x[j], original_lp_.upper[j]); + } + // Let's look for cuts + // Compute b_bar + std::vector b_bar(original_lp_.num_rows); + basis_update.b_solve(original_lp_.rhs, b_bar); + + std::vector nonbasic_mark(original_lp_.num_cols, 0); + for (i_t j : nonbasic_list) { + nonbasic_mark[j] = 1; + } + + std::vector x_workspace(original_lp_.num_cols, 0.0); + std::vector x_mark(original_lp_.num_cols, 0); + + std::vector abar_indices; + abar_indices.reserve(original_lp_.num_cols); + + std::vector has_lower(original_lp_.num_cols, 0); + std::vector has_upper(original_lp_.num_cols, 0); + for (i_t j = 0; j < original_lp_.num_cols; j++) { + if (original_lp_.lower[j] < 0) { + settings_.log.printf( + "Variable %d has negative lower bound %e\n", j, original_lp_.lower[j]); + exit(1); + } + const f_t uj = original_lp_.upper[j]; + const f_t lj = original_lp_.lower[j]; + const f_t xstar_j = root_relax_soln_.x[j]; + if (uj < inf) { + if (uj - xstar_j <= xstar_j - lj) { + has_upper[j] = 1; + //settings_.log.printf("Variable %d in upper\n", j); + } else { + has_lower[j] = 1; + //settings_.log.printf("Variable %d in lower\n", j); + } + continue; + } + + if (lj > -inf) { + has_lower[j] = 1; + //settings_.log.printf("Variable %d in lower\n", j); + } + } + + csr_matrix_t C(0, original_lp_.num_cols, 0); + C.row_start[0] = 0; + std::vector cut_rhs; + + for (i_t i = 0; i < original_lp_.num_rows; i++) { + const i_t j = basic_list[i]; + //settings_.log.printf( + // "Variable %d type %d val %e\n", j, var_types_[j], root_relax_soln_.x[j]); + if (var_types_[j] != variable_type_t::INTEGER) { continue; } + const f_t x_j = root_relax_soln_.x[j]; + if (std::abs(x_j - std::round(x_j)) < settings_.integer_tol) { continue; } + + settings_.log.printf("Generating cut for variable %d relaxed value %e row %d\n", j, x_j, i); +#ifdef PRINT_BASIS + for (i_t h = 0; h < basic_list.size(); h++) { + settings_.log.printf("basic_list[%d] = %d\n", h, basic_list[h]); + } +#endif + + // Solve B^T u_bar = e_i + sparse_vector_t e_i(original_lp_.num_rows, 1); + e_i.i[0] = i; + e_i.x[0] = 1.0; + sparse_vector_t u_bar(original_lp_.num_rows, 0); + basis_update.b_transpose_solve(e_i, u_bar); + + std::vector u_bar_dense(original_lp_.num_rows); + u_bar.to_dense(u_bar_dense); + + std::vector BTu_bar(original_lp_.num_rows); + b_transpose_multiply(original_lp_, basic_list, u_bar_dense, BTu_bar); + for (i_t k = 0; k < original_lp_.num_rows; k++) { + if (k == i) { + if (std::abs(BTu_bar[k] - 1.0) > 1e-6) { + settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i); + exit(1); + } + } else { + if (std::abs(BTu_bar[k]) > 1e-6) { + settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i); + exit(1); + } + } + } + + // Compute a_bar = N^T u_bar + const i_t nz_ubar = u_bar.i.size(); + for (i_t k = 0; k < nz_ubar; k++) { + const i_t ii = u_bar.i[k]; + const f_t u_bar_i = u_bar.x[k]; + const i_t row_start = Arow.col_start[ii]; + const i_t row_end = Arow.col_start[ii + 1]; + for (i_t p = row_start; p < row_end; p++) { + const i_t jj = Arow.i[p]; + if (nonbasic_mark[jj] == 1) { + x_workspace[jj] += u_bar_i * Arow.x[p]; + if (!x_mark[jj]) { + x_mark[jj] = 1; + abar_indices.push_back(jj); + } + } + } + } + + sparse_vector_t a_bar(original_lp_.num_cols, abar_indices.size() + 1); + for (i_t k = 0; k < abar_indices.size(); k++) { + const i_t jj = abar_indices[k]; + a_bar.i[k] = jj; + a_bar.x[k] = x_workspace[jj]; + } + + // Clear the workspace + for (i_t jj : abar_indices) { + x_workspace[jj] = 0.0; + x_mark[jj] = 0; + } + abar_indices.clear(); + + // We should now have the base inequality + // x_j + a_bar^T x_N >= b_bar_i + // We add x_j into a_bar so that everything is in a single sparse_vector_t + a_bar.i[a_bar.i.size() - 1] = j; + a_bar.x[a_bar.x.size() - 1] = 1.0; + + std::vector a_bar_dense(original_lp_.num_cols); + a_bar.to_dense(a_bar_dense); + + f_t a_bar_dense_dot = dot(a_bar_dense, root_relax_soln_.x); + settings_.log.printf("a_bar_dense_dot = %e b_bar[%d] = %e\n", a_bar_dense_dot, i, b_bar[i]); + + settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]); + + // Print out the base inequality + for (i_t k = 0; k < a_bar.i.size(); k++) { + const i_t jj = a_bar.i[k]; + const f_t aj = a_bar.x[k]; + settings_.log.printf("a_bar[%d] = %e\n", k, aj); + } + settings_.log.printf("b_bar[%d] = %e\n", i, b_bar[i]); + + auto f = [](f_t q_1, f_t q_2) -> f_t { + f_t q_1_hat = q_1 - std::floor(q_1); + f_t q_2_hat = q_2 - std::floor(q_2); + return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1); + }; + + auto h = [](f_t q) -> f_t { return std::max(q, 0.0); }; + + f_t R = (b_bar[i] - std::floor(b_bar[i])) * std::ceil(b_bar[i]); + std::vector cut_indices; + cut_indices.reserve(a_bar.i.size()); + for (i_t k = 0; k < a_bar.i.size(); k++) { + const i_t jj = a_bar.i[k]; + f_t aj = a_bar.x[k]; + if (var_types_[jj] == variable_type_t::INTEGER) { + x_workspace[jj] += f(aj, b_bar[i]); + if (!x_mark[jj]) { + x_mark[jj] = 1; + cut_indices.push_back(jj); + } + } else { + x_workspace[jj] += h(aj); + if (!x_mark[jj]) { + x_mark[jj] = 1; + cut_indices.push_back(jj); + } + } + } + +#ifdef CMIR + // Compute r + f_t r = b_bar[i]; + for (i_t k = 0; k < a_bar.i.size(); k++) { + const i_t jj = a_bar.i[k]; + if (has_upper[jj]) { + const f_t uj = original_lp_.upper[jj]; + r -= uj * a_bar.x[k]; + continue; + } + if (has_lower[jj]) { + const f_t lj = original_lp_.lower[jj]; + r -= lj * a_bar.x[k]; + } + } + + // Compute R + f_t R = std::ceil(r) * (r - std::floor(r)); + for (i_t k = 0; k < a_bar.i.size(); k++) { + const i_t jj = a_bar.i[k]; + const f_t aj = a_bar.x[k]; + if (has_upper[jj]) { + const f_t uj = original_lp_.upper[jj]; + if (var_types_[jj] == variable_type_t::INTEGER) { + R -= f(-aj, r) * uj; + } else { + R -= h(-aj) * uj; + } + } else if (has_lower[jj]) { + const f_t lj = original_lp_.lower[jj]; + if (var_types_[jj] == variable_type_t::INTEGER) { + R += f(aj, r) * lj; + } else { + R += h(aj) * lj; + } + } + } + + // Compute the cut coefficients + std::vector cut_indices; + cut_indices.reserve(a_bar.i.size()); + for (i_t k = 0; k < a_bar.i.size(); k++) { + const i_t jj = a_bar.i[k]; + const f_t aj = a_bar.x[k]; + if (has_upper[jj]) { + if (var_types_[jj] == variable_type_t::INTEGER) { + // Upper intersect I + x_workspace[jj] -= f(-aj, r); + if (!x_mark[jj]) { + x_mark[jj] = 1; + cut_indices.push_back(jj); + } + } else { + // Upper intersect C + x_workspace[jj] -= h(-aj); + if (!x_mark[jj]) { + x_mark[jj] = 1; + cut_indices.push_back(jj); + } + } + } else if (var_types_[jj] == variable_type_t::INTEGER) { + // I \ Upper + x_workspace[jj] -= f(aj, r); + if (!x_mark[jj]) { + x_mark[jj] = 1; + cut_indices.push_back(jj); + } + } else { + // C \ Upper + x_workspace[jj] += h(aj); + if (!x_mark[jj]) { + x_mark[jj] = 1; + cut_indices.push_back(jj); + } + } + } +#endif + + sparse_vector_t cut(original_lp_.num_cols, cut_indices.size()); + for (i_t k = 0; k < cut_indices.size(); k++) { + const i_t jj = cut_indices[k]; + cut.i[k] = jj; + cut.x[k] = x_workspace[jj]; + } + + // Clear the workspace + for (i_t jj : cut_indices) { + x_workspace[jj] = 0.0; + x_mark[jj] = 0; + } + + // Sort the coefficients by their index + cut.sort(); + // The new cut is: g'*x >= R + // But we want to have it in the form h'*x <= b + for (i_t k = 0; k < cut.x.size(); k++) { + cut.x[k] *= -1.0; + } + + C.append_row(cut); + cut_rhs.push_back(-R); + } + + csc_matrix_t C_col(C.m, C.n, 0); + C.to_compressed_col(C_col); + +#ifdef PRINT_CUTS + C_col.print_matrix(); +#endif + + C.check_matrix(); +#ifdef PRINT_CUT_RHS + for (i_t k = 0; k < cut_rhs.size(); k++) { + lp_settings.log.printf("cut_rhs[%d] = %e\n", k, cut_rhs[k]); + } +#endif + + lp_settings.log.printf("C nz %d\n", C.row_start[C.m]); + lp_settings.log.printf("C m %d cut rhs size %d\n", C.m, cut_rhs.size()); + lp_settings.log.printf("original_lp_.num_cols %d\n", original_lp_.num_cols); + +#ifdef PRINT_OPTIMAL + for (i_t j = 0; j < original_lp_.num_cols; j++) { + lp_settings.log.printf("x[%d] = %e\n", j, root_relax_soln_.x[j]); + } +#endif + + // Check to see that this is a cut i.e C*x > d + std::vector Cx(C.m); + matrix_vector_multiply(C_col, 1.0, root_relax_soln_.x, 0.0, Cx); + for (i_t k = 0; k < Cx.size(); k++) { + //lp_settings.log.printf("Cx[%d] = %e cut_rhs[%d] = %e\n", k, Cx[k], k, cut_rhs[k]); + if (Cx[k] <= cut_rhs[k]) { + lp_settings.log.printf("C*x <= d for cut %d\n", k); + exit(1); + } + } + + // Resolve the LP with the new cuts + lp_settings.log.printf("Solving LP with %d cuts\n", C.m); + + lp_status_t cut_status = solve_linear_program_with_cuts(stats_.start_time, + lp_settings, + C, + cut_rhs, + original_lp_, + root_relax_soln_, + basis_update, + basic_list, + nonbasic_list, + root_vstatus_, + edge_norms_); + + root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); + + if (cut_status != lp_status_t::OPTIMAL) { + lp_settings.log.printf("Cut status %d\n", cut_status); + exit(1); + } + + original_lp_.A.transpose(Arow); + var_types_.resize(original_lp_.num_cols, variable_type_t::CONTINUOUS); + + fractional.clear(); + num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); } - return mip_status_t::OPTIMAL; } pc_.resize(original_lp_.num_cols); @@ -1177,8 +1539,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut " | Explored | Unexplored | Objective | Bound | Depth | Iter/Node | Gap " "| Time |\n"); - csc_matrix_t Arow(1, 1, 1); - original_lp_.A.transpose(Arow); stats_.nodes_explored = 0; stats_.nodes_unexplored = 2; diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 39ea9b465..2ff075c15 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -2397,6 +2397,39 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, } timers.pricing_time += timers.stop_timer(); if (leaving_index == -1) { + + +#ifdef CHECK_BASIS_UPDATE + for (i_t k = 0; k < basic_list.size(); k++) { + const i_t jj = basic_list[k]; + sparse_vector_t ei_sparse(m, 1); + ei_sparse.i[0] = k; + ei_sparse.x[0] = 1.0; + sparse_vector_t ubar_sparse(m, 0); + ft.b_transpose_solve(ei_sparse, ubar_sparse); + std::vector ubar_dense(m); + ubar_sparse.to_dense(ubar_dense); + std::vector BTu_dense(m); + b_transpose_multiply(lp, basic_list, ubar_dense, BTu_dense); + for (i_t l = 0; l < m; l++) { + if (l != k) { + settings.log.printf("BTu_dense[%d] = %e i %d\n", l, BTu_dense[l], k); + } else { + settings.log.printf("BTu_dense[%d] = %e != 1.0 i %d\n", l, BTu_dense[l], k); + } + } + for (i_t h = 0; h < m; h++) { + settings.log.printf("i %d ubar_dense[%d] = %.16e\n", k, h, ubar_dense[h]); + } + } + settings.log.printf("ft.num_updates() %d\n", ft.num_updates()); + for (i_t h = 0; h < m; h++) { + settings.log.printf("basic_list[%d] = %d\n", h, basic_list[h]); + } + +#endif + + phase2::prepare_optimality(lp, settings, ft, diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index f8fbd66c1..8874bf420 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -310,6 +311,21 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, // by the current solution x* (i.e. C*x* > d), this function // adds the cuts into the LP and solves again. + + { + csc_matrix_t Btest(lp.num_rows, lp.num_rows, 1); + basis_update.multiply_lu(Btest); + csc_matrix_t B(lp.num_rows, lp.num_rows, 1); + form_b(lp.A, basic_list, B); + csc_matrix_t Diff(lp.num_rows, lp.num_rows, 1); + add(Btest, B, 1.0, -1.0, Diff); + const f_t err = Diff.norm1(); + settings.log.printf("Before || B - L*U || %e\n", err); + if (err > 1e-6) { + exit(1); + } + } + const i_t p = cuts.m; if (cut_rhs.size() != static_cast(p)) { settings.log.printf("cut_rhs must have the same number of rows as cuts\n"); @@ -362,23 +378,31 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, } // Construct C_B = C(:, basic_list) - std::vector C_col_degree(p, 0); + std::vector C_col_degree(lp.num_cols, 0); i_t cuts_nz = cuts.row_start[p]; for (i_t q = 0; q < cuts_nz; q++) { const i_t j = cuts.j[q]; + if (j >= lp.num_cols) { + settings.log.printf("j %d is greater than p %d\n", j, p); + exit(1); + } C_col_degree[j]++; } - std::vector in_basis(old_cols, 0); + std::vector in_basis(old_cols, -1); const i_t num_basic = static_cast(basic_list.size()); i_t C_B_nz = 0; for (i_t k = 0; k < num_basic; k++) { const i_t j = basic_list[k]; - in_basis[j] = 1; - C_B_nz += C_col_degree[j]; + in_basis[j] = k; + if (j < p) + { + C_B_nz += C_col_degree[j]; + } } + settings.log.printf("Done estimating C_B_nz\n"); - csr_matrix_t C_B(num_basic, num_basic, C_B_nz); + csr_matrix_t C_B(p, num_basic, C_B_nz); nz = 0; for (i_t i = 0; i < p; i++) { C_B.row_start[i] = nz; @@ -386,30 +410,49 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, const i_t row_end = cuts.row_start[i + 1]; for (i_t q = row_start; q < row_end; q++) { const i_t j = cuts.j[q]; - if (in_basis[j] == 0) { continue; } - C_B.j[nz] = j; + const i_t j_basis = in_basis[j]; + if (j_basis == -1) { continue; } + C_B.j[nz] = j_basis; C_B.x[nz] = cuts.x[q]; nz++; } } C_B.row_start[p] = nz; settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz); - if (nz != C_B_nz) { return lp_status_t::NUMERICAL_ISSUES; } + if (nz != C_B_nz) { exit(1); return lp_status_t::NUMERICAL_ISSUES; } + settings.log.printf("C_B rows %d cols %d nz %d\n", C_B.m, C_B.n, nz); + // Adjust the basis update to include the new cuts basis_update.append_cuts(C_B); + basic_list.resize(lp.num_rows, 0); + i_t h = old_cols; + for (i_t j = old_rows; j < lp.num_rows; j++) { + basic_list[j] = h++; + } + + // Check the basis update + csc_matrix_t Btest(lp.num_rows, lp.num_rows, 1); + basis_update.multiply_lu(Btest); + + csc_matrix_t B(lp.num_rows, lp.num_rows, 1); + form_b(lp.A, basic_list, B); + + csc_matrix_t Diff(lp.num_rows, lp.num_rows, 1); + add(Btest, B, 1.0, -1.0, Diff); + const f_t err = Diff.norm1(); + settings.log.printf("After || B - L*U || %e\n", err); + if (err > 1e-6) { + Diff.print_matrix(); + exit(1); + } // Adjust the vstatus vstatus.resize(lp.num_cols); for (i_t j = old_cols; j < lp.num_cols; j++) { vstatus[j] = variable_status_t::BASIC; } - basic_list.resize(lp.num_rows, 0); - i_t h = old_cols; - for (i_t j = old_rows; j < lp.num_rows; j++) { - basic_list[j] = h++; - } // Adjust the solution solution.x.resize(lp.num_cols, 0.0); solution.y.resize(lp.num_rows, 0.0); @@ -418,9 +461,10 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, // For now just clear the edge norms edge_norms.clear(); i_t iter = 0; + bool initialize_basis = false; dual::status_t status = dual_phase2_with_advanced_basis(2, 0, - false, + initialize_basis, start_time, lp, settings, diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index 3e01c2f9c..0da4f90e9 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -399,6 +399,33 @@ i_t csr_matrix_t::append_rows(const csr_matrix_t& C) return 0; } +template +i_t csr_matrix_t::append_row(const sparse_vector_t& c) +{ + const i_t old_m = this->m; + const i_t n = this->n; + const i_t old_nz = this->row_start[old_m]; + const i_t c_nz = c.i.size(); + const i_t new_nz = old_nz + c_nz; + const i_t new_m = old_m + 1; + + this->j.resize(new_nz); + this->x.resize(new_nz); + this->row_start.resize(new_m + 1); + this->row_start[new_m] = new_nz; + + i_t nz = old_nz; + for (i_t k = 0; k < c_nz; k++) { + this->j[nz] = c.i[k]; + this->x[nz] = c.x[k]; + nz++; + } + + this->m = new_m; + this->nz_max = new_nz; + return 0; +} + template void csc_matrix_t::print_matrix(FILE* fid) const { diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index eefc31b1f..49c5c185a 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -136,8 +136,12 @@ class csr_matrix_t { // Create a new matrix with the marked rows removed i_t remove_rows(std::vector& row_marker, csr_matrix_t& Aout) const; + // Append rows from another CSR matrix i_t append_rows(const csr_matrix_t& C); + // Append a row from a sparse vector + i_t append_row(const sparse_vector_t& c); + // Ensures no repeated column indices within a row void check_matrix() const; diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 28659cccd..e8978e17e 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -109,7 +109,8 @@ solution_t mip_solver_t::run_solver() diversity_manager_t dm(context); dm.timer = timer_; - bool presolve_success = dm.run_presolve(timer_.remaining_time()); + //bool presolve_success = dm.run_presolve(timer_.remaining_time()); + bool presolve_success = true; if (!presolve_success) { CUOPT_LOG_INFO("Problem proven infeasible in presolve"); solution_t sol(*context.problem_ptr); @@ -117,7 +118,7 @@ solution_t mip_solver_t::run_solver() context.problem_ptr->post_process_solution(sol); return sol; } - if (context.problem_ptr->empty) { + if (0 && context.problem_ptr->empty) { CUOPT_LOG_INFO("Problem full reduced in presolve"); solution_t sol(*context.problem_ptr); sol.set_problem_fully_reduced(); @@ -126,7 +127,7 @@ solution_t mip_solver_t::run_solver() } // if the problem was reduced to a LP: run concurrent LP - if (context.problem_ptr->n_integer_vars == 0) { + if (0 && context.problem_ptr->n_integer_vars == 0) { CUOPT_LOG_INFO("Problem reduced to a LP, running concurrent LP"); pdlp_solver_settings_t settings{}; settings.time_limit = timer_.remaining_time(); @@ -224,6 +225,9 @@ solution_t mip_solver_t::run_solver() std::ref(branch_and_bound_solution)); } + auto bb_status = branch_and_bound_status_future.get(); + exit(1); + // Start the primal heuristics auto sol = dm.run_solver(); if (!context.settings.heuristics_only) { From 96ed3864b2595b2d837b403c6c63147e164bce6a Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 24 Nov 2025 17:13:19 -0800 Subject: [PATCH 05/45] Fix unit test --- cpp/tests/dual_simplex/unit_tests/solve.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/tests/dual_simplex/unit_tests/solve.cpp b/cpp/tests/dual_simplex/unit_tests/solve.cpp index 41a3a8e62..95623f4b2 100644 --- a/cpp/tests/dual_simplex/unit_tests/solve.cpp +++ b/cpp/tests/dual_simplex/unit_tests/solve.cpp @@ -438,8 +438,12 @@ TEST(dual_simplex, simple_cuts) EXPECT_NEAR(solution.objective, 4.0 / 3.0, 1e-6); cuts.row_start.resize(3); + cuts.j.resize(2); + cuts.x.resize(2); + // Add cut y >= 1/3 cuts.j[0] = 1; cuts.row_start[2] = 2; + // Add cut x <= 0.0 cuts.j[1] = 0; cuts.x[1] = 1.0; cuts.m = 2; From 6ff7952f4772ff577c0f220475119e95b2f15198 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 24 Nov 2025 17:20:28 -0800 Subject: [PATCH 06/45] Fix issue when computing nonzeros in C_B --- cpp/src/dual_simplex/solve.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 8874bf420..3fba1a5ac 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -395,7 +395,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, for (i_t k = 0; k < num_basic; k++) { const i_t j = basic_list[k]; in_basis[j] = k; - if (j < p) + if (j < cuts.n) { C_B_nz += C_col_degree[j]; } From 20b5777156b48c55fa598fbd9d48d449987c62ea Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 24 Nov 2025 17:26:01 -0800 Subject: [PATCH 07/45] Check solution values at end of unit test --- cpp/tests/dual_simplex/unit_tests/solve.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/tests/dual_simplex/unit_tests/solve.cpp b/cpp/tests/dual_simplex/unit_tests/solve.cpp index 95623f4b2..cd66e63f1 100644 --- a/cpp/tests/dual_simplex/unit_tests/solve.cpp +++ b/cpp/tests/dual_simplex/unit_tests/solve.cpp @@ -465,6 +465,10 @@ TEST(dual_simplex, simple_cuts) printf("Solution objective: %e\n", solution.objective); printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]); EXPECT_NEAR(solution.objective, 4.0 / 3.0, 1e-6); + EXPECT_NEAR(solution.x[0], 0.0, 1e-6); + EXPECT_NEAR(solution.x[1], 2.0 / 3.0, 1e-6); + EXPECT_NEAR(solution.x[2], 1.0 / 3.0, 1e-6); + } } // namespace cuopt::linear_programming::dual_simplex::test From ca571a04401a505e155d2ab169fd51dd69186937 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 25 Nov 2025 13:13:20 -0800 Subject: [PATCH 08/45] Enable c-MIR cuts --- cpp/src/dual_simplex/branch_and_bound.cpp | 173 ++++++++++++---------- 1 file changed, 91 insertions(+), 82 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index aed49ad4f..f9c323133 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1159,7 +1159,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut for (i_t j: fractional) { settings_.log.printf("Fractional variable %d lower %e value %e upper %e\n", j, original_lp_.lower[j], root_relax_soln_.x[j], original_lp_.upper[j]); } - // Let's look for cuts + // Let's look for Gomory cuts // Compute b_bar std::vector b_bar(original_lp_.num_rows); basis_update.b_solve(original_lp_.rhs, b_bar); @@ -1177,6 +1177,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut std::vector has_lower(original_lp_.num_cols, 0); std::vector has_upper(original_lp_.num_cols, 0); + + bool needs_complement = false; for (i_t j = 0; j < original_lp_.num_cols; j++) { if (original_lp_.lower[j] < 0) { settings_.log.printf( @@ -1185,23 +1187,27 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } const f_t uj = original_lp_.upper[j]; const f_t lj = original_lp_.lower[j]; + if (uj != inf || lj != 0.0) { + needs_complement = true; + } const f_t xstar_j = root_relax_soln_.x[j]; if (uj < inf) { if (uj - xstar_j <= xstar_j - lj) { has_upper[j] = 1; - //settings_.log.printf("Variable %d in upper\n", j); + settings_.log.printf("Variable %d in upper\n", j); } else { has_lower[j] = 1; - //settings_.log.printf("Variable %d in lower\n", j); + settings_.log.printf("Variable %d in lower\n", j); } continue; } if (lj > -inf) { has_lower[j] = 1; - //settings_.log.printf("Variable %d in lower\n", j); + settings_.log.printf("Variable %d in lower\n", j); } } + settings_.log.printf("needs_complement %d\n", needs_complement); csr_matrix_t C(0, original_lp_.num_cols, 0); C.row_start[0] = 0; @@ -1249,6 +1255,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } // Compute a_bar = N^T u_bar + // TODO: This is similar to a function in phase2 of dual simplex. See if it can be reused. const i_t nz_ubar = u_bar.i.size(); for (i_t k = 0; k < nz_ubar; k++) { const i_t ii = u_bar.i[k]; @@ -1295,6 +1302,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]); +#ifdef PRINT_BASE_INEQUALITY // Print out the base inequality for (i_t k = 0; k < a_bar.i.size(); k++) { const i_t jj = a_bar.i[k]; @@ -1302,6 +1310,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.log.printf("a_bar[%d] = %e\n", k, aj); } settings_.log.printf("b_bar[%d] = %e\n", i, b_bar[i]); +#endif auto f = [](f_t q_1, f_t q_2) -> f_t { f_t q_1_hat = q_1 - std::floor(q_1); @@ -1311,104 +1320,105 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut auto h = [](f_t q) -> f_t { return std::max(q, 0.0); }; - f_t R = (b_bar[i] - std::floor(b_bar[i])) * std::ceil(b_bar[i]); + std::vector cut_indices; cut_indices.reserve(a_bar.i.size()); - for (i_t k = 0; k < a_bar.i.size(); k++) { - const i_t jj = a_bar.i[k]; - f_t aj = a_bar.x[k]; - if (var_types_[jj] == variable_type_t::INTEGER) { - x_workspace[jj] += f(aj, b_bar[i]); - if (!x_mark[jj]) { - x_mark[jj] = 1; - cut_indices.push_back(jj); - } - } else { - x_workspace[jj] += h(aj); - if (!x_mark[jj]) { - x_mark[jj] = 1; - cut_indices.push_back(jj); - } - } - } + f_t R; + if (!needs_complement) { + R = (b_bar[i] - std::floor(b_bar[i])) * std::ceil(b_bar[i]); -#ifdef CMIR - // Compute r - f_t r = b_bar[i]; - for (i_t k = 0; k < a_bar.i.size(); k++) { - const i_t jj = a_bar.i[k]; - if (has_upper[jj]) { - const f_t uj = original_lp_.upper[jj]; - r -= uj * a_bar.x[k]; - continue; - } - if (has_lower[jj]) { - const f_t lj = original_lp_.lower[jj]; - r -= lj * a_bar.x[k]; - } - } - - // Compute R - f_t R = std::ceil(r) * (r - std::floor(r)); - for (i_t k = 0; k < a_bar.i.size(); k++) { - const i_t jj = a_bar.i[k]; - const f_t aj = a_bar.x[k]; - if (has_upper[jj]) { - const f_t uj = original_lp_.upper[jj]; + for (i_t k = 0; k < a_bar.i.size(); k++) { + const i_t jj = a_bar.i[k]; + f_t aj = a_bar.x[k]; if (var_types_[jj] == variable_type_t::INTEGER) { - R -= f(-aj, r) * uj; + x_workspace[jj] += f(aj, b_bar[i]); + if (!x_mark[jj]) { + x_mark[jj] = 1; + cut_indices.push_back(jj); + } } else { - R -= h(-aj) * uj; + x_workspace[jj] += h(aj); + if (!x_mark[jj]) { + x_mark[jj] = 1; + cut_indices.push_back(jj); + } } - } else if (has_lower[jj]) { - const f_t lj = original_lp_.lower[jj]; - if (var_types_[jj] == variable_type_t::INTEGER) { - R += f(aj, r) * lj; - } else { - R += h(aj) * lj; + } + } else { + // Compute r + f_t r = b_bar[i]; + for (i_t k = 0; k < a_bar.i.size(); k++) { + const i_t jj = a_bar.i[k]; + if (has_upper[jj]) { + const f_t uj = original_lp_.upper[jj]; + r -= uj * a_bar.x[k]; + continue; + } + if (has_lower[jj]) { + const f_t lj = original_lp_.lower[jj]; + r -= lj * a_bar.x[k]; } } - } - // Compute the cut coefficients - std::vector cut_indices; - cut_indices.reserve(a_bar.i.size()); - for (i_t k = 0; k < a_bar.i.size(); k++) { - const i_t jj = a_bar.i[k]; - const f_t aj = a_bar.x[k]; - if (has_upper[jj]) { - if (var_types_[jj] == variable_type_t::INTEGER) { - // Upper intersect I - x_workspace[jj] -= f(-aj, r); + // Compute R + R = std::ceil(r) * (r - std::floor(r)); + for (i_t k = 0; k < a_bar.i.size(); k++) { + const i_t jj = a_bar.i[k]; + const f_t aj = a_bar.x[k]; + if (has_upper[jj]) { + const f_t uj = original_lp_.upper[jj]; + if (var_types_[jj] == variable_type_t::INTEGER) { + R -= f(-aj, r) * uj; + } else { + R -= h(-aj) * uj; + } + } else if (has_lower[jj]) { + const f_t lj = original_lp_.lower[jj]; + if (var_types_[jj] == variable_type_t::INTEGER) { + R += f(aj, r) * lj; + } else { + R += h(aj) * lj; + } + } + } + + // Compute the cut coefficients + for (i_t k = 0; k < a_bar.i.size(); k++) { + const i_t jj = a_bar.i[k]; + const f_t aj = a_bar.x[k]; + if (has_upper[jj]) { + if (var_types_[jj] == variable_type_t::INTEGER) { + // Upper intersect I + x_workspace[jj] -= f(-aj, r); + if (!x_mark[jj]) { + x_mark[jj] = 1; + cut_indices.push_back(jj); + } + } else { + // Upper intersect C + x_workspace[jj] -= h(-aj); + if (!x_mark[jj]) { + x_mark[jj] = 1; + cut_indices.push_back(jj); + } + } + } else if (var_types_[jj] == variable_type_t::INTEGER) { + // I \ Upper + x_workspace[jj] += f(aj, r); if (!x_mark[jj]) { x_mark[jj] = 1; cut_indices.push_back(jj); } } else { - // Upper intersect C - x_workspace[jj] -= h(-aj); + // C \ Upper + x_workspace[jj] += h(aj); if (!x_mark[jj]) { x_mark[jj] = 1; cut_indices.push_back(jj); } } - } else if (var_types_[jj] == variable_type_t::INTEGER) { - // I \ Upper - x_workspace[jj] -= f(aj, r); - if (!x_mark[jj]) { - x_mark[jj] = 1; - cut_indices.push_back(jj); - } - } else { - // C \ Upper - x_workspace[jj] += h(aj); - if (!x_mark[jj]) { - x_mark[jj] = 1; - cut_indices.push_back(jj); - } } } -#endif sparse_vector_t cut(original_lp_.num_cols, cut_indices.size()); for (i_t k = 0; k < cut_indices.size(); k++) { @@ -1437,7 +1447,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut csc_matrix_t C_col(C.m, C.n, 0); C.to_compressed_col(C_col); - #ifdef PRINT_CUTS C_col.print_matrix(); #endif From 9dea7ce5c8e6ac5a920450f5b4fead70e7a24b4d Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 25 Nov 2025 21:01:55 -0800 Subject: [PATCH 09/45] Add integer infeasibility info. Remove inactive cuts. Add mip_cut_passes parameter --- .../cuopt/linear_programming/constants.h | 1 + .../mip/solver_settings.hpp | 1 + cpp/src/dual_simplex/basis_updates.cpp | 2 +- cpp/src/dual_simplex/branch_and_bound.cpp | 236 +++++++++++++++--- cpp/src/dual_simplex/mip_node.hpp | 10 +- .../dual_simplex/simplex_solver_settings.hpp | 2 + cpp/src/dual_simplex/solve.cpp | 13 +- cpp/src/math_optimization/solver_settings.cu | 3 +- cpp/src/mip/solver.cu | 1 + 9 files changed, 233 insertions(+), 36 deletions(-) diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index cc051ab49..15082254e 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -57,6 +57,7 @@ #define CUOPT_MIP_HEURISTICS_ONLY "mip_heuristics_only" #define CUOPT_MIP_SCALING "mip_scaling" #define CUOPT_MIP_PRESOLVE "mip_presolve" +#define CUOPT_MIP_CUT_PASSES "mip_cut_passes" #define CUOPT_SOLUTION_FILE "solution_file" #define CUOPT_NUM_CPU_THREADS "num_cpu_threads" #define CUOPT_USER_PROBLEM_FILE "user_problem_file" diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 2c62f1b44..72026d7d1 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -81,6 +81,7 @@ class mip_solver_settings_t { f_t time_limit = std::numeric_limits::infinity(); bool heuristics_only = false; i_t num_cpu_threads = -1; // -1 means use default number of threads in branch and bound + i_t max_cut_passes = 10; // number of cut passes to make bool log_to_console = true; std::string log_file; std::string sol_file; diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 2590c5226..fd70194e1 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -2267,7 +2267,7 @@ int basis_update_mpf_t::refactor_basis( q, deficient, slacks_needed) == -1) { - settings.log.debug("Initial factorization failed\n"); + settings.log.printf("Initial factorization failed\n"); basis_repair(A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); #ifdef CHECK_BASIS_REPAIR diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index f9c323133..bb225ddbc 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -517,12 +517,13 @@ void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, f_t lower_bound = get_lower_bound(); f_t obj = compute_user_objective(original_lp_, upper_bound_); f_t lower = compute_user_objective(original_lp_, lower_bound); - settings_.log.printf("%c%10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", + settings_.log.printf("%c%10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f\n", thread_type, nodes_explored, nodes_unexplored, obj, lower, + 0, leaf_depth, nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0, user_mip_gap(obj, lower).c_str(), @@ -665,7 +666,7 @@ node_status_t branch_and_bound_t::solve_node(search_tree_t& assert(leaf_vstatus.size() == leaf_problem.num_cols); search_tree.branch( - node_ptr, branch_var, leaf_solution.x[branch_var], leaf_vstatus, original_lp_, log); + node_ptr, branch_var, leaf_solution.x[branch_var], leaf_num_fractional, leaf_vstatus, original_lp_, log); node_ptr->status = node_status_t::HAS_CHILDREN; return node_status_t::HAS_CHILDREN; @@ -741,11 +742,12 @@ void branch_and_bound_t::exploration_ramp_up(search_tree_t* f_t user_lower = compute_user_objective(original_lp_, root_objective_); std::string gap_user = user_mip_gap(obj, user_lower); - settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", + settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f\n", nodes_explored, nodes_unexplored, obj, user_lower, + node->integer_infeasible, node->depth, nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0, gap_user.c_str(), @@ -841,11 +843,12 @@ void branch_and_bound_t::explore_subtree(i_t task_id, f_t obj = compute_user_objective(original_lp_, upper_bound); f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); std::string gap_user = user_mip_gap(obj, user_lower); - settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %7.1e %s %9.2f\n", + settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f\n", nodes_explored, nodes_unexplored, obj, user_lower, + node_ptr->integer_infeasible, node_ptr->depth, nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0, gap_user.c_str(), @@ -1065,6 +1068,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut root_relax_soln_.resize(original_lp_.num_rows, original_lp_.num_cols); settings_.log.printf("Solving LP root relaxation\n"); + i_t original_rows = original_lp_.num_rows; simplex_solver_settings_t lp_settings = settings_; lp_settings.inside_mip = 1; lp_settings.scale_columns = false; @@ -1126,13 +1130,21 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut csc_matrix_t Arow(1, 1, 1); original_lp_.A.transpose(Arow); - for (i_t cut_pass = 0; cut_pass < 10; cut_pass++) { + if (num_fractional != 0) { + settings_.log.printf( + " | Explored | Unexplored | Objective | Bound | IntInf | Depth | Iter/Node | Gap " + "| Time |\n"); + } + + for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { if (num_fractional == 0) { +#ifdef PRINT_SOLUTION for (i_t j = 0; j < original_lp_.num_cols; j++) { if (var_types_[j] == variable_type_t::INTEGER) { settings_.log.printf("Variable %d type %d val %e\n", j, var_types_[j], root_relax_soln_.x[j]); } } +#endif mutex_upper_.lock(); incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x); upper_bound_ = root_objective_; @@ -1155,10 +1167,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } return mip_status_t::OPTIMAL; } else { +#ifdef PRINT_FRACTIONAL_INFO settings_.log.printf("Found %d fractional variables on cut pass %d\n", num_fractional, cut_pass); for (i_t j: fractional) { settings_.log.printf("Fractional variable %d lower %e value %e upper %e\n", j, original_lp_.lower[j], root_relax_soln_.x[j], original_lp_.upper[j]); } +#endif // Let's look for Gomory cuts // Compute b_bar std::vector b_bar(original_lp_.num_rows); @@ -1166,6 +1180,10 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut std::vector nonbasic_mark(original_lp_.num_cols, 0); for (i_t j : nonbasic_list) { + if (j < 0 || j >= original_lp_.num_cols) { + settings_.log.printf("nonbasic_list out of bounds %d num_cols %d\n", j, original_lp_.num_cols); + exit(1); + } nonbasic_mark[j] = 1; } @@ -1194,20 +1212,19 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (uj < inf) { if (uj - xstar_j <= xstar_j - lj) { has_upper[j] = 1; - settings_.log.printf("Variable %d in upper\n", j); } else { has_lower[j] = 1; - settings_.log.printf("Variable %d in lower\n", j); } continue; } if (lj > -inf) { has_lower[j] = 1; - settings_.log.printf("Variable %d in lower\n", j); } } +#ifdef PRINT_COMPLEMENT_INFO settings_.log.printf("needs_complement %d\n", needs_complement); +#endif csr_matrix_t C(0, original_lp_.num_cols, 0); C.row_start[0] = 0; @@ -1220,8 +1237,9 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (var_types_[j] != variable_type_t::INTEGER) { continue; } const f_t x_j = root_relax_soln_.x[j]; if (std::abs(x_j - std::round(x_j)) < settings_.integer_tol) { continue; } - +#ifdef PRINT_CUT_INFO settings_.log.printf("Generating cut for variable %d relaxed value %e row %d\n", j, x_j, i); +#endif #ifdef PRINT_BASIS for (i_t h = 0; h < basic_list.size(); h++) { settings_.log.printf("basic_list[%d] = %d\n", h, basic_list[h]); @@ -1298,9 +1316,18 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut a_bar.to_dense(a_bar_dense); f_t a_bar_dense_dot = dot(a_bar_dense, root_relax_soln_.x); - settings_.log.printf("a_bar_dense_dot = %e b_bar[%d] = %e\n", a_bar_dense_dot, i, b_bar[i]); + if (std::abs(a_bar_dense_dot - b_bar[i]) > 1e-6) { + settings_.log.printf("a_bar_dense_dot = %e b_bar[%d] = %e\n", a_bar_dense_dot, i, b_bar[i]); + settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]); + exit(1); + } - settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]); + // Skip cuts that are shallow + const f_t shallow_tol = 1e-2; + if (std::abs(x_j - std::round(x_j)) < shallow_tol) { + //settings_.log.printf("Skipping shallow cut %d. b_bar[%d] = %e x_j %e\n", i, i, b_bar[i], x_j); + continue; + } #ifdef PRINT_BASE_INEQUALITY // Print out the base inequality @@ -1396,10 +1423,13 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } } else { // Upper intersect C - x_workspace[jj] -= h(-aj); - if (!x_mark[jj]) { - x_mark[jj] = 1; - cut_indices.push_back(jj); + f_t h_j = h(-aj); + if (h_j != 0.0) { + x_workspace[jj] -= h_j; + if (!x_mark[jj]) { + x_mark[jj] = 1; + cut_indices.push_back(jj); + } } } } else if (var_types_[jj] == variable_type_t::INTEGER) { @@ -1411,10 +1441,13 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } } else { // C \ Upper - x_workspace[jj] += h(aj); - if (!x_mark[jj]) { - x_mark[jj] = 1; - cut_indices.push_back(jj); + f_t h_j = h(aj); + if (h_j != 0.0) { + x_workspace[jj] += h_j; + if (!x_mark[jj]) { + x_mark[jj] = 1; + cut_indices.push_back(jj); + } } } } @@ -1454,13 +1487,15 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut C.check_matrix(); #ifdef PRINT_CUT_RHS for (i_t k = 0; k < cut_rhs.size(); k++) { - lp_settings.log.printf("cut_rhs[%d] = %e\n", k, cut_rhs[k]); + settings_.log.printf("cut_rhs[%d] = %e\n", k, cut_rhs[k]); } #endif - lp_settings.log.printf("C nz %d\n", C.row_start[C.m]); - lp_settings.log.printf("C m %d cut rhs size %d\n", C.m, cut_rhs.size()); - lp_settings.log.printf("original_lp_.num_cols %d\n", original_lp_.num_cols); +#ifdef PRINT_CUT_INFO + settings_.log.printf("C nz %d\n", C.row_start[C.m]); + settings_.log.printf("C m %d cut rhs size %d\n", C.m, cut_rhs.size()); + settings_.log.printf("original_lp_.num_cols %d\n", original_lp_.num_cols); +#endif #ifdef PRINT_OPTIMAL for (i_t j = 0; j < original_lp_.num_cols; j++) { @@ -1471,16 +1506,22 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut // Check to see that this is a cut i.e C*x > d std::vector Cx(C.m); matrix_vector_multiply(C_col, 1.0, root_relax_soln_.x, 0.0, Cx); + f_t min_cut_violation = inf; for (i_t k = 0; k < Cx.size(); k++) { //lp_settings.log.printf("Cx[%d] = %e cut_rhs[%d] = %e\n", k, Cx[k], k, cut_rhs[k]); if (Cx[k] <= cut_rhs[k]) { - lp_settings.log.printf("C*x <= d for cut %d\n", k); + settings_.log.printf("C*x <= d for cut %d\n", k); exit(1); } + min_cut_violation = std::min(min_cut_violation, Cx[k] - cut_rhs[k]); } +#ifdef PRINT_MIN_CUT_VIOLATION + settings_.log.printf("Min cut violation %e\n", min_cut_violation); +#endif // Resolve the LP with the new cuts - lp_settings.log.printf("Solving LP with %d cuts\n", C.m); + settings_.log.printf("Solving LP with %d cuts (%d nonzeros). Total constraints %d\n", C.m, C.row_start[C.m], C.m + original_lp_.num_rows); + lp_settings.log.log = false; lp_status_t cut_status = solve_linear_program_with_cuts(stats_.start_time, lp_settings, @@ -1493,7 +1534,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut nonbasic_list, root_vstatus_, edge_norms_); - + settings_.log.printf("Cut LP iterations %d. A nz %d\n", root_relax_soln_.iterations, original_lp_.A.col_start[original_lp_.A.n]); + stats_.total_lp_iters += root_relax_soln_.iterations; root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); if (cut_status != lp_status_t::OPTIMAL) { @@ -1503,9 +1545,146 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut original_lp_.A.transpose(Arow); var_types_.resize(original_lp_.num_cols, variable_type_t::CONTINUOUS); + std::vector cuts_to_remove; + cuts_to_remove.reserve(original_lp_.num_rows - original_rows); + std::vector slacks_to_remove; + slacks_to_remove.reserve(original_lp_.num_rows - original_rows); + const f_t dual_tol = 1e-10; + for (i_t k = original_rows; k < original_lp_.num_rows; k++) { + if (std::abs(root_relax_soln_.y[k]) < dual_tol) { + const i_t row_start = Arow.col_start[k]; + const i_t row_end = Arow.col_start[k + 1]; + i_t last_slack = -1; + const f_t slack_tol = 1e-3; + for (i_t p = row_start; p < row_end; p++) { + const i_t jj = Arow.i[p]; + const i_t col_len = original_lp_.A.col_start[jj + 1] - original_lp_.A.col_start[jj]; + if (var_types_[jj] == variable_type_t::CONTINUOUS && + Arow.x[p] == 1.0 && + original_lp_.lower[jj] == 0.0 && + original_lp_.upper[jj] == inf && + root_vstatus_[jj] == variable_status_t::BASIC && + col_len == 1 && + root_relax_soln_.x[jj] > slack_tol) { + last_slack = jj; + } + } + if (last_slack != -1) { + cuts_to_remove.push_back(k); + slacks_to_remove.push_back(last_slack); + } + } + } + + if (cuts_to_remove.size() > 0) { + settings_.log.printf("Removing %d cuts\n", cuts_to_remove.size()); + std::vector marked_rows(original_lp_.num_rows, 0); + for (i_t i : cuts_to_remove) { + marked_rows[i] = 1; + } + std::vector marked_cols(original_lp_.num_cols, 0); + for (i_t j : slacks_to_remove) { + marked_cols[j] = 1; + } + + std::vector new_rhs(original_lp_.num_rows - cuts_to_remove.size()); + std::vector new_solution_y(original_lp_.num_rows - cuts_to_remove.size()); + i_t h = 0; + for (i_t i = 0; i < original_lp_.num_rows; i++) { + if (!marked_rows[i]) { + new_rhs[h] = original_lp_.rhs[i]; + new_solution_y[h] = root_relax_soln_.y[i]; + h++; + } + } + + + Arow.remove_columns(marked_rows); + Arow.transpose(original_lp_.A); + + std::vector new_objective(original_lp_.num_cols - slacks_to_remove.size()); + std::vector new_lower(original_lp_.num_cols - slacks_to_remove.size()); + std::vector new_upper(original_lp_.num_cols - slacks_to_remove.size()); + std::vector new_var_types(original_lp_.num_cols - slacks_to_remove.size()); + std::vector new_vstatus(original_lp_.num_cols - slacks_to_remove.size()); + std::vector new_basic_list; + new_basic_list.reserve(original_lp_.num_rows - slacks_to_remove.size()); + std::vector new_nonbasic_list; + new_nonbasic_list.reserve(nonbasic_list.size()); + std::vector new_solution_x(original_lp_.num_cols - slacks_to_remove.size()); + std::vector new_solution_z(original_lp_.num_cols - slacks_to_remove.size()); + h = 0; + for (i_t k = 0; k < original_lp_.num_cols; k++) { + if (!marked_cols[k]) { + new_objective[h] = original_lp_.objective[k]; + new_lower[h] = original_lp_.lower[k]; + new_upper[h] = original_lp_.upper[k]; + new_var_types[h] = var_types_[k]; + new_vstatus[h] = root_vstatus_[k]; + new_solution_x[h] = root_relax_soln_.x[k]; + new_solution_z[h] = root_relax_soln_.z[k]; + if (new_vstatus[h] != variable_status_t::BASIC) { + new_nonbasic_list.push_back(h); + } else { + new_basic_list.push_back(h); + } + h++; + } + } + original_lp_.A.remove_columns(marked_cols); + original_lp_.A.transpose(Arow); + original_lp_.objective = new_objective; + original_lp_.lower = new_lower; + original_lp_.upper = new_upper; + original_lp_.rhs = new_rhs; + var_types_ = new_var_types; + original_lp_.num_cols = original_lp_.A.n; + original_lp_.num_rows = original_lp_.A.m; + basic_list = new_basic_list; + nonbasic_list = new_nonbasic_list; + root_vstatus_ = new_vstatus; + root_relax_soln_.x = new_solution_x; + root_relax_soln_.y = new_solution_y; + root_relax_soln_.z = new_solution_z; + +#ifdef PRINT_SIZES + settings_.log.printf("A %d x %d\n", original_lp_.A.m, original_lp_.A.n); + settings_.log.printf("basic_list size %d\n", basic_list.size()); + settings_.log.printf("nonbasic_list size %d\n", nonbasic_list.size()); + settings_.log.printf("root_vstatus_ size %d\n", root_vstatus_.size()); + settings_.log.printf("original_lp_.num_rows %d\n", original_lp_.num_rows); + settings_.log.printf("original_lp_.num_cols %d\n", original_lp_.num_cols); + settings_.log.printf("root_relax_soln_.x size %d\n", root_relax_soln_.x.size()); + settings_.log.printf("root_relax_soln_.y size %d\n", root_relax_soln_.y.size()); + settings_.log.printf("root_relax_soln_.z size %d\n", root_relax_soln_.z.size()); + settings_.log.printf("rhs size %ld\n", original_lp_.rhs.size()); + settings_.log.printf("lower size %ld\n", original_lp_.lower.size()); + settings_.log.printf("upper size %ld\n", original_lp_.upper.size()); + settings_.log.printf("objective size %ld\n", original_lp_.objective.size()); + settings_.log.printf("var_types_ size %ld\n", var_types_.size()); +#endif + + basis_update.resize(original_lp_.num_rows); + basis_update.refactor_basis(original_lp_.A, settings_, basic_list, nonbasic_list, root_vstatus_); + } fractional.clear(); num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); + + // TODO: Get upper bound from heuristics + std::string gap = num_fractional != 0 ? " - " : "0.0%"; + f_t obj = num_fractional != 0 ? inf : compute_user_objective(original_lp_, root_objective_); + + settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f\n", + 0, + 0, + obj, + compute_user_objective(original_lp_, root_objective_), + num_fractional, + 0, + stats_.total_lp_iters.load(), + gap.c_str(), + toc(stats_.start_time)); } } @@ -1534,6 +1713,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut search_tree.branch(&search_tree.root, branch_var, root_relax_soln_.x[branch_var], + num_fractional, root_vstatus_, original_lp_, log); @@ -1544,9 +1724,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.num_diving_threads, settings_.num_threads); - settings_.log.printf( - " | Explored | Unexplored | Objective | Bound | Depth | Iter/Node | Gap " - "| Time |\n"); + stats_.nodes_explored = 0; diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp index 9034bfa22..f18ae0072 100644 --- a/cpp/src/dual_simplex/mip_node.hpp +++ b/cpp/src/dual_simplex/mip_node.hpp @@ -41,6 +41,7 @@ class mip_node_t { node_id(0), branch_var(-1), branch_dir(-1), + integer_infeasible(-1), vstatus(basis) { children[0] = nullptr; @@ -53,6 +54,7 @@ class mip_node_t { i_t branch_variable, i_t branch_direction, f_t branch_var_value, + i_t integer_inf, const std::vector& basis) : status(node_status_t::ACTIVE), lower_bound(parent_node->lower_bound), @@ -62,8 +64,8 @@ class mip_node_t { branch_var(branch_variable), branch_dir(branch_direction), fractional_val(branch_var_value), + integer_infeasible(integer_inf), vstatus(basis) - { branch_var_lower = branch_direction == 0 ? problem.lower[branch_var] : std::ceil(branch_var_value); @@ -217,6 +219,7 @@ class mip_node_t { f_t branch_var_lower; f_t branch_var_upper; f_t fractional_val; + i_t integer_infeasible; mip_node_t* parent; std::unique_ptr children[2]; @@ -272,6 +275,7 @@ class search_tree_t { void branch(mip_node_t* parent_node, const i_t branch_var, const f_t fractional_val, + const i_t integer_infeasible, const std::vector& parent_vstatus, const lp_problem_t& original_lp, logger_t& log) @@ -280,13 +284,13 @@ class search_tree_t { // down child auto down_child = std::make_unique>( - original_lp, parent_node, ++id, branch_var, 0, fractional_val, parent_vstatus); + original_lp, parent_node, ++id, branch_var, 0, fractional_val, integer_infeasible, parent_vstatus); graphviz_edge(log, parent_node, down_child.get(), branch_var, 0, std::floor(fractional_val)); // up child auto up_child = std::make_unique>( - original_lp, parent_node, ++id, branch_var, 1, fractional_val, parent_vstatus); + original_lp, parent_node, ++id, branch_var, 1, fractional_val, integer_infeasible, parent_vstatus); graphviz_edge(log, parent_node, up_child.get(), branch_var, 1, std::ceil(fractional_val)); diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 8e54c40bb..6fffbfe1f 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -69,6 +69,7 @@ struct simplex_solver_settings_t { num_threads(omp_get_max_threads() - 1), num_bfs_threads(std::min(num_threads / 4, 1)), num_diving_threads(std::min(num_threads - num_bfs_threads, 1)), + max_cut_passes(10), random_seed(0), inside_mip(0), solution_callback(nullptr), @@ -134,6 +135,7 @@ struct simplex_solver_settings_t { i_t random_seed; // random seed i_t num_bfs_threads; // number of threads dedicated to the best-first search i_t num_diving_threads; // number of threads dedicated to diving + i_t max_cut_passes; // number of cut passes to make i_t inside_mip; // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node std::function&, f_t)> solution_callback; std::function&, f_t)> node_processed_callback; diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 3fba1a5ac..45a4967e9 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -361,6 +361,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, lp.upper[j] = inf; lp.objective[j] = 0.0; } + settings.log.printf("Done adding slacks\n"); new_A_col.col_start[lp.num_cols + p] = nz; new_A_col.n = lp.num_cols + p; @@ -376,6 +377,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, const i_t h = k - old_rows; lp.rhs[k] = cut_rhs[h]; } + settings.log.printf("Done adding rhs\n"); // Construct C_B = C(:, basic_list) std::vector C_col_degree(lp.num_cols, 0); @@ -388,12 +390,17 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, } C_col_degree[j]++; } + settings.log.printf("Done computing C_col_degree\n"); std::vector in_basis(old_cols, -1); const i_t num_basic = static_cast(basic_list.size()); i_t C_B_nz = 0; for (i_t k = 0; k < num_basic; k++) { const i_t j = basic_list[k]; + if (j < 0 || j >= old_cols) { + settings.log.printf("basic_list[%d] = %d is out of bounds %d old_cols %d\n", k, j, j, old_cols); + exit(1); + } in_basis[j] = k; if (j < cuts.n) { @@ -444,7 +451,8 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, const f_t err = Diff.norm1(); settings.log.printf("After || B - L*U || %e\n", err); if (err > 1e-6) { - Diff.print_matrix(); + settings.log.printf("Diff matrix\n"); + //Diff.print_matrix(); exit(1); } // Adjust the vstatus @@ -475,7 +483,8 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, solution, iter, edge_norms); - + settings.log.printf("Phase 2 iterations %d\n", iter); + solution.iterations = iter; lp_status_t lp_status; if (status == dual::status_t::OPTIMAL) { lp_status = lp_status_t::OPTIMAL; } if (status == dual::status_t::DUAL_UNBOUNDED) { lp_status = lp_status_t::INFEASIBLE; } diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index b5da4f095..0d3874321 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -86,7 +86,8 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_FOLDING, &pdlp_settings.folding, -1, 1, -1}, {CUOPT_DUALIZE, &pdlp_settings.dualize, -1, 1, -1}, {CUOPT_ORDERING, &pdlp_settings.ordering, -1, 1, -1}, - {CUOPT_BARRIER_DUAL_INITIAL_POINT, &pdlp_settings.barrier_dual_initial_point, -1, 1, -1} + {CUOPT_BARRIER_DUAL_INITIAL_POINT, &pdlp_settings.barrier_dual_initial_point, -1, 1, -1}, + {CUOPT_MIP_CUT_PASSES, &mip_settings.max_cut_passes, -1, std::numeric_limits::max(), 10} }; // Bool parameters diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index e8978e17e..68fb0c698 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -168,6 +168,7 @@ solution_t mip_solver_t::run_solver() branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; + branch_and_bound_settings.max_cut_passes = context.settings.max_cut_passes; if (context.settings.num_cpu_threads < 0) { branch_and_bound_settings.num_threads = omp_get_max_threads() - 1; From 42af00cc1c80ded53b891d14b41a9fb17f8096f7 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 2 Dec 2025 10:08:36 -0800 Subject: [PATCH 10/45] Remove small coefficients from cut --- cpp/src/dual_simplex/branch_and_bound.cpp | 59 ++++++++++++++++++----- cpp/src/dual_simplex/solve.cpp | 4 ++ 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index bb225ddbc..ae53d1ba3 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1329,6 +1329,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut continue; } + const f_t f_val = b_bar[i] - std::floor(b_bar[i]); + if (f_val < 0.01 || f_val > 0.99) { + settings_.log.printf("Skipping cut %d. b_bar[%d] = %e f_val %e\n", i, i, b_bar[i], f_val); + continue; + } + #ifdef PRINT_BASE_INEQUALITY // Print out the base inequality for (i_t k = 0; k < a_bar.i.size(); k++) { @@ -1359,13 +1365,13 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut f_t aj = a_bar.x[k]; if (var_types_[jj] == variable_type_t::INTEGER) { x_workspace[jj] += f(aj, b_bar[i]); - if (!x_mark[jj]) { + if (!x_mark[jj] && x_workspace[jj] != 0.0) { x_mark[jj] = 1; cut_indices.push_back(jj); } } else { x_workspace[jj] += h(aj); - if (!x_mark[jj]) { + if (!x_mark[jj] && x_workspace[jj] != 0.0) { x_mark[jj] = 1; cut_indices.push_back(jj); } @@ -1417,7 +1423,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (var_types_[jj] == variable_type_t::INTEGER) { // Upper intersect I x_workspace[jj] -= f(-aj, r); - if (!x_mark[jj]) { + if (!x_mark[jj] && x_workspace[jj] != 0.0) { x_mark[jj] = 1; cut_indices.push_back(jj); } @@ -1435,7 +1441,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } else if (var_types_[jj] == variable_type_t::INTEGER) { // I \ Upper x_workspace[jj] += f(aj, r); - if (!x_mark[jj]) { + if (!x_mark[jj] && x_workspace[jj] != 0.0) { x_mark[jj] = 1; cut_indices.push_back(jj); } @@ -1453,11 +1459,31 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } } - sparse_vector_t cut(original_lp_.num_cols, cut_indices.size()); + sparse_vector_t cut(original_lp_.num_cols, 0); + cut.i.reserve(cut_indices.size()); + cut.x.reserve(cut_indices.size()); for (i_t k = 0; k < cut_indices.size(); k++) { const i_t jj = cut_indices[k]; - cut.i[k] = jj; - cut.x[k] = x_workspace[jj]; + + // Check for small coefficients + const f_t aj = x_workspace[jj]; + if (std::abs(aj) < 1e-6) { + if (aj >= 0.0 && original_lp_.upper[jj] < inf) { + // Move this to the right-hand side + //settings_.log.printf("Moving %e to the right-hand side for variable %d\n", aj * original_lp_.upper[jj], jj); + R -= aj * original_lp_.upper[jj]; + continue; + } else if (aj <= 0.0 && original_lp_.lower[jj] > -inf) { + //settings_.log.printf("Moving %e to the right-hand side for variable %d\n", aj * original_lp_.lower[jj], jj); + R += aj * original_lp_.lower[jj]; + continue; + } + else { + //settings_.log.printf("Small coefficient %e for variable %d lower %e upper %e\n", aj, jj, original_lp_.lower[jj], original_lp_.upper[jj]); + } + } + cut.i.push_back(jj); + cut.x.push_back(x_workspace[jj]); } // Clear the workspace @@ -1466,14 +1492,21 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut x_mark[jj] = 0; } + if (cut.x.size() == 0) + { + continue; + } + if (cut.x.size() >= 0.7 * original_lp_.num_cols) + { + settings_.log.printf("Cut %d has %d nonzeros. Skipping because it is too dense %.2f\n", i, cut.x.size(), static_cast(cut.x.size()) / original_lp_.num_cols); + continue; + } + // Sort the coefficients by their index cut.sort(); // The new cut is: g'*x >= R // But we want to have it in the form h'*x <= b - for (i_t k = 0; k < cut.x.size(); k++) { - cut.x[k] *= -1.0; - } - + cut.negate(); C.append_row(cut); cut_rhs.push_back(-R); } @@ -1663,6 +1696,10 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.log.printf("objective size %ld\n", original_lp_.objective.size()); settings_.log.printf("var_types_ size %ld\n", var_types_.size()); #endif + settings_.log.printf("After removal %d rows %d columns %d nonzeros\n", + original_lp_.num_rows, + original_lp_.num_cols, + original_lp_.A.col_start[original_lp_.A.n]); basis_update.resize(original_lp_.num_rows); basis_update.refactor_basis(original_lp_.A, settings_, basic_list, nonbasic_list, root_vstatus_); diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 45a4967e9..fcd8a6386 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -312,6 +312,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, // adds the cuts into the LP and solves again. +#ifdef CHECK_BASIS { csc_matrix_t Btest(lp.num_rows, lp.num_rows, 1); basis_update.multiply_lu(Btest); @@ -325,6 +326,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, exit(1); } } +#endif const i_t p = cuts.m; if (cut_rhs.size() != static_cast(p)) { @@ -439,6 +441,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, basic_list[j] = h++; } +#ifdef CHECK_BASIS // Check the basis update csc_matrix_t Btest(lp.num_rows, lp.num_rows, 1); basis_update.multiply_lu(Btest); @@ -455,6 +458,7 @@ lp_status_t solve_linear_program_with_cuts(const f_t start_time, //Diff.print_matrix(); exit(1); } +#endif // Adjust the vstatus vstatus.resize(lp.num_cols); for (i_t j = old_cols; j < lp.num_cols; j++) { From dddf42dcb64645eea9695a999b62425443a10aad Mon Sep 17 00:00:00 2001 From: Alice Boucher <160623740+aliceb-nv@users.noreply.github.com> Date: Thu, 11 Dec 2025 17:52:36 +0100 Subject: [PATCH 11/45] CI crash fixes (#691) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary by CodeRabbit ## Release Notes * **Performance** * Optimized branch-and-bound algorithm with improved search termination conditions * **Improvements** * Enhanced concurrency control mechanisms across solver components * Improved logger initialization and lifecycle management for better resource handling ✏️ Tip: You can customize this high-level summary in your review settings. Authors: - Alice Boucher (https://github.com/aliceb-nv) Approvers: - Nicolas Blin (https://github.com/Kh4ster) URL: https://github.com/NVIDIA/cuopt/pull/691 --- .../pdlp/solver_settings.hpp | 2 +- cpp/src/dual_simplex/branch_and_bound.cpp | 5 ++++ cpp/src/dual_simplex/branch_and_bound.hpp | 4 +-- .../dual_simplex/simplex_solver_settings.hpp | 4 +-- cpp/src/linear_programming/solve.cu | 2 +- cpp/src/mip/diversity/diversity_manager.cuh | 2 +- cpp/src/mip/relaxed_lp/relaxed_lp.cuh | 16 ++++++------ cpp/src/utilities/logger.cpp | 26 ++++++++++++++++--- cpp/src/utilities/logger.hpp | 6 +++-- 9 files changed, 47 insertions(+), 20 deletions(-) diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index ea697e1e1..76388504e 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -212,7 +212,7 @@ class pdlp_solver_settings_t { method_t method{method_t::Concurrent}; bool inside_mip{false}; // For concurrent termination - volatile int* concurrent_halt{nullptr}; + std::atomic* concurrent_halt{nullptr}; static constexpr f_t minimal_absolute_tolerance = 1.0e-12; private: diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 77acca8f7..6161f4d3f 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1135,6 +1135,7 @@ void branch_and_bound_t::diving_thread(const csr_matrix_t& A if (get_upper_bound() < start_node->node.lower_bound) { continue; } bool recompute_bounds_and_basis = true; + i_t nodes_explored = 0; search_tree_t subtree(std::move(start_node->node)); std::deque*> stack; stack.push_front(&subtree.root); @@ -1152,6 +1153,8 @@ void branch_and_bound_t::diving_thread(const csr_matrix_t& A if (toc(exploration_stats_.start_time) > settings_.time_limit) { return; } + if (nodes_explored >= 1000) { break; } + node_solve_info_t status = solve_node(node_ptr, subtree, leaf_problem, @@ -1165,6 +1168,8 @@ void branch_and_bound_t::diving_thread(const csr_matrix_t& A start_node->upper, log); + nodes_explored++; + recompute_bounds_and_basis = !has_children(status); if (status == node_solve_info_t::TIME_LIMIT) { diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index 7891711f7..38438cc9e 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -113,7 +113,7 @@ class branch_and_bound_t { f_t get_lower_bound(); i_t get_heap_size(); bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; } - volatile int* get_root_concurrent_halt() { return &root_concurrent_halt_; } + std::atomic* get_root_concurrent_halt() { return &root_concurrent_halt_; } void set_root_concurrent_halt(int value) { root_concurrent_halt_ = value; } lp_status_t solve_root_relaxation(simplex_solver_settings_t const& lp_settings); @@ -170,7 +170,7 @@ class branch_and_bound_t { std::vector edge_norms_; std::atomic root_crossover_solution_set_{false}; bool enable_concurrent_lp_root_solve_{false}; - volatile int root_concurrent_halt_{0}; + std::atomic root_concurrent_halt_{0}; // Pseudocosts pseudo_costs_t pc_; diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 98be9d4cb..a1cc049e7 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -145,8 +145,8 @@ struct simplex_solver_settings_t { std::function heuristic_preemption_callback; std::function&, std::vector&, f_t)> set_simplex_solution_callback; mutable logger_t log; - volatile int* concurrent_halt; // if nullptr ignored, if !nullptr, 0 if solver should - // continue, 1 if solver should halt + std::atomic* concurrent_halt; // if nullptr ignored, if !nullptr, 0 if solver should + // continue, 1 if solver should halt }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index ed141a0c4..ab418bf5a 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -306,7 +306,7 @@ void setup_device_symbols(rmm::cuda_stream_view stream_view) detail::set_pdlp_hyper_parameters(stream_view); } -volatile int global_concurrent_halt; +std::atomic global_concurrent_halt{0}; template optimization_problem_solution_t convert_dual_simplex_sol( diff --git a/cpp/src/mip/diversity/diversity_manager.cuh b/cpp/src/mip/diversity/diversity_manager.cuh index 4a78f6cff..9f3b4c90f 100644 --- a/cpp/src/mip/diversity/diversity_manager.cuh +++ b/cpp/src/mip/diversity/diversity_manager.cuh @@ -95,7 +95,7 @@ class diversity_manager_t { // mutex for the simplex solution update std::mutex relaxed_solution_mutex; // atomic for signalling pdlp to stop - volatile int global_concurrent_halt{0}; + std::atomic global_concurrent_halt{0}; rins_t rins; diff --git a/cpp/src/mip/relaxed_lp/relaxed_lp.cuh b/cpp/src/mip/relaxed_lp/relaxed_lp.cuh index 01931a3dd..0094f5982 100644 --- a/cpp/src/mip/relaxed_lp/relaxed_lp.cuh +++ b/cpp/src/mip/relaxed_lp/relaxed_lp.cuh @@ -17,14 +17,14 @@ namespace cuopt::linear_programming::detail { struct relaxed_lp_settings_t { - double tolerance = 1e-4; - double time_limit = 1.0; - bool check_infeasibility = true; - bool return_first_feasible = false; - bool save_state = true; - bool per_constraint_residual = true; - bool has_initial_primal = true; - volatile int* concurrent_halt = nullptr; + double tolerance = 1e-4; + double time_limit = 1.0; + bool check_infeasibility = true; + bool return_first_feasible = false; + bool save_state = true; + bool per_constraint_residual = true; + bool has_initial_primal = true; + std::atomic* concurrent_halt = nullptr; }; template diff --git a/cpp/src/utilities/logger.cpp b/cpp/src/utilities/logger.cpp index a16c49c11..217f9c64c 100644 --- a/cpp/src/utilities/logger.cpp +++ b/cpp/src/utilities/logger.cpp @@ -137,9 +137,26 @@ void reset_default_logger() default_logger().flush_on(rapids_logger::level_enum::debug); } +// Guard object whose destructor resets the logger +struct logger_config_guard { + ~logger_config_guard() { cuopt::reset_default_logger(); } +}; + +// Weak reference to detect if any init_logger_t instance is still alive +static std::weak_ptr g_active_guard; +static std::mutex g_guard_mutex; + init_logger_t::init_logger_t(std::string log_file, bool log_to_console) { - // until this function is called, the default sink is the buffer sink + std::lock_guard lock(g_guard_mutex); + + auto existing_guard = g_active_guard.lock(); + if (existing_guard) { + // Reuse existing configuration, just hold a reference to keep it alive + guard_ = existing_guard; + return; + } + cuopt::default_logger().sinks().clear(); // re-initialize sinks @@ -164,8 +181,11 @@ init_logger_t::init_logger_t(std::string log_file, bool log_to_console) for (const auto& entry : buffered_messages) { cuopt::default_logger().log(entry.level, entry.msg.c_str()); } -} -init_logger_t::~init_logger_t() { cuopt::reset_default_logger(); } + // Create guard and store weak reference for future instances to find + auto guard = std::make_shared(); + g_active_guard = guard; + guard_ = guard; +} } // namespace cuopt diff --git a/cpp/src/utilities/logger.hpp b/cpp/src/utilities/logger.hpp index 13c5e36e3..08556a4c7 100644 --- a/cpp/src/utilities/logger.hpp +++ b/cpp/src/utilities/logger.hpp @@ -33,11 +33,13 @@ rapids_logger::logger& default_logger(); */ void reset_default_logger(); +// Ref-counted logger initializer class init_logger_t { + // Using shared_ptr for ref-counting + std::shared_ptr guard_; + public: init_logger_t(std::string log_file, bool log_to_console); - - ~init_logger_t(); }; } // namespace cuopt From d97ff6b4762f9df4459534ccf56d70c8e172bc81 Mon Sep 17 00:00:00 2001 From: Jake Awe Date: Thu, 11 Dec 2025 13:28:18 -0600 Subject: [PATCH 12/45] REL v25.12.00 release --- .pre-commit-config.yaml | 9 ++- .../all_cuda-129_arch-aarch64.yaml | 14 ++--- .../all_cuda-129_arch-x86_64.yaml | 14 ++--- .../all_cuda-130_arch-aarch64.yaml | 14 ++--- .../all_cuda-130_arch-x86_64.yaml | 14 ++--- dependencies.yaml | 58 +++++++++---------- .../cuopt/linear_programming/pyproject.toml | 6 +- python/cuopt/pyproject.toml | 26 ++++----- python/cuopt_self_hosted/pyproject.toml | 2 +- python/cuopt_server/pyproject.toml | 2 +- python/libcuopt/pyproject.toml | 12 ++-- 11 files changed, 87 insertions(+), 84 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 799eb291f..447b03a32 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -56,6 +56,11 @@ repos: - repo: https://github.com/rapidsai/pre-commit-hooks rev: v1.2.1 hooks: + - id: verify-alpha-spec + args: + - --fix + - --mode + - release - id: verify-copyright args: [--fix, --spdx] files: | @@ -83,7 +88,5 @@ repos: entry: python ci/utils/update_doc_versions.py language: system files: docs/cuopt/source/versions1.json - - default_language_version: - python: python3 + python: python3 diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 59d6b43c0..7a4a54a1d 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -19,7 +19,7 @@ dependencies: - cuda-python>=12.9.2,<13.0a0 - cuda-sanitizer-api - cuda-version=12.9 -- cudf==25.12.*,>=0.0.0a0 +- cudf==25.12.* - cupy>=13.6.0 - cxx-compiler - cython>=3.0.3 @@ -35,8 +35,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==25.12.*,>=0.0.0a0 -- librmm==25.12.*,>=0.0.0a0 +- libraft-headers==25.12.* +- librmm==25.12.* - make - msgpack-numpy==0.4.8 - msgpack-python==1.1.0 @@ -53,16 +53,16 @@ dependencies: - pip - pre-commit - psutil>=6.0.0 -- pylibraft==25.12.*,>=0.0.0a0 +- pylibraft==25.12.* - pyrsistent - pytest-cov - pytest<8 - python>=3.10,<3.14 - rapids-build-backend>=0.4.0,<0.5.0.dev0 -- rapids-dask-dependency==25.12.*,>=0.0.0a0 -- rapids-logger==0.2.*,>=0.0.0a0 +- rapids-dask-dependency==25.12.* +- rapids-logger==0.2.* - requests -- rmm==25.12.*,>=0.0.0a0 +- rmm==25.12.* - scikit-build-core>=0.10.0 - sphinx - sphinx-copybutton diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 4aad50d00..6c5ae382e 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -19,7 +19,7 @@ dependencies: - cuda-python>=12.9.2,<13.0a0 - cuda-sanitizer-api - cuda-version=12.9 -- cudf==25.12.*,>=0.0.0a0 +- cudf==25.12.* - cupy>=13.6.0 - cxx-compiler - cython>=3.0.3 @@ -35,8 +35,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==25.12.*,>=0.0.0a0 -- librmm==25.12.*,>=0.0.0a0 +- libraft-headers==25.12.* +- librmm==25.12.* - make - msgpack-numpy==0.4.8 - msgpack-python==1.1.0 @@ -53,16 +53,16 @@ dependencies: - pip - pre-commit - psutil>=6.0.0 -- pylibraft==25.12.*,>=0.0.0a0 +- pylibraft==25.12.* - pyrsistent - pytest-cov - pytest<8 - python>=3.10,<3.14 - rapids-build-backend>=0.4.0,<0.5.0.dev0 -- rapids-dask-dependency==25.12.*,>=0.0.0a0 -- rapids-logger==0.2.*,>=0.0.0a0 +- rapids-dask-dependency==25.12.* +- rapids-logger==0.2.* - requests -- rmm==25.12.*,>=0.0.0a0 +- rmm==25.12.* - scikit-build-core>=0.10.0 - sphinx - sphinx-copybutton diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml index 5adb8aeae..82c250f01 100644 --- a/conda/environments/all_cuda-130_arch-aarch64.yaml +++ b/conda/environments/all_cuda-130_arch-aarch64.yaml @@ -19,7 +19,7 @@ dependencies: - cuda-python>=13.0.1,<14.0a0 - cuda-sanitizer-api - cuda-version=13.0 -- cudf==25.12.*,>=0.0.0a0 +- cudf==25.12.* - cupy>=13.6.0 - cxx-compiler - cython>=3.0.3 @@ -35,8 +35,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==25.12.*,>=0.0.0a0 -- librmm==25.12.*,>=0.0.0a0 +- libraft-headers==25.12.* +- librmm==25.12.* - make - msgpack-numpy==0.4.8 - msgpack-python==1.1.0 @@ -53,16 +53,16 @@ dependencies: - pip - pre-commit - psutil>=6.0.0 -- pylibraft==25.12.*,>=0.0.0a0 +- pylibraft==25.12.* - pyrsistent - pytest-cov - pytest<8 - python>=3.10,<3.14 - rapids-build-backend>=0.4.0,<0.5.0.dev0 -- rapids-dask-dependency==25.12.*,>=0.0.0a0 -- rapids-logger==0.2.*,>=0.0.0a0 +- rapids-dask-dependency==25.12.* +- rapids-logger==0.2.* - requests -- rmm==25.12.*,>=0.0.0a0 +- rmm==25.12.* - scikit-build-core>=0.10.0 - sphinx - sphinx-copybutton diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml index 11c24d589..e89e322d6 100644 --- a/conda/environments/all_cuda-130_arch-x86_64.yaml +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml @@ -19,7 +19,7 @@ dependencies: - cuda-python>=13.0.1,<14.0a0 - cuda-sanitizer-api - cuda-version=13.0 -- cudf==25.12.*,>=0.0.0a0 +- cudf==25.12.* - cupy>=13.6.0 - cxx-compiler - cython>=3.0.3 @@ -35,8 +35,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libraft-headers==25.12.*,>=0.0.0a0 -- librmm==25.12.*,>=0.0.0a0 +- libraft-headers==25.12.* +- librmm==25.12.* - make - msgpack-numpy==0.4.8 - msgpack-python==1.1.0 @@ -53,16 +53,16 @@ dependencies: - pip - pre-commit - psutil>=6.0.0 -- pylibraft==25.12.*,>=0.0.0a0 +- pylibraft==25.12.* - pyrsistent - pytest-cov - pytest<8 - python>=3.10,<3.14 - rapids-build-backend>=0.4.0,<0.5.0.dev0 -- rapids-dask-dependency==25.12.*,>=0.0.0a0 -- rapids-logger==0.2.*,>=0.0.0a0 +- rapids-dask-dependency==25.12.* +- rapids-logger==0.2.* - requests -- rmm==25.12.*,>=0.0.0a0 +- rmm==25.12.* - scikit-build-core>=0.10.0 - sphinx - sphinx-copybutton diff --git a/dependencies.yaml b/dependencies.yaml index 393b99293..c46f11b74 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -297,8 +297,8 @@ dependencies: packages: - boost - cpp-argparse - - librmm==25.12.*,>=0.0.0a0 - - libraft-headers==25.12.*,>=0.0.0a0 + - librmm==25.12.* + - libraft-headers==25.12.* - tbb-devel - zlib - bzip2 @@ -313,7 +313,7 @@ dependencies: common: - output_types: [conda] packages: - - libcuopt-tests==25.12.*,>=0.0.0a0 + - libcuopt-tests==25.12.* build_wheels: common: - output_types: [requirements, pyproject] @@ -350,7 +350,7 @@ dependencies: packages: - numba-cuda>=0.19.1,<0.20.0a0 - numba>=0.60.0 - - rapids-dask-dependency==25.12.*,>=0.0.0a0 + - rapids-dask-dependency==25.12.* - &pandas pandas>=2.0 - output_types: requirements packages: @@ -420,7 +420,7 @@ dependencies: common: - output_types: conda packages: - - &libcuopt_unsuffixed libcuopt==25.12.*,>=0.0.0a0 + - &libcuopt_unsuffixed libcuopt==25.12.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -433,18 +433,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - libcuopt-cu12==25.12.*,>=0.0.0a0 + - libcuopt-cu12==25.12.* - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - libcuopt-cu13==25.12.*,>=0.0.0a0 + - libcuopt-cu13==25.12.* - {matrix: null, packages: [*libcuopt_unsuffixed]} depends_on_cuopt: common: - output_types: conda packages: - - &cuopt_unsuffixed cuopt==25.12.*,>=0.0.0a0 + - &cuopt_unsuffixed cuopt==25.12.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -457,18 +457,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cuopt-cu12==25.12.*,>=0.0.0a0 + - cuopt-cu12==25.12.* - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - cuopt-cu13==25.12.*,>=0.0.0a0 + - cuopt-cu13==25.12.* - {matrix: null, packages: [*cuopt_unsuffixed]} depends_on_cuopt_server: common: - output_types: conda packages: - - &cuopt_server_unsuffixed cuopt-server==25.12.*,>=0.0.0a0 + - &cuopt_server_unsuffixed cuopt-server==25.12.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -481,18 +481,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cuopt-server-cu12==25.12.*,>=0.0.0a0 + - cuopt-server-cu12==25.12.* - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - cuopt-server-cu13==25.12.*,>=0.0.0a0 + - cuopt-server-cu13==25.12.* - {matrix: null, packages: [*cuopt_server_unsuffixed]} depends_on_cuopt_sh_client: common: - output_types: [conda, requirements, pyproject] packages: - - &cuopt_sh_client_unsuffixed cuopt-sh-client==25.12.*,>=0.0.0a0 + - &cuopt_sh_client_unsuffixed cuopt-sh-client==25.12.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -502,7 +502,7 @@ dependencies: common: - output_types: [requirements, pyproject, conda] packages: - - cuopt-mps-parser==25.12.*,>=0.0.0a0 + - cuopt-mps-parser==25.12.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -512,12 +512,12 @@ dependencies: common: - output_types: conda packages: - - libraft-headers==25.12.*,>=0.0.0a0 + - libraft-headers==25.12.* depends_on_librmm: common: - output_types: conda packages: - - &librmm_unsuffixed librmm==25.12.*,>=0.0.0a0 + - &librmm_unsuffixed librmm==25.12.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -530,12 +530,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - librmm-cu12==25.12.*,>=0.0.0a0 + - librmm-cu12==25.12.* - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - librmm-cu13==25.12.*,>=0.0.0a0 + - librmm-cu13==25.12.* - {matrix: null, packages: [*librmm_unsuffixed]} depends_on_cupy: common: @@ -560,7 +560,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - rapids-logger==0.2.*,>=0.0.0a0 + - rapids-logger==0.2.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -570,7 +570,7 @@ dependencies: common: - output_types: conda packages: - - &rmm_unsuffixed rmm==25.12.*,>=0.0.0a0 + - &rmm_unsuffixed rmm==25.12.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -583,12 +583,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - rmm-cu12==25.12.*,>=0.0.0a0 + - rmm-cu12==25.12.* - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - rmm-cu13==25.12.*,>=0.0.0a0 + - rmm-cu13==25.12.* - matrix: packages: - *rmm_unsuffixed @@ -597,7 +597,7 @@ dependencies: common: - output_types: conda packages: - - &cudf_unsuffixed cudf==25.12.*,>=0.0.0a0 + - &cudf_unsuffixed cudf==25.12.* - output_types: requirements packages: - --extra-index-url=https://pypi.nvidia.com @@ -609,12 +609,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cudf-cu12==25.12.*,>=0.0.0a0 + - cudf-cu12==25.12.* - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - cudf-cu13==25.12.*,>=0.0.0a0 + - cudf-cu13==25.12.* - matrix: packages: - *cudf_unsuffixed @@ -623,7 +623,7 @@ dependencies: common: - output_types: conda packages: - - &pylibraft_unsuffixed pylibraft==25.12.*,>=0.0.0a0 + - &pylibraft_unsuffixed pylibraft==25.12.* - output_types: requirements packages: - --extra-index-url=https://pypi.nvidia.com @@ -635,12 +635,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibraft-cu12==25.12.*,>=0.0.0a0 + - pylibraft-cu12==25.12.* - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - pylibraft-cu13==25.12.*,>=0.0.0a0 + - pylibraft-cu13==25.12.* - matrix: packages: - *pylibraft_unsuffixed diff --git a/python/cuopt/cuopt/linear_programming/pyproject.toml b/python/cuopt/cuopt/linear_programming/pyproject.toml index c11cd3a58..24907d83f 100644 --- a/python/cuopt/cuopt/linear_programming/pyproject.toml +++ b/python/cuopt/cuopt/linear_programming/pyproject.toml @@ -20,7 +20,7 @@ license = { text = "Apache-2.0" } requires-python = ">=3.10" dependencies = [ "numpy>=1.23.5,<3.0a0", - "rapids-logger==0.2.*,>=0.0.0a0", + "rapids-logger==0.2.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -39,7 +39,7 @@ Source = "https://github.com/nvidia/cuopt" test = [ "pytest-cov", "pytest<8", - "rapids-logger==0.2.*,>=0.0.0a0", + "rapids-logger==0.2.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../../../dependencies.yaml and run `rapids-dependency-file-generator`. [tool.setuptools] @@ -83,5 +83,5 @@ requires = [ "cython>=3.0.3", "ninja", "numpy>=1.23.5,<3.0a0", - "rapids-logger==0.2.*,>=0.0.0a0", + "rapids-logger==0.2.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/cuopt/pyproject.toml b/python/cuopt/pyproject.toml index f3f2dbbf8..f27e3f23a 100644 --- a/python/cuopt/pyproject.toml +++ b/python/cuopt/pyproject.toml @@ -20,18 +20,18 @@ license = { text = "Apache-2.0" } requires-python = ">=3.10" dependencies = [ "cuda-python>=13.0.1,<14.0a0", - "cudf==25.12.*,>=0.0.0a0", - "cuopt-mps-parser==25.12.*,>=0.0.0a0", + "cudf==25.12.*", + "cuopt-mps-parser==25.12.*", "cupy-cuda13x>=13.6.0", - "libcuopt==25.12.*,>=0.0.0a0", + "libcuopt==25.12.*", "numba-cuda>=0.19.1,<0.20.0a0", "numba>=0.60.0", "numpy>=1.23.5,<3.0a0", "pandas>=2.0", - "pylibraft==25.12.*,>=0.0.0a0", - "rapids-dask-dependency==25.12.*,>=0.0.0a0", - "rapids-logger==0.2.*,>=0.0.0a0", - "rmm==25.12.*,>=0.0.0a0", + "pylibraft==25.12.*", + "rapids-dask-dependency==25.12.*", + "rapids-logger==0.2.*", + "rmm==25.12.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -48,7 +48,7 @@ test = [ "numpy>=1.23.5,<3.0a0", "pytest-cov", "pytest<8", - "rapids-logger==0.2.*,>=0.0.0a0", + "rapids-logger==0.2.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] @@ -112,12 +112,12 @@ dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" requires = [ "cmake>=3.30.4", - "cuopt-mps-parser==25.12.*,>=0.0.0a0", + "cuopt-mps-parser==25.12.*", "cupy-cuda13x>=13.6.0", "cython>=3.0.3", - "libcuopt==25.12.*,>=0.0.0a0", + "libcuopt==25.12.*", "ninja", - "pylibraft==25.12.*,>=0.0.0a0", - "rapids-logger==0.2.*,>=0.0.0a0", - "rmm==25.12.*,>=0.0.0a0", + "pylibraft==25.12.*", + "rapids-logger==0.2.*", + "rmm==25.12.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/cuopt_self_hosted/pyproject.toml b/python/cuopt_self_hosted/pyproject.toml index 5b94fb74f..79b2de079 100644 --- a/python/cuopt_self_hosted/pyproject.toml +++ b/python/cuopt_self_hosted/pyproject.toml @@ -19,7 +19,7 @@ authors = [ license = { text = "Apache-2.0" } requires-python = ">=3.10" dependencies = [ - "cuopt-mps-parser==25.12.*,>=0.0.0a0", + "cuopt-mps-parser==25.12.*", "msgpack-numpy==0.4.8", "msgpack==1.1.0", "requests", diff --git a/python/cuopt_server/pyproject.toml b/python/cuopt_server/pyproject.toml index 4e4dc15a2..cbf9429c7 100644 --- a/python/cuopt_server/pyproject.toml +++ b/python/cuopt_server/pyproject.toml @@ -20,7 +20,7 @@ authors = [ license = { text = "Apache-2.0" } requires-python = ">=3.10" dependencies = [ - "cuopt==25.12.*,>=0.0.0a0", + "cuopt==25.12.*", "cupy-cuda13x>=13.6.0", "fastapi", "jsonref==1.1.0", diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml index 1fdd8ad2c..5e955f3ba 100644 --- a/python/libcuopt/pyproject.toml +++ b/python/libcuopt/pyproject.toml @@ -31,8 +31,8 @@ classifiers = [ "Programming Language :: Python :: 3.13", ] dependencies = [ - "cuopt-mps-parser==25.12.*,>=0.0.0a0", - "librmm==25.12.*,>=0.0.0a0", + "cuopt-mps-parser==25.12.*", + "librmm==25.12.*", "nvidia-cublas", "nvidia-cudart", "nvidia-cudss", @@ -40,7 +40,7 @@ dependencies = [ "nvidia-cusolver", "nvidia-cusparse", "nvidia-nvtx", - "rapids-logger==0.2.*,>=0.0.0a0", + "rapids-logger==0.2.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] @@ -82,8 +82,8 @@ dependencies-file = "../../dependencies.yaml" matrix-entry = "cuda_suffixed=true;use_cuda_wheels=true" requires = [ "cmake>=3.30.4", - "cuopt-mps-parser==25.12.*,>=0.0.0a0", - "librmm==25.12.*,>=0.0.0a0", + "cuopt-mps-parser==25.12.*", + "librmm==25.12.*", "ninja", - "rapids-logger==0.2.*,>=0.0.0a0", + "rapids-logger==0.2.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. From f341e345d2a11736b18c2c787e70ed6d494e4b95 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 11 Dec 2025 18:52:50 -0800 Subject: [PATCH 13/45] Fix incorrect infeasible list --- cpp/src/dual_simplex/basis_solves.cpp | 14 +++++++- cpp/src/dual_simplex/basis_solves.hpp | 2 ++ cpp/src/dual_simplex/basis_updates.cpp | 4 ++- cpp/src/dual_simplex/basis_updates.hpp | 2 ++ cpp/src/dual_simplex/crossover.cpp | 6 ++-- cpp/src/dual_simplex/phase2.cpp | 45 ++++++++++++++------------ cpp/src/dual_simplex/primal.cpp | 2 +- 7 files changed, 49 insertions(+), 26 deletions(-) diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index db24f55a2..3080f269d 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -613,6 +613,8 @@ i_t factorize_basis(const csc_matrix_t& A, template i_t basis_repair(const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, const std::vector& deficient, const std::vector& slacks_needed, std::vector& basis_list, @@ -658,7 +660,15 @@ i_t basis_repair(const csc_matrix_t& A, nonbasic_list[nonbasic_map[replace_j]] = bad_j; vstatus[replace_j] = variable_status_t::BASIC; // This is the main issue. What value should bad_j take on. - vstatus[bad_j] = variable_status_t::NONBASIC_FREE; + if (lower[bad_j] == -inf && upper[bad_j] == inf) { + vstatus[bad_j] = variable_status_t::NONBASIC_FREE; + } else if (lower[bad_j] > -inf) { + vstatus[bad_j] = variable_status_t::NONBASIC_LOWER; + } else if (upper[bad_j] < inf) { + vstatus[bad_j] = variable_status_t::NONBASIC_UPPER; + } else { + assert(1 == 0); + } } return 0; @@ -849,6 +859,8 @@ template int factorize_basis(const csc_matrix_t& A, template int basis_repair(const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, const std::vector& deficient, const std::vector& slacks_needed, std::vector& basis_list, diff --git a/cpp/src/dual_simplex/basis_solves.hpp b/cpp/src/dual_simplex/basis_solves.hpp index b668c0f46..0745806a6 100644 --- a/cpp/src/dual_simplex/basis_solves.hpp +++ b/cpp/src/dual_simplex/basis_solves.hpp @@ -42,6 +42,8 @@ i_t factorize_basis(const csc_matrix_t& A, template i_t basis_repair(const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, const std::vector& deficient, const std::vector& slacks_needed, std::vector& basis_list, diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 6b79f3c86..11056a65e 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -2046,6 +2046,8 @@ template int basis_update_mpf_t::refactor_basis( const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, std::vector& basic_list, std::vector& nonbasic_list, std::vector& vstatus) @@ -2066,7 +2068,7 @@ int basis_update_mpf_t::refactor_basis( deficient, slacks_needed) == -1) { settings.log.debug("Initial factorization failed\n"); - basis_repair(A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(A, settings, lower, upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); #ifdef CHECK_BASIS_REPAIR const i_t m = A.m; diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index cea907074..9b5d3e614 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -373,6 +373,8 @@ class basis_update_mpf_t { // Compute L*U = A(p, basic_list) int refactor_basis(const csc_matrix_t& A, const simplex_solver_settings_t& settings, + const std::vector& lower, + const std::vector& upper, std::vector& basic_list, std::vector& nonbasic_list, std::vector& vstatus); diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 23d9a0e8e..3dd61b152 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -786,7 +786,7 @@ i_t primal_push(const lp_problem_t& lp, if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); basis_repair( - lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); if (factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); @@ -1132,7 +1132,7 @@ crossover_status_t crossover(const lp_problem_t& lp, rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); @@ -1323,7 +1323,7 @@ crossover_status_t crossover(const lp_problem_t& lp, factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); if (factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 56298ef4d..e0ac7239e 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -623,14 +623,17 @@ f_t compute_initial_primal_infeasibilities(const lp_problem_t& lp, const std::vector& basic_list, const std::vector& x, std::vector& squared_infeasibilities, - std::vector& infeasibility_indices) + std::vector& infeasibility_indices, + f_t& primal_inf) { const i_t m = lp.num_rows; const i_t n = lp.num_cols; - squared_infeasibilities.resize(n, 0.0); + squared_infeasibilities.resize(n); + std::fill(squared_infeasibilities.begin(), squared_infeasibilities.end(), 0.0); infeasibility_indices.reserve(n); infeasibility_indices.clear(); - f_t primal_inf = 0.0; + f_t primal_inf_squared = 0.0; + primal_inf = 0.0; for (i_t k = 0; k < m; ++k) { const i_t j = basic_list[k]; const f_t lower_infeas = lp.lower[j] - x[j]; @@ -640,10 +643,11 @@ f_t compute_initial_primal_infeasibilities(const lp_problem_t& lp, const f_t square_infeas = infeas * infeas; squared_infeasibilities[j] = square_infeas; infeasibility_indices.push_back(j); - primal_inf += square_infeas; + primal_inf_squared += square_infeas; + primal_inf += infeas; } } - return primal_inf; + return primal_inf_squared; } template @@ -2241,7 +2245,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, assert(superbasic_list.size() == 0); assert(nonbasic_list.size() == n - m); - if (ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus) > 0) { + if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) { return dual::status_t::NUMERICAL; } @@ -2268,7 +2272,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, #ifdef COMPUTE_DUAL_RESIDUAL std::vector dual_res1; - compute_dual_residual(lp.A, objective, y, z, dual_res1); + phase2::compute_dual_residual(lp.A, objective, y, z, dual_res1); f_t dual_res_norm = vector_norm_inf(dual_res1); if (dual_res_norm > settings.tight_tol) { settings.log.printf("|| A'*y + z - c || %e\n", dual_res_norm); @@ -2357,8 +2361,9 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, std::vector bounded_variables(n, 0); phase2::compute_bounded_info(lp.lower, lp.upper, bounded_variables); - f_t primal_infeasibility = phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + f_t primal_infeasibility; + f_t primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility); #ifdef CHECK_BASIC_INFEASIBILITIES phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 0); @@ -2557,8 +2562,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, phase2::compute_primal_solution_from_basis( lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); x = unperturbed_x; - primal_infeasibility = phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility); settings.log.printf("Updated primal infeasibility: %e\n", primal_infeasibility); objective = lp.objective; @@ -2594,8 +2599,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, phase2::compute_primal_solution_from_basis( lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); x = unperturbed_x; - primal_infeasibility = phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility); const f_t orig_dual_infeas = phase2::dual_infeasibility( lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); @@ -2810,7 +2815,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, delta_xB_0_sparse.i, squared_infeasibilities, infeasibility_indices, - primal_infeasibility); + primal_infeasibility_squared); // Update primal infeasibilities due to changes in basic variables // from the leaving and entering variables phase2::update_primal_infeasibilities(lp, @@ -2822,7 +2827,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, scaled_delta_xB_sparse.i, squared_infeasibilities, infeasibility_indices, - primal_infeasibility); + primal_infeasibility_squared); // Update the entering variable phase2::update_single_primal_infeasibility(lp.lower, lp.upper, @@ -2883,14 +2888,14 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, #endif if (should_refactor) { bool should_recompute_x = false; - if (ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus) > 0) { + if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) { should_recompute_x = true; settings.log.printf("Failed to factorize basis. Iteration %d\n", iter); if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } i_t count = 0; i_t deficient_size; while ((deficient_size = - ft.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus)) > 0) { + ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus)) > 0) { settings.log.printf("Failed to repair basis. Iteration %d. %d deficient columns.\n", iter, static_cast(deficient_size)); @@ -2912,8 +2917,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); x = unperturbed_x; } - phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility); } #ifdef CHECK_BASIC_INFEASIBILITIES phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 7); @@ -2951,7 +2956,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, iter, compute_user_objective(lp, obj), infeasibility_indices.size(), - primal_infeasibility, + primal_infeasibility_squared, sum_perturb, now); } diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp index 80406dcf0..445177fac 100644 --- a/cpp/src/dual_simplex/primal.cpp +++ b/cpp/src/dual_simplex/primal.cpp @@ -298,7 +298,7 @@ primal::status_t primal_phase2(i_t phase, factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); From b8e99593006c50e1be680a2f3f03b1128a31ee8a Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 12 Dec 2025 15:08:47 -0800 Subject: [PATCH 14/45] Style fixes --- cpp/src/dual_simplex/basis_updates.cpp | 3 +- cpp/src/dual_simplex/crossover.cpp | 31 ++++++++++++-- cpp/src/dual_simplex/phase2.cpp | 56 +++++++++++++++++++------- cpp/src/dual_simplex/primal.cpp | 10 ++++- 4 files changed, 79 insertions(+), 21 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 11056a65e..e44e3b21c 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -2068,7 +2068,8 @@ int basis_update_mpf_t::refactor_basis( deficient, slacks_needed) == -1) { settings.log.debug("Initial factorization failed\n"); - basis_repair(A, settings, lower, upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair( + A, settings, lower, upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); #ifdef CHECK_BASIS_REPAIR const i_t m = A.m; diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 3dd61b152..41844729e 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -785,8 +785,15 @@ i_t primal_push(const lp_problem_t& lp, factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair( - lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, + settings, + lp.lower, + lp.upper, + deficient, + slacks_needed, + basic_list, + nonbasic_list, + vstatus); if (factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); @@ -1132,7 +1139,15 @@ crossover_status_t crossover(const lp_problem_t& lp, rank = factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair(lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, + settings, + lp.lower, + lp.upper, + deficient, + slacks_needed, + basic_list, + nonbasic_list, + vstatus); if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); @@ -1323,7 +1338,15 @@ crossover_status_t crossover(const lp_problem_t& lp, factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair(lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, + settings, + lp.lower, + lp.upper, + deficient, + slacks_needed, + basic_list, + nonbasic_list, + vstatus); if (factorize_basis( lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index e0ac7239e..3aeef35e1 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -633,7 +633,7 @@ f_t compute_initial_primal_infeasibilities(const lp_problem_t& lp, infeasibility_indices.reserve(n); infeasibility_indices.clear(); f_t primal_inf_squared = 0.0; - primal_inf = 0.0; + primal_inf = 0.0; for (i_t k = 0; k < m; ++k) { const i_t j = basic_list[k]; const f_t lower_infeas = lp.lower[j] - x[j]; @@ -2245,7 +2245,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, assert(superbasic_list.size() == 0); assert(nonbasic_list.size() == n - m); - if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) { + if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > + 0) { return dual::status_t::NUMERICAL; } @@ -2362,8 +2363,14 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, phase2::compute_bounded_info(lp.lower, lp.upper, bounded_variables); f_t primal_infeasibility; - f_t primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility); + f_t primal_infeasibility_squared = + phase2::compute_initial_primal_infeasibilities(lp, + settings, + basic_list, + x, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); #ifdef CHECK_BASIC_INFEASIBILITIES phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 0); @@ -2561,9 +2568,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, std::vector unperturbed_x(n); phase2::compute_primal_solution_from_basis( lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); - x = unperturbed_x; - primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility); + x = unperturbed_x; + primal_infeasibility_squared = + phase2::compute_initial_primal_infeasibilities(lp, + settings, + basic_list, + x, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); settings.log.printf("Updated primal infeasibility: %e\n", primal_infeasibility); objective = lp.objective; @@ -2598,9 +2611,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, std::vector unperturbed_x(n); phase2::compute_primal_solution_from_basis( lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); - x = unperturbed_x; - primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility); + x = unperturbed_x; + primal_infeasibility_squared = + phase2::compute_initial_primal_infeasibilities(lp, + settings, + basic_list, + x, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); const f_t orig_dual_infeas = phase2::dual_infeasibility( lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); @@ -2888,14 +2907,15 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, #endif if (should_refactor) { bool should_recompute_x = false; - if (ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) { + if (ft.refactor_basis( + lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus) > 0) { should_recompute_x = true; settings.log.printf("Failed to factorize basis. Iteration %d\n", iter); if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } i_t count = 0; i_t deficient_size; - while ((deficient_size = - ft.refactor_basis(lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus)) > 0) { + while ((deficient_size = ft.refactor_basis( + lp.A, settings, lp.lower, lp.upper, basic_list, nonbasic_list, vstatus)) > 0) { settings.log.printf("Failed to repair basis. Iteration %d. %d deficient columns.\n", iter, static_cast(deficient_size)); @@ -2917,8 +2937,14 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); x = unperturbed_x; } - primal_infeasibility_squared = phase2::compute_initial_primal_infeasibilities( - lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices, primal_infeasibility); + primal_infeasibility_squared = + phase2::compute_initial_primal_infeasibilities(lp, + settings, + basic_list, + x, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); } #ifdef CHECK_BASIC_INFEASIBILITIES phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 7); diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp index 445177fac..3d9849fbe 100644 --- a/cpp/src/dual_simplex/primal.cpp +++ b/cpp/src/dual_simplex/primal.cpp @@ -298,7 +298,15 @@ primal::status_t primal_phase2(i_t phase, factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed); if (rank != m) { settings.log.debug("Failed to factorize basis. rank %d m %d\n", rank, m); - basis_repair(lp.A, settings, lp.lower, lp.upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); + basis_repair(lp.A, + settings, + lp.lower, + lp.upper, + deficient, + slacks_needed, + basic_list, + nonbasic_list, + vstatus); if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { settings.log.printf("Failed to factorize basis after repair. rank %d m %d\n", rank, m); From 3c3683668e9e031003c95f399aa99ff34fd8b218 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 17 Dec 2025 17:16:39 -0800 Subject: [PATCH 15/45] Separate out cuts logic into several classes --- cpp/src/dual_simplex/CMakeLists.txt | 3 +- cpp/src/dual_simplex/branch_and_bound.cpp | 623 ++--------- cpp/src/dual_simplex/branch_and_bound.hpp | 3 + cpp/src/dual_simplex/cuts.cpp | 1050 +++++++++++++++++++ cpp/src/dual_simplex/cuts.hpp | 221 ++++ cpp/src/dual_simplex/solve.cpp | 219 ---- cpp/src/dual_simplex/solve.hpp | 13 - cpp/src/dual_simplex/sparse_matrix.cpp | 2 +- cpp/src/mip/diversity/diversity_manager.cu | 2 +- cpp/src/mip/diversity/lns/rins.cu | 1 + cpp/src/mip/solver.cu | 4 +- cpp/tests/dual_simplex/unit_tests/solve.cpp | 10 +- 12 files changed, 1396 insertions(+), 755 deletions(-) create mode 100644 cpp/src/dual_simplex/cuts.cpp create mode 100644 cpp/src/dual_simplex/cuts.hpp diff --git a/cpp/src/dual_simplex/CMakeLists.txt b/cpp/src/dual_simplex/CMakeLists.txt index a376ee23d..157a00a07 100644 --- a/cpp/src/dual_simplex/CMakeLists.txt +++ b/cpp/src/dual_simplex/CMakeLists.txt @@ -10,6 +10,7 @@ set(DUAL_SIMPLEX_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/basis_updates.cpp ${CMAKE_CURRENT_SOURCE_DIR}/bound_flipping_ratio_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cuts.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crossover.cpp ${CMAKE_CURRENT_SOURCE_DIR}/folding.cpp ${CMAKE_CURRENT_SOURCE_DIR}/initial_basis.cpp @@ -33,7 +34,7 @@ set(DUAL_SIMPLEX_SRC_FILES ) # Uncomment to enable debug info -#set_source_files_properties(${DUAL_SIMPLEX_SRC_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTIES COMPILE_OPTIONS "-g1") +set_source_files_properties(${DUAL_SIMPLEX_SRC_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTIES COMPILE_OPTIONS "-g1") set(CUOPT_SRC_FILES ${CUOPT_SRC_FILES} ${DUAL_SIMPLEX_SRC_FILES} PARENT_SCOPE) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index ae53d1ba3..945d84215 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -5,10 +5,9 @@ */ /* clang-format on */ -#include -#include #include #include +#include #include #include #include @@ -20,6 +19,9 @@ #include #include +#include + +#include #include #include #include @@ -255,6 +257,7 @@ i_t branch_and_bound_t::get_heap_size() template void branch_and_bound_t::set_new_solution(const std::vector& solution) { + mutex_original_lp_.lock(); if (solution.size() != original_problem_.num_cols) { settings_.log.printf( "Solution size mismatch %ld %d\n", solution.size(), original_problem_.num_cols); @@ -263,16 +266,22 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu crush_primal_solution( original_problem_, original_lp_, solution, new_slacks_, crushed_solution); f_t obj = compute_objective(original_lp_, crushed_solution); + mutex_original_lp_.unlock(); bool is_feasible = false; bool attempt_repair = false; mutex_upper_.lock(); - if (obj < upper_bound_) { + f_t current_upper_bound = upper_bound_; + mutex_upper_.unlock(); + if (obj < current_upper_bound) { f_t primal_err; f_t bound_err; i_t num_fractional; + mutex_original_lp_.lock(); is_feasible = check_guess( original_lp_, settings_, var_types_, crushed_solution, primal_err, bound_err, num_fractional); - if (is_feasible) { + mutex_original_lp_.unlock(); + mutex_upper_.lock(); + if (is_feasible && obj < upper_bound_) { upper_bound_ = obj; incumbent_.set_incumbent_solution(obj, crushed_solution); } else { @@ -287,8 +296,8 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu num_fractional); } } + mutex_upper_.unlock(); } - mutex_upper_.unlock(); if (is_feasible) { if (status_ == mip_exploration_status_t::RUNNING) { @@ -297,7 +306,7 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu std::string gap = user_mip_gap(user_obj, user_lower); settings_.log.printf( - "H %+13.6e %+10.6e %s %9.2f\n", + "H %+13.6e %+10.6e %s %9.2f\n", user_obj, user_lower, gap.c_str(), @@ -410,7 +419,7 @@ void branch_and_bound_t::repair_heuristic_solutions() std::string user_gap = user_mip_gap(obj, lower); settings_.log.printf( - "H %+13.6e %+10.6e %s %9.2f\n", + "H %+13.6e %+10.6e %s %9.2f\n", obj, lower, user_gap.c_str(), @@ -1136,6 +1145,9 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut "| Time |\n"); } + cut_pool_t cut_pool(original_lp_.num_cols, settings_); + cut_generation_t cut_generation(cut_pool); + for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { if (num_fractional == 0) { #ifdef PRINT_SOLUTION @@ -1173,538 +1185,117 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.log.printf("Fractional variable %d lower %e value %e upper %e\n", j, original_lp_.lower[j], root_relax_soln_.x[j], original_lp_.upper[j]); } #endif - // Let's look for Gomory cuts - // Compute b_bar - std::vector b_bar(original_lp_.num_rows); - basis_update.b_solve(original_lp_.rhs, b_bar); - - std::vector nonbasic_mark(original_lp_.num_cols, 0); - for (i_t j : nonbasic_list) { - if (j < 0 || j >= original_lp_.num_cols) { - settings_.log.printf("nonbasic_list out of bounds %d num_cols %d\n", j, original_lp_.num_cols); - exit(1); - } - nonbasic_mark[j] = 1; - } - - std::vector x_workspace(original_lp_.num_cols, 0.0); - std::vector x_mark(original_lp_.num_cols, 0); - - std::vector abar_indices; - abar_indices.reserve(original_lp_.num_cols); - - std::vector has_lower(original_lp_.num_cols, 0); - std::vector has_upper(original_lp_.num_cols, 0); - bool needs_complement = false; - for (i_t j = 0; j < original_lp_.num_cols; j++) { - if (original_lp_.lower[j] < 0) { - settings_.log.printf( - "Variable %d has negative lower bound %e\n", j, original_lp_.lower[j]); - exit(1); - } - const f_t uj = original_lp_.upper[j]; - const f_t lj = original_lp_.lower[j]; - if (uj != inf || lj != 0.0) { - needs_complement = true; - } - const f_t xstar_j = root_relax_soln_.x[j]; - if (uj < inf) { - if (uj - xstar_j <= xstar_j - lj) { - has_upper[j] = 1; - } else { - has_lower[j] = 1; - } - continue; - } + // Generate cuts and add them to the cut pool + cut_generation.generate_cuts(original_lp_, settings_, Arow, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list); - if (lj > -inf) { - has_lower[j] = 1; - } - } -#ifdef PRINT_COMPLEMENT_INFO - settings_.log.printf("needs_complement %d\n", needs_complement); -#endif - - csr_matrix_t C(0, original_lp_.num_cols, 0); - C.row_start[0] = 0; + // Score the cuts + cut_pool.score_cuts(root_relax_soln_.x); + // Get the best cuts from the cut pool + csr_matrix_t cuts_to_add(0, original_lp_.num_cols, 0); std::vector cut_rhs; + i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs); - for (i_t i = 0; i < original_lp_.num_rows; i++) { - const i_t j = basic_list[i]; - //settings_.log.printf( - // "Variable %d type %d val %e\n", j, var_types_[j], root_relax_soln_.x[j]); - if (var_types_[j] != variable_type_t::INTEGER) { continue; } - const f_t x_j = root_relax_soln_.x[j]; - if (std::abs(x_j - std::round(x_j)) < settings_.integer_tol) { continue; } -#ifdef PRINT_CUT_INFO - settings_.log.printf("Generating cut for variable %d relaxed value %e row %d\n", j, x_j, i); -#endif -#ifdef PRINT_BASIS - for (i_t h = 0; h < basic_list.size(); h++) { - settings_.log.printf("basic_list[%d] = %d\n", h, basic_list[h]); - } -#endif - - // Solve B^T u_bar = e_i - sparse_vector_t e_i(original_lp_.num_rows, 1); - e_i.i[0] = i; - e_i.x[0] = 1.0; - sparse_vector_t u_bar(original_lp_.num_rows, 0); - basis_update.b_transpose_solve(e_i, u_bar); - - std::vector u_bar_dense(original_lp_.num_rows); - u_bar.to_dense(u_bar_dense); - - std::vector BTu_bar(original_lp_.num_rows); - b_transpose_multiply(original_lp_, basic_list, u_bar_dense, BTu_bar); - for (i_t k = 0; k < original_lp_.num_rows; k++) { - if (k == i) { - if (std::abs(BTu_bar[k] - 1.0) > 1e-6) { - settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i); - exit(1); - } - } else { - if (std::abs(BTu_bar[k]) > 1e-6) { - settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i); - exit(1); - } - } - } - - // Compute a_bar = N^T u_bar - // TODO: This is similar to a function in phase2 of dual simplex. See if it can be reused. - const i_t nz_ubar = u_bar.i.size(); - for (i_t k = 0; k < nz_ubar; k++) { - const i_t ii = u_bar.i[k]; - const f_t u_bar_i = u_bar.x[k]; - const i_t row_start = Arow.col_start[ii]; - const i_t row_end = Arow.col_start[ii + 1]; - for (i_t p = row_start; p < row_end; p++) { - const i_t jj = Arow.i[p]; - if (nonbasic_mark[jj] == 1) { - x_workspace[jj] += u_bar_i * Arow.x[p]; - if (!x_mark[jj]) { - x_mark[jj] = 1; - abar_indices.push_back(jj); - } - } - } - } - - sparse_vector_t a_bar(original_lp_.num_cols, abar_indices.size() + 1); - for (i_t k = 0; k < abar_indices.size(); k++) { - const i_t jj = abar_indices[k]; - a_bar.i[k] = jj; - a_bar.x[k] = x_workspace[jj]; - } - - // Clear the workspace - for (i_t jj : abar_indices) { - x_workspace[jj] = 0.0; - x_mark[jj] = 0; - } - abar_indices.clear(); - - // We should now have the base inequality - // x_j + a_bar^T x_N >= b_bar_i - // We add x_j into a_bar so that everything is in a single sparse_vector_t - a_bar.i[a_bar.i.size() - 1] = j; - a_bar.x[a_bar.x.size() - 1] = 1.0; - - std::vector a_bar_dense(original_lp_.num_cols); - a_bar.to_dense(a_bar_dense); - - f_t a_bar_dense_dot = dot(a_bar_dense, root_relax_soln_.x); - if (std::abs(a_bar_dense_dot - b_bar[i]) > 1e-6) { - settings_.log.printf("a_bar_dense_dot = %e b_bar[%d] = %e\n", a_bar_dense_dot, i, b_bar[i]); - settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]); - exit(1); - } - - // Skip cuts that are shallow - const f_t shallow_tol = 1e-2; - if (std::abs(x_j - std::round(x_j)) < shallow_tol) { - //settings_.log.printf("Skipping shallow cut %d. b_bar[%d] = %e x_j %e\n", i, i, b_bar[i], x_j); - continue; - } - - const f_t f_val = b_bar[i] - std::floor(b_bar[i]); - if (f_val < 0.01 || f_val > 0.99) { - settings_.log.printf("Skipping cut %d. b_bar[%d] = %e f_val %e\n", i, i, b_bar[i], f_val); - continue; - } - -#ifdef PRINT_BASE_INEQUALITY - // Print out the base inequality - for (i_t k = 0; k < a_bar.i.size(); k++) { - const i_t jj = a_bar.i[k]; - const f_t aj = a_bar.x[k]; - settings_.log.printf("a_bar[%d] = %e\n", k, aj); - } - settings_.log.printf("b_bar[%d] = %e\n", i, b_bar[i]); -#endif - - auto f = [](f_t q_1, f_t q_2) -> f_t { - f_t q_1_hat = q_1 - std::floor(q_1); - f_t q_2_hat = q_2 - std::floor(q_2); - return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1); - }; - - auto h = [](f_t q) -> f_t { return std::max(q, 0.0); }; - - - std::vector cut_indices; - cut_indices.reserve(a_bar.i.size()); - f_t R; - if (!needs_complement) { - R = (b_bar[i] - std::floor(b_bar[i])) * std::ceil(b_bar[i]); - - for (i_t k = 0; k < a_bar.i.size(); k++) { - const i_t jj = a_bar.i[k]; - f_t aj = a_bar.x[k]; - if (var_types_[jj] == variable_type_t::INTEGER) { - x_workspace[jj] += f(aj, b_bar[i]); - if (!x_mark[jj] && x_workspace[jj] != 0.0) { - x_mark[jj] = 1; - cut_indices.push_back(jj); - } - } else { - x_workspace[jj] += h(aj); - if (!x_mark[jj] && x_workspace[jj] != 0.0) { - x_mark[jj] = 1; - cut_indices.push_back(jj); - } - } - } - } else { - // Compute r - f_t r = b_bar[i]; - for (i_t k = 0; k < a_bar.i.size(); k++) { - const i_t jj = a_bar.i[k]; - if (has_upper[jj]) { - const f_t uj = original_lp_.upper[jj]; - r -= uj * a_bar.x[k]; - continue; - } - if (has_lower[jj]) { - const f_t lj = original_lp_.lower[jj]; - r -= lj * a_bar.x[k]; - } - } - - // Compute R - R = std::ceil(r) * (r - std::floor(r)); - for (i_t k = 0; k < a_bar.i.size(); k++) { - const i_t jj = a_bar.i[k]; - const f_t aj = a_bar.x[k]; - if (has_upper[jj]) { - const f_t uj = original_lp_.upper[jj]; - if (var_types_[jj] == variable_type_t::INTEGER) { - R -= f(-aj, r) * uj; - } else { - R -= h(-aj) * uj; - } - } else if (has_lower[jj]) { - const f_t lj = original_lp_.lower[jj]; - if (var_types_[jj] == variable_type_t::INTEGER) { - R += f(aj, r) * lj; - } else { - R += h(aj) * lj; - } - } - } - - // Compute the cut coefficients - for (i_t k = 0; k < a_bar.i.size(); k++) { - const i_t jj = a_bar.i[k]; - const f_t aj = a_bar.x[k]; - if (has_upper[jj]) { - if (var_types_[jj] == variable_type_t::INTEGER) { - // Upper intersect I - x_workspace[jj] -= f(-aj, r); - if (!x_mark[jj] && x_workspace[jj] != 0.0) { - x_mark[jj] = 1; - cut_indices.push_back(jj); - } - } else { - // Upper intersect C - f_t h_j = h(-aj); - if (h_j != 0.0) { - x_workspace[jj] -= h_j; - if (!x_mark[jj]) { - x_mark[jj] = 1; - cut_indices.push_back(jj); - } - } - } - } else if (var_types_[jj] == variable_type_t::INTEGER) { - // I \ Upper - x_workspace[jj] += f(aj, r); - if (!x_mark[jj] && x_workspace[jj] != 0.0) { - x_mark[jj] = 1; - cut_indices.push_back(jj); - } - } else { - // C \ Upper - f_t h_j = h(aj); - if (h_j != 0.0) { - x_workspace[jj] += h_j; - if (!x_mark[jj]) { - x_mark[jj] = 1; - cut_indices.push_back(jj); - } - } - } - } - } - - sparse_vector_t cut(original_lp_.num_cols, 0); - cut.i.reserve(cut_indices.size()); - cut.x.reserve(cut_indices.size()); - for (i_t k = 0; k < cut_indices.size(); k++) { - const i_t jj = cut_indices[k]; - - // Check for small coefficients - const f_t aj = x_workspace[jj]; - if (std::abs(aj) < 1e-6) { - if (aj >= 0.0 && original_lp_.upper[jj] < inf) { - // Move this to the right-hand side - //settings_.log.printf("Moving %e to the right-hand side for variable %d\n", aj * original_lp_.upper[jj], jj); - R -= aj * original_lp_.upper[jj]; - continue; - } else if (aj <= 0.0 && original_lp_.lower[jj] > -inf) { - //settings_.log.printf("Moving %e to the right-hand side for variable %d\n", aj * original_lp_.lower[jj], jj); - R += aj * original_lp_.lower[jj]; - continue; - } - else { - //settings_.log.printf("Small coefficient %e for variable %d lower %e upper %e\n", aj, jj, original_lp_.lower[jj], original_lp_.upper[jj]); - } - } - cut.i.push_back(jj); - cut.x.push_back(x_workspace[jj]); - } - - // Clear the workspace - for (i_t jj : cut_indices) { - x_workspace[jj] = 0.0; - x_mark[jj] = 0; - } - - if (cut.x.size() == 0) - { - continue; - } - if (cut.x.size() >= 0.7 * original_lp_.num_cols) - { - settings_.log.printf("Cut %d has %d nonzeros. Skipping because it is too dense %.2f\n", i, cut.x.size(), static_cast(cut.x.size()) / original_lp_.num_cols); - continue; - } - - // Sort the coefficients by their index - cut.sort(); - // The new cut is: g'*x >= R - // But we want to have it in the form h'*x <= b - cut.negate(); - C.append_row(cut); - cut_rhs.push_back(-R); - } + cuts_to_add.check_matrix(); - csc_matrix_t C_col(C.m, C.n, 0); - C.to_compressed_col(C_col); -#ifdef PRINT_CUTS - C_col.print_matrix(); -#endif - - C.check_matrix(); -#ifdef PRINT_CUT_RHS - for (i_t k = 0; k < cut_rhs.size(); k++) { - settings_.log.printf("cut_rhs[%d] = %e\n", k, cut_rhs[k]); - } -#endif - -#ifdef PRINT_CUT_INFO - settings_.log.printf("C nz %d\n", C.row_start[C.m]); - settings_.log.printf("C m %d cut rhs size %d\n", C.m, cut_rhs.size()); - settings_.log.printf("original_lp_.num_cols %d\n", original_lp_.num_cols); -#endif - -#ifdef PRINT_OPTIMAL - for (i_t j = 0; j < original_lp_.num_cols; j++) { - lp_settings.log.printf("x[%d] = %e\n", j, root_relax_soln_.x[j]); - } -#endif - - // Check to see that this is a cut i.e C*x > d - std::vector Cx(C.m); - matrix_vector_multiply(C_col, 1.0, root_relax_soln_.x, 0.0, Cx); - f_t min_cut_violation = inf; - for (i_t k = 0; k < Cx.size(); k++) { - //lp_settings.log.printf("Cx[%d] = %e cut_rhs[%d] = %e\n", k, Cx[k], k, cut_rhs[k]); - if (Cx[k] <= cut_rhs[k]) { - settings_.log.printf("C*x <= d for cut %d\n", k); - exit(1); - } - min_cut_violation = std::min(min_cut_violation, Cx[k] - cut_rhs[k]); - } #ifdef PRINT_MIN_CUT_VIOLATION + f_t min_cut_violation = minimum_violation(cuts_to_add, cut_rhs, root_relax_soln_.x); settings_.log.printf("Min cut violation %e\n", min_cut_violation); #endif // Resolve the LP with the new cuts - settings_.log.printf("Solving LP with %d cuts (%d nonzeros). Total constraints %d\n", C.m, C.row_start[C.m], C.m + original_lp_.num_rows); + settings_.log.printf("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n", + num_cuts, + cuts_to_add.row_start[cuts_to_add.m], + cut_pool.pool_size(), + cuts_to_add.m + original_lp_.num_rows); lp_settings.log.log = false; - lp_status_t cut_status = solve_linear_program_with_cuts(stats_.start_time, - lp_settings, - C, - cut_rhs, - original_lp_, - root_relax_soln_, - basis_update, - basic_list, - nonbasic_list, - root_vstatus_, - edge_norms_); - settings_.log.printf("Cut LP iterations %d. A nz %d\n", root_relax_soln_.iterations, original_lp_.A.col_start[original_lp_.A.n]); - stats_.total_lp_iters += root_relax_soln_.iterations; - root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); - - if (cut_status != lp_status_t::OPTIMAL) { - lp_settings.log.printf("Cut status %d\n", cut_status); + mutex_original_lp_.lock(); + i_t add_cuts_status = add_cuts(settings_, + cuts_to_add, + cut_rhs, + original_lp_, + root_relax_soln_, + basis_update, + basic_list, + nonbasic_list, + root_vstatus_, + edge_norms_); + mutex_original_lp_.unlock(); + if (add_cuts_status != 0) { + settings_.log.printf("Failed to add cuts\n"); exit(1); } - original_lp_.A.transpose(Arow); + // Try to do bound strengthening var_types_.resize(original_lp_.num_cols, variable_type_t::CONTINUOUS); - std::vector cuts_to_remove; - cuts_to_remove.reserve(original_lp_.num_rows - original_rows); - std::vector slacks_to_remove; - slacks_to_remove.reserve(original_lp_.num_rows - original_rows); - const f_t dual_tol = 1e-10; - for (i_t k = original_rows; k < original_lp_.num_rows; k++) { - if (std::abs(root_relax_soln_.y[k]) < dual_tol) { - const i_t row_start = Arow.col_start[k]; - const i_t row_end = Arow.col_start[k + 1]; - i_t last_slack = -1; - const f_t slack_tol = 1e-3; - for (i_t p = row_start; p < row_end; p++) { - const i_t jj = Arow.i[p]; - const i_t col_len = original_lp_.A.col_start[jj + 1] - original_lp_.A.col_start[jj]; - if (var_types_[jj] == variable_type_t::CONTINUOUS && - Arow.x[p] == 1.0 && - original_lp_.lower[jj] == 0.0 && - original_lp_.upper[jj] == inf && - root_vstatus_[jj] == variable_status_t::BASIC && - col_len == 1 && - root_relax_soln_.x[jj] > slack_tol) { - last_slack = jj; - } - } - if (last_slack != -1) { - cuts_to_remove.push_back(k); - slacks_to_remove.push_back(last_slack); - } - } - } - if (cuts_to_remove.size() > 0) { - settings_.log.printf("Removing %d cuts\n", cuts_to_remove.size()); - std::vector marked_rows(original_lp_.num_rows, 0); - for (i_t i : cuts_to_remove) { - marked_rows[i] = 1; - } - std::vector marked_cols(original_lp_.num_cols, 0); - for (i_t j : slacks_to_remove) { - marked_cols[j] = 1; - } + std::vector bounds_changed(original_lp_.num_cols, true); + std::vector row_sense; - std::vector new_rhs(original_lp_.num_rows - cuts_to_remove.size()); - std::vector new_solution_y(original_lp_.num_rows - cuts_to_remove.size()); - i_t h = 0; - for (i_t i = 0; i < original_lp_.num_rows; i++) { - if (!marked_rows[i]) { - new_rhs[h] = original_lp_.rhs[i]; - new_solution_y[h] = root_relax_soln_.y[i]; - h++; - } - } + settings_.log.printf("Before A check\n"); + original_lp_.A.check_matrix(); + settings_.log.printf("Before A transpose\n"); + original_lp_.A.transpose(Arow); + settings_.log.printf("After A transpose\n"); + bool feasible = + bound_strengthening(row_sense, settings_, original_lp_, Arow, var_types_, bounds_changed); + if (!feasible) { + settings_.log.printf("Bound strengthening failed\n"); + exit(1); + } - Arow.remove_columns(marked_rows); - Arow.transpose(original_lp_.A); - - std::vector new_objective(original_lp_.num_cols - slacks_to_remove.size()); - std::vector new_lower(original_lp_.num_cols - slacks_to_remove.size()); - std::vector new_upper(original_lp_.num_cols - slacks_to_remove.size()); - std::vector new_var_types(original_lp_.num_cols - slacks_to_remove.size()); - std::vector new_vstatus(original_lp_.num_cols - slacks_to_remove.size()); - std::vector new_basic_list; - new_basic_list.reserve(original_lp_.num_rows - slacks_to_remove.size()); - std::vector new_nonbasic_list; - new_nonbasic_list.reserve(nonbasic_list.size()); - std::vector new_solution_x(original_lp_.num_cols - slacks_to_remove.size()); - std::vector new_solution_z(original_lp_.num_cols - slacks_to_remove.size()); - h = 0; - for (i_t k = 0; k < original_lp_.num_cols; k++) { - if (!marked_cols[k]) { - new_objective[h] = original_lp_.objective[k]; - new_lower[h] = original_lp_.lower[k]; - new_upper[h] = original_lp_.upper[k]; - new_var_types[h] = var_types_[k]; - new_vstatus[h] = root_vstatus_[k]; - new_solution_x[h] = root_relax_soln_.x[k]; - new_solution_z[h] = root_relax_soln_.z[k]; - if (new_vstatus[h] != variable_status_t::BASIC) { - new_nonbasic_list.push_back(h); - } else { - new_basic_list.push_back(h); - } - h++; - } - } - original_lp_.A.remove_columns(marked_cols); - original_lp_.A.transpose(Arow); - original_lp_.objective = new_objective; - original_lp_.lower = new_lower; - original_lp_.upper = new_upper; - original_lp_.rhs = new_rhs; - var_types_ = new_var_types; - original_lp_.num_cols = original_lp_.A.n; - original_lp_.num_rows = original_lp_.A.m; - basic_list = new_basic_list; - nonbasic_list = new_nonbasic_list; - root_vstatus_ = new_vstatus; - root_relax_soln_.x = new_solution_x; - root_relax_soln_.y = new_solution_y; - root_relax_soln_.z = new_solution_z; - -#ifdef PRINT_SIZES - settings_.log.printf("A %d x %d\n", original_lp_.A.m, original_lp_.A.n); - settings_.log.printf("basic_list size %d\n", basic_list.size()); - settings_.log.printf("nonbasic_list size %d\n", nonbasic_list.size()); - settings_.log.printf("root_vstatus_ size %d\n", root_vstatus_.size()); - settings_.log.printf("original_lp_.num_rows %d\n", original_lp_.num_rows); - settings_.log.printf("original_lp_.num_cols %d\n", original_lp_.num_cols); - settings_.log.printf("root_relax_soln_.x size %d\n", root_relax_soln_.x.size()); - settings_.log.printf("root_relax_soln_.y size %d\n", root_relax_soln_.y.size()); - settings_.log.printf("root_relax_soln_.z size %d\n", root_relax_soln_.z.size()); - settings_.log.printf("rhs size %ld\n", original_lp_.rhs.size()); - settings_.log.printf("lower size %ld\n", original_lp_.lower.size()); - settings_.log.printf("upper size %ld\n", original_lp_.upper.size()); - settings_.log.printf("objective size %ld\n", original_lp_.objective.size()); - settings_.log.printf("var_types_ size %ld\n", var_types_.size()); -#endif - settings_.log.printf("After removal %d rows %d columns %d nonzeros\n", - original_lp_.num_rows, - original_lp_.num_cols, - original_lp_.A.col_start[original_lp_.A.n]); + // Adjust the solution + root_relax_soln_.x.resize(original_lp_.num_cols, 0.0); + root_relax_soln_.y.resize(original_lp_.num_rows, 0.0); + root_relax_soln_.z.resize(original_lp_.num_cols, 0.0); + + // For now just clear the edge norms + edge_norms_.clear(); + i_t iter = 0; + bool initialize_basis = false; + dual::status_t cut_status = dual_phase2_with_advanced_basis(2, + 0, + initialize_basis, + stats_.start_time, + original_lp_, + lp_settings, + root_vstatus_, + basis_update, + basic_list, + nonbasic_list, + root_relax_soln_, + iter, + edge_norms_); + + settings_.log.printf("Cut LP iterations %d. A nz %d\n", + iter, + original_lp_.A.col_start[original_lp_.A.n]); + stats_.total_lp_iters += root_relax_soln_.iterations; + root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); - basis_update.resize(original_lp_.num_rows); - basis_update.refactor_basis(original_lp_.A, settings_, basic_list, nonbasic_list, root_vstatus_); + if (cut_status != dual::status_t::OPTIMAL) { + settings_.log.printf("Cut status %d\n", cut_status); + exit(1); } + local_lower_bounds_.assign(settings_.num_bfs_threads, root_objective_); + + remove_cuts(original_lp_, + settings_, + Arow, + original_rows, + var_types_, + root_vstatus_, + root_relax_soln_.x, + root_relax_soln_.y, + root_relax_soln_.z, + basic_list, + nonbasic_list, + basis_update); + fractional.clear(); num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index 5b304addd..ccbad335a 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -149,6 +149,9 @@ class branch_and_bound_t { // Local lower bounds for each thread std::vector> local_lower_bounds_; + // Mutex for the original LP + omp_mutex_t mutex_original_lp_; + // Mutex for upper bound omp_mutex_t mutex_upper_; diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp new file mode 100644 index 000000000..4b8fc94dd --- /dev/null +++ b/cpp/src/dual_simplex/cuts.cpp @@ -0,0 +1,1050 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include + + +namespace cuopt::linear_programming::dual_simplex { + + +template +void cut_pool_t::add_cut(i_t n, const sparse_vector_t& cut, f_t rhs) +{ + // TODO: Need to deduplicate cuts and only add if the cut is not already in the pool + + if (n > cut_storage_.n) { + cut_storage_.n = n; + } + + for (i_t p = 0; p < cut.i.size(); p++) { + const i_t j = cut.i[p]; + if (j >= original_vars_) { + settings_.log.printf( + "Cut has variable %d that is greater than original_vars_ %d\n", j, original_vars_); + return; + } + } + + cut_storage_.append_row(cut); + rhs_storage_.push_back(rhs); + cut_age_.push_back(0); +} + + +template +f_t cut_pool_t::cut_distance(i_t row, const std::vector& x, f_t& cut_violation, f_t &cut_norm) +{ + const i_t row_start = cut_storage_.row_start[row]; + const i_t row_end = cut_storage_.row_start[row + 1]; + f_t cut_x = 0.0; + f_t dot = 0.0; + for (i_t p = row_start; p < row_end; p++) { + const i_t j = cut_storage_.j[p]; + const f_t cut_coeff = cut_storage_.x[p]; + cut_x += cut_coeff * x[j]; + dot += cut_coeff * cut_coeff; + } + cut_violation = rhs_storage_[row] - cut_x; + cut_norm = std::sqrt(dot); + const f_t distance = cut_violation / cut_norm; + return distance; +} + +template +f_t cut_pool_t::cut_density(i_t row) +{ + const i_t row_start = cut_storage_.row_start[row]; + const i_t row_end = cut_storage_.row_start[row + 1]; + const i_t cut_nz = row_end - row_start; + const i_t original_vars = original_vars_; + return static_cast(cut_nz) / original_vars; +} + +template +f_t cut_pool_t::cut_orthogonality(i_t i, i_t j) +{ + const i_t i_start = cut_storage_.row_start[i]; + const i_t i_end = cut_storage_.row_start[i + 1]; + const i_t i_nz = i_end - i_start; + const i_t j_start = cut_storage_.row_start[j]; + const i_t j_end = cut_storage_.row_start[j + 1]; + const i_t j_nz = j_end - j_start; + + f_t dot = sparse_dot(cut_storage_.j.data() + i_start, cut_storage_.x.data() + i_start, i_nz, + cut_storage_.j.data() + j_start, cut_storage_.x.data() + j_start, j_nz); + + f_t norm_i = cut_norms_[i]; + f_t norm_j = cut_norms_[j]; + return 1.0 - std::abs(dot) / (norm_i * norm_j); +} + +template +void cut_pool_t::score_cuts(std::vector& x_relax) +{ + const f_t weight_distance = 1.0; + const f_t weight_orthogonality = 1.0; + cut_distances_.resize(cut_storage_.m, 0.0); + cut_norms_.resize(cut_storage_.m, 0.0); + cut_orthogonality_.resize(cut_storage_.m, 1); + cut_scores_.resize(cut_storage_.m, 0.0); + for (i_t i = 0; i < cut_storage_.m; i++) { + f_t violation; + cut_distances_[i] = cut_distance(i, x_relax, violation, cut_norms_[i]); + cut_scores_[i] = weight_distance * cut_distances_[i] + weight_orthogonality * cut_orthogonality_[i]; + //settings_.log.printf("Cut %d distance %e violation %e orthogonality %e score %e\n", i, cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]); + } + + std::vector sorted_indices(cut_storage_.m); + std::iota(sorted_indices.begin(), sorted_indices.end(), 0); + std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) { + return cut_scores_[a] > cut_scores_[b]; + }); + + std::vector indices; + indices.reserve(sorted_indices.size()); + + + const i_t max_cuts = 2000; + const f_t min_orthogonality = 0.5; + const f_t min_cut_distance = 1e-4; + best_cuts_.reserve(std::min(max_cuts, cut_storage_.m)); + + while (scored_cuts_ < max_cuts && !sorted_indices.empty()) { + const i_t i = sorted_indices[0]; + + if (cut_distances_[i] <= min_cut_distance) { + break; + } + + if (cut_age_[i] > 0) { + settings_.log.printf("Adding cut with age %d\n", cut_age_[i]); + } + //settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]); + + best_cuts_.push_back(i); + scored_cuts_++; + + // Recompute the orthogonality for the remaining cuts + for (i_t k = 1; k < sorted_indices.size(); k++) { + const i_t j = sorted_indices[k]; + cut_orthogonality_[j] = std::min(cut_orthogonality_[j], cut_orthogonality(i, j)); + if (cut_orthogonality_[j] >= min_orthogonality) { + indices.push_back(j); + cut_scores_[j] = weight_distance * cut_distances_[j] + weight_orthogonality * cut_orthogonality_[j]; + //settings_.log.printf("Recomputed cut %d score %e\n", j, cut_scores_[j]); + } + } + + sorted_indices = indices; + indices.clear(); + //settings_.log.printf("Sorting %d cuts\n", sorted_indices.size()); + + std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) { + return cut_scores_[a] > cut_scores_[b]; + }); + } +} + +template +i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, std::vector& best_rhs) +{ + best_cuts.m = 0; + best_cuts.n = cut_storage_.n; + best_cuts.row_start.clear(); + best_cuts.j.clear(); + best_cuts.x.clear(); + best_cuts.row_start.reserve(scored_cuts_ + 1); + best_cuts.row_start.push_back(0); + + for (i_t i: best_cuts_) { + sparse_vector_t cut(cut_storage_, i); + cut.negate(); + best_cuts.append_row(cut); + //settings_.log.printf("Best cuts nz %d\n", best_cuts.row_start[best_cuts.m]); + best_rhs.push_back(-rhs_storage_[i]); + } + + return static_cast(best_cuts_.size()); +} + + +template +void cut_pool_t::age_cuts() +{ + for (i_t i = 0; i < cut_age_.size(); i++) { + cut_age_[i]++; + } +} + +template +void cut_pool_t::drop_cuts() +{ + // TODO: Implement this +} + +template +void cut_generation_t::generate_cuts(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& var_types, + basis_update_mpf_t& basis_update, + const std::vector& xstar, + const std::vector& basic_list, + const std::vector& nonbasic_list) +{ + // Generate Gomory Cuts + generate_gomory_cuts( + lp, settings, Arow, var_types, basis_update, xstar, basic_list, nonbasic_list); + + + // Generate MIR cuts + // generate_mir_cuts(lp, settings, Arow, var_types, xstar); +} + +template +void cut_generation_t::generate_mir_cuts(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& var_types, + const std::vector& xstar) +{ + mixed_integer_rounding_cut_t mir(lp.num_cols, settings); + mir.initialize(lp, xstar); + + for (i_t i = 0; i < lp.num_rows; i++) { + sparse_vector_t inequality(Arow, i); + f_t inequality_rhs = lp.rhs[i]; + + const i_t row_start = Arow.col_start[i]; + const i_t row_end = Arow.col_start[i + 1]; + i_t last_slack = -1; + for (i_t p = row_start; p < row_end; p++) { + const i_t j = Arow.i[p]; + const f_t a = Arow.x[p]; + if (var_types[j] == variable_type_t::CONTINUOUS && a == 1.0 && lp.lower[j] == 0.0) { + last_slack = j; + } + } + + if (last_slack != -1) { + // Remove the slack from the equality to get an inequality + for (i_t k = 0; k < inequality.i.size(); k++) { + const i_t j = inequality.i[k]; + if (j == last_slack) { + inequality.x[k] = 0.0; + } + } + + // inequaility'*x <= inequality_rhs + // But for MIR we need: inequality'*x >= inequality_rhs + inequality_rhs *= -1; + inequality.negate(); + + sparse_vector_t cut(lp.num_cols, 0); + f_t cut_rhs; + i_t mir_status = mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut, cut_rhs); + if (mir_status == 0) { + f_t dot = 0.0; + f_t cut_norm = 0.0; + for (i_t k = 0; k < cut.i.size(); k++) { + const i_t jj = cut.i[k]; + const f_t aj = cut.x[k]; + dot += aj * xstar[jj]; + cut_norm += aj * aj; + } + if (dot >= cut_rhs) { + continue; + } + } + + settings.log.printf("Adding MIR cut %d\n", i); + cut_pool_.add_cut(lp.num_cols, cut, cut_rhs); + } + } +} + +template +void cut_generation_t::generate_gomory_cuts( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& var_types, + basis_update_mpf_t& basis_update, + const std::vector& xstar, + const std::vector& basic_list, + const std::vector& nonbasic_list) +{ + mixed_integer_gomory_base_inequality_t gomory(lp, basis_update, nonbasic_list); + mixed_integer_rounding_cut_t mir(lp.num_cols, settings); + + mir.initialize(lp, xstar); + + for (i_t i = 0; i < lp.num_rows; i++) { + sparse_vector_t inequality(lp.num_cols, 0); + f_t inequality_rhs; + const i_t j = basic_list[i]; + if (var_types[j] != variable_type_t::INTEGER) { continue; } + const f_t x_j = xstar[j]; + if (std::abs(x_j - std::round(x_j)) < settings.integer_tol) { continue; } + i_t gomory_status = gomory.generate_base_inequality(lp, + settings, + Arow, + var_types, + basis_update, + xstar, + basic_list, + nonbasic_list, + i, + inequality, + inequality_rhs); + if (gomory_status == 0) { + // Given the base inequality, generate a MIR cut + sparse_vector_t cut_A(lp.num_cols, 0); + f_t cut_A_rhs; + i_t mir_status = + mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut_A, cut_A_rhs); + bool A_valid = false; + f_t cut_A_distance = 0.0; + if (mir_status == 0) { + // Check that the cut is violated + f_t dot = 0.0; + f_t cut_norm = 0.0; + for (i_t k = 0; k < cut_A.i.size(); k++) { + const i_t jj = cut_A.i[k]; + const f_t aj = cut_A.x[k]; + dot += aj * xstar[jj]; + cut_norm += aj * aj; + } + if (dot >= cut_A_rhs) { + settings.log.printf("Cut %d is not violated. Skipping\n", i); + continue; + } + cut_A_distance = (cut_A_rhs - dot) / std::sqrt(cut_norm); + A_valid = true; + //cut_pool_.add_cut(lp.num_cols, cut, cut_rhs); + } + + // Negate the base inequality + inequality.negate(); + inequality_rhs *= -1; + + sparse_vector_t cut_B(lp.num_cols, 0); + f_t cut_B_rhs; + + mir_status = + mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut_B, cut_B_rhs); + bool B_valid = false; + f_t cut_B_distance = 0.0; + if (mir_status == 0) { + // Check that the cut is violated + f_t dot = 0.0; + f_t cut_norm = 0.0; + for (i_t k = 0; k < cut_B.i.size(); k++) { + const i_t jj = cut_B.i[k]; + const f_t aj = cut_B.x[k]; + dot += aj * xstar[jj]; + cut_norm += aj * aj; + } + if (dot >= cut_B_rhs) { + settings.log.printf("Cut %d is not violated. Skipping\n", i); + continue; + } + cut_B_distance = (cut_B_rhs - dot) / std::sqrt(cut_norm); + B_valid = true; + // cut_pool_.add_cut(lp.num_cols, cut_B, cut_B_rhs); + } + + if ((cut_A_distance > cut_B_distance) && A_valid) { + cut_pool_.add_cut(lp.num_cols, cut_A, cut_A_rhs); + } else if (B_valid) { + cut_pool_.add_cut(lp.num_cols, cut_B, cut_B_rhs); + } + } + } +} + +template +i_t mixed_integer_gomory_base_inequality_t::generate_base_inequality( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& var_types, + basis_update_mpf_t& basis_update, + const std::vector& xstar, + const std::vector& basic_list, + const std::vector& nonbasic_list, + i_t i, + sparse_vector_t& inequality, + f_t& inequality_rhs) +{ + // Let's look for Gomory cuts + const i_t j = basic_list[i]; + if (var_types[j] != variable_type_t::INTEGER) { return -1; } + const f_t x_j = xstar[j]; + if (std::abs(x_j - std::round(x_j)) < settings.integer_tol) { return -1; } +#ifdef PRINT_CUT_INFO + settings_.log.printf("Generating cut for variable %d relaxed value %e row %d\n", j, x_j, i); +#endif +#ifdef PRINT_BASIS + for (i_t h = 0; h < basic_list.size(); h++) { + settings_.log.printf("basic_list[%d] = %d\n", h, basic_list[h]); + } +#endif + + // Solve B^T u_bar = e_i + sparse_vector_t e_i(lp.num_rows, 1); + e_i.i[0] = i; + e_i.x[0] = 1.0; + sparse_vector_t u_bar(lp.num_rows, 0); + basis_update.b_transpose_solve(e_i, u_bar); + + +#ifdef CHECK_B_TRANSPOSE_SOLVE + std::vector u_bar_dense(lp.num_rows); + u_bar.to_dense(u_bar_dense); + + std::vector BTu_bar(lp.num_rows); + b_transpose_multiply(lp, basic_list, u_bar_dense, BTu_bar); + for (i_t k = 0; k < lp.num_rows; k++) { + if (k == i) { + if (std::abs(BTu_bar[k] - 1.0) > 1e-6) { + settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i); + exit(1); + } + } else { + if (std::abs(BTu_bar[k]) > 1e-6) { + settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i); + exit(1); + } + } + } +#endif + + // Compute a_bar = N^T u_bar + // TODO: This is similar to a function in phase2 of dual simplex. See if it can be reused. + const i_t nz_ubar = u_bar.i.size(); + std::vector abar_indices; + abar_indices.reserve(nz_ubar); + for (i_t k = 0; k < nz_ubar; k++) { + const i_t ii = u_bar.i[k]; + const f_t u_bar_i = u_bar.x[k]; + const i_t row_start = Arow.col_start[ii]; + const i_t row_end = Arow.col_start[ii + 1]; + for (i_t p = row_start; p < row_end; p++) { + const i_t jj = Arow.i[p]; + if (nonbasic_mark_[jj] == 1) { + x_workspace_[jj] += u_bar_i * Arow.x[p]; + if (!x_mark_[jj]) { + x_mark_[jj] = 1; + abar_indices.push_back(jj); + } + } + } + } + + sparse_vector_t a_bar(lp.num_cols, abar_indices.size() + 1); + for (i_t k = 0; k < abar_indices.size(); k++) { + const i_t jj = abar_indices[k]; + a_bar.i[k] = jj; + a_bar.x[k] = x_workspace_[jj]; + } + + // Clear the workspace + for (i_t jj : abar_indices) { + x_workspace_[jj] = 0.0; + x_mark_[jj] = 0; + } + abar_indices.clear(); + + // We should now have the base inequality + // x_j + a_bar^T x_N >= b_bar_i + // We add x_j into a_bar so that everything is in a single sparse_vector_t + a_bar.i[a_bar.i.size() - 1] = j; + a_bar.x[a_bar.x.size() - 1] = 1.0; + +#ifdef CHECK_A_BAR_DENSE_DOT + std::vector a_bar_dense(lp.num_cols); + a_bar.to_dense(a_bar_dense); + + f_t a_bar_dense_dot = dot(a_bar_dense, xstar); + if (std::abs(a_bar_dense_dot - b_bar[i]) > 1e-6) { + settings_.log.printf("a_bar_dense_dot = %e b_bar[%d] = %e\n", a_bar_dense_dot, i, b_bar[i]); + settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]); + exit(1); + } +#endif + + // We have that x_j + a_bar^T x_N == b_bar_i + // So x_j + a_bar^T x_N >= b_bar_i + // And x_j + a_bar^T x_N <= b_bar_i + // Or -x_j - a_bar^T x_N >= -b_bar_i + +#ifdef PRINT_CUT + { + settings_.log.printf("Cut %d\n", i); + for (i_t k = 0; k < a_bar.i.size(); k++) { + const i_t jj = a_bar.i[k]; + const f_t aj = a_bar.x[k]; + settings_.log.printf("(%d, %e) ", jj, aj); + } + settings_.log.printf("\nEnd cut %d b_bar[%d] = %e\n", i, b_bar[i]); + } +#endif + + // Skip cuts that are shallow + const f_t shallow_tol = 1e-2; + if (std::abs(x_j - std::round(x_j)) < shallow_tol) { + //settings_.log.printf("Skipping shallow cut %d. b_bar[%d] = %e x_j %e\n", i, i, b_bar[i], x_j); + return -1; + } + + const f_t f_val = b_bar_[i] - std::floor(b_bar_[i]); + if (f_val < 0.01 || f_val > 0.99) { + //settings_.log.printf("Skipping cut %d. b_bar[%d] = %e f_val %e\n", i, i, b_bar[i], f_val); + return -1; + } + +#ifdef PRINT_BASE_INEQUALITY + // Print out the base inequality + for (i_t k = 0; k < a_bar.i.size(); k++) { + const i_t jj = a_bar.i[k]; + const f_t aj = a_bar.x[k]; + settings_.log.printf("a_bar[%d] = %e\n", k, aj); + } + settings_.log.printf("b_bar[%d] = %e\n", i, b_bar[i]); +#endif + + inequality = a_bar; + inequality_rhs = b_bar_[i]; + + return 0; +} + +template +void mixed_integer_rounding_cut_t::initialize(const lp_problem_t& lp, + const std::vector& xstar) +{ + + if (lp.num_cols != num_vars_) { + num_vars_ = lp.num_cols; + x_workspace_.resize(num_vars_, 0.0); + x_mark_.resize(num_vars_, 0); + has_lower_.resize(num_vars_, 0); + has_upper_.resize(num_vars_, 0); + } + + + needs_complement_ = false; + for (i_t j = 0; j < lp.num_cols; j++) { + if (lp.lower[j] < 0) { + settings_.log.printf("Variable %d has negative lower bound %e\n", j, lp.lower[j]); + exit(1); + } + const f_t uj = lp.upper[j]; + const f_t lj = lp.lower[j]; + if (uj != inf || lj != 0.0) { needs_complement_ = true; } + const f_t xstar_j = xstar[j]; + if (uj < inf) { + if (uj - xstar_j <= xstar_j - lj) { + has_upper_[j] = 1; + } else { + has_lower_[j] = 1; + } + continue; + } + + if (lj > -inf) { has_lower_[j] = 1; } + } +} + +template +i_t mixed_integer_rounding_cut_t::generate_cut( + const sparse_vector_t& a, + f_t beta, + const std::vector& upper_bounds, + const std::vector& lower_bounds, + const std::vector& var_types, + sparse_vector_t& cut, + f_t& cut_rhs) +{ + auto f = [](f_t q_1, f_t q_2) -> f_t { + f_t q_1_hat = q_1 - std::floor(q_1); + f_t q_2_hat = q_2 - std::floor(q_2); + return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1); + }; + + auto h = [](f_t q) -> f_t { return std::max(q, 0.0); }; + + std::vector cut_indices; + cut_indices.reserve(a.i.size()); + f_t R; + if (!needs_complement_) { + R = (beta - std::floor(beta)) * std::ceil(beta); + + for (i_t k = 0; k < a.i.size(); k++) { + const i_t jj = a.i[k]; + f_t aj = a.x[k]; + if (var_types[jj] == variable_type_t::INTEGER) { + x_workspace_[jj] += f(aj, beta); + if (!x_mark_[jj] && x_workspace_[jj] != 0.0) { + x_mark_[jj] = 1; + cut_indices.push_back(jj); + } + } else { + x_workspace_[jj] += h(aj); + if (!x_mark_[jj] && x_workspace_[jj] != 0.0) { + x_mark_[jj] = 1; + cut_indices.push_back(jj); + } + } + } + } else { + // Compute r + f_t r = beta; + for (i_t k = 0; k < a.i.size(); k++) { + const i_t jj = a.i[k]; + if (has_upper_[jj]) { + const f_t uj = upper_bounds[jj]; + r -= uj * a.x[k]; + continue; + } + if (has_lower_[jj]) { + const f_t lj = lower_bounds[jj]; + r -= lj * a.x[k]; + } + } + + // Compute R + R = std::ceil(r) * (r - std::floor(r)); + for (i_t k = 0; k < a.i.size(); k++) { + const i_t jj = a.i[k]; + const f_t aj = a.x[k]; + if (has_upper_[jj]) { + const f_t uj = upper_bounds[jj]; + if (var_types[jj] == variable_type_t::INTEGER) { + R -= f(-aj, r) * uj; + } else { + R -= h(-aj) * uj; + } + } else if (has_lower_[jj]) { + const f_t lj = lower_bounds[jj]; + if (var_types[jj] == variable_type_t::INTEGER) { + R += f(aj, r) * lj; + } else { + R += h(aj) * lj; + } + } + } + + // Compute the cut coefficients + for (i_t k = 0; k < a.i.size(); k++) { + const i_t jj = a.i[k]; + const f_t aj = a.x[k]; + if (has_upper_[jj]) { + if (var_types[jj] == variable_type_t::INTEGER) { + // Upper intersect I + x_workspace_[jj] -= f(-aj, r); + if (!x_mark_[jj] && x_workspace_[jj] != 0.0) { + x_mark_[jj] = 1; + cut_indices.push_back(jj); + } + } else { + // Upper intersect C + f_t h_j = h(-aj); + if (h_j != 0.0) { + x_workspace_[jj] -= h_j; + if (!x_mark_[jj]) { + x_mark_[jj] = 1; + cut_indices.push_back(jj); + } + } + } + } else if (var_types[jj] == variable_type_t::INTEGER) { + // I \ Upper + x_workspace_[jj] += f(aj, r); + if (!x_mark_[jj] && x_workspace_[jj] != 0.0) { + x_mark_[jj] = 1; + cut_indices.push_back(jj); + } + } else { + // C \ Upper + f_t h_j = h(aj); + if (h_j != 0.0) { + x_workspace_[jj] += h_j; + if (!x_mark_[jj]) { + x_mark_[jj] = 1; + cut_indices.push_back(jj); + } + } + } + } + } + + cut.i.reserve(cut_indices.size()); + cut.x.reserve(cut_indices.size()); + for (i_t k = 0; k < cut_indices.size(); k++) { + const i_t jj = cut_indices[k]; + + // Check for small coefficients + const f_t aj = x_workspace_[jj]; + if (std::abs(aj) < 1e-6) { + if (aj >= 0.0 && upper_bounds[jj] < inf) { + // Move this to the right-hand side + R -= aj * upper_bounds[jj]; + continue; + } else if (aj <= 0.0 && lower_bounds[jj] > -inf) { + R += aj * lower_bounds[jj]; + continue; + } else { + } + } + cut.i.push_back(jj); + cut.x.push_back(x_workspace_[jj]); + } + + // Clear the workspace + for (i_t jj : cut_indices) { + x_workspace_[jj] = 0.0; + x_mark_[jj] = 0; + } + + + // The new cut is: g'*x >= R + // But we want to have it in the form h'*x <= b + cut.sort(); + + cut_rhs = R; + return 0; +} + +template +i_t add_cuts(const simplex_solver_settings_t& settings, + const csr_matrix_t& cuts, + const std::vector& cut_rhs, + lp_problem_t& lp, + lp_solution_t& solution, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list, + std::vector& vstatus, + std::vector& edge_norms) + +{ + // Given a set of cuts: C*x <= d that are currently violated + // by the current solution x* (i.e. C*x* > d), this function + // adds the cuts into the LP and solves again. + +#ifdef CHECK_BASIS + { + csc_matrix_t Btest(lp.num_rows, lp.num_rows, 1); + basis_update.multiply_lu(Btest); + csc_matrix_t B(lp.num_rows, lp.num_rows, 1); + form_b(lp.A, basic_list, B); + csc_matrix_t Diff(lp.num_rows, lp.num_rows, 1); + add(Btest, B, 1.0, -1.0, Diff); + const f_t err = Diff.norm1(); + settings.log.printf("Before || B - L*U || %e\n", err); + if (err > 1e-6) { exit(1); } + } +#endif + + const i_t p = cuts.m; + if (cut_rhs.size() != static_cast(p)) { + settings.log.printf("cut_rhs must have the same number of rows as cuts\n"); + return -1; + } + settings.log.debug("Number of cuts %d\n", p); + settings.log.debug("Original lp rows %d\n", lp.num_rows); + settings.log.debug("Original lp cols %d\n", lp.num_cols); + + csr_matrix_t new_A_row(lp.num_rows, lp.num_cols, 1); + lp.A.to_compressed_row(new_A_row); + + new_A_row.append_rows(cuts); + + csc_matrix_t new_A_col(lp.num_rows + p, lp.num_cols, 1); + new_A_row.to_compressed_col(new_A_col); + + // Add in slacks variables for the new rows + lp.lower.resize(lp.num_cols + p); + lp.upper.resize(lp.num_cols + p); + lp.objective.resize(lp.num_cols + p); + i_t nz = new_A_col.col_start[lp.num_cols]; + new_A_col.col_start.resize(lp.num_cols + p + 1); + new_A_col.i.resize(nz + p); + new_A_col.x.resize(nz + p); + i_t k = lp.num_rows; + for (i_t j = lp.num_cols; j < lp.num_cols + p; j++) { + new_A_col.col_start[j] = nz; + new_A_col.i[nz] = k++; + new_A_col.x[nz] = 1.0; + nz++; + lp.lower[j] = 0.0; + lp.upper[j] = inf; + lp.objective[j] = 0.0; + } + settings.log.debug("Done adding slacks\n"); + new_A_col.col_start[lp.num_cols + p] = nz; + new_A_col.n = lp.num_cols + p; + + lp.A = new_A_col; + i_t old_rows = lp.num_rows; + lp.num_rows += p; + i_t old_cols = lp.num_cols; + lp.num_cols += p; + + lp.rhs.resize(lp.num_rows); + for (i_t k = old_rows; k < old_rows + p; k++) { + const i_t h = k - old_rows; + lp.rhs[k] = cut_rhs[h]; + } + settings.log.debug("Done adding rhs\n"); + + // Construct C_B = C(:, basic_list) + std::vector C_col_degree(lp.num_cols, 0); + i_t cuts_nz = cuts.row_start[p]; + for (i_t q = 0; q < cuts_nz; q++) { + const i_t j = cuts.j[q]; + if (j >= lp.num_cols) { + settings.log.printf("j %d is greater than p %d\n", j, p); + return -1; + } + C_col_degree[j]++; + } + settings.log.debug("Done computing C_col_degree\n"); + + std::vector in_basis(old_cols, -1); + const i_t num_basic = static_cast(basic_list.size()); + i_t C_B_nz = 0; + for (i_t k = 0; k < num_basic; k++) { + const i_t j = basic_list[k]; + if (j < 0 || j >= old_cols) { + settings.log.printf( + "basic_list[%d] = %d is out of bounds %d old_cols %d\n", k, j, j, old_cols); + return -1; + } + in_basis[j] = k; + if (j < cuts.n) { C_B_nz += C_col_degree[j]; } + } + settings.log.debug("Done estimating C_B_nz\n"); + + csr_matrix_t C_B(p, num_basic, C_B_nz); + nz = 0; + for (i_t i = 0; i < p; i++) { + C_B.row_start[i] = nz; + const i_t row_start = cuts.row_start[i]; + const i_t row_end = cuts.row_start[i + 1]; + for (i_t q = row_start; q < row_end; q++) { + const i_t j = cuts.j[q]; + const i_t j_basis = in_basis[j]; + if (j_basis == -1) { continue; } + C_B.j[nz] = j_basis; + C_B.x[nz] = cuts.x[q]; + nz++; + } + } + C_B.row_start[p] = nz; + + if (nz != C_B_nz) { + settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz); + return -1; + } + settings.log.debug("C_B rows %d cols %d nz %d\n", C_B.m, C_B.n, nz); + + // Adjust the basis update to include the new cuts + basis_update.append_cuts(C_B); + + basic_list.resize(lp.num_rows, 0); + i_t h = old_cols; + for (i_t j = old_rows; j < lp.num_rows; j++) { + basic_list[j] = h++; + } + +#ifdef CHECK_BASIS + // Check the basis update + csc_matrix_t Btest(lp.num_rows, lp.num_rows, 1); + basis_update.multiply_lu(Btest); + + csc_matrix_t B(lp.num_rows, lp.num_rows, 1); + form_b(lp.A, basic_list, B); + + csc_matrix_t Diff(lp.num_rows, lp.num_rows, 1); + add(Btest, B, 1.0, -1.0, Diff); + const f_t err = Diff.norm1(); + settings.log.printf("After || B - L*U || %e\n", err); + if (err > 1e-6) { + settings.log.printf("Diff matrix\n"); + // Diff.print_matrix(); + exit(1); + } +#endif + // Adjust the vstatus + vstatus.resize(lp.num_cols); + for (i_t j = old_cols; j < lp.num_cols; j++) { + vstatus[j] = variable_status_t::BASIC; + } + + return 0; +} + +template +void remove_cuts(lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + i_t original_rows, + std::vector& var_types, + std::vector& vstatus, + std::vector& x, + std::vector& y, + std::vector& z, + std::vector& basic_list, + std::vector& nonbasic_list, + basis_update_mpf_t& basis_update) +{ + std::vector cuts_to_remove; + cuts_to_remove.reserve(lp.num_rows - original_rows); + std::vector slacks_to_remove; + slacks_to_remove.reserve(lp.num_rows - original_rows); + const f_t dual_tol = 1e-10; + for (i_t k = original_rows; k < lp.num_rows; k++) { + if (std::abs(y[k]) < dual_tol) { + const i_t row_start = Arow.col_start[k]; + const i_t row_end = Arow.col_start[k + 1]; + i_t last_slack = -1; + const f_t slack_tol = 1e-3; + for (i_t p = row_start; p < row_end; p++) { + const i_t jj = Arow.i[p]; + const i_t col_len = lp.A.col_start[jj + 1] - lp.A.col_start[jj]; + if (col_len == 1 && var_types[jj] == variable_type_t::CONTINUOUS && Arow.x[p] == 1.0 && + lp.lower[jj] == 0.0) { + if (vstatus[jj] == variable_status_t::BASIC && x[jj] > slack_tol) { last_slack = jj; } + } + } + if (last_slack != -1) { + cuts_to_remove.push_back(k); + slacks_to_remove.push_back(last_slack); + } + } + } + + if (cuts_to_remove.size() > 0) { + settings.log.printf("Removing %d cuts\n", cuts_to_remove.size()); + std::vector marked_rows(lp.num_rows, 0); + for (i_t i : cuts_to_remove) { + marked_rows[i] = 1; + } + std::vector marked_cols(lp.num_cols, 0); + for (i_t j : slacks_to_remove) { + marked_cols[j] = 1; + } + + std::vector new_rhs(lp.num_rows - cuts_to_remove.size()); + std::vector new_solution_y(lp.num_rows - cuts_to_remove.size()); + i_t h = 0; + for (i_t i = 0; i < lp.num_rows; i++) { + if (!marked_rows[i]) { + new_rhs[h] = lp.rhs[i]; + new_solution_y[h] = y[i]; + h++; + } + } + + Arow.remove_columns(marked_rows); + Arow.transpose(lp.A); + + std::vector new_objective(lp.num_cols - slacks_to_remove.size()); + std::vector new_lower(lp.num_cols - slacks_to_remove.size()); + std::vector new_upper(lp.num_cols - slacks_to_remove.size()); + std::vector new_var_types(lp.num_cols - slacks_to_remove.size()); + std::vector new_vstatus(lp.num_cols - slacks_to_remove.size()); + std::vector new_basic_list; + new_basic_list.reserve(lp.num_rows - slacks_to_remove.size()); + std::vector new_nonbasic_list; + new_nonbasic_list.reserve(nonbasic_list.size()); + std::vector new_solution_x(lp.num_cols - slacks_to_remove.size()); + std::vector new_solution_z(lp.num_cols - slacks_to_remove.size()); + h = 0; + for (i_t k = 0; k < lp.num_cols; k++) { + if (!marked_cols[k]) { + new_objective[h] = lp.objective[k]; + new_lower[h] = lp.lower[k]; + new_upper[h] = lp.upper[k]; + new_var_types[h] = var_types[k]; + new_vstatus[h] = vstatus[k]; + new_solution_x[h] = x[k]; + new_solution_z[h] = z[k]; + if (new_vstatus[h] != variable_status_t::BASIC) { + new_nonbasic_list.push_back(h); + } else { + new_basic_list.push_back(h); + } + h++; + } + } + lp.A.remove_columns(marked_cols); + lp.A.transpose(Arow); + lp.objective = new_objective; + lp.lower = new_lower; + lp.upper = new_upper; + lp.rhs = new_rhs; + var_types = new_var_types; + lp.num_cols = lp.A.n; + lp.num_rows = lp.A.m; + basic_list = new_basic_list; + nonbasic_list = new_nonbasic_list; + vstatus = new_vstatus; + x = new_solution_x; + y = new_solution_y; + z = new_solution_z; + + settings.log.printf("After removal %d rows %d columns %d nonzeros\n", + lp.num_rows, + lp.num_cols, + lp.A.col_start[lp.A.n]); + + basis_update.resize(lp.num_rows); + basis_update.refactor_basis(lp.A, settings, basic_list, nonbasic_list, vstatus); + } +} + + +#ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE +template class cut_pool_t; +template class cut_generation_t; +template class mixed_integer_gomory_base_inequality_t; +template class mixed_integer_rounding_cut_t; + +template +int add_cuts(const simplex_solver_settings_t& settings, + const csr_matrix_t& cuts, + const std::vector& cut_rhs, + lp_problem_t& lp, + lp_solution_t& solution, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list, + std::vector& vstatus, + std::vector& edge_norms); + +template +void remove_cuts(lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + int original_rows, + std::vector& var_types, + std::vector& vstatus, + std::vector& x, + std::vector& y, + std::vector& z, + std::vector& basic_list, + std::vector& nonbasic_list, + basis_update_mpf_t& basis_update); +#endif + +} // namespace cuopt::linear_programming::dual_simplex + + diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp new file mode 100644 index 000000000..14b6d0e1f --- /dev/null +++ b/cpp/src/dual_simplex/cuts.hpp @@ -0,0 +1,221 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ +#pragma once + +#include +#include +#include +#include +#include +#include + + +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +f_t minimum_violation(const csr_matrix_t& C, + const std::vector& cut_rhs, + const std::vector& x) +{ + // Check to see that this is a cut i.e C*x > d + std::vector Cx(C.m); + csc_matrix_t C_col(C.m, C.n, 0); + C.to_compressed_col(C_col); + matrix_vector_multiply(C_col, 1.0, x, 0.0, Cx); + f_t min_cut_violation = inf; + for (i_t k = 0; k < Cx.size(); k++) { + if (Cx[k] <= cut_rhs[k]) { + printf("C*x <= d for cut %d. C*x %e rhs %e\n", k, Cx[k], cut_rhs[k]); + } + min_cut_violation = std::min(min_cut_violation, Cx[k] - cut_rhs[k]); + } + return min_cut_violation; +} + +template +class cut_pool_t { + public: + cut_pool_t(i_t original_vars, const simplex_solver_settings_t& settings) + : original_vars_(original_vars), + settings_(settings), + cut_storage_(0, original_vars, 0), + rhs_storage_(0), + cut_age_(0), + scored_cuts_(0) + { + } + + // Add a cut in the form: cut'*x >= rhs. + // We expect that the cut is violated by the current relaxation + // cut'*xstart < rhs + void add_cut(i_t n, const sparse_vector_t& cut, f_t rhs); + + void score_cuts(std::vector& x_relax); + + // We return the cuts in the form best_cuts*x <= best_rhs + i_t get_best_cuts(csr_matrix_t& best_cuts, std::vector& best_rhs); + + void age_cuts(); + + void drop_cuts(); + + i_t pool_size() const { return cut_storage_.m; } + + private: + f_t cut_distance(i_t row, const std::vector& x, f_t& cut_violation, f_t &cut_norm); + f_t cut_density(i_t row); + f_t cut_orthogonality(i_t i, i_t j); + + i_t original_vars_; + const simplex_solver_settings_t& settings_; + + csr_matrix_t cut_storage_; + std::vector rhs_storage_; + std::vector cut_age_; + + i_t scored_cuts_; + std::vector cut_distances_; + std::vector cut_norms_; + std::vector cut_orthogonality_; + std::vector cut_scores_; + std::vector best_cuts_; +}; + +template +class cut_generation_t { + public: + cut_generation_t(cut_pool_t& cut_pool) : cut_pool_(cut_pool) {} + + + void generate_cuts(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& var_types, + basis_update_mpf_t& basis_update, + const std::vector& xstar, + const std::vector& basic_list, + const std::vector& nonbasic_list); + private: + + void generate_gomory_cuts(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& var_types, + basis_update_mpf_t& basis_update, + const std::vector& xstar, + const std::vector& basic_list, + const std::vector& nonbasic_list); + + void generate_mir_cuts(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& var_types, + const std::vector& xstar); + cut_pool_t& cut_pool_; +}; + +template +class mixed_integer_gomory_base_inequality_t { + public: + mixed_integer_gomory_base_inequality_t(const lp_problem_t& lp, + basis_update_mpf_t& basis_update, + const std::vector nonbasic_list) + : b_bar_(lp.num_rows, 0.0), + nonbasic_mark_(lp.num_cols, 0), + x_workspace_(lp.num_cols, 0.0), + x_mark_(lp.num_cols, 0) + { + basis_update.b_solve(lp.rhs, b_bar_); + for (i_t j : nonbasic_list) { + nonbasic_mark_[j] = 1; + } + } + + // Generates the base inequalities: C*x == d that will be turned into cuts + i_t generate_base_inequality(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& var_types, + basis_update_mpf_t& basis_update, + const std::vector& xstar, + const std::vector& basic_list, + const std::vector& nonbasic_list, + i_t i, + sparse_vector_t& inequality, + f_t& inequality_rhs); + + private: + std::vector b_bar_; + std::vector nonbasic_mark_; + std::vector x_workspace_; + std::vector x_mark_; +}; + +template +class mixed_integer_rounding_cut_t { + public: + mixed_integer_rounding_cut_t(i_t num_vars, const simplex_solver_settings_t& settings) + : num_vars_(num_vars), + settings_(settings), + x_workspace_(num_vars, 0.0), + x_mark_(num_vars, 0), + has_lower_(num_vars, 0), + has_upper_(num_vars, 0), + needs_complement_(false) + { + } + + void initialize(const lp_problem_t& lp, const std::vector& xstar); + + i_t generate_cut(const sparse_vector_t& a, + f_t beta, + const std::vector& upper_bounds, + const std::vector& lower_bounds, + const std::vector& var_types, + sparse_vector_t& cut, + f_t& cut_rhs); + + private: + i_t num_vars_; + const simplex_solver_settings_t& settings_; + std::vector x_workspace_; + std::vector x_mark_; + std::vector has_lower_; + std::vector has_upper_; + bool needs_complement_; +}; + +template +i_t add_cuts(const simplex_solver_settings_t& settings, + const csr_matrix_t& cuts, + const std::vector& cut_rhs, + lp_problem_t& lp, + lp_solution_t& solution, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list, + std::vector& vstatus, + std::vector& edge_norms); + +template +void remove_cuts(lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + i_t original_rows, + std::vector& var_types, + std::vector& vstatus, + std::vector& x, + std::vector& y, + std::vector& z, + std::vector& basic_list, + std::vector& nonbasic_list, + basis_update_mpf_t& basis_update); + +} + diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index fcd8a6386..951945092 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -294,212 +294,6 @@ lp_status_t solve_linear_program_with_advanced_basis( return lp_status; } -template -lp_status_t solve_linear_program_with_cuts(const f_t start_time, - const simplex_solver_settings_t& settings, - const csr_matrix_t& cuts, - const std::vector& cut_rhs, - lp_problem_t& lp, - lp_solution_t& solution, - basis_update_mpf_t& basis_update, - std::vector& basic_list, - std::vector& nonbasic_list, - std::vector& vstatus, - std::vector& edge_norms) -{ - // Given a set of cuts: C*x <= d that are currently violated - // by the current solution x* (i.e. C*x* > d), this function - // adds the cuts into the LP and solves again. - - -#ifdef CHECK_BASIS - { - csc_matrix_t Btest(lp.num_rows, lp.num_rows, 1); - basis_update.multiply_lu(Btest); - csc_matrix_t B(lp.num_rows, lp.num_rows, 1); - form_b(lp.A, basic_list, B); - csc_matrix_t Diff(lp.num_rows, lp.num_rows, 1); - add(Btest, B, 1.0, -1.0, Diff); - const f_t err = Diff.norm1(); - settings.log.printf("Before || B - L*U || %e\n", err); - if (err > 1e-6) { - exit(1); - } - } -#endif - - const i_t p = cuts.m; - if (cut_rhs.size() != static_cast(p)) { - settings.log.printf("cut_rhs must have the same number of rows as cuts\n"); - return lp_status_t::NUMERICAL_ISSUES; - } - settings.log.printf("Number of cuts %d\n", p); - settings.log.printf("Original lp rows %d\n", lp.num_rows); - settings.log.printf("Original lp cols %d\n", lp.num_cols); - - csr_matrix_t new_A_row(lp.num_rows, lp.num_cols, 1); - lp.A.to_compressed_row(new_A_row); - - new_A_row.append_rows(cuts); - - csc_matrix_t new_A_col(lp.num_rows + p, lp.num_cols, 1); - new_A_row.to_compressed_col(new_A_col); - - // Add in slacks variables for the new rows - lp.lower.resize(lp.num_cols + p); - lp.upper.resize(lp.num_cols + p); - lp.objective.resize(lp.num_cols + p); - i_t nz = new_A_col.col_start[lp.num_cols]; - new_A_col.col_start.resize(lp.num_cols + p + 1); - new_A_col.i.resize(nz + p); - new_A_col.x.resize(nz + p); - i_t k = lp.num_rows; - for (i_t j = lp.num_cols; j < lp.num_cols + p; j++) { - new_A_col.col_start[j] = nz; - new_A_col.i[nz] = k++; - new_A_col.x[nz] = 1.0; - nz++; - lp.lower[j] = 0.0; - lp.upper[j] = inf; - lp.objective[j] = 0.0; - } - settings.log.printf("Done adding slacks\n"); - new_A_col.col_start[lp.num_cols + p] = nz; - new_A_col.n = lp.num_cols + p; - - lp.A = new_A_col; - i_t old_rows = lp.num_rows; - lp.num_rows += p; - i_t old_cols = lp.num_cols; - lp.num_cols += p; - - - lp.rhs.resize(lp.num_rows); - for (i_t k = old_rows; k < old_rows + p; k++) { - const i_t h = k - old_rows; - lp.rhs[k] = cut_rhs[h]; - } - settings.log.printf("Done adding rhs\n"); - - // Construct C_B = C(:, basic_list) - std::vector C_col_degree(lp.num_cols, 0); - i_t cuts_nz = cuts.row_start[p]; - for (i_t q = 0; q < cuts_nz; q++) { - const i_t j = cuts.j[q]; - if (j >= lp.num_cols) { - settings.log.printf("j %d is greater than p %d\n", j, p); - exit(1); - } - C_col_degree[j]++; - } - settings.log.printf("Done computing C_col_degree\n"); - - std::vector in_basis(old_cols, -1); - const i_t num_basic = static_cast(basic_list.size()); - i_t C_B_nz = 0; - for (i_t k = 0; k < num_basic; k++) { - const i_t j = basic_list[k]; - if (j < 0 || j >= old_cols) { - settings.log.printf("basic_list[%d] = %d is out of bounds %d old_cols %d\n", k, j, j, old_cols); - exit(1); - } - in_basis[j] = k; - if (j < cuts.n) - { - C_B_nz += C_col_degree[j]; - } - } - settings.log.printf("Done estimating C_B_nz\n"); - - csr_matrix_t C_B(p, num_basic, C_B_nz); - nz = 0; - for (i_t i = 0; i < p; i++) { - C_B.row_start[i] = nz; - const i_t row_start = cuts.row_start[i]; - const i_t row_end = cuts.row_start[i + 1]; - for (i_t q = row_start; q < row_end; q++) { - const i_t j = cuts.j[q]; - const i_t j_basis = in_basis[j]; - if (j_basis == -1) { continue; } - C_B.j[nz] = j_basis; - C_B.x[nz] = cuts.x[q]; - nz++; - } - } - C_B.row_start[p] = nz; - settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz); - if (nz != C_B_nz) { exit(1); return lp_status_t::NUMERICAL_ISSUES; } - settings.log.printf("C_B rows %d cols %d nz %d\n", C_B.m, C_B.n, nz); - - - // Adjust the basis update to include the new cuts - basis_update.append_cuts(C_B); - - basic_list.resize(lp.num_rows, 0); - i_t h = old_cols; - for (i_t j = old_rows; j < lp.num_rows; j++) { - basic_list[j] = h++; - } - -#ifdef CHECK_BASIS - // Check the basis update - csc_matrix_t Btest(lp.num_rows, lp.num_rows, 1); - basis_update.multiply_lu(Btest); - - csc_matrix_t B(lp.num_rows, lp.num_rows, 1); - form_b(lp.A, basic_list, B); - - csc_matrix_t Diff(lp.num_rows, lp.num_rows, 1); - add(Btest, B, 1.0, -1.0, Diff); - const f_t err = Diff.norm1(); - settings.log.printf("After || B - L*U || %e\n", err); - if (err > 1e-6) { - settings.log.printf("Diff matrix\n"); - //Diff.print_matrix(); - exit(1); - } -#endif - // Adjust the vstatus - vstatus.resize(lp.num_cols); - for (i_t j = old_cols; j < lp.num_cols; j++) { - vstatus[j] = variable_status_t::BASIC; - } - - // Adjust the solution - solution.x.resize(lp.num_cols, 0.0); - solution.y.resize(lp.num_rows, 0.0); - solution.z.resize(lp.num_cols, 0.0); - - // For now just clear the edge norms - edge_norms.clear(); - i_t iter = 0; - bool initialize_basis = false; - dual::status_t status = dual_phase2_with_advanced_basis(2, - 0, - initialize_basis, - start_time, - lp, - settings, - vstatus, - basis_update, - basic_list, - nonbasic_list, - solution, - iter, - edge_norms); - settings.log.printf("Phase 2 iterations %d\n", iter); - solution.iterations = iter; - lp_status_t lp_status; - if (status == dual::status_t::OPTIMAL) { lp_status = lp_status_t::OPTIMAL; } - if (status == dual::status_t::DUAL_UNBOUNDED) { lp_status = lp_status_t::INFEASIBLE; } - if (status == dual::status_t::TIME_LIMIT) { lp_status = lp_status_t::TIME_LIMIT; } - if (status == dual::status_t::ITERATION_LIMIT) { lp_status = lp_status_t::ITERATION_LIMIT; } - if (status == dual::status_t::CONCURRENT_LIMIT) { lp_status = lp_status_t::CONCURRENT_LIMIT; } - if (status == dual::status_t::NUMERICAL) { lp_status = lp_status_t::NUMERICAL_ISSUES; } - if (status == dual::status_t::CUTOFF) { lp_status = lp_status_t::CUTOFF; } - return lp_status; -} - template lp_status_t solve_linear_program_with_barrier(const user_problem_t& user_problem, const simplex_solver_settings_t& settings, @@ -868,19 +662,6 @@ template lp_status_t solve_linear_program_with_advanced_basis( std::vector& vstatus, std::vector& edge_norms); -template lp_status_t solve_linear_program_with_cuts( - const double start_time, - const simplex_solver_settings_t& settings, - const csr_matrix_t& cuts, - const std::vector& cut_rhs, - lp_problem_t& lp, - lp_solution_t& solution, - basis_update_mpf_t& basis_update, - std::vector& basic_list, - std::vector& nonbasic_list, - std::vector& vstatus, - std::vector& edge_norms); - template lp_status_t solve_linear_program_with_barrier( const user_problem_t& user_problem, const simplex_solver_settings_t& settings, diff --git a/cpp/src/dual_simplex/solve.hpp b/cpp/src/dual_simplex/solve.hpp index d659d6282..e96229784 100644 --- a/cpp/src/dual_simplex/solve.hpp +++ b/cpp/src/dual_simplex/solve.hpp @@ -61,19 +61,6 @@ lp_status_t solve_linear_program_with_advanced_basis( std::vector& vstatus, std::vector& edge_norms); -template -lp_status_t solve_linear_program_with_cuts(const f_t start_time, - const simplex_solver_settings_t& settings, - const csr_matrix_t& cuts, - const std::vector& cut_rhs, - lp_problem_t& lp, - lp_solution_t& solution, - basis_update_mpf_t& basis_update, - std::vector& basic_list, - std::vector& nonbasic_list, - std::vector& vstatus, - std::vector& edge_norms); - template lp_status_t solve_linear_program_with_barrier(const user_problem_t& user_problem, const simplex_solver_settings_t& settings, diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index 0da4f90e9..8398065a7 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -403,7 +403,6 @@ template i_t csr_matrix_t::append_row(const sparse_vector_t& c) { const i_t old_m = this->m; - const i_t n = this->n; const i_t old_nz = this->row_start[old_m]; const i_t c_nz = c.i.size(); const i_t new_nz = old_nz + c_nz; @@ -625,6 +624,7 @@ void csr_matrix_t::check_matrix() const const i_t row_end = this->row_start[i + 1]; for (i_t p = row_start; p < row_end; ++p) { const i_t j = this->j[p]; + if (j < 0 || j >= this->n) { printf("CSR Error: column index %d not in range [0, %d)\n", j, this->n); } if (col_marker[j] == i) { printf("CSR Error: repeated column index %d in row %d\n", j, i); } col_marker[j] = i; } diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu index 78f2b9fa0..cbc59b2ab 100644 --- a/cpp/src/mip/diversity/diversity_manager.cu +++ b/cpp/src/mip/diversity/diversity_manager.cu @@ -409,7 +409,7 @@ solution_t diversity_manager_t::run_solver() run_fj_alone(sol); return sol; } - rins.enable(); + //rins.enable(); generate_solution(timer.remaining_time(), false); if (timer.check_time_limit()) { diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu index 1efc971b2..23522f07d 100644 --- a/cpp/src/mip/diversity/lns/rins.cu +++ b/cpp/src/mip/diversity/lns/rins.cu @@ -245,6 +245,7 @@ void rins_t::run_rins() branch_and_bound_settings.num_bfs_threads = 1; branch_and_bound_settings.num_diving_threads = 1; branch_and_bound_settings.log.log_prefix = "[RINS] "; + branch_and_bound_settings.max_cut_passes = 0; branch_and_bound_settings.solution_callback = [this, &rins_solution_queue]( std::vector& solution, f_t objective) { rins_solution_queue.push_back(solution); diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 68fb0c698..cc9f9f6c5 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -226,8 +226,8 @@ solution_t mip_solver_t::run_solver() std::ref(branch_and_bound_solution)); } - auto bb_status = branch_and_bound_status_future.get(); - exit(1); + //auto bb_status = branch_and_bound_status_future.get(); + //CUOPT_LOG_INFO("BB status: %d", bb_status); // Start the primal heuristics auto sol = dm.run_solver(); diff --git a/cpp/tests/dual_simplex/unit_tests/solve.cpp b/cpp/tests/dual_simplex/unit_tests/solve.cpp index cd66e63f1..66a2347d1 100644 --- a/cpp/tests/dual_simplex/unit_tests/solve.cpp +++ b/cpp/tests/dual_simplex/unit_tests/solve.cpp @@ -326,6 +326,7 @@ TEST(dual_simplex, dual_variable_greater_than) EXPECT_NEAR(solution.z[1], 0.0, 1e-6); } +#if 0 TEST(dual_simplex, simple_cuts) { // minimize x + y + 2 z @@ -421,6 +422,8 @@ TEST(dual_simplex, simple_cuts) printf("cuts m %d n %d\n", cuts.m, cuts.n); std::vector cut_rhs(1); cut_rhs[0] = -1.0 / 3.0; + + std::vector var_types; EXPECT_EQ(cuopt::linear_programming::dual_simplex::solve_linear_program_with_cuts(start_time, settings, cuts, @@ -431,7 +434,8 @@ TEST(dual_simplex, simple_cuts) basic_list, nonbasic_list, vstatus, - edge_norms), + edge_norms, + var_types), cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL); printf("Solution objective: %e\n", solution.objective); printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]); @@ -460,7 +464,8 @@ TEST(dual_simplex, simple_cuts) basic_list, nonbasic_list, vstatus, - edge_norms), + edge_norms, + var_types), cuopt::linear_programming::dual_simplex::lp_status_t::OPTIMAL); printf("Solution objective: %e\n", solution.objective); printf("Solution x: %e %e %e\n", solution.x[0], solution.x[1], solution.x[2]); @@ -470,5 +475,6 @@ TEST(dual_simplex, simple_cuts) EXPECT_NEAR(solution.x[2], 1.0 / 3.0, 1e-6); } +#endif } // namespace cuopt::linear_programming::dual_simplex::test From 369e75512cbbf06d16c8ba263b9fd8438f0e53b6 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 19 Dec 2025 15:49:11 -0800 Subject: [PATCH 16/45] Only perform cuts on the original variables. Substitute out slack variables --- cpp/src/dual_simplex/branch_and_bound.cpp | 33 ++-- cpp/src/dual_simplex/cuts.cpp | 175 +++++++++++++++++----- cpp/src/dual_simplex/cuts.hpp | 18 ++- cpp/src/dual_simplex/sparse_matrix.cpp | 5 + cpp/src/dual_simplex/sparse_vector.cpp | 11 ++ cpp/src/dual_simplex/sparse_vector.hpp | 2 + 6 files changed, 194 insertions(+), 50 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 945d84215..6f6917fec 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -306,7 +306,7 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu std::string gap = user_mip_gap(user_obj, user_lower); settings_.log.printf( - "H %+13.6e %+10.6e %s %9.2f\n", + "H %+13.6e %+10.6e %s %9.2f\n", user_obj, user_lower, gap.c_str(), @@ -1139,6 +1139,9 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut csc_matrix_t Arow(1, 1, 1); original_lp_.A.transpose(Arow); + status_ = mip_exploration_status_t::RUNNING; + lower_bound_ceiling_ = inf; + if (num_fractional != 0) { settings_.log.printf( " | Explored | Unexplored | Objective | Bound | IntInf | Depth | Iter/Node | Gap " @@ -1187,7 +1190,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut #endif // Generate cuts and add them to the cut pool - cut_generation.generate_cuts(original_lp_, settings_, Arow, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list); + cut_generation.generate_cuts(original_lp_, settings_, Arow, new_slacks_, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list); // Score the cuts cut_pool.score_cuts(root_relax_soln_.x); @@ -1204,7 +1207,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut #endif // Resolve the LP with the new cuts - settings_.log.printf("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n", + settings_.log.debug("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n", num_cuts, cuts_to_add.row_start[cuts_to_add.m], cut_pool.pool_size(), @@ -1216,6 +1219,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut cuts_to_add, cut_rhs, original_lp_, + new_slacks_, root_relax_soln_, basis_update, basic_list, @@ -1233,12 +1237,11 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut std::vector bounds_changed(original_lp_.num_cols, true); std::vector row_sense; - +#ifdef CHECK_MATRICES settings_.log.printf("Before A check\n"); original_lp_.A.check_matrix(); - settings_.log.printf("Before A transpose\n"); +#endif original_lp_.A.transpose(Arow); - settings_.log.printf("After A transpose\n"); bool feasible = bound_strengthening(row_sense, settings_, original_lp_, Arow, var_types_, bounds_changed); @@ -1270,7 +1273,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut iter, edge_norms_); - settings_.log.printf("Cut LP iterations %d. A nz %d\n", + settings_.log.debug("Cut LP iterations %d. A nz %d\n", iter, original_lp_.A.col_start[original_lp_.A.n]); stats_.total_lp_iters += root_relax_soln_.iterations; @@ -1286,6 +1289,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut remove_cuts(original_lp_, settings_, Arow, + new_slacks_, original_rows, var_types_, root_vstatus_, @@ -1300,14 +1304,18 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); // TODO: Get upper bound from heuristics - std::string gap = num_fractional != 0 ? " - " : "0.0%"; - f_t obj = num_fractional != 0 ? inf : compute_user_objective(original_lp_, root_objective_); + f_t upper_bound = get_upper_bound(); + f_t obj = num_fractional != 0 ? get_upper_bound() : compute_user_objective(original_lp_, root_objective_); + f_t user_obj = compute_user_objective(original_lp_, obj); + f_t user_lower = compute_user_objective(original_lp_, root_objective_); + std::string gap = num_fractional != 0 ? user_mip_gap(user_obj, user_lower) : "0.0%"; + settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f\n", 0, 0, - obj, - compute_user_objective(original_lp_, root_objective_), + user_obj, + user_lower, num_fractional, 0, stats_.total_lp_iters.load(), @@ -1361,8 +1369,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut stats_.last_log = tic(); active_subtrees_ = 0; min_diving_queue_size_ = 4 * settings_.num_diving_threads; - status_ = mip_exploration_status_t::RUNNING; - lower_bound_ceiling_ = inf; + #pragma omp parallel num_threads(settings_.num_threads) { diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 4b8fc94dd..8fbf1a275 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -12,14 +12,10 @@ namespace cuopt::linear_programming::dual_simplex { template -void cut_pool_t::add_cut(i_t n, const sparse_vector_t& cut, f_t rhs) +void cut_pool_t::add_cut(const sparse_vector_t& cut, f_t rhs) { // TODO: Need to deduplicate cuts and only add if the cut is not already in the pool - if (n > cut_storage_.n) { - cut_storage_.n = n; - } - for (i_t p = 0; p < cut.i.size(); p++) { const i_t j = cut.i[p]; if (j >= original_vars_) { @@ -153,7 +149,7 @@ template i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, std::vector& best_rhs) { best_cuts.m = 0; - best_cuts.n = cut_storage_.n; + best_cuts.n = original_vars_; best_cuts.row_start.clear(); best_cuts.j.clear(); best_cuts.x.clear(); @@ -190,6 +186,7 @@ template void cut_generation_t::generate_cuts(const lp_problem_t& lp, const simplex_solver_settings_t& settings, csc_matrix_t& Arow, + const std::vector& new_slacks, const std::vector& var_types, basis_update_mpf_t& basis_update, const std::vector& xstar, @@ -198,7 +195,7 @@ void cut_generation_t::generate_cuts(const lp_problem_t& lp, { // Generate Gomory Cuts generate_gomory_cuts( - lp, settings, Arow, var_types, basis_update, xstar, basic_list, nonbasic_list); + lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list); // Generate MIR cuts @@ -209,11 +206,12 @@ template void cut_generation_t::generate_mir_cuts(const lp_problem_t& lp, const simplex_solver_settings_t& settings, csc_matrix_t& Arow, + const std::vector& new_slacks, const std::vector& var_types, const std::vector& xstar) { mixed_integer_rounding_cut_t mir(lp.num_cols, settings); - mir.initialize(lp, xstar); + mir.initialize(lp, new_slacks, xstar); for (i_t i = 0; i < lp.num_rows; i++) { sparse_vector_t inequality(Arow, i); @@ -262,16 +260,18 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& } settings.log.printf("Adding MIR cut %d\n", i); - cut_pool_.add_cut(lp.num_cols, cut, cut_rhs); + cut_pool_.add_cut(cut, cut_rhs); } } } + template void cut_generation_t::generate_gomory_cuts( const lp_problem_t& lp, const simplex_solver_settings_t& settings, csc_matrix_t& Arow, + const std::vector& new_slacks, const std::vector& var_types, basis_update_mpf_t& basis_update, const std::vector& xstar, @@ -281,7 +281,7 @@ void cut_generation_t::generate_gomory_cuts( mixed_integer_gomory_base_inequality_t gomory(lp, basis_update, nonbasic_list); mixed_integer_rounding_cut_t mir(lp.num_cols, settings); - mir.initialize(lp, xstar); + mir.initialize(lp, new_slacks, xstar); for (i_t i = 0; i < lp.num_rows; i++) { sparse_vector_t inequality(lp.num_cols, 0); @@ -310,15 +310,10 @@ void cut_generation_t::generate_gomory_cuts( bool A_valid = false; f_t cut_A_distance = 0.0; if (mir_status == 0) { + mir.substitute_slacks(lp, Arow, cut_A, cut_A_rhs); // Check that the cut is violated - f_t dot = 0.0; - f_t cut_norm = 0.0; - for (i_t k = 0; k < cut_A.i.size(); k++) { - const i_t jj = cut_A.i[k]; - const f_t aj = cut_A.x[k]; - dot += aj * xstar[jj]; - cut_norm += aj * aj; - } + f_t dot = cut_A.dot(xstar); + f_t cut_norm = cut_A.norm2_squared(); if (dot >= cut_A_rhs) { settings.log.printf("Cut %d is not violated. Skipping\n", i); continue; @@ -340,15 +335,10 @@ void cut_generation_t::generate_gomory_cuts( bool B_valid = false; f_t cut_B_distance = 0.0; if (mir_status == 0) { + mir.substitute_slacks(lp, Arow, cut_B, cut_B_rhs); // Check that the cut is violated - f_t dot = 0.0; - f_t cut_norm = 0.0; - for (i_t k = 0; k < cut_B.i.size(); k++) { - const i_t jj = cut_B.i[k]; - const f_t aj = cut_B.x[k]; - dot += aj * xstar[jj]; - cut_norm += aj * aj; - } + f_t dot = cut_B.dot(xstar); + f_t cut_norm = cut_B.norm2_squared(); if (dot >= cut_B_rhs) { settings.log.printf("Cut %d is not violated. Skipping\n", i); continue; @@ -359,9 +349,9 @@ void cut_generation_t::generate_gomory_cuts( } if ((cut_A_distance > cut_B_distance) && A_valid) { - cut_pool_.add_cut(lp.num_cols, cut_A, cut_A_rhs); + cut_pool_.add_cut(cut_A, cut_A_rhs); } else if (B_valid) { - cut_pool_.add_cut(lp.num_cols, cut_B, cut_B_rhs); + cut_pool_.add_cut(cut_B, cut_B_rhs); } } } @@ -526,7 +516,8 @@ i_t mixed_integer_gomory_base_inequality_t::generate_base_inequality( template void mixed_integer_rounding_cut_t::initialize(const lp_problem_t& lp, - const std::vector& xstar) + const std::vector& new_slacks, + const std::vector& xstar) { if (lp.num_cols != num_vars_) { @@ -537,6 +528,17 @@ void mixed_integer_rounding_cut_t::initialize(const lp_problem_t::generate_cut( return 0; } +template +void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_t& lp, + csc_matrix_t& Arow, + sparse_vector_t& cut, + f_t& cut_rhs) +{ + // Remove slacks from the cut + // So that the cut is only over the original variables + bool found_slack = false; + i_t cut_nz = 0; + std::vector cut_indices; + cut_indices.reserve(cut.i.size()); + for (i_t k = 0; k < cut.i.size(); k++) { + const i_t j = cut.i[k]; + const f_t cj = cut.x[k]; + if (is_slack_[j]) { + found_slack = true; + // Do the substitution + // Slack variable s_j participates in row i of the constraint matrix + // Row i is of the form: + // sum_{k != j} A(i, k) * x_k + A(i, j) * s_j = rhs_i + /// So we have that + // s_j = rhs_i - sum_{k != j} A(i, k) * x_k + + // Our cut is of the form: + // sum_{k != j} C(k) * x_k + C(j) * s_j >= cut_rhs + // So the cut becomes + // sum_{k != j} C(k) * x_k + C(j) * (rhs_i - sum_{k != j} A(i, k) * x_k) >= cut_rhs + // This is equivalent to: + // sum_{k != j} C(k) * x_k + sum_{k != j} -C(k) * A(i, k) * x_k >= cut_rhs - C(j) * rhs_i + const i_t i = slack_rows_[j]; + cut_rhs -= cj * lp.rhs[i]; + const i_t row_start = Arow.col_start[i]; + const i_t row_end = Arow.col_start[i + 1]; + for (i_t q = row_start; q < row_end; q++) { + const i_t k = Arow.i[q]; + if (k != j) { + const f_t aik = Arow.x[q]; + x_workspace_[k] -= cj * aik; + if (!x_mark_[k]) { + x_mark_[k] = 1; + cut_indices.push_back(k); + cut_nz++; + } + } + } + + } else { + x_workspace_[j] += cj; + if (!x_mark_[j]) { + x_mark_[j] = 1; + cut_indices.push_back(j); + cut_nz++; + } + } + } + + if (found_slack) { + //printf("Found slack. Nz increased from %d to %d: %d\n", cut.i.size(), cut_nz, cut_nz - cut.i.size()); + cut.i.reserve(cut_nz); + cut.x.reserve(cut_nz); + cut.i.clear(); + cut.x.clear(); + + for (i_t k = 0; k < cut_nz; k++) { + const i_t j = cut_indices[k]; + cut.i.push_back(j); + cut.x.push_back(x_workspace_[j]); + } + // Sort the cut + cut.sort(); + + // Clear the workspace + for (i_t jj : cut_indices) { + x_workspace_[jj] = 0.0; + x_mark_[jj] = 0; + } + } +} + template i_t add_cuts(const simplex_solver_settings_t& settings, const csr_matrix_t& cuts, const std::vector& cut_rhs, lp_problem_t& lp, + std::vector& new_slacks, lp_solution_t& solution, basis_update_mpf_t& basis_update, std::vector& basic_list, @@ -764,7 +847,11 @@ i_t add_cuts(const simplex_solver_settings_t& settings, csr_matrix_t new_A_row(lp.num_rows, lp.num_cols, 1); lp.A.to_compressed_row(new_A_row); - new_A_row.append_rows(cuts); + i_t append_status = new_A_row.append_rows(cuts); + if (append_status != 0) { + settings.log.printf("append_rows error: %d\n", append_status); + exit(1); + } csc_matrix_t new_A_col(lp.num_rows + p, lp.num_cols, 1); new_A_row.to_compressed_col(new_A_col); @@ -786,6 +873,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, lp.lower[j] = 0.0; lp.upper[j] = inf; lp.objective[j] = 0.0; + new_slacks.push_back(j); } settings.log.debug("Done adding slacks\n"); new_A_col.col_start[lp.num_cols + p] = nz; @@ -895,6 +983,7 @@ template void remove_cuts(lp_problem_t& lp, const simplex_solver_settings_t& settings, csc_matrix_t& Arow, + std::vector& new_slacks, i_t original_rows, std::vector& var_types, std::vector& vstatus, @@ -910,6 +999,12 @@ void remove_cuts(lp_problem_t& lp, std::vector slacks_to_remove; slacks_to_remove.reserve(lp.num_rows - original_rows); const f_t dual_tol = 1e-10; + + std::vector is_slack(lp.num_cols, 0); + for (i_t j : new_slacks) { + is_slack[j] = 1; + } + for (i_t k = original_rows; k < lp.num_rows; k++) { if (std::abs(y[k]) < dual_tol) { const i_t row_start = Arow.col_start[k]; @@ -917,11 +1012,9 @@ void remove_cuts(lp_problem_t& lp, i_t last_slack = -1; const f_t slack_tol = 1e-3; for (i_t p = row_start; p < row_end; p++) { - const i_t jj = Arow.i[p]; - const i_t col_len = lp.A.col_start[jj + 1] - lp.A.col_start[jj]; - if (col_len == 1 && var_types[jj] == variable_type_t::CONTINUOUS && Arow.x[p] == 1.0 && - lp.lower[jj] == 0.0) { - if (vstatus[jj] == variable_status_t::BASIC && x[jj] > slack_tol) { last_slack = jj; } + const i_t j = Arow.i[p]; + if (is_slack[j]) { + if (vstatus[j] == variable_status_t::BASIC && x[j] > slack_tol) { last_slack = j; } } } if (last_slack != -1) { @@ -967,6 +1060,7 @@ void remove_cuts(lp_problem_t& lp, new_nonbasic_list.reserve(nonbasic_list.size()); std::vector new_solution_x(lp.num_cols - slacks_to_remove.size()); std::vector new_solution_z(lp.num_cols - slacks_to_remove.size()); + std::vector new_is_slacks(lp.num_cols - slacks_to_remove.size(), 0); h = 0; for (i_t k = 0; k < lp.num_cols; k++) { if (!marked_cols[k]) { @@ -977,6 +1071,7 @@ void remove_cuts(lp_problem_t& lp, new_vstatus[h] = vstatus[k]; new_solution_x[h] = x[k]; new_solution_z[h] = z[k]; + new_is_slacks[h] = is_slack[k]; if (new_vstatus[h] != variable_status_t::BASIC) { new_nonbasic_list.push_back(h); } else { @@ -994,6 +1089,14 @@ void remove_cuts(lp_problem_t& lp, var_types = new_var_types; lp.num_cols = lp.A.n; lp.num_rows = lp.A.m; + + new_slacks.clear(); + new_slacks.resize(lp.num_cols); + for (i_t j = 0; j < lp.num_cols; j++) { + if (new_is_slacks[j]) { + new_slacks.push_back(j); + } + } basic_list = new_basic_list; nonbasic_list = new_nonbasic_list; vstatus = new_vstatus; @@ -1023,6 +1126,7 @@ int add_cuts(const simplex_solver_settings_t& settings, const csr_matrix_t& cuts, const std::vector& cut_rhs, lp_problem_t& lp, + std::vector& new_slacks, lp_solution_t& solution, basis_update_mpf_t& basis_update, std::vector& basic_list, @@ -1034,6 +1138,7 @@ template void remove_cuts(lp_problem_t& lp, const simplex_solver_settings_t& settings, csc_matrix_t& Arow, + std::vector& new_slacks, int original_rows, std::vector& var_types, std::vector& vstatus, diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp index 14b6d0e1f..9113b926e 100644 --- a/cpp/src/dual_simplex/cuts.hpp +++ b/cpp/src/dual_simplex/cuts.hpp @@ -54,7 +54,7 @@ class cut_pool_t { // Add a cut in the form: cut'*x >= rhs. // We expect that the cut is violated by the current relaxation // cut'*xstart < rhs - void add_cut(i_t n, const sparse_vector_t& cut, f_t rhs); + void add_cut(const sparse_vector_t& cut, f_t rhs); void score_cuts(std::vector& x_relax); @@ -96,6 +96,7 @@ class cut_generation_t { void generate_cuts(const lp_problem_t& lp, const simplex_solver_settings_t& settings, csc_matrix_t& Arow, + const std::vector& new_slacks, const std::vector& var_types, basis_update_mpf_t& basis_update, const std::vector& xstar, @@ -106,6 +107,7 @@ class cut_generation_t { void generate_gomory_cuts(const lp_problem_t& lp, const simplex_solver_settings_t& settings, csc_matrix_t& Arow, + const std::vector& new_slacks, const std::vector& var_types, basis_update_mpf_t& basis_update, const std::vector& xstar, @@ -115,6 +117,7 @@ class cut_generation_t { void generate_mir_cuts(const lp_problem_t& lp, const simplex_solver_settings_t& settings, csc_matrix_t& Arow, + const std::vector& new_slacks, const std::vector& var_types, const std::vector& xstar); cut_pool_t& cut_pool_; @@ -171,7 +174,9 @@ class mixed_integer_rounding_cut_t { { } - void initialize(const lp_problem_t& lp, const std::vector& xstar); + void initialize(const lp_problem_t& lp, + const std::vector& new_slacks, + const std::vector& xstar); i_t generate_cut(const sparse_vector_t& a, f_t beta, @@ -181,6 +186,11 @@ class mixed_integer_rounding_cut_t { sparse_vector_t& cut, f_t& cut_rhs); + void substitute_slacks(const lp_problem_t& lp, + csc_matrix_t& Arow, + sparse_vector_t& cut, + f_t& cut_rhs); + private: i_t num_vars_; const simplex_solver_settings_t& settings_; @@ -188,6 +198,8 @@ class mixed_integer_rounding_cut_t { std::vector x_mark_; std::vector has_lower_; std::vector has_upper_; + std::vector is_slack_; + std::vector slack_rows_; bool needs_complement_; }; @@ -196,6 +208,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, const csr_matrix_t& cuts, const std::vector& cut_rhs, lp_problem_t& lp, + std::vector& new_slacks, lp_solution_t& solution, basis_update_mpf_t& basis_update, std::vector& basic_list, @@ -207,6 +220,7 @@ template void remove_cuts(lp_problem_t& lp, const simplex_solver_settings_t& settings, csc_matrix_t& Arow, + std::vector& new_slacks, i_t original_rows, std::vector& var_types, std::vector& vstatus, diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index 8398065a7..6160cf1b4 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -365,6 +365,7 @@ i_t csr_matrix_t::append_rows(const csr_matrix_t& C) const i_t old_nz = this->row_start[old_m]; const i_t C_row = C.m; if (C.n > n) { + printf("append_rows error: C.n %d n %d\n", C.n, n); return -1; } const i_t C_nz = C.row_start[C_row]; @@ -566,6 +567,10 @@ i_t csc_matrix_t::check_matrix() const { std::vector row_marker(this->m, -1); for (i_t j = 0; j < this->n; ++j) { + if (j >= col_start.size()) { + printf("Col start too small size %ld n %d\n", col_start.size(), this->n); + return -1; + } const i_t col_start = this->col_start[j]; const i_t col_end = this->col_start[j + 1]; if (col_start > col_end || col_start > this->col_start[this->n]) { diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp index 46bae286c..3ba981539 100644 --- a/cpp/src/dual_simplex/sparse_vector.cpp +++ b/cpp/src/dual_simplex/sparse_vector.cpp @@ -121,6 +121,17 @@ void sparse_vector_t::inverse_permute_vector(const std::vector& p y.i = i_perm; } +template +f_t sparse_vector_t::dot(const std::vector& x_dense) const +{ + const i_t nz = i.size(); + f_t dot = 0.0; + for (i_t k = 0; k < nz; ++k) { + dot += x[k] * x_dense[i[k]]; + } + return dot; +} + template f_t sparse_vector_t::sparse_dot(const csc_matrix_t& Y, i_t y_col) const { diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp index afa559926..3badeed12 100644 --- a/cpp/src/dual_simplex/sparse_vector.hpp +++ b/cpp/src/dual_simplex/sparse_vector.hpp @@ -40,6 +40,8 @@ class sparse_vector_t { void inverse_permute_vector(const std::vector& p); // inverse permute a sparse vector into another sparse vector void inverse_permute_vector(const std::vector& p, sparse_vector_t& y) const; + // compute the dot product of a sparse vector with a dense vector + f_t dot(const std::vector& x) const; // compute the dot product of a sparse vector with a column of a CSC matrix f_t sparse_dot(const csc_matrix_t& Y, i_t y_col) const; // ensure the coefficients in the sparse vectory are sorted in terms of increasing index From b48e05b2dc2fcab55df692d622af7591992199e6 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 5 Jan 2026 09:00:39 -0800 Subject: [PATCH 17/45] Knapsack cuts from before the winter break --- cpp/src/dual_simplex/branch_and_bound.cpp | 24 +- cpp/src/dual_simplex/cuts.cpp | 496 ++++++++++++++++++++-- cpp/src/dual_simplex/cuts.hpp | 82 +++- cpp/src/dual_simplex/dense_matrix.hpp | 2 + 4 files changed, 560 insertions(+), 44 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 6f6917fec..9d4af809a 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1149,7 +1149,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } cut_pool_t cut_pool(original_lp_.num_cols, settings_); - cut_generation_t cut_generation(cut_pool); + cut_generation_t cut_generation(cut_pool, original_lp_, settings_, Arow, new_slacks_, var_types_); for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { if (num_fractional == 0) { @@ -1191,23 +1191,37 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut // Generate cuts and add them to the cut pool cut_generation.generate_cuts(original_lp_, settings_, Arow, new_slacks_, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list); + settings_.log.printf("Generated cuts\n"); // Score the cuts cut_pool.score_cuts(root_relax_soln_.x); + settings_.log.printf("Scored cuts\n"); // Get the best cuts from the cut pool csr_matrix_t cuts_to_add(0, original_lp_.num_cols, 0); std::vector cut_rhs; - i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs); + std::vector cut_types; + i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types); + settings_.log.printf("Got best cuts\n"); + print_cut_types(cut_types, settings_); cuts_to_add.check_matrix(); -#ifdef PRINT_MIN_CUT_VIOLATION +#ifdef PRINT_CUTS + csc_matrix_t cuts_to_add_col(cuts_to_add.m, cuts_to_add.n, cuts_to_add.row_start[cuts_to_add.m]); + cuts_to_add.to_compressed_col(cuts_to_add_col); + cuts_to_add_col.print_matrix(); + for (i_t i = 0; i < cut_rhs.size(); i++) { + printf("cut_rhs[%d] = %g\n", i, cut_rhs[i]); + } +#endif + +#if 1 f_t min_cut_violation = minimum_violation(cuts_to_add, cut_rhs, root_relax_soln_.x); settings_.log.printf("Min cut violation %e\n", min_cut_violation); #endif // Resolve the LP with the new cuts - settings_.log.debug("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n", + settings_.log.printf("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n", num_cuts, cuts_to_add.row_start[cuts_to_add.m], cut_pool.pool_size(), @@ -1242,6 +1256,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut original_lp_.A.check_matrix(); #endif original_lp_.A.transpose(Arow); +#if 1 bool feasible = bound_strengthening(row_sense, settings_, original_lp_, Arow, var_types_, bounds_changed); @@ -1249,6 +1264,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.log.printf("Bound strengthening failed\n"); exit(1); } +#endif // Adjust the solution root_relax_soln_.x.resize(original_lp_.num_cols, 0.0); diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 8fbf1a275..606c46f24 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -6,13 +6,14 @@ /* clang-format on */ #include +#include namespace cuopt::linear_programming::dual_simplex { template -void cut_pool_t::add_cut(const sparse_vector_t& cut, f_t rhs) +void cut_pool_t::add_cut(cut_type_t cut_type, const sparse_vector_t& cut, f_t rhs) { // TODO: Need to deduplicate cuts and only add if the cut is not already in the pool @@ -26,7 +27,9 @@ void cut_pool_t::add_cut(const sparse_vector_t& cut, f_t rhs } cut_storage_.append_row(cut); + settings_.log.printf("Added cut %d to pool\n", cut_storage_.m - 1); rhs_storage_.push_back(rhs); + cut_type_.push_back(cut_type); cut_age_.push_back(0); } @@ -108,6 +111,8 @@ void cut_pool_t::score_cuts(std::vector& x_relax) const f_t min_orthogonality = 0.5; const f_t min_cut_distance = 1e-4; best_cuts_.reserve(std::min(max_cuts, cut_storage_.m)); + best_cuts_.clear(); + scored_cuts_ = 0; while (scored_cuts_ < max_cuts && !sorted_indices.empty()) { const i_t i = sorted_indices[0]; @@ -119,7 +124,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) if (cut_age_[i] > 0) { settings_.log.printf("Adding cut with age %d\n", cut_age_[i]); } - //settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]); + settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]); best_cuts_.push_back(i); scored_cuts_++; @@ -146,7 +151,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) } template -i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, std::vector& best_rhs) +i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, std::vector& best_rhs, std::vector& best_cut_types) { best_cuts.m = 0; best_cuts.n = original_vars_; @@ -162,6 +167,7 @@ i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, std:: best_cuts.append_row(cut); //settings_.log.printf("Best cuts nz %d\n", best_cuts.row_start[best_cuts.m]); best_rhs.push_back(-rhs_storage_[i]); + best_cut_types.push_back(cut_type_[i]); } return static_cast(best_cuts_.size()); @@ -182,6 +188,331 @@ void cut_pool_t::drop_cuts() // TODO: Implement this } +template +knapsack_generation_t::knapsack_generation_t( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& new_slacks, + const std::vector& var_types) +{ + knapsack_constraints_.reserve(lp.num_rows); + + is_slack_.resize(lp.num_cols, 0); + for (i_t j : new_slacks) { + is_slack_[j] = 1; + } + + for (i_t i = 0; i < lp.num_rows; i++) { + const i_t row_start = Arow.col_start[i]; + const i_t row_end = Arow.col_start[i + 1]; + bool is_knapsack = true; + f_t sum_pos = 0.0; + //printf("i %d ", i); + for (i_t p = row_start; p < row_end; p++) { + const i_t j = Arow.i[p]; + if (is_slack_[j]) { continue; } + const f_t aj = Arow.x[p]; + //printf(" j %d (%e < %e) aj %e\n", j, lp.lower[j], lp.upper[j], aj); + if (std::abs(aj - std::round(aj)) > settings.integer_tol) { + is_knapsack = false; + break; + } + if (var_types[j] != variable_type_t::INTEGER || lp.lower[j] != 0.0 || lp.upper[j] != 1.0) { + is_knapsack = false; + break; + } + if (aj < 0.0) { + is_knapsack = false; + break; + } + sum_pos += aj; + } + // printf("sum_pos %e\n", sum_pos); + + if (is_knapsack) { + const f_t beta = lp.rhs[i]; + printf("Knapsack constraint %d beta %e sum_pos %e\n", i, beta, sum_pos); + if (std::abs(beta - std::round(beta)) <= settings.integer_tol) { + if (beta >= 0.0 && beta <= sum_pos) { + knapsack_constraints_.push_back(i); + } + } + } + } + + i_t num_knapsack_constraints = knapsack_constraints_.size(); + settings.log.printf("Number of knapsack constraints %d\n", num_knapsack_constraints); +} + +template +i_t knapsack_generation_t::generate_knapsack_cuts( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& new_slacks, + const std::vector& var_types, + const std::vector& xstar, + i_t knapsack_row, + sparse_vector_t& cut, + f_t& cut_rhs) +{ + // Get the row associated with the knapsack constraint + sparse_vector_t knapsack_inequality(Arow, knapsack_row); + f_t knapsack_rhs = lp.rhs[knapsack_row]; + + // Remove the slacks from the inequality + f_t seperation_rhs = 0.0; + printf(" Knapsack : "); + for (i_t k = 0; k < knapsack_inequality.i.size(); k++) { + const i_t j = knapsack_inequality.i[k]; + if (is_slack_[j]) { + knapsack_inequality.x[k] = 0.0; + } else { + printf(" %g x%d +", knapsack_inequality.x[k], j); + seperation_rhs += knapsack_inequality.x[k]; + } + } + printf(" <= %g\n", knapsack_rhs); + seperation_rhs -= (knapsack_rhs + 1); + + printf("\t"); + for (i_t k = 0; k < knapsack_inequality.i.size(); k++) { + const i_t j = knapsack_inequality.i[k]; + if (!is_slack_[j]) { + if (std::abs(xstar[j]) > 1e-3) { + printf("x_relax[%d]= %g ", j, xstar[j]); + } + } + } + printf("\n"); + + printf("seperation_rhs %g\n", seperation_rhs); + if (seperation_rhs <= 0.0) { return -1; } + + std::vector values; + values.resize(knapsack_inequality.i.size() - 1); + std::vector weights; + weights.resize(knapsack_inequality.i.size() - 1); + i_t h = 0; + f_t objective_constant = 0.0; + for (i_t k = 0; k < knapsack_inequality.i.size(); k++) { + const i_t j = knapsack_inequality.i[k]; + if (!is_slack_[j]) { + const f_t vj = 1.0 - xstar[j]; + objective_constant += vj; + values[h] = vj; + weights[h] = knapsack_inequality.x[k]; + h++; + } + } + std::vector solution; + solution.resize(knapsack_inequality.i.size() - 1); + + printf("Calling solve_knapsack_problem\n"); + f_t objective = solve_knapsack_problem(values, weights, seperation_rhs, solution); + if (objective != objective) { return -1; } + printf("objective %e objective_constant %e\n", objective, objective_constant); + + f_t seperation_value = -objective + objective_constant; + printf("seperation_value %e\n", seperation_value); + const f_t tol = 1e-6; + if (seperation_value >= 1.0 - tol) { return -1; } + + i_t cover_size = 0; + for (i_t k = 0; k < solution.size(); k++) { + if (solution[k] == 0.0) { cover_size++; } + } + + cut.i.clear(); + cut.x.clear(); + cut.i.reserve(cover_size); + cut.x.reserve(cover_size); + + h = 0; + for (i_t k = 0; k < knapsack_inequality.i.size(); k++) { + const i_t j = knapsack_inequality.i[k]; + if (!is_slack_[j]) { + if (solution[h] == 0.0) { + cut.i.push_back(j); + cut.x.push_back(-1.0); + } + h++; + } + } + cut_rhs = -cover_size + 1; + cut.sort(); + + // The cut is in the form: - sum_{j in cover} x_j >= -cover_size + 1 + // Which is equivalent to: sum_{j in cover} x_j <= cover_size - 1 + + // Verify the cut is violated + f_t dot = cut.dot(xstar); + f_t violation = dot - cut_rhs; + printf("Knapsack cut %d violation %e < 0\n", knapsack_row, violation); + + if (violation <= tol) { return -1; } + return 0; +} + +template +f_t knapsack_generation_t::greedy_knapsack_problem(const std::vector& values, + const std::vector& weights, + f_t rhs, + std::vector& solution) +{ + i_t n = weights.size(); + solution.assign(n, 0.0); + + // Build permutation + std::vector perm(n); + std::iota(perm.begin(), perm.end(), 0); + + std::vector ratios; + ratios.resize(n); + for (i_t i = 0; i < n; i++) { + ratios[i] = values[i] / weights[i]; + } + + // Sort by value / weight ratio + std::sort(perm.begin(), perm.end(), [&](i_t i, i_t j) { return ratios[i] > ratios[j]; }); + + // Greedy select items with the best value / weight ratio until the remaining capacity is exhausted + f_t remaining = rhs; + f_t total_value = 0.0; + + for (i_t j : perm) { + if (weights[j] <= remaining) { + solution[j] = 1.0; + remaining -= weights[j]; + total_value += values[j]; + } + } + + // Best single-item fallback + f_t best_single_value = 0.0; + i_t best_single_idx = -1; + + for (i_t j = 0; j < n; ++j) { + if (weights[j] <= rhs && values[j] > best_single_value) { + best_single_value = values[j]; + best_single_idx = j; + } + } + + if (best_single_value > total_value) { + solution.assign(n, 0.0); + solution[best_single_idx] = 1.0; + return best_single_value; + } + + return total_value; +} + +template +f_t knapsack_generation_t::solve_knapsack_problem(const std::vector& values, + const std::vector& weights, + f_t rhs, + std::vector& solution) +{ + // Solve the knapsack problem + // maximize sum_{j=0}^n values[j] * solution[j] + // subject to sum_{j=0}^n weights[j] * solution[j] <= rhs + // values: values of the items + // weights: weights of the items + // return the value of the solution + + // Using approximate dynamic programming + + i_t n = weights.size(); + f_t objective = std::numeric_limits::quiet_NaN(); + + // Compute the maximum value + f_t vmax = *std::max_element(values.begin(), values.end()); + + // Check if all the values are integers + bool all_integers = true; + const f_t integer_tol = 1e-5; + for (i_t j = 0; j < n; j++) { + if (std::abs(values[j] - std::round(values[j])) > integer_tol) { + all_integers = false; + break; + } + } + + printf("all_integers %d\n", all_integers); + + // Compute the scaling factor and comptue the scaled integer values + f_t scale = 1.0; + std::vector scaled_values(n); + if (all_integers) { + for (i_t j = 0; j < n; j++) { + scaled_values[j] = static_cast(std::floor(values[j])); + } + } else { + const f_t epsilon = 0.1; + scale = epsilon * vmax / static_cast(n); + if (scale <= 0.0) { return std::numeric_limits::quiet_NaN(); } + printf("scale %g epsilon %g vmax %g n %d\n", scale, epsilon, vmax, n); + for (i_t i = 0; i < n; ++i) { + scaled_values[i] = static_cast(std::floor(values[i] / scale)); + //printf("scaled_values[%d] %d values[%d] %g\n", i, scaled_values[i], i, values[i]); + } + } + + i_t sum_value = std::accumulate(scaled_values.begin(), scaled_values.end(), 0); + const i_t INT_INF = std::numeric_limits::max() / 2; + printf("sum value %d\n", sum_value); + const i_t max_size = 10000; + if (sum_value <= 0.0 || sum_value >= max_size) { + printf("sum value %d is negative or too large using greedy solution\n", sum_value); + return greedy_knapsack_problem(values, weights, rhs, solution); + } + + // dp(j, v) = minimum weight using first j items to get value v + dense_matrix_t dp(n + 1, sum_value + 1, INT_INF); + dense_matrix_t take(n + 1, sum_value + 1, 0); + dp(0, 0) = 0; + printf("start dp\n"); + + // 4. Dynamic programming + for (int j = 1; j <= n; ++j) { + for (int v = 0; v <= sum_value; ++v) { + // Do not take item i-1 + dp(j, v) = dp(j - 1, v); + + // Take item j-1 if possible + if (v >= scaled_values[j - 1]) { + i_t candidate = dp(j - 1, v - scaled_values[j - 1]) + static_cast(std::floor(weights[j - 1])); + if (candidate < dp(j, v)) { + dp(j, v) = candidate; + take(j, v) = 1; + } + } + } + } + + // 5. Find best achievable value within capacity + i_t best_value = 0; + for (i_t v = 0; v <= sum_value; ++v) { + if (dp(n, v) <= rhs) { best_value = v; } + } + + // 6. Backtrack to recover solution + i_t v = best_value; + for (i_t j = n; j >= 1; --j) { + if (take(j, v)) { + solution[j - 1] = 1.0; + v -= scaled_values[j - 1]; + } else { + solution[j - 1] = 0.0; + } + } + + objective = best_value * scale; + return objective; +} + template void cut_generation_t::generate_cuts(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -196,12 +527,41 @@ void cut_generation_t::generate_cuts(const lp_problem_t& lp, // Generate Gomory Cuts generate_gomory_cuts( lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list); + settings.log.printf("Generated Gomory cuts\n"); + // Generate Knapsack cuts + generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar); + settings.log.printf("Generated Knapsack cuts\n"); // Generate MIR cuts // generate_mir_cuts(lp, settings, Arow, var_types, xstar); } +template +void cut_generation_t::generate_knapsack_cuts( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& new_slacks, + const std::vector& var_types, + const std::vector& xstar) +{ + if (knapsack_generation_.num_knapsack_constraints() > 0) { + for (i_t knapsack_row : knapsack_generation_.get_knapsack_constraints()) { + sparse_vector_t cut(lp.num_cols, 0); + f_t cut_rhs; + i_t knapsack_status = knapsack_generation_.generate_knapsack_cuts( + lp, settings, Arow, new_slacks, var_types, xstar, knapsack_row, cut, cut_rhs); + if (knapsack_status == 0) { + settings.log.printf("Adding Knapsack cut %d\n", knapsack_row); + cut_pool_.add_cut(cut_type_t::KNAPSACK, cut, cut_rhs); + } else { + settings.log.printf("Knapsack cut %d is not violated. Skipping\n", knapsack_row); + } + } + } +} + template void cut_generation_t::generate_mir_cuts(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -260,7 +620,7 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& } settings.log.printf("Adding MIR cut %d\n", i); - cut_pool_.add_cut(cut, cut_rhs); + cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs); } } } @@ -310,16 +670,25 @@ void cut_generation_t::generate_gomory_cuts( bool A_valid = false; f_t cut_A_distance = 0.0; if (mir_status == 0) { - mir.substitute_slacks(lp, Arow, cut_A, cut_A_rhs); - // Check that the cut is violated - f_t dot = cut_A.dot(xstar); - f_t cut_norm = cut_A.norm2_squared(); - if (dot >= cut_A_rhs) { - settings.log.printf("Cut %d is not violated. Skipping\n", i); + if (cut_A.i.size() == 0) { + settings.log.printf("No coefficients in cut A\n"); continue; } - cut_A_distance = (cut_A_rhs - dot) / std::sqrt(cut_norm); - A_valid = true; + mir.substitute_slacks(lp, Arow, cut_A, cut_A_rhs); + if (cut_A.i.size() == 0) { + settings.log.printf("No coefficients in cut A after substituting slacks\n"); + A_valid = false; + } else { + // Check that the cut is violated + f_t dot = cut_A.dot(xstar); + f_t cut_norm = cut_A.norm2_squared(); + if (dot >= cut_A_rhs) { + settings.log.printf("Cut %d is not violated. Skipping\n", i); + continue; + } + cut_A_distance = (cut_A_rhs - dot) / std::sqrt(cut_norm); + A_valid = true; + } //cut_pool_.add_cut(lp.num_cols, cut, cut_rhs); } @@ -335,23 +704,34 @@ void cut_generation_t::generate_gomory_cuts( bool B_valid = false; f_t cut_B_distance = 0.0; if (mir_status == 0) { - mir.substitute_slacks(lp, Arow, cut_B, cut_B_rhs); - // Check that the cut is violated - f_t dot = cut_B.dot(xstar); - f_t cut_norm = cut_B.norm2_squared(); - if (dot >= cut_B_rhs) { - settings.log.printf("Cut %d is not violated. Skipping\n", i); + if (cut_B.i.size() == 0) { + settings.log.printf("No coefficients in cut B\n"); continue; } - cut_B_distance = (cut_B_rhs - dot) / std::sqrt(cut_norm); - B_valid = true; + mir.substitute_slacks(lp, Arow, cut_B, cut_B_rhs); + if (cut_B.i.size() == 0) { + settings.log.printf("No coefficients in cut B after substituting slacks\n"); + B_valid = false; + } else { + // Check that the cut is violated + f_t dot = cut_B.dot(xstar); + f_t cut_norm = cut_B.norm2_squared(); + if (dot >= cut_B_rhs) { + settings.log.printf("Cut %d is not violated. Skipping\n", i); + continue; + } + cut_B_distance = (cut_B_rhs - dot) / std::sqrt(cut_norm); + B_valid = true; + } // cut_pool_.add_cut(lp.num_cols, cut_B, cut_B_rhs); } if ((cut_A_distance > cut_B_distance) && A_valid) { - cut_pool_.add_cut(cut_A, cut_A_rhs); + printf("Adding Gomory cut A: nz %d distance %e valid %d\n", cut_A.i.size(), cut_A_distance, A_valid); + cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_A, cut_A_rhs); } else if (B_valid) { - cut_pool_.add_cut(cut_B, cut_B_rhs); + printf("Adding Gomory cut B: nz %d distance %e valid %d\n", cut_B.i.size(), cut_B_distance, B_valid); + cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_B, cut_B_rhs); } } } @@ -720,6 +1100,12 @@ i_t mixed_integer_rounding_cut_t::generate_cut( cut.sort(); cut_rhs = R; + + if (cut.i.size() == 0) { + settings_.log.printf("No coefficients in cut\n"); + return -1; + } + return 0; } @@ -735,38 +1121,62 @@ void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_ i_t cut_nz = 0; std::vector cut_indices; cut_indices.reserve(cut.i.size()); + +#if 1 + for (i_t j = 0; j < x_workspace_.size(); j++) { + if (x_workspace_[j] != 0.0) { + printf("Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); + exit(1); + } + if (x_mark_[j] != 0) { + printf("Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); + exit(1); + } + } +#endif + + + for (i_t k = 0; k < cut.i.size(); k++) { const i_t j = cut.i[k]; const f_t cj = cut.x[k]; if (is_slack_[j]) { found_slack = true; + // Do the substitution // Slack variable s_j participates in row i of the constraint matrix // Row i is of the form: - // sum_{k != j} A(i, k) * x_k + A(i, j) * s_j = rhs_i + // sum_{k != j} A(i, k) * x_k + s_j = rhs_i /// So we have that // s_j = rhs_i - sum_{k != j} A(i, k) * x_k // Our cut is of the form: // sum_{k != j} C(k) * x_k + C(j) * s_j >= cut_rhs // So the cut becomes - // sum_{k != j} C(k) * x_k + C(j) * (rhs_i - sum_{k != j} A(i, k) * x_k) >= cut_rhs + // sum_{k != j} C(k) * x_k + C(j) * (rhs_i - sum_{h != j} A(i, h) * x_h) >= cut_rhs // This is equivalent to: - // sum_{k != j} C(k) * x_k + sum_{k != j} -C(k) * A(i, k) * x_k >= cut_rhs - C(j) * rhs_i + // sum_{k != j} C(k) * x_k + sum_{h != j} -C(j) * A(i, h) * x_h >= cut_rhs - C(j) * rhs_i const i_t i = slack_rows_[j]; + //printf("Found slack %d in cut. lo %e up %e. Slack row %d\n", j, lp.lower[j], lp.upper[j], i); cut_rhs -= cj * lp.rhs[i]; const i_t row_start = Arow.col_start[i]; const i_t row_end = Arow.col_start[i + 1]; for (i_t q = row_start; q < row_end; q++) { - const i_t k = Arow.i[q]; - if (k != j) { - const f_t aik = Arow.x[q]; - x_workspace_[k] -= cj * aik; - if (!x_mark_[k]) { - x_mark_[k] = 1; - cut_indices.push_back(k); + const i_t h = Arow.i[q]; + if (h != j) { + const f_t aih = Arow.x[q]; + x_workspace_[h] -= cj * aih; + if (!x_mark_[h]) { + x_mark_[h] = 1; + cut_indices.push_back(h); cut_nz++; } + } else { + const f_t aij = Arow.x[q]; + if (aij != 1.0) { + printf("Slack row %d has non-unit coefficient for variable %d\n", i, j); + exit(1); + } } } @@ -794,13 +1204,27 @@ void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_ } // Sort the cut cut.sort(); + } - // Clear the workspace - for (i_t jj : cut_indices) { - x_workspace_[jj] = 0.0; - x_mark_[jj] = 0; + // Clear the workspace + for (i_t jj : cut_indices) { + x_workspace_[jj] = 0.0; + x_mark_[jj] = 0; + } + + +#if 1 + for (i_t j = 0; j < x_workspace_.size(); j++) { + if (x_workspace_[j] != 0.0) { + printf("Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); + exit(1); + } + if (x_mark_[j] != 0) { + printf("Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); + exit(1); } } +#endif } template diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp index 9113b926e..ec877c274 100644 --- a/cpp/src/dual_simplex/cuts.hpp +++ b/cpp/src/dual_simplex/cuts.hpp @@ -18,6 +18,30 @@ namespace cuopt::linear_programming::dual_simplex { +enum cut_type_t : int8_t { + MIXED_INTEGER_GOMORY = 0, + MIXED_INTEGER_ROUNDING = 1, + KNAPSACK = 2, +}; + +template +void print_cut_types(const std::vector& cut_types, const simplex_solver_settings_t& settings) { + i_t num_gomory_cuts = 0; + i_t num_mir_cuts = 0; + i_t num_knapsack_cuts = 0; + for (i_t i = 0; i < cut_types.size(); i++) { + if (cut_types[i] == cut_type_t::MIXED_INTEGER_GOMORY) { + num_gomory_cuts++; + } else if (cut_types[i] == cut_type_t::MIXED_INTEGER_ROUNDING) { + num_mir_cuts++; + } else if (cut_types[i] == cut_type_t::KNAPSACK) { + num_knapsack_cuts++; + } + } + settings.log.printf("Gomory cuts: %d, MIR cuts: %d, Knapsack cuts: %d\n", num_gomory_cuts, num_mir_cuts, num_knapsack_cuts); +} + + template f_t minimum_violation(const csr_matrix_t& C, const std::vector& cut_rhs, @@ -32,6 +56,7 @@ f_t minimum_violation(const csr_matrix_t& C, for (i_t k = 0; k < Cx.size(); k++) { if (Cx[k] <= cut_rhs[k]) { printf("C*x <= d for cut %d. C*x %e rhs %e\n", k, Cx[k], cut_rhs[k]); + exit(1); } min_cut_violation = std::min(min_cut_violation, Cx[k] - cut_rhs[k]); } @@ -47,6 +72,7 @@ class cut_pool_t { cut_storage_(0, original_vars, 0), rhs_storage_(0), cut_age_(0), + cut_type_(0), scored_cuts_(0) { } @@ -54,12 +80,12 @@ class cut_pool_t { // Add a cut in the form: cut'*x >= rhs. // We expect that the cut is violated by the current relaxation // cut'*xstart < rhs - void add_cut(const sparse_vector_t& cut, f_t rhs); + void add_cut(cut_type_t cut_type, const sparse_vector_t& cut, f_t rhs); void score_cuts(std::vector& x_relax); // We return the cuts in the form best_cuts*x <= best_rhs - i_t get_best_cuts(csr_matrix_t& best_cuts, std::vector& best_rhs); + i_t get_best_cuts(csr_matrix_t& best_cuts, std::vector& best_rhs, std::vector& best_cut_types); void age_cuts(); @@ -78,6 +104,7 @@ class cut_pool_t { csr_matrix_t cut_storage_; std::vector rhs_storage_; std::vector cut_age_; + std::vector cut_type_; i_t scored_cuts_; std::vector cut_distances_; @@ -88,10 +115,49 @@ class cut_pool_t { }; template -class cut_generation_t { +class knapsack_generation_t { public: - cut_generation_t(cut_pool_t& cut_pool) : cut_pool_(cut_pool) {} + knapsack_generation_t(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& new_slacks, + const std::vector& var_types); + + i_t generate_knapsack_cuts(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& new_slacks, + const std::vector& var_types, + const std::vector& xstar, + i_t knapsack_row, + sparse_vector_t& cut, + f_t& cut_rhs); + + i_t num_knapsack_constraints() const { return knapsack_constraints_.size(); } + const std::vector& get_knapsack_constraints() const { return knapsack_constraints_; } + + private: + f_t greedy_knapsack_problem(const std::vector& values, const std::vector& weights, f_t rhs, std::vector& solution); + f_t solve_knapsack_problem(const std::vector& values, const std::vector& weights, f_t rhs, std::vector& solution); + + + std::vector is_slack_; + std::vector knapsack_constraints_; +}; + +template +class cut_generation_t { + public: + cut_generation_t(cut_pool_t& cut_pool, + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& new_slacks, + const std::vector& var_types) + : cut_pool_(cut_pool), knapsack_generation_(lp, settings, Arow, new_slacks, var_types) + { + } void generate_cuts(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -120,7 +186,15 @@ class cut_generation_t { const std::vector& new_slacks, const std::vector& var_types, const std::vector& xstar); + + void generate_knapsack_cuts(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csc_matrix_t& Arow, + const std::vector& new_slacks, + const std::vector& var_types, + const std::vector& xstar); cut_pool_t& cut_pool_; + knapsack_generation_t knapsack_generation_; }; template diff --git a/cpp/src/dual_simplex/dense_matrix.hpp b/cpp/src/dual_simplex/dense_matrix.hpp index b1fc521b3..3f5287113 100644 --- a/cpp/src/dual_simplex/dense_matrix.hpp +++ b/cpp/src/dual_simplex/dense_matrix.hpp @@ -18,6 +18,8 @@ class dense_matrix_t { public: dense_matrix_t(i_t rows, i_t cols) : m(rows), n(cols), values(rows * cols, 0.0) {} + dense_matrix_t(i_t rows, i_t cols, f_t value) : m(rows), n(cols), values(rows * cols, value) {} + void resize(i_t rows, i_t cols) { m = rows; From 78cb1dcb4a2ce8ddb056d9657b4be751716e68cc Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Mon, 5 Jan 2026 17:16:40 -0800 Subject: [PATCH 18/45] Turn off sub-mip. Fix edge norms which was leading to crazy depth on b-ball and swath1. Add reliability branching as an option. Finally seeing good performance on swath1 --- cpp/src/dual_simplex/branch_and_bound.cpp | 203 +++++++++++++++++- cpp/src/dual_simplex/branch_and_bound.hpp | 1 + cpp/src/dual_simplex/cuts.cpp | 45 +++- cpp/src/dual_simplex/phase2.cpp | 51 ++++- cpp/src/dual_simplex/presolve.cpp | 2 +- cpp/src/dual_simplex/pseudo_costs.cpp | 134 ++++++++++++ cpp/src/dual_simplex/pseudo_costs.hpp | 10 + cpp/src/dual_simplex/sparse_vector.cpp | 24 +++ cpp/src/dual_simplex/sparse_vector.hpp | 2 + cpp/src/mip/diversity/recombiners/sub_mip.cuh | 2 +- 10 files changed, 450 insertions(+), 24 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 9d4af809a..65403404b 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -216,6 +216,14 @@ branch_and_bound_t::branch_and_bound_t( convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info); full_variable_types(original_problem_, original_lp_, var_types_); + num_integer_variables_ = 0; + for (i_t j = 0; j < original_lp_.num_cols; j++) { + if (var_types_[j] == variable_type_t::INTEGER) { + num_integer_variables_++; + } + } + printf("num_integer_variables %d\n", num_integer_variables_); + mutex_upper_.lock(); upper_bound_ = inf; mutex_upper_.unlock(); @@ -475,6 +483,31 @@ mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t residual = original_lp_.rhs; + matrix_vector_multiply(original_lp_.A, 1.0, incumbent_.x, -1.0, residual); + printf("|| A*x - b ||_inf %e\n", vector_norm_inf(residual)); + auto hash_combine_f = [](size_t seed, f_t x) { + seed ^= std::hash{}(x) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + }; + printf("incumbent size %ld original lp cols %d\n", incumbent_.x.size(), original_lp_.num_cols); + i_t n = original_lp_.num_cols; + size_t seed = n; + fprintf(fid, "%d\n", n); + for (i_t j = 0; j < n; ++j) { + fprintf(fid, "%.17g\n", incumbent_.x[j]); + seed = hash_combine_f(seed, incumbent_.x[j]); + } + printf("Solution hash: %20x\n", seed); + fclose(fid); + } +#endif if (gap > 0 && gap <= settings_.absolute_mip_gap_tol) { settings_.log.printf("Optimal solution found within absolute MIP gap tolerance (%.1e)\n", settings_.absolute_mip_gap_tol); @@ -580,6 +613,16 @@ node_status_t branch_and_bound_t::solve_node(search_tree_t& { f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; + if (node_ptr->depth >= num_integer_variables_) { + printf("Depth %d >= num_integer_variables %d\n", node_ptr->depth, num_integer_variables_); + mip_node_t* parent = node_ptr->parent; + while (parent != nullptr) { + printf("Parent depth %d\n", parent->depth); + printf("Parent branch var %d dir %d lower %e upper %e\n", parent->branch_var, parent->branch_dir, parent->branch_var_lower, parent->branch_var_upper); + parent = parent->parent; + } + } + lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); std::vector& leaf_vstatus = node_ptr->vstatus; assert(leaf_vstatus.size() == leaf_problem.num_cols); @@ -602,12 +645,11 @@ node_status_t branch_and_bound_t::solve_node(search_tree_t& bound_strengthening(row_sense, lp_settings, leaf_problem, Arow, var_types_, bounds_changed); dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED; + std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; if (feasible) { i_t node_iter = 0; f_t lp_start_time = tic(); - std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; - lp_status = dual_phase2(2, 0, lp_start_time, @@ -650,6 +692,23 @@ node_status_t branch_and_bound_t::solve_node(search_tree_t& i_t leaf_num_fractional = fractional_variables(settings_, leaf_solution.x, var_types_, leaf_fractional); + // Check if any of the fractional variables were fixed to their bounds + for (i_t j : leaf_fractional) + { + if (leaf_problem.lower[j] == leaf_problem.upper[j]) + { + printf( + "Node %d: Fixed variable %d has a fractional value %e. Lower %e upper %e. Variable status %d\n", + node_ptr->node_id, + j, + leaf_solution.x[j], + leaf_problem.lower[j], + leaf_problem.upper[j], + leaf_vstatus[j]); + } + } + + f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); node_ptr->lower_bound = leaf_objective; search_tree.graphviz_node(log, node_ptr, "lower bound", leaf_objective); @@ -670,8 +729,14 @@ node_status_t branch_and_bound_t::solve_node(search_tree_t& } else if (leaf_objective <= upper_bound + abs_fathom_tol) { // Choose fractional variable to branch on + +#ifdef RELIABLE_BRANCHING + const i_t branch_var = + pc_.reliable_variable_selection(leaf_problem, lp_settings, var_types_, leaf_vstatus, leaf_edge_norms, leaf_fractional, leaf_solution.x, leaf_objective, lp_settings.log); +#else const i_t branch_var = pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log); +#endif assert(leaf_vstatus.size() == leaf_problem.num_cols); search_tree.branch( @@ -1057,6 +1122,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut stats_.nodes_unexplored = 0; stats_.nodes_explored = 0; + printf("Branch and bound solve called\n"); + if (guess_.size() != 0) { std::vector crushed_guess; crush_primal_solution(original_problem_, original_lp_, guess_, new_slacks_, crushed_guess); @@ -1151,6 +1218,96 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut cut_pool_t cut_pool(original_lp_.num_cols, settings_); cut_generation_t cut_generation(cut_pool, original_lp_, settings_, Arow, new_slacks_, var_types_); + std::vector saved_solution; +#if 1 + printf("Trying to open solution.dat\n"); + FILE* fid = NULL; + fid = fopen("solution.dat", "r"); + if (fid != NULL) + { + i_t n_solution_dat; + i_t count = fscanf(fid, "%d\n", &n_solution_dat); + printf("Solution.dat variables %d =? %d =? %ld count %d\n", n_solution_dat, original_lp_.num_cols, solution.x.size(), count); + bool good = true; + if (count == 1 && n_solution_dat == original_lp_.num_cols) + { + printf("Opened solution.dat with %d number of variables\n", n_solution_dat); + saved_solution.resize(n_solution_dat); + for (i_t j = 0; j < n_solution_dat; j++) + { + count = fscanf(fid, "%lf", &saved_solution[j]); + if (count != 1) + { + printf("bad read solution.dat: j %d count %d\n", j, count); + good = false; + break; + } + } + } else { + good = false; + } + fclose(fid); + + if (!good) + { + saved_solution.resize(0); + printf("Solution.dat is bad\n"); + } + else + { + printf("Read solution file\n"); + + auto hash_combine_f = [](size_t seed, f_t x) { + seed ^= std::hash{}(x) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + }; + size_t seed = original_lp_.num_cols; + for (i_t j = 0; j < original_lp_.num_cols; ++j) + { + seed = hash_combine_f(seed, saved_solution[j]); + } + printf("Saved solution hash: %20x\n", seed); + + FILE* fid = NULL; + fid = fopen("solution.dat.2", "w"); + if (fid != NULL) { + printf("Writing solution.dat.2\n"); + i_t n = original_lp_.num_cols; + size_t seed = n; + fprintf(fid, "%d\n", n); + for (i_t j = 0; j < n; ++j) { + fprintf(fid, "%.17g\n", saved_solution[j]); + } + fclose(fid); + } + + // Compute || A * x - b ||_inf + std::vector residual = original_lp_.rhs; + matrix_vector_multiply(original_lp_.A, 1.0, saved_solution, -1.0, residual); + printf("Saved solution: || A*x - b ||_inf %e\n", vector_norm_inf(residual)); + f_t infeas = 0; + for (i_t j = 0; j < original_lp_.num_cols; j++) { + if (saved_solution[j] < original_lp_.lower[j] - 1e-6) { + f_t curr_infeas = (original_lp_.lower[j] - saved_solution[j]); + infeas += curr_infeas; + printf( + "j: %d saved solution %e lower %e\n", j, saved_solution[j], original_lp_.lower[j]); + } + if (saved_solution[j] > original_lp_.upper[j] + 1e-6) { + f_t curr_infeas = (saved_solution[j] - original_lp_.upper[j]); + infeas += curr_infeas; + printf( + "j %d saved solution %e upper %e\n", j, saved_solution[j], original_lp_.upper[j]); + } + } + printf("Bound infeasibility %e\n", infeas); + } + } else { + printf("Could not open solution.dat\n"); + } +#endif + + for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { if (num_fractional == 0) { #ifdef PRINT_SOLUTION @@ -1191,18 +1348,20 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut // Generate cuts and add them to the cut pool cut_generation.generate_cuts(original_lp_, settings_, Arow, new_slacks_, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list); - settings_.log.printf("Generated cuts\n"); // Score the cuts cut_pool.score_cuts(root_relax_soln_.x); - settings_.log.printf("Scored cuts\n"); // Get the best cuts from the cut pool csr_matrix_t cuts_to_add(0, original_lp_.num_cols, 0); std::vector cut_rhs; std::vector cut_types; i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types); - settings_.log.printf("Got best cuts\n"); - print_cut_types(cut_types, settings_); + if (num_cuts == 0) + { + settings_.log.printf("No cuts found\n"); + break; + } + //print_cut_types(cut_types, settings_); cuts_to_add.check_matrix(); @@ -1217,9 +1376,26 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut #if 1 f_t min_cut_violation = minimum_violation(cuts_to_add, cut_rhs, root_relax_soln_.x); - settings_.log.printf("Min cut violation %e\n", min_cut_violation); + if (min_cut_violation < 1e-6) { + settings_.log.printf("Min cut violation %e\n", min_cut_violation); + } #endif + // Check against saved solution + if (saved_solution.size() > 0) { + csc_matrix_t cuts_to_add_col(cuts_to_add.m, cuts_to_add.n, cuts_to_add.row_start[cuts_to_add.m]); + cuts_to_add.to_compressed_col(cuts_to_add_col); + std::vector Cx(cuts_to_add.m); + matrix_vector_multiply(cuts_to_add_col, 1.0, saved_solution, 0.0, Cx); + for (i_t k = 0; k < num_cuts; k++) { + //printf("Cx[%d] = %e cut_rhs[%d] = %e\n", k, Cx[k], k, cut_rhs[k]); + if (Cx[k] > cut_rhs[k] + 1e-6) { + printf("Cut %d is violated by saved solution. Cx %e cut_rhs %e\n", k, Cx[k], cut_rhs[k]); + exit(1); + } + } + } + // Resolve the LP with the new cuts settings_.log.printf("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n", num_cuts, @@ -1340,6 +1516,19 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } } + if (edge_norms_.size() != original_lp_.num_cols) + { + edge_norms_.resize(original_lp_.num_cols, -1.0); + } + for (i_t k = 0; k < original_lp_.num_rows; k++) + { + const i_t j = basic_list[k]; + if (edge_norms_[j] < 0.0) + { + edge_norms_[j] = 1e-4; + } + } + pc_.resize(original_lp_.num_cols); strong_branching(original_lp_, settings_, diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index ccbad335a..0943155a8 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -145,6 +145,7 @@ class branch_and_bound_t { lp_problem_t original_lp_; std::vector new_slacks_; std::vector var_types_; + i_t num_integer_variables_; // Local lower bounds for each thread std::vector> local_lower_bounds_; diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 606c46f24..643a1ee33 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -26,8 +26,10 @@ void cut_pool_t::add_cut(cut_type_t cut_type, const sparse_vector_t cut_squeezed; + cut.squeeze(cut_squeezed); + cut_storage_.append_row(cut_squeezed); + //settings_.log.printf("Added cut %d to pool\n", cut_storage_.m - 1); rhs_storage_.push_back(rhs); cut_type_.push_back(cut_type); cut_age_.push_back(0); @@ -124,7 +126,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) if (cut_age_[i] > 0) { settings_.log.printf("Adding cut with age %d\n", cut_age_[i]); } - settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]); + //settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]); best_cuts_.push_back(i); scored_cuts_++; @@ -206,6 +208,7 @@ knapsack_generation_t::knapsack_generation_t( for (i_t i = 0; i < lp.num_rows; i++) { const i_t row_start = Arow.col_start[i]; const i_t row_end = Arow.col_start[i + 1]; + if (row_end - row_start < 3) { continue; } bool is_knapsack = true; f_t sum_pos = 0.0; //printf("i %d ", i); @@ -527,11 +530,11 @@ void cut_generation_t::generate_cuts(const lp_problem_t& lp, // Generate Gomory Cuts generate_gomory_cuts( lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list); - settings.log.printf("Generated Gomory cuts\n"); + //settings.log.printf("Generated Gomory cuts\n"); // Generate Knapsack cuts generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar); - settings.log.printf("Generated Knapsack cuts\n"); + //settings.log.printf("Generated Knapsack cuts\n"); // Generate MIR cuts // generate_mir_cuts(lp, settings, Arow, var_types, xstar); @@ -727,10 +730,10 @@ void cut_generation_t::generate_gomory_cuts( } if ((cut_A_distance > cut_B_distance) && A_valid) { - printf("Adding Gomory cut A: nz %d distance %e valid %d\n", cut_A.i.size(), cut_A_distance, A_valid); + //printf("Adding Gomory cut A: nz %d distance %e valid %d\n", cut_A.i.size(), cut_A_distance, A_valid); cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_A, cut_A_rhs); } else if (B_valid) { - printf("Adding Gomory cut B: nz %d distance %e valid %d\n", cut_B.i.size(), cut_B_distance, B_valid); + //printf("Adding Gomory cut B: nz %d distance %e valid %d\n", cut_B.i.size(), cut_B_distance, B_valid); cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_B, cut_B_rhs); } } @@ -918,6 +921,10 @@ void mixed_integer_rounding_cut_t::initialize(const lp_problem_t::substitute_slacks(const lp_problem_ } else { const f_t aij = Arow.x[q]; if (aij != 1.0) { - printf("Slack row %d has non-unit coefficient for variable %d\n", i, j); + printf("Slack row %d has non-unit coefficient %e for variable %d\n", i, aij, j); exit(1); } } @@ -1199,6 +1206,21 @@ void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_ for (i_t k = 0; k < cut_nz; k++) { const i_t j = cut_indices[k]; + + // Check for small coefficients + const f_t aj = x_workspace_[j]; + if (std::abs(aj) < 1e-6) { + if (aj >= 0.0 && lp.upper[j] < inf) { + // Move this to the right-hand side + cut_rhs -= aj * lp.upper[j]; + continue; + } else if (aj <= 0.0 && lp.lower[j] > -inf) { + cut_rhs += aj * lp.lower[j]; + continue; + } else { + } + } + cut.i.push_back(j); cut.x.push_back(x_workspace_[j]); } @@ -1449,7 +1471,7 @@ void remove_cuts(lp_problem_t& lp, } if (cuts_to_remove.size() > 0) { - settings.log.printf("Removing %d cuts\n", cuts_to_remove.size()); + //settings.log.printf("Removing %d cuts\n", cuts_to_remove.size()); std::vector marked_rows(lp.num_rows, 0); for (i_t i : cuts_to_remove) { marked_rows[i] = 1; @@ -1515,7 +1537,7 @@ void remove_cuts(lp_problem_t& lp, lp.num_rows = lp.A.m; new_slacks.clear(); - new_slacks.resize(lp.num_cols); + new_slacks.reserve(lp.num_cols); for (i_t j = 0; j < lp.num_cols; j++) { if (new_is_slacks[j]) { new_slacks.push_back(j); @@ -1528,7 +1550,8 @@ void remove_cuts(lp_problem_t& lp, y = new_solution_y; z = new_solution_z; - settings.log.printf("After removal %d rows %d columns %d nonzeros\n", + settings.log.printf("Removed %d cuts. After removal %d rows %d columns %d nonzeros\n", + cuts_to_remove.size(), lp.num_rows, lp.num_cols, lp.A.col_start[lp.A.n]); diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 2ff075c15..34bdfbb3d 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -2014,7 +2014,8 @@ f_t amount_of_perturbation(const lp_problem_t& lp, const std::vector -void prepare_optimality(const lp_problem_t& lp, +void prepare_optimality(i_t info, + const lp_problem_t& lp, const simplex_solver_settings_t& settings, basis_update_mpf_t& ft, const std::vector& objective, @@ -2081,6 +2082,11 @@ void prepare_optimality(const lp_problem_t& lp, settings.log.printf("\n"); } } + + if (primal_infeas > settings.primal_tol) + { + printf("Primal infeasibility %e. Info %d\n", primal_infeas, info); + } } template @@ -2324,6 +2330,22 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, } } } else { + + // Check that none of the basic variables have a steepest edge that is nonpositive + for (i_t k = 0; k < m; k++) + { + const i_t j = basic_list[k]; + bool fix_needed = false; + if (delta_y_steepest_edge[j] <= 0.0) + { + fix_needed = true; + //printf("Basic variable %d has a nonpositive steepest edge %e\n", j, delta_y_steepest_edge[j]); + delta_y_steepest_edge[j] = 1e-4; + } + if (fix_needed) { + //printf("Basic variable had nonpositive steepest edge\n"); + } + } settings.log.printf("using exisiting steepest edge %e\n", vector_norm2(delta_y_steepest_edge)); } @@ -2429,8 +2451,27 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, #endif + //primal_infeasibility = phase2::compute_initial_primal_infeasibilities( + // lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + if (0 && primal_infeasibility > settings.primal_tol) { + + const i_t nz = infeasibility_indices.size(); + for (i_t k = 0; k < nz; ++k) { + const i_t j = infeasibility_indices[k]; + const f_t squared_infeas = squared_infeasibilities[j]; + const f_t val = squared_infeas / delta_y_steepest_edge[j]; + if (squared_infeas >= 0.0 && delta_y_steepest_edge[j] < 0.0) { + printf("Iter %d potential leaving %d val %e squared infeas %e delta_y_steepest_edge %e\n", iter, j, val, squared_infeas, delta_y_steepest_edge[j]); + delta_y_steepest_edge[j] = 1e-4; + } + } - phase2::prepare_optimality(lp, + //printf("No leaving variable. Updated primal infeasibility: %e\n", primal_infeasibility); + continue; + } + + phase2::prepare_optimality(0, + lp, settings, ft, objective, @@ -2596,7 +2637,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, // Need to reset the objective value, since we have recomputed x obj = phase2::compute_perturbed_objective(objective, x); if (dual_infeas <= settings.dual_tol && primal_infeasibility <= settings.primal_tol) { - phase2::prepare_optimality(lp, + phase2::prepare_optimality(1, + lp, settings, ft, objective, @@ -2633,7 +2675,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, if (primal_infeasibility <= settings.primal_tol && orig_dual_infeas <= settings.dual_tol) { - phase2::prepare_optimality(lp, + phase2::prepare_optimality(2, + lp, settings, ft, objective, diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index 6c55f5623..d9e37b799 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -213,7 +213,7 @@ bool bound_strengthening(const std::vector& row_sense, if (new_lb > new_ub + 1e-6) { settings.log.printf( - "Iter:: %d, Infeasible variable after update %d, %e > %e\n", iter, k, new_lb, new_ub); + "Iter:: %d, Infeasible variable after update %d, new_lb = %e > %e = new_ub\n", iter, k, new_lb, new_ub); return false; } if (new_lb != old_lb || new_ub != old_ub) { diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp index ca3e58041..4d3a1f830 100644 --- a/cpp/src/dual_simplex/pseudo_costs.cpp +++ b/cpp/src/dual_simplex/pseudo_costs.cpp @@ -132,6 +132,39 @@ void strong_branch_helper(i_t start, } } +template +f_t trial_branching(const lp_problem_t& original_lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const std::vector& root_vstatus, + const std::vector& edge_norms, + i_t branch_var, + f_t branch_var_lower, + f_t branch_var_upper) +{ + lp_problem_t child_problem = original_lp; + child_problem.lower[branch_var] = branch_var_lower; + child_problem.upper[branch_var] = branch_var_upper; + + simplex_solver_settings_t child_settings = settings; + child_settings.set_log(false); + f_t lp_start_time = tic(); + child_settings.iteration_limit = 200; + lp_solution_t solution(original_lp.num_rows, original_lp.num_cols); + i_t iter = 0; + std::vector vstatus = root_vstatus; + std::vector child_edge_norms = edge_norms; + dual::status_t status = dual_phase2( + 2, 0, lp_start_time, child_problem, child_settings, vstatus, solution, iter, child_edge_norms); + printf("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %d. Obj %e\n", branch_var, child_problem.lower[branch_var], child_problem.upper[branch_var], iter, status, compute_objective(child_problem, solution.x)); + + if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT || status == dual::status_t::CUTOFF) { + return compute_objective(child_problem, solution.x); + } else { + return std::numeric_limits::quiet_NaN(); + } +} + } // namespace template @@ -310,6 +343,107 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio return branch_var; } +template +i_t pseudo_costs_t::reliable_variable_selection(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const std::vector& vstatus, + const std::vector& edge_norms, + const std::vector& fractional, + const std::vector& solution, + f_t current_obj, + logger_t& log) +{ + mutex.lock(); + + const i_t num_fractional = fractional.size(); + std::vector pseudo_cost_up(num_fractional); + std::vector pseudo_cost_down(num_fractional); + std::vector score(num_fractional); + + i_t num_initialized_down; + i_t num_initialized_up; + f_t pseudo_cost_down_avg; + f_t pseudo_cost_up_avg; + + initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + + mutex.unlock(); + + log.printf("PC: num initialized down %d up %d avg down %e up %e\n", + num_initialized_down, + num_initialized_up, + pseudo_cost_down_avg, + pseudo_cost_up_avg); + + + const i_t reliable_threshold = 1; + + for (i_t k = 0; k < num_fractional; k++) { + const i_t j = fractional[k]; + mutex.lock(); + if (pseudo_cost_num_down[j] >= reliable_threshold) { + pseudo_cost_down[k] = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; + mutex.unlock(); + } else { + mutex.unlock(); + // Do trial branching on the down branch + f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, lp.lower[j], std::floor(solution[j])); + if (!std::isnan(obj)) { + f_t change_in_obj = obj - current_obj; + f_t change_in_x = solution[j] - std::floor(solution[j]); + mutex.lock(); + pseudo_cost_sum_down[j] += change_in_obj / change_in_x; + pseudo_cost_num_down[j]++; + pseudo_cost_down[k] = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; + mutex.unlock(); + } + } + + mutex.lock(); + if (pseudo_cost_num_up[j] >= reliable_threshold) { + pseudo_cost_up[k] = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; + mutex.unlock(); + } else { + mutex.unlock(); + // Do trial branching on the up branch + f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, std::ceil(solution[j]), lp.upper[j]); + if (!std::isnan(obj)) { + f_t change_in_obj = obj - current_obj; + f_t change_in_x = std::ceil(solution[j]) - solution[j]; + mutex.lock(); + pseudo_cost_sum_up[j] += change_in_obj / change_in_x; + pseudo_cost_num_up[j]++; + pseudo_cost_up[k] = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; + mutex.unlock(); + } + } + constexpr f_t eps = 1e-6; + const f_t f_down = solution[j] - std::floor(solution[j]); + const f_t f_up = std::ceil(solution[j]) - solution[j]; + score[k] = + std::max(f_down * pseudo_cost_down[k], eps) * std::max(f_up * pseudo_cost_up[k], eps); + } + + i_t branch_var = fractional[0]; + f_t max_score = -1; + i_t select = -1; + for (i_t k = 0; k < num_fractional; k++) { + if (score[k] > max_score) { + max_score = score[k]; + branch_var = fractional[k]; + select = k; + } + } + + log.printf( + "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], score[select]); + + mutex.unlock(); + + return branch_var; +} + template void pseudo_costs_t::update_pseudo_costs_from_strong_branching( const std::vector& fractional, const std::vector& root_soln) diff --git a/cpp/src/dual_simplex/pseudo_costs.hpp b/cpp/src/dual_simplex/pseudo_costs.hpp index 799cdc3ff..20b2198e4 100644 --- a/cpp/src/dual_simplex/pseudo_costs.hpp +++ b/cpp/src/dual_simplex/pseudo_costs.hpp @@ -47,6 +47,16 @@ class pseudo_costs_t { const std::vector& solution, logger_t& log); + i_t reliable_variable_selection(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const std::vector& vstatus, + const std::vector& edge_norms, + const std::vector& fractional, + const std::vector& solution, + f_t current_obj, + logger_t& log); + void update_pseudo_costs_from_strong_branching(const std::vector& fractional, const std::vector& root_soln); std::vector pseudo_cost_sum_up; diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp index 3ba981539..a8bd06afa 100644 --- a/cpp/src/dual_simplex/sparse_vector.cpp +++ b/cpp/src/dual_simplex/sparse_vector.cpp @@ -233,6 +233,30 @@ f_t sparse_vector_t::find_coefficient(i_t index) const return std::numeric_limits::quiet_NaN(); } +template +void sparse_vector_t::squeeze(sparse_vector_t& y) const +{ + y.n = n; + + i_t nz = 0; + const i_t n = x.size(); + for (i_t k = 0; k < n; k++) { + if (x[k] != 0.0) { + nz++; + } + } + y.i.reserve(nz); + y.x.reserve(nz); + y.i.clear(); + y.x.clear(); + for (i_t k = 0; k < n; k++) { + if (x[k] != 0.0) { + y.i.push_back(i[k]); + y.x.push_back(x[k]); + } + } +} + #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template class sparse_vector_t; #endif diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp index 3badeed12..c56ebf6d9 100644 --- a/cpp/src/dual_simplex/sparse_vector.hpp +++ b/cpp/src/dual_simplex/sparse_vector.hpp @@ -51,6 +51,8 @@ class sparse_vector_t { void negate(); f_t find_coefficient(i_t index) const; + void squeeze(sparse_vector_t& y) const; + i_t n; std::vector i; std::vector x; diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh index 62fb52fe1..7ea53a73d 100644 --- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh @@ -86,7 +86,7 @@ class sub_mip_recombiner_t : public recombiner_t { trivial_presolve(fixed_problem); fixed_problem.check_problem_representation(true); // brute force rounding threshold is 8 - const bool run_sub_mip = fixed_problem.n_integer_vars > 8; + const bool run_sub_mip = 0 && fixed_problem.n_integer_vars > 8; dual_simplex::mip_status_t branch_and_bound_status = dual_simplex::mip_status_t::UNSET; dual_simplex::mip_solution_t branch_and_bound_solution(1); if (run_sub_mip) { From 1e177432ebf286eaad917c6df441ce0c172f5aad Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 6 Jan 2026 11:58:19 -0800 Subject: [PATCH 19/45] Check for reduced cost variable fixings --- cpp/src/dual_simplex/branch_and_bound.cpp | 108 +++++++++++++++++++++- cpp/src/dual_simplex/branch_and_bound.hpp | 2 + cpp/src/dual_simplex/cuts.cpp | 53 +++++++++-- cpp/src/dual_simplex/solution.hpp | 2 +- 4 files changed, 154 insertions(+), 11 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 65403404b..d2175f997 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -224,6 +224,24 @@ branch_and_bound_t::branch_and_bound_t( } printf("num_integer_variables %d\n", num_integer_variables_); + // Check slack + printf("slacks size %ld m %d\n", new_slacks_.size(), original_lp_.num_rows); + for (i_t slack : new_slacks_) { + const i_t col_start = original_lp_.A.col_start[slack]; + const i_t col_end = original_lp_.A.col_start[slack + 1]; + const i_t col_len = col_end - col_start; + if (col_len != 1) { + printf("Slack %d has %d nzs\n", slack, col_len); + exit(1); + } + const i_t i = original_lp_.A.i[col_start]; + const f_t x = original_lp_.A.x[col_start]; + if (std::abs(x) != 1.0) { + printf("Slack %d row %d has non-unit coefficient %e\n", slack, i, x); + exit(1); + } + } + mutex_upper_.lock(); upper_bound_ = inf; mutex_upper_.unlock(); @@ -262,6 +280,84 @@ i_t branch_and_bound_t::get_heap_size() return size; } +template +void branch_and_bound_t::find_reduced_cost_fixings(f_t upper_bound) +{ + printf("Finding reduced cost fixings\n"); + mutex_original_lp_.lock(); + std::vector reduced_costs = root_relax_soln_.z; + std::vector lower_bounds = original_lp_.lower; + std::vector upper_bounds = original_lp_.upper; + std::vector bounds_changed(original_lp_.num_cols, false); + const f_t root_obj = compute_objective(original_lp_, root_relax_soln_.x); + const f_t threshold = 1e-3; + const f_t weaken = 1e-5; + i_t num_improved = 0; + i_t num_fixed = 0; + for (i_t j = 0; j < original_lp_.num_cols; j++) { + //printf("Variable %d type %d reduced cost %e\n", j, var_types_[j], reduced_costs[j]); + if (var_types_[j] == variable_type_t::INTEGER && reduced_costs[j] > threshold) { + const f_t lower_j = original_lp_.lower[j]; + const f_t upper_j = original_lp_.upper[j]; + const f_t abs_gap = upper_bound - root_obj; + f_t reduced_cost_upper_bound = upper_j; + f_t reduced_cost_lower_bound = lower_j; + if (lower_j > -inf && reduced_costs[j] > 0) + { + const f_t new_upper_bound = lower_j + abs_gap/reduced_costs[j]; + reduced_cost_upper_bound = std::floor(new_upper_bound + weaken); + if (reduced_cost_upper_bound < upper_j) + { + //printf("Improved upper bound for variable %d from %e to %e (%e)\n", j, upper_j, reduced_cost_upper_bound, new_upper_bound); + num_improved++; + upper_bounds[j] = reduced_cost_upper_bound; + bounds_changed[j] = true; + } + } + if (upper_j < inf && reduced_costs[j] < 0) + { + const f_t new_lower_bound = upper_j + abs_gap/reduced_costs[j]; + reduced_cost_lower_bound = std::ceil(new_lower_bound - weaken); + if (reduced_cost_lower_bound > lower_j) + { + //printf("Improved lower bound for variable %d from %e to %e (%e)\n", j, lower_j, reduced_cost_lower_bound, new_lower_bound); + num_improved++; + lower_bounds[j] = reduced_cost_lower_bound; + bounds_changed[j] = true; + } + } + if (reduced_cost_upper_bound <= reduced_cost_lower_bound) + { + num_fixed++; + } + } + } + + printf("Reduced costs: Found %d improved bounds and %d fixed variables (%.1f%%)\n", num_improved, num_fixed, 100.0*static_cast(num_fixed)/static_cast(num_integer_variables_)); + + if (num_improved > 0) { + lp_problem_t new_lp = original_lp_; + new_lp.lower = lower_bounds; + new_lp.upper = upper_bounds; + std::vector row_sense; + csc_matrix_t Arow(original_lp_.num_rows, + original_lp_.num_cols, + original_lp_.A.col_start[original_lp_.num_cols]); + original_lp_.A.transpose(Arow); + bool feasible = + bound_strengthening(row_sense, settings_, new_lp, Arow, var_types_, bounds_changed); + + num_improved = 0; + for (i_t j = 0; j < original_lp_.num_cols; j++) { + if (new_lp.lower[j] > original_lp_.lower[j]) { num_improved++; } + if (new_lp.upper[j] < original_lp_.upper[j]) { num_improved++; } + } + printf("Bound strengthening: Found %d improved bounds\n", num_improved); + } + + mutex_original_lp_.unlock(); +} + template void branch_and_bound_t::set_new_solution(const std::vector& solution) { @@ -319,6 +415,8 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu user_lower, gap.c_str(), toc(stats_.start_time)); + + find_reduced_cost_fixings(obj); } else { settings_.log.printf("New solution from primal heuristics. Objective %+.6e. Time %.2f\n", compute_user_objective(original_lp_, obj), @@ -438,6 +536,8 @@ void branch_and_bound_t::repair_heuristic_solutions() uncrush_primal_solution(original_problem_, original_lp_, repaired_solution, original_x); settings_.solution_callback(original_x, repaired_obj); } + + find_reduced_cost_fixings(obj); } mutex_upper_.unlock(); @@ -571,6 +671,8 @@ void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, user_mip_gap(obj, lower).c_str(), toc(stats_.start_time)); + find_reduced_cost_fixings(upper_bound_); + send_solution = true; } @@ -613,8 +715,8 @@ node_status_t branch_and_bound_t::solve_node(search_tree_t& { f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; - if (node_ptr->depth >= num_integer_variables_) { - printf("Depth %d >= num_integer_variables %d\n", node_ptr->depth, num_integer_variables_); + if (node_ptr->depth > num_integer_variables_) { + printf("Depth %d > num_integer_variables %d\n", node_ptr->depth, num_integer_variables_); mip_node_t* parent = node_ptr->parent; while (parent != nullptr) { printf("Parent depth %d\n", parent->depth); @@ -1478,6 +1580,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut local_lower_bounds_.assign(settings_.num_bfs_threads, root_objective_); + mutex_original_lp_.lock(); remove_cuts(original_lp_, settings_, Arow, @@ -1491,6 +1594,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut basic_list, nonbasic_list, basis_update); + mutex_original_lp_.unlock(); fractional.clear(); num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index 0943155a8..1525b3d7c 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -131,6 +131,8 @@ class branch_and_bound_t { f_t get_lower_bound(); i_t get_heap_size(); + void find_reduced_cost_fixings(f_t upper_bound); + // The main entry routine. Returns the solver status and populates solution with the incumbent. mip_status_t solve(mip_solution_t& solution); diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 643a1ee33..9c163abd6 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -921,7 +921,7 @@ void mixed_integer_rounding_cut_t::initialize(const lp_problem_t::substitute_slacks(const lp_problem_ const f_t cj = cut.x[k]; if (is_slack_[j]) { found_slack = true; + const i_t slack_start = lp.A.col_start[j]; + const i_t slack_end = lp.A.col_start[j + 1]; + const i_t slack_len = slack_end - slack_start; + if (slack_len != 1) { + printf("Slack %d has %d nzs in colum\n", j, slack_len); + exit(1); + } + const f_t alpha = lp.A.x[slack_start]; + if (std::abs(alpha) != 1.0) { + printf("Slack %d has non-unit coefficient %e\n", j, alpha); + exit(1); + } // Do the substitution // Slack variable s_j participates in row i of the constraint matrix // Row i is of the form: - // sum_{k != j} A(i, k) * x_k + s_j = rhs_i + // sum_{k != j} A(i, k) * x_k + alpha * s_j = rhs_i + // where alpha = +1/-1 /// So we have that - // s_j = rhs_i - sum_{k != j} A(i, k) * x_k + // s_j = (rhs_i - sum_{k != j} A(i, k) * x_k)/alpha // Our cut is of the form: // sum_{k != j} C(k) * x_k + C(j) * s_j >= cut_rhs // So the cut becomes - // sum_{k != j} C(k) * x_k + C(j) * (rhs_i - sum_{h != j} A(i, h) * x_h) >= cut_rhs + // sum_{k != j} C(k) * x_k + C(j)/alpha * (rhs_i - sum_{h != j} A(i, h) * x_h) >= cut_rhs // This is equivalent to: - // sum_{k != j} C(k) * x_k + sum_{h != j} -C(j) * A(i, h) * x_h >= cut_rhs - C(j) * rhs_i + // sum_{k != j} C(k) * x_k + sum_{h != j} -C(j)/alpha * A(i, h) * x_h >= cut_rhs - C(j)/alpha * rhs_i const i_t i = slack_rows_[j]; //printf("Found slack %d in cut. lo %e up %e. Slack row %d\n", j, lp.lower[j], lp.upper[j], i); - cut_rhs -= cj * lp.rhs[i]; + cut_rhs -= cj * lp.rhs[i] / alpha; const i_t row_start = Arow.col_start[i]; const i_t row_end = Arow.col_start[i + 1]; for (i_t q = row_start; q < row_end; q++) { const i_t h = Arow.i[q]; if (h != j) { const f_t aih = Arow.x[q]; - x_workspace_[h] -= cj * aih; + x_workspace_[h] -= cj * aih / alpha; if (!x_mark_[h]) { x_mark_[h] = 1; cut_indices.push_back(h); @@ -1180,7 +1193,7 @@ void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_ } } else { const f_t aij = Arow.x[q]; - if (aij != 1.0) { + if (std::abs(aij)!= 1.0) { printf("Slack row %d has non-unit coefficient %e for variable %d\n", i, aij, j); exit(1); } @@ -1302,6 +1315,8 @@ i_t add_cuts(const simplex_solver_settings_t& settings, csc_matrix_t new_A_col(lp.num_rows + p, lp.num_cols, 1); new_A_row.to_compressed_col(new_A_col); + printf("slacks size %ld m %d\n", new_slacks.size(), lp.num_rows); + // Add in slacks variables for the new rows lp.lower.resize(lp.num_cols + p); lp.upper.resize(lp.num_cols + p); @@ -1320,12 +1335,26 @@ i_t add_cuts(const simplex_solver_settings_t& settings, lp.upper[j] = inf; lp.objective[j] = 0.0; new_slacks.push_back(j); + printf("Added slack %d\n", j); } settings.log.debug("Done adding slacks\n"); new_A_col.col_start[lp.num_cols + p] = nz; new_A_col.n = lp.num_cols + p; lp.A = new_A_col; + + // Check that all slack columns have length 1 + for (i_t slack: new_slacks) { + const i_t col_start = lp.A.col_start[slack]; + const i_t col_end = lp.A.col_start[slack + 1]; + const i_t col_len = col_end - col_start; + if (col_len != 1) { + printf("Add cuts: Slack %d has %d nzs in column\n", slack, col_len); + exit(1); + } + } + + i_t old_rows = lp.num_rows; lp.num_rows += p; i_t old_cols = lp.num_cols; @@ -1449,6 +1478,14 @@ void remove_cuts(lp_problem_t& lp, std::vector is_slack(lp.num_cols, 0); for (i_t j : new_slacks) { is_slack[j] = 1; + // Check that slack column length is 1 + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; + const i_t col_len = col_end - col_start; + if (col_len != 1) { + printf("Remove cuts: Slack %d has %d nzs in column\n", j, col_len); + exit(1); + } } for (i_t k = original_rows; k < lp.num_rows; k++) { diff --git a/cpp/src/dual_simplex/solution.hpp b/cpp/src/dual_simplex/solution.hpp index d1d745cbd..d882e21e2 100644 --- a/cpp/src/dual_simplex/solution.hpp +++ b/cpp/src/dual_simplex/solution.hpp @@ -39,7 +39,7 @@ class lp_solution_t { std::vector x; // Dual solution vector. Lagrange multipliers for equality constraints. std::vector y; - // Dual solution vector. Lagrange multipliers for inequality constraints. + // Reduced costs std::vector z; f_t objective; f_t user_objective; From 37445485fabc2b386be4df277d4a475c022d4ccb Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 6 Jan 2026 15:17:24 -0800 Subject: [PATCH 20/45] Also try to improve continuous variables with reduced cost strengthening --- cpp/src/dual_simplex/branch_and_bound.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 58540a53b..7bd32267d 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -315,7 +315,7 @@ void branch_and_bound_t::find_reduced_cost_fixings(f_t upper_bound) i_t num_fixed = 0; for (i_t j = 0; j < original_lp_.num_cols; j++) { //printf("Variable %d type %d reduced cost %e\n", j, var_types_[j], reduced_costs[j]); - if (var_types_[j] == variable_type_t::INTEGER && reduced_costs[j] > threshold) { + if (std::abs(reduced_costs[j]) > threshold) { const f_t lower_j = original_lp_.lower[j]; const f_t upper_j = original_lp_.upper[j]; const f_t abs_gap = upper_bound - root_obj; @@ -324,7 +324,7 @@ void branch_and_bound_t::find_reduced_cost_fixings(f_t upper_bound) if (lower_j > -inf && reduced_costs[j] > 0) { const f_t new_upper_bound = lower_j + abs_gap/reduced_costs[j]; - reduced_cost_upper_bound = std::floor(new_upper_bound + weaken); + reduced_cost_upper_bound = var_types_[j] == variable_type_t::INTEGER ? std::floor(new_upper_bound + weaken) : new_upper_bound; if (reduced_cost_upper_bound < upper_j) { //printf("Improved upper bound for variable %d from %e to %e (%e)\n", j, upper_j, reduced_cost_upper_bound, new_upper_bound); @@ -336,7 +336,7 @@ void branch_and_bound_t::find_reduced_cost_fixings(f_t upper_bound) if (upper_j < inf && reduced_costs[j] < 0) { const f_t new_lower_bound = upper_j + abs_gap/reduced_costs[j]; - reduced_cost_lower_bound = std::ceil(new_lower_bound - weaken); + reduced_cost_lower_bound = var_types_[j] == variable_type_t::INTEGER ? std::ceil(new_lower_bound - weaken) : new_lower_bound; if (reduced_cost_lower_bound > lower_j) { //printf("Improved lower bound for variable %d from %e to %e (%e)\n", j, lower_j, reduced_cost_lower_bound, new_lower_bound); @@ -345,7 +345,7 @@ void branch_and_bound_t::find_reduced_cost_fixings(f_t upper_bound) bounds_changed[j] = true; } } - if (reduced_cost_upper_bound <= reduced_cost_lower_bound) + if (var_types_[j] == variable_type_t::INTEGER && reduced_cost_upper_bound <= reduced_cost_lower_bound) { num_fixed++; } From f8e6fbecf94120dcb34b909ff604e156b1861439 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 6 Jan 2026 16:39:48 -0800 Subject: [PATCH 21/45] Fix performance bug in set_quadratic_objective_matrix --- .../optimization_problem.cu | 109 ++++++++++++------ 1 file changed, 76 insertions(+), 33 deletions(-) diff --git a/cpp/src/linear_programming/optimization_problem.cu b/cpp/src/linear_programming/optimization_problem.cu index 72d75cdc7..69c8b7094 100644 --- a/cpp/src/linear_programming/optimization_problem.cu +++ b/cpp/src/linear_programming/optimization_problem.cu @@ -158,45 +158,88 @@ void optimization_problem_t::set_quadratic_objective_matrix( // Replace Q with Q + Q^T i_t qn = size_offsets - 1; // Number of variables i_t q_nnz = size_indices; - Q_offsets_.resize(qn + 1); - std::fill(Q_offsets_.begin(), Q_offsets_.end(), 0); - Q_indices_.reserve(2 * q_nnz); - Q_values_.reserve(2 * q_nnz); - - // TODO: This is very inefficient for large Q matrices - // Build a map from (row,col) to value for Q+Q^T - std::map, f_t> Q_map; - for (i_t row = 0; row < qn; ++row) { - size_t start = Q_offsets[row]; - size_t end = Q_offsets[row + 1]; - for (size_t idx = start; idx < end; ++idx) { - i_t col = Q_indices[idx]; - f_t val = Q_values[idx]; - auto ij = std::make_pair(row, col); - auto ji = std::make_pair(col, row); - Q_map[ij] += val; - Q_map[ji] += val; + + + // Construct H = Q + Q^T in triplet form first + // Then covert the triplet to CSR + + std::vector H_i; + std::vector H_j; + std::vector H_x; + + H_i.reserve(2 * q_nnz); + H_j.reserve(2 * q_nnz); + H_x.reserve(2 * q_nnz); + + for (i_t i = 0; i < qn; ++i) { + i_t row_start = Q_offsets[i]; + i_t row_end = Q_offsets[i + 1]; + for (i_t p = row_start; p < row_end; ++p) { + i_t j = Q_indices[p]; + f_t x = Q_values[p]; + // Add H(i,j) + H_i.push_back(i); + H_j.push_back(j); + H_x.push_back(x); + if (i != j) { + // Add H(j,i) + H_i.push_back(j); + H_j.push_back(i); + H_x.push_back(x); + } } } - // Write map into CSR format (rows are built in key order, so each row's columns are sorted) - for (i_t row = 0; row < qn; ++row) { - for (auto it = Q_map.lower_bound(std::make_pair(row, 0)); - it != Q_map.upper_bound(std::make_pair(row, std::numeric_limits::max())); - ++it) { - i_t col = it->first.second; - f_t v = it->second; - if (v != 0.0) { - Q_indices_.push_back(col); - Q_values_.push_back(v); - Q_offsets_[row + 1]++; + // Convert H to CSR format + // Get row counts + i_t H_nz = H_x.size(); + std::vector H_row_counts(qn, 0); + for (i_t k = 0; k < H_nz; ++k) { + H_row_counts[H_i[k]]++; + } + std::vector H_cumulative_counts(qn + 1, 0); + for (i_t k = 0; k < qn; ++k) { + H_cumulative_counts[k + 1] = H_cumulative_counts[k] + H_row_counts[k]; + } + std::vector H_row_starts = H_cumulative_counts; + std::vector H_indices(H_nz); + std::vector H_values(H_nz); + for (i_t k = 0; k < H_nz; ++k) { + i_t p = H_cumulative_counts[H_i[k]]++; + H_indices[p] = H_j[k]; + H_values[p] = H_x[k]; + } + + // H_row_starts, H_indices, H_values are the CSR representation of H + // But this contains duplicate entries + + std::vector workspace(qn, -1); + Q_offsets_.resize(qn + 1); + std::fill(Q_offsets_.begin(), Q_offsets_.end(), 0); + Q_indices_.resize(H_nz); + Q_values_.resize(H_nz); + i_t nz = 0; + for (i_t i = 0; i < qn; ++i) + { + i_t q = nz; // row i will start at q + const i_t row_start = H_row_starts[i]; + const i_t row_end = H_row_starts[i + 1]; + for (i_t p = row_start; p < row_end; ++p) { + i_t j = H_indices[p]; + if (workspace[j] >= q) { + Q_values_[workspace[j]] += H_values[p]; // H(i,j) is duplicate + } else { + workspace[j] = nz; // record where column j occurs + Q_indices_[nz] = j; // keep H(i,j) + Q_values_[nz] = H_values[p]; + nz++; } } + Q_offsets_[i] = q; // record start of row i } - // Convert Q_offsets_new to cumulative sum - for (i_t i = 0; i < qn; ++i) { - Q_offsets_[i + 1] += Q_offsets_[i]; - } + Q_offsets_[qn] = nz; // finalize Q + Q_indices_.resize(nz); + Q_values_.resize(nz); // FIX ME:: check for positive semi definite matrix } From 6fc7e990b4489e1a408dbec42982a0011c24a9d5 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 7 Jan 2026 15:39:51 -0800 Subject: [PATCH 22/45] Fix cut scoring when keeping around old cuts that may not be violated --- cpp/src/dual_simplex/cuts.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 5bc8b1b86..0687a7afe 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -88,6 +88,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) { const f_t weight_distance = 1.0; const f_t weight_orthogonality = 1.0; + const f_t min_cut_distance = 1e-4; cut_distances_.resize(cut_storage_.m, 0.0); cut_norms_.resize(cut_storage_.m, 0.0); cut_orthogonality_.resize(cut_storage_.m, 1); @@ -95,7 +96,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) for (i_t i = 0; i < cut_storage_.m; i++) { f_t violation; cut_distances_[i] = cut_distance(i, x_relax, violation, cut_norms_[i]); - cut_scores_[i] = weight_distance * cut_distances_[i] + weight_orthogonality * cut_orthogonality_[i]; + cut_scores_[i] = cut_distances_[i] <= min_cut_distance ? 0.0 : weight_distance * cut_distances_[i] + weight_orthogonality * cut_orthogonality_[i]; //settings_.log.printf("Cut %d distance %e violation %e orthogonality %e score %e\n", i, cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]); } @@ -111,7 +112,6 @@ void cut_pool_t::score_cuts(std::vector& x_relax) const i_t max_cuts = 2000; const f_t min_orthogonality = 0.5; - const f_t min_cut_distance = 1e-4; best_cuts_.reserve(std::min(max_cuts, cut_storage_.m)); best_cuts_.clear(); scored_cuts_ = 0; @@ -120,6 +120,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) const i_t i = sorted_indices[0]; if (cut_distances_[i] <= min_cut_distance) { + //settings_.log.printf("Cut %d distance %e <= %e. Stopping\n", i, cut_distances_[i], min_cut_distance); break; } @@ -137,7 +138,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) cut_orthogonality_[j] = std::min(cut_orthogonality_[j], cut_orthogonality(i, j)); if (cut_orthogonality_[j] >= min_orthogonality) { indices.push_back(j); - cut_scores_[j] = weight_distance * cut_distances_[j] + weight_orthogonality * cut_orthogonality_[j]; + cut_scores_[j] = cut_distances_[j] <= min_cut_distance ? 0.0 : weight_distance * cut_distances_[j] + weight_orthogonality * cut_orthogonality_[j]; //settings_.log.printf("Recomputed cut %d score %e\n", j, cut_scores_[j]); } } @@ -149,6 +150,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) { return cut_scores_[a] > cut_scores_[b]; }); + //settings_.log.printf("\t Sorted indicies %d\n", sorted_indices.size()); } } From 67b57c71bdbfaac51f4483042de55267974c31c9 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 7 Jan 2026 17:51:37 -0800 Subject: [PATCH 23/45] Reenable MIR cuts. Print out types of cut after root node. --- cpp/src/dual_simplex/basis_updates.cpp | 2 +- cpp/src/dual_simplex/branch_and_bound.cpp | 20 ++- cpp/src/dual_simplex/cuts.cpp | 154 ++++++++++++------ cpp/src/dual_simplex/cuts.hpp | 2 +- cpp/src/dual_simplex/phase2.cpp | 2 +- cpp/src/dual_simplex/pseudo_costs.cpp | 15 +- .../optimization_problem.cu | 46 +++++- 7 files changed, 171 insertions(+), 70 deletions(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index a5260be26..8ef19f236 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1169,7 +1169,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts i_t V_nz = 0; const f_t zero_tol = 1e-13; for (i_t h = 0; h < cuts_basic.m; h++) { - sparse_vector_t rhs(WT, h); + sparse_vector_t rhs(WT, h); scatter_into_workspace(rhs); i_t nz = rhs.i.size(); for (i_t k = num_updates_ - 1; k >= 0; --k) { diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 7bd32267d..c290f5bd4 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1702,7 +1702,9 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } #endif - + i_t num_gomory_cuts = 0; + i_t num_mir_cuts = 0; + i_t num_knapsack_cuts = 0; for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { if (num_fractional == 0) { #ifdef PRINT_SOLUTION @@ -1756,7 +1758,15 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.log.printf("No cuts found\n"); break; } - //print_cut_types(cut_types, settings_); + for (i_t k = 0; k < cut_types.size(); k++) { + if (cut_types[k] == cut_type_t::MIXED_INTEGER_GOMORY) { + num_gomory_cuts++; + } else if (cut_types[k] == cut_type_t::MIXED_INTEGER_ROUNDING) { + num_mir_cuts++; + } else if (cut_types[k] == cut_type_t::KNAPSACK) { + num_knapsack_cuts++; + } + } cuts_to_add.check_matrix(); @@ -1914,6 +1924,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } } + if (num_gomory_cuts + num_mir_cuts + num_knapsack_cuts > 0) { + settings_.log.printf("Gomory cuts : %d\n", num_gomory_cuts); + settings_.log.printf("MIR cuts : %d\n", num_mir_cuts); + settings_.log.printf("Knapsack cuts: %d\n", num_knapsack_cuts); + } + if (edge_norms_.size() != original_lp_.num_cols) { edge_norms_.resize(original_lp_.num_cols, -1.0); diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 0687a7afe..fdce8099c 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -97,7 +97,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) f_t violation; cut_distances_[i] = cut_distance(i, x_relax, violation, cut_norms_[i]); cut_scores_[i] = cut_distances_[i] <= min_cut_distance ? 0.0 : weight_distance * cut_distances_[i] + weight_orthogonality * cut_orthogonality_[i]; - //settings_.log.printf("Cut %d distance %e violation %e orthogonality %e score %e\n", i, cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]); + //settings_.log.printf("Cut %d type %d distance %e violation %e orthogonality %e score %e\n", i, static_cast(cut_type_[i]), cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]); } std::vector sorted_indices(cut_storage_.m); @@ -532,14 +532,13 @@ void cut_generation_t::generate_cuts(const lp_problem_t& lp, // Generate Gomory Cuts generate_gomory_cuts( lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list); - //settings.log.printf("Generated Gomory cuts\n"); // Generate Knapsack cuts generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar); //settings.log.printf("Generated Knapsack cuts\n"); // Generate MIR cuts - // generate_mir_cuts(lp, settings, Arow, var_types, xstar); + generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar); } template @@ -578,54 +577,67 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& mixed_integer_rounding_cut_t mir(lp.num_cols, settings); mir.initialize(lp, new_slacks, xstar); + std::vector slack_map(lp.num_rows); + for (i_t slack : new_slacks) { + const i_t col_start = lp.A.col_start[slack]; + const i_t col_end = lp.A.col_start[slack + 1]; + const i_t col_len = col_end - col_start; + if (col_len != 1) { + printf("Generate MIR cuts: Slack %d has %d nzs in column\n", slack, col_len); + exit(1); + } + const i_t i = lp.A.i[col_start]; + slack_map[i] = slack; + } + for (i_t i = 0; i < lp.num_rows; i++) { sparse_vector_t inequality(Arow, i); f_t inequality_rhs = lp.rhs[i]; const i_t row_start = Arow.row_start[i]; const i_t row_end = Arow.row_start[i + 1]; - i_t last_slack = -1; - for (i_t p = row_start; p < row_end; p++) { - const i_t j = Arow.j[p]; - const f_t a = Arow.x[p]; - if (var_types[j] == variable_type_t::CONTINUOUS && a == 1.0 && lp.lower[j] == 0.0) { - last_slack = j; - } - } + i_t slack = slack_map[i]; - if (last_slack != -1) { - // Remove the slack from the equality to get an inequality - for (i_t k = 0; k < inequality.i.size(); k++) { - const i_t j = inequality.i[k]; - if (j == last_slack) { - inequality.x[k] = 0.0; - } - } - - // inequaility'*x <= inequality_rhs - // But for MIR we need: inequality'*x >= inequality_rhs - inequality_rhs *= -1; - inequality.negate(); - - sparse_vector_t cut(lp.num_cols, 0); - f_t cut_rhs; - i_t mir_status = mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut, cut_rhs); - if (mir_status == 0) { - f_t dot = 0.0; - f_t cut_norm = 0.0; - for (i_t k = 0; k < cut.i.size(); k++) { - const i_t jj = cut.i[k]; - const f_t aj = cut.x[k]; - dot += aj * xstar[jj]; - cut_norm += aj * aj; - } - if (dot >= cut_rhs) { - continue; - } - } + // Remove the slack from the equality to get an inequality + for (i_t k = 0; k < inequality.i.size(); k++) { + const i_t j = inequality.i[k]; + if (j == slack) { inequality.x[k] = 0.0; } + } - settings.log.printf("Adding MIR cut %d\n", i); - cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs); + // inequaility'*x <= inequality_rhs + // But for MIR we need: inequality'*x >= inequality_rhs + inequality_rhs *= -1; + inequality.negate(); + + sparse_vector_t cut(lp.num_cols, 0); + f_t cut_rhs; + i_t mir_status = + mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut, cut_rhs); + bool add_cut = false; + const f_t min_cut_distance = 1e-4; + if (mir_status == 0) { + if (cut.i.size() == 0) { + continue; + } + mir.substitute_slacks(lp, Arow, cut, cut_rhs); + if (cut.i.size() == 0) { + continue; + } + // Check that the cut is violated + // The cut is of the form cut'*x >= cut_rhs + // We need that cut'*xstar < cut_rhs for the cut to be violated by the current relaxation solution xstar + f_t dot = cut.dot(xstar); + f_t cut_norm = cut.norm2_squared(); + if (dot < cut_rhs && cut_norm > 0.0) { + // Cut is violated. Compute it's distance + f_t cut_distance = (cut_rhs - dot) / std::sqrt(cut_norm); + if (cut_distance > min_cut_distance) { + add_cut = true; + } + } + } + if (add_cut) { + cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs); } } } @@ -950,6 +962,19 @@ void mixed_integer_rounding_cut_t::initialize(const lp_problem_t -inf) { has_lower_[j] = 1; } } + +#if 0 + for (i_t j = 0; j < x_workspace_.size(); j++) { + if (x_workspace_[j] != 0.0) { + printf("Initialize: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); + exit(1); + } + if (x_mark_[j] != 0) { + printf("Initialize: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); + exit(1); + } + } +#endif } template @@ -962,6 +987,22 @@ i_t mixed_integer_rounding_cut_t::generate_cut( sparse_vector_t& cut, f_t& cut_rhs) { +#if 0 + for (i_t j = 0; j < x_workspace_.size(); j++) { + if (x_workspace_[j] != 0.0) { + printf("Before generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); + printf("num_vars_ %d\n", num_vars_); + printf("x_workspace_.size() %ld\n", x_workspace_.size()); + exit(1); + } + if (x_mark_[j] != 0) { + printf("Before generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); + exit(1); + } + } +#endif + + auto f = [](f_t q_1, f_t q_2) -> f_t { f_t q_1_hat = q_1 - std::floor(q_1); f_t q_2_hat = q_2 - std::floor(q_2); @@ -1104,6 +1145,19 @@ i_t mixed_integer_rounding_cut_t::generate_cut( } +#if 0 + for (i_t j = 0; j < x_workspace_.size(); j++) { + if (x_workspace_[j] != 0.0) { + printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); + exit(1); + } + if (x_mark_[j] != 0) { + printf("After generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); + exit(1); + } + } +#endif + // The new cut is: g'*x >= R // But we want to have it in the form h'*x <= b cut.sort(); @@ -1111,7 +1165,7 @@ i_t mixed_integer_rounding_cut_t::generate_cut( cut_rhs = R; if (cut.i.size() == 0) { - settings_.log.printf("No coefficients in cut\n"); + //settings_.log.printf("MIR: No coefficients in cut\n"); return -1; } @@ -1131,14 +1185,14 @@ void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_ std::vector cut_indices; cut_indices.reserve(cut.i.size()); -#if 1 +#if 0 for (i_t j = 0; j < x_workspace_.size(); j++) { if (x_workspace_[j] != 0.0) { - printf("Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); + printf("Begin Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); exit(1); } if (x_mark_[j] != 0) { - printf("Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); + printf("Begin Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); exit(1); } } @@ -1250,14 +1304,14 @@ void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_ } -#if 1 +#if 0 for (i_t j = 0; j < x_workspace_.size(); j++) { if (x_workspace_[j] != 0.0) { - printf("Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); + printf("End Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); exit(1); } if (x_mark_[j] != 0) { - printf("Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); + printf("End Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); exit(1); } } diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp index 37359f6e8..323dfcfbb 100644 --- a/cpp/src/dual_simplex/cuts.hpp +++ b/cpp/src/dual_simplex/cuts.hpp @@ -229,7 +229,7 @@ class mixed_integer_gomory_base_inequality_t { private: std::vector b_bar_; - std::vector nonbasic_mark_; + std::vector nonbasic_mark_; std::vector x_workspace_; std::vector x_mark_; }; diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index a3392aa07..de9077560 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -2456,7 +2456,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, //primal_infeasibility = phase2::compute_initial_primal_infeasibilities( // lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); - if (0 && primal_infeasibility > settings.primal_tol) { + if (primal_infeasibility > settings.primal_tol) { const i_t nz = infeasibility_indices.size(); for (i_t k = 0; k < nz; ++k) { diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp index 2d583213e..d65944f5e 100644 --- a/cpp/src/dual_simplex/pseudo_costs.cpp +++ b/cpp/src/dual_simplex/pseudo_costs.cpp @@ -389,11 +389,13 @@ i_t pseudo_costs_t::reliable_variable_selection(const lp_problem_t= reliable_threshold) { + bool down_reliable = pseudo_cost_num_down[j] >= reliable_threshold; + mutex.unlock(); + if (down_reliable) { + mutex.lock(); pseudo_cost_down[k] = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; mutex.unlock(); } else { - mutex.unlock(); // Do trial branching on the down branch f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, lp.lower[j], std::floor(solution[j])); if (!std::isnan(obj)) { @@ -402,17 +404,19 @@ i_t pseudo_costs_t::reliable_variable_selection(const lp_problem_t= reliable_threshold) { + bool up_reliable = pseudo_cost_num_up[j] >= reliable_threshold; + mutex.unlock(); + if (up_reliable) { + mutex.lock(); pseudo_cost_up[k] = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; mutex.unlock(); } else { - mutex.unlock(); // Do trial branching on the up branch f_t obj = trial_branching(lp, settings, var_types, vstatus, edge_norms, j, std::ceil(solution[j]), lp.upper[j]); if (!std::isnan(obj)) { @@ -446,7 +450,6 @@ i_t pseudo_costs_t::reliable_variable_selection(const lp_problem_t::set_quadratic_objective_matrix( H_i.push_back(i); H_j.push_back(j); H_x.push_back(x); - if (i != j) { - // Add H(j,i) - H_i.push_back(j); - H_j.push_back(i); - H_x.push_back(x); - } + // Add H(j,i) + H_i.push_back(j); + H_j.push_back(i); + H_x.push_back(x); } } @@ -202,13 +200,43 @@ void optimization_problem_t::set_quadratic_objective_matrix( H_cumulative_counts[k + 1] = H_cumulative_counts[k] + H_row_counts[k]; } std::vector H_row_starts = H_cumulative_counts; + std::vector H_map(H_nz); std::vector H_indices(H_nz); std::vector H_values(H_nz); for (i_t k = 0; k < H_nz; ++k) { - i_t p = H_cumulative_counts[H_i[k]]++; - H_indices[p] = H_j[k]; - H_values[p] = H_x[k]; + const i_t p = H_cumulative_counts[H_i[k]]++; + H_map[k] = p; } + rmm::device_uvector d_H_map(H_nz, stream_view_); + rmm::device_uvector d_H_j(H_nz, stream_view_); + rmm::device_uvector d_H_x(H_nz, stream_view_); + rmm::device_uvector d_H_indices(H_nz, stream_view_); + rmm::device_uvector d_H_values(H_nz, stream_view_); + + raft::copy(d_H_map.data(), H_map.data(), H_nz, stream_view_); + raft::copy(d_H_j.data(), H_j.data(), H_nz, stream_view_); + raft::copy(d_H_x.data(), H_x.data(), H_nz, stream_view_); + stream_view_.synchronize(); + thrust::for_each_n(rmm::exec_policy(stream_view_), + thrust::make_counting_iterator(0), + H_nz, + [span_H_map = cuopt::make_span(d_H_map), + span_H_j = cuopt::make_span(d_H_j), + span_H_indices = cuopt::make_span(d_H_indices)] __device__(i_t k) { + span_H_indices[span_H_map[k]] = span_H_j[k]; + }); + thrust::for_each_n(rmm::exec_policy(stream_view_), + thrust::make_counting_iterator(0), + H_nz, + [span_H_map = cuopt::make_span(d_H_map), + span_H_x = cuopt::make_span(d_H_x), + span_H_values = cuopt::make_span(d_H_values)] __device__(i_t k) { + span_H_values[span_H_map[k]] = span_H_x[k]; + }); + + raft::copy(H_indices.data(), d_H_indices.data(), H_nz, stream_view_); + raft::copy(H_values.data(), d_H_values.data(), H_nz, stream_view_); + stream_view_.synchronize(); // H_row_starts, H_indices, H_values are the CSR representation of H // But this contains duplicate entries From b70439087b76e9ee13f205464e832a0d7eba8a36 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 8 Jan 2026 15:41:46 -0800 Subject: [PATCH 24/45] Fix bug in crossover leading to crash on glass-sc. Add node_limit and reliability_branching parameters --- .../cuopt/linear_programming/constants.h | 2 + .../mip/solver_settings.hpp | 2 + cpp/src/dual_simplex/branch_and_bound.cpp | 77 +++++++++++++------ cpp/src/dual_simplex/crossover.cpp | 26 ++++--- cpp/src/dual_simplex/cuts.cpp | 14 +++- cpp/src/dual_simplex/cuts.hpp | 14 ++-- cpp/src/dual_simplex/phase2.cpp | 6 +- .../dual_simplex/simplex_solver_settings.hpp | 2 + cpp/src/math_optimization/solver_settings.cu | 2 + cpp/src/mip/solver.cu | 2 + 10 files changed, 102 insertions(+), 45 deletions(-) diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index ab78391c9..2a4a781c1 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -58,6 +58,8 @@ #define CUOPT_MIP_SCALING "mip_scaling" #define CUOPT_MIP_PRESOLVE "mip_presolve" #define CUOPT_MIP_CUT_PASSES "mip_cut_passes" +#define CUOPT_MIP_NODE_LIMIT "mip_node_limit" +#define CUOPT_MIP_RELIABILITY_BRANCHING "mip_reliability_branching" #define CUOPT_SOLUTION_FILE "solution_file" #define CUOPT_NUM_CPU_THREADS "num_cpu_threads" #define CUOPT_NUM_GPUS "num_gpus" diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 8f7efdea9..65a4d4bd0 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -79,6 +79,8 @@ class mip_solver_settings_t { tolerances_t tolerances; f_t time_limit = std::numeric_limits::infinity(); + i_t node_limit = std::numeric_limits::max(); + i_t reliability_branching = -1; bool heuristics_only = false; i_t num_cpu_threads = -1; // -1 means use default number of threads in branch and bound i_t max_cut_passes = 10; // number of cut passes to make diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index c290f5bd4..d851d723c 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -324,9 +324,10 @@ void branch_and_bound_t::find_reduced_cost_fixings(f_t upper_bound) if (lower_j > -inf && reduced_costs[j] > 0) { const f_t new_upper_bound = lower_j + abs_gap/reduced_costs[j]; - reduced_cost_upper_bound = var_types_[j] == variable_type_t::INTEGER ? std::floor(new_upper_bound + weaken) : new_upper_bound; - if (reduced_cost_upper_bound < upper_j) - { + reduced_cost_upper_bound = var_types_[j] == variable_type_t::INTEGER + ? std::floor(new_upper_bound + weaken) + : new_upper_bound; + if (reduced_cost_upper_bound < upper_j) { //printf("Improved upper bound for variable %d from %e to %e (%e)\n", j, upper_j, reduced_cost_upper_bound, new_upper_bound); num_improved++; upper_bounds[j] = reduced_cost_upper_bound; @@ -336,9 +337,10 @@ void branch_and_bound_t::find_reduced_cost_fixings(f_t upper_bound) if (upper_j < inf && reduced_costs[j] < 0) { const f_t new_lower_bound = upper_j + abs_gap/reduced_costs[j]; - reduced_cost_lower_bound = var_types_[j] == variable_type_t::INTEGER ? std::ceil(new_lower_bound - weaken) : new_lower_bound; - if (reduced_cost_lower_bound > lower_j) - { + reduced_cost_lower_bound = var_types_[j] == variable_type_t::INTEGER + ? std::ceil(new_lower_bound - weaken) + : new_lower_bound; + if (reduced_cost_lower_bound > lower_j) { //printf("Improved lower bound for variable %d from %e to %e (%e)\n", j, lower_j, reduced_cost_lower_bound, new_lower_bound); num_improved++; lower_bounds[j] = reduced_cost_lower_bound; @@ -902,13 +904,20 @@ node_solve_info_t branch_and_bound_t::solve_node( } else if (leaf_objective <= upper_bound + abs_fathom_tol) { // Choose fractional variable to branch on -#ifdef RELIABLE_BRANCHING - const i_t branch_var = - pc_.reliable_variable_selection(leaf_problem, lp_settings, var_types_, leaf_vstatus, leaf_edge_norms, leaf_fractional, leaf_solution.x, leaf_objective, lp_settings.log); -#else - const i_t branch_var = - pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log); -#endif + i_t branch_var = -1; + if (lp_settings.reliability_branching > 0) { + branch_var = pc_.reliable_variable_selection(leaf_problem, + lp_settings, + var_types_, + leaf_vstatus, + leaf_edge_norms, + leaf_fractional, + leaf_solution.x, + leaf_objective, + lp_settings.log); + } else { + branch_var = pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log); + } assert(leaf_vstatus.size() == leaf_problem.num_cols); search_tree.branch( @@ -1474,7 +1483,15 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( nonbasic_list.push_back(j); } } - + if (basic_list.size() != original_lp_.num_rows) { + printf("basic_list size %d != m %d\n", basic_list.size(), original_lp_.num_rows); + exit(1); + } + if (nonbasic_list.size() != original_lp_.num_cols - original_lp_.num_rows) { + printf("nonbasic_list size %d != n - m %d\n", nonbasic_list.size(), original_lp_.num_cols - original_lp_.num_rows); + exit(1); + } + root_crossover_settings.max_cut_passes = 3; // Populate the basis_update from the crossover vstatus basis_update.refactor_basis( original_lp_.A, root_crossover_settings, basic_list, nonbasic_list, crossover_vstatus_); @@ -1482,11 +1499,13 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( // Set the edge norms to a default value edge_norms.resize(original_lp_.num_cols, -1.0); set_uninitialized_steepest_edge_norms(edge_norms); - + printf("Using crossover solution\n"); } else { + printf("Using dual simplex solution 1\n"); root_status = root_status_future.get(); } } else { + printf("Using dual simplex solution\n"); root_status = root_status_future.get(); } return root_status; @@ -1534,6 +1553,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut basis_update_mpf_t basis_update(original_lp_.num_rows, settings_.refactor_frequency); lp_status_t root_status; if (!enable_concurrent_lp_root_solve()) { + printf("Non concurrent LP root solve\n"); // RINS/SUBMIP path root_status = solve_linear_program_with_advanced_basis(original_lp_, exploration_stats_.start_time, @@ -1545,7 +1565,13 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut root_vstatus_, edge_norms_); } else { - root_status = solve_root_relaxation(lp_settings, root_relax_soln_, root_vstatus_, basis_update, basic_list, nonbasic_list, edge_norms_); + root_status = solve_root_relaxation(lp_settings, + root_relax_soln_, + root_vstatus_, + basis_update, + basic_list, + nonbasic_list, + edge_norms_); } exploration_stats_.total_lp_iters = root_relax_soln_.iterations; exploration_stats_.total_lp_solve_time = toc(exploration_stats_.start_time); @@ -1705,6 +1731,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut i_t num_gomory_cuts = 0; i_t num_mir_cuts = 0; i_t num_knapsack_cuts = 0; + i_t cut_pool_size = 0; for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { if (num_fractional == 0) { #ifdef PRINT_SOLUTION @@ -1755,7 +1782,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut i_t num_cuts = cut_pool.get_best_cuts(cuts_to_add, cut_rhs, cut_types); if (num_cuts == 0) { - settings_.log.printf("No cuts found\n"); + //settings_.log.printf("No cuts found\n"); break; } for (i_t k = 0; k < cut_types.size(); k++) { @@ -1779,7 +1806,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } #endif -#if 1 +#if 0 f_t min_cut_violation = minimum_violation(cuts_to_add, cut_rhs, root_relax_soln_.x); if (min_cut_violation < 1e-6) { settings_.log.printf("Min cut violation %e\n", min_cut_violation); @@ -1801,8 +1828,10 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } } + cut_pool_size = cut_pool.pool_size(); + // Resolve the LP with the new cuts - settings_.log.printf("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n", + settings_.log.debug("Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n", num_cuts, cuts_to_add.row_start[cuts_to_add.m], cut_pool.pool_size(), @@ -1918,16 +1947,18 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut user_lower, num_fractional, 0, - exploration_stats_.total_lp_iters.load(), + static_cast(iter), gap.c_str(), toc(exploration_stats_.start_time)); } } if (num_gomory_cuts + num_mir_cuts + num_knapsack_cuts > 0) { - settings_.log.printf("Gomory cuts : %d\n", num_gomory_cuts); - settings_.log.printf("MIR cuts : %d\n", num_mir_cuts); - settings_.log.printf("Knapsack cuts: %d\n", num_knapsack_cuts); + settings_.log.printf("Gomory cuts : %d\n", num_gomory_cuts); + settings_.log.printf("MIR cuts : %d\n", num_mir_cuts); + settings_.log.printf("Knapsack cuts : %d\n", num_knapsack_cuts); + settings_.log.printf("Cut pool size : %d\n", cut_pool_size); + settings_.log.printf("Size with cuts: %d constraints, %d variables, %d nonzeros\n", original_lp_.num_rows, original_lp_.num_cols, original_lp_.A.col_start[original_lp_.A.n]); } if (edge_norms_.size() != original_lp_.num_cols) diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 23d9a0e8e..b46085b40 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -1355,18 +1355,22 @@ crossover_status_t crossover(const lp_problem_t& lp, settings.log.debug("Num flips %d\n", num_flips); solution = phase1_solution; print_crossover_info(lp, settings, vstatus, solution, "Dual phase 1 complete"); - std::vector edge_norms; - dual::status_t status = dual_phase2( - 2, iter == 0 ? 1 : 0, start_time, lp, settings, vstatus, solution, iter, edge_norms); - if (toc(start_time) > settings.time_limit) { - settings.log.printf("Time limit exceeded\n"); - return crossover_status_t::TIME_LIMIT; - } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { - settings.log.printf("Concurrent halt\n"); - return crossover_status_t::CONCURRENT_LIMIT; + dual_infeas = dual_infeasibility(lp, settings, vstatus, solution.z); + dual::status_t status = dual::status_t::NUMERICAL; + if (dual_infeas <= settings.dual_tol) { + std::vector edge_norms; + status = dual_phase2( + 2, iter == 0 ? 1 : 0, start_time, lp, settings, vstatus, solution, iter, edge_norms); + if (toc(start_time) > settings.time_limit) { + settings.log.printf("Time limit exceeded\n"); + return crossover_status_t::TIME_LIMIT; + } + if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + settings.log.printf("Concurrent halt\n"); + return crossover_status_t::CONCURRENT_LIMIT; + } + solution.iterations += iter; } - solution.iterations += iter; primal_infeas = primal_infeasibility(lp, settings, vstatus, solution.x); dual_infeas = dual_infeasibility(lp, settings, vstatus, solution.z); primal_res = primal_residual(lp, solution); diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index fdce8099c..7632397a3 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -1444,7 +1444,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, return -1; } in_basis[j] = k; - if (j < cuts.n) { C_B_nz += C_col_degree[j]; } + if (j < cuts.n) { C_B_nz += C_col_degree[j]; } else { printf("j >= cuts.n %d %d\n", j, cuts.n); } } settings.log.debug("Done estimating C_B_nz\n"); @@ -1466,7 +1466,15 @@ i_t add_cuts(const simplex_solver_settings_t& settings, C_B.row_start[p] = nz; if (nz != C_B_nz) { - settings.log.printf("predicted nz %d actual nz %d\n", C_B_nz, nz); + settings.log.printf("Add cuts: predicted nz %d actual nz %d\n", C_B_nz, nz); + for (i_t i = 0; i < p; i++) { + const i_t row_start = cuts.row_start[i]; + const i_t row_end = cuts.row_start[i + 1]; + for (i_t q = row_start; q < row_end; q++) { + const i_t j = cuts.j[q]; + printf("C(%d, %d) = %e\n", i, j, C_B.x[q]); + } + } return -1; } settings.log.debug("C_B rows %d cols %d nz %d\n", C_B.m, C_B.n, nz); @@ -1641,7 +1649,7 @@ void remove_cuts(lp_problem_t& lp, y = new_solution_y; z = new_solution_z; - settings.log.printf("Removed %d cuts. After removal %d rows %d columns %d nonzeros\n", + settings.log.debug("Removed %d cuts. After removal %d rows %d columns %d nonzeros\n", cuts_to_remove.size(), lp.num_rows, lp.num_cols, diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp index 323dfcfbb..e7014e546 100644 --- a/cpp/src/dual_simplex/cuts.hpp +++ b/cpp/src/dual_simplex/cuts.hpp @@ -78,7 +78,7 @@ class cut_pool_t { } // Add a cut in the form: cut'*x >= rhs. - // We expect that the cut is violated by the current relaxation + // We expect that the cut is violated by the current relaxation xstar // cut'*xstart < rhs void add_cut(cut_type_t cut_type, const sparse_vector_t& cut, f_t rhs); @@ -137,10 +137,14 @@ class knapsack_generation_t { const std::vector& get_knapsack_constraints() const { return knapsack_constraints_; } private: - - f_t greedy_knapsack_problem(const std::vector& values, const std::vector& weights, f_t rhs, std::vector& solution); - f_t solve_knapsack_problem(const std::vector& values, const std::vector& weights, f_t rhs, std::vector& solution); - + f_t greedy_knapsack_problem(const std::vector& values, + const std::vector& weights, + f_t rhs, + std::vector& solution); + f_t solve_knapsack_problem(const std::vector& values, + const std::vector& weights, + f_t rhs, + std::vector& solution); std::vector is_slack_; std::vector knapsack_constraints_; diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index de9077560..88018b74e 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -2083,7 +2083,7 @@ void prepare_optimality(i_t info, } } - if (primal_infeas > settings.primal_tol) + if (primal_infeas > 10.0*settings.primal_tol) { printf("Primal infeasibility %e. Info %d\n", primal_infeas, info); } @@ -2465,12 +2465,12 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, const f_t val = squared_infeas / delta_y_steepest_edge[j]; if (squared_infeas >= 0.0 && delta_y_steepest_edge[j] < 0.0) { printf("Iter %d potential leaving %d val %e squared infeas %e delta_y_steepest_edge %e\n", iter, j, val, squared_infeas, delta_y_steepest_edge[j]); - delta_y_steepest_edge[j] = 1e-4; + //delta_y_steepest_edge[j] = 1e-4; } } //printf("No leaving variable. Updated primal infeasibility: %e\n", primal_infeasibility); - continue; + //continue; } phase2::prepare_optimality(0, diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 53ffcf209..7dbf0e1cc 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -75,6 +75,7 @@ struct simplex_solver_settings_t { max_cut_passes(10), random_seed(0), inside_mip(0), + reliability_branching(-1), solution_callback(nullptr), heuristic_preemption_callback(nullptr), concurrent_halt(nullptr) @@ -142,6 +143,7 @@ struct simplex_solver_settings_t { i_t num_diving_threads; // number of threads dedicated to diving i_t max_cut_passes; // number of cut passes to make i_t inside_mip; // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node + i_t reliability_branching; // -1 automatic, 0 to disable, >0 to enable reliability branching std::function&, f_t)> solution_callback; std::function&, f_t)> node_processed_callback; std::function heuristic_preemption_callback; diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 93e8df5cd..8ae1fa51b 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -88,6 +88,8 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_ORDERING, &pdlp_settings.ordering, -1, 1, -1}, {CUOPT_BARRIER_DUAL_INITIAL_POINT, &pdlp_settings.barrier_dual_initial_point, -1, 1, -1}, {CUOPT_MIP_CUT_PASSES, &mip_settings.max_cut_passes, -1, std::numeric_limits::max(), 10}, + {CUOPT_MIP_NODE_LIMIT, &mip_settings.node_limit, 0, std::numeric_limits::max(), std::numeric_limits::max()}, + {CUOPT_MIP_RELIABILITY_BRANCHING, &mip_settings.reliability_branching, -1, std::numeric_limits::max(), -1}, {CUOPT_NUM_GPUS, &pdlp_settings.num_gpus, 1, 2, 1}, {CUOPT_NUM_GPUS, &mip_settings.num_gpus, 1, 2, 1} }; diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 4b39b4619..b6ffd04f1 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -163,6 +163,8 @@ solution_t mip_solver_t::run_solver() // Fill in the settings for branch and bound branch_and_bound_settings.time_limit = timer_.remaining_time(); + branch_and_bound_settings.node_limit = context.settings.node_limit; + branch_and_bound_settings.reliability_branching = context.settings.reliability_branching; branch_and_bound_settings.print_presolve_stats = false; branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; From 89dafc2cf0daad143c56d1c8bc0fcc212caaf84c Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 9 Jan 2026 10:45:51 -0800 Subject: [PATCH 25/45] More info on primal infeasibility and trial branching --- cpp/src/dual_simplex/branch_and_bound.cpp | 2 +- cpp/src/dual_simplex/cuts.cpp | 6 +- cpp/src/dual_simplex/phase2.cpp | 91 ++++++++++++++++++++++- cpp/src/dual_simplex/pseudo_costs.cpp | 23 ++++-- 4 files changed, 109 insertions(+), 13 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index d851d723c..3402c3c20 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1501,7 +1501,7 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( set_uninitialized_steepest_edge_norms(edge_norms); printf("Using crossover solution\n"); } else { - printf("Using dual simplex solution 1\n"); + printf("Using dual simplex solution 1: crossover status %d\n", crossover_status); root_status = root_status_future.get(); } } else { diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 7632397a3..2827dfbc1 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -945,7 +945,7 @@ void mixed_integer_rounding_cut_t::initialize(const lp_problem_t& settings, return -1; } in_basis[j] = k; - if (j < cuts.n) { C_B_nz += C_col_degree[j]; } else { printf("j >= cuts.n %d %d\n", j, cuts.n); } + // The cuts are on the original variables. So it is possible that + // a slack will be basic and thus not part of the cuts matrix + if (j < cuts.n) { C_B_nz += C_col_degree[j]; } } settings.log.debug("Done estimating C_B_nz\n"); diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 88018b74e..413e4718c 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -1729,6 +1729,74 @@ f_t dual_infeasibility(const lp_problem_t& lp, return sum_infeasible; } + +template +f_t primal_infeasibility_breakdown(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& vstatus, + const std::vector& x, + f_t& basic_infeas, + f_t& nonbasic_infeas, + f_t& basic_over) +{ + const i_t n = lp.num_cols; + f_t primal_inf = 0; + basic_infeas = 0.0; + basic_over = 0.0; + nonbasic_infeas = 0.0; + for (i_t j = 0; j < n; ++j) { + if (x[j] < lp.lower[j]) { + // x_j < l_j => -x_j > -l_j => -x_j + l_j > 0 + const f_t infeas = -x[j] + lp.lower[j]; + if (vstatus[j] == variable_status_t::BASIC) { + basic_infeas += infeas; + if (infeas > settings.primal_tol) { + basic_over += infeas; + } + } else { + nonbasic_infeas += infeas; + } + primal_inf += infeas; +#ifdef PRIMAL_INFEASIBLE_DEBUG + if (infeas > settings.primal_tol) { + settings.log.printf("x %d infeas %e lo %e val %e up %e vstatus %d\n", + j, + infeas, + lp.lower[j], + x[j], + lp.upper[j], + static_cast(vstatus[j])); + } +#endif + } + if (x[j] > lp.upper[j]) { + // x_j > u_j => x_j - u_j > 0 + const f_t infeas = x[j] - lp.upper[j]; + if (vstatus[j] == variable_status_t::BASIC) { + basic_infeas += infeas; + if (infeas > settings.primal_tol) { + basic_over += infeas; + } + } else { + nonbasic_infeas += infeas; + } + primal_inf += infeas; +#ifdef PRIMAL_INFEASIBLE_DEBUG + if (infeas > settings.primal_tol) { + settings.log.printf("x %d infeas %e lo %e val %e up %e vstatus %d\n", + j, + infeas, + lp.lower[j], + x[j], + lp.upper[j], + static_cast(vstatus[j])); + } +#endif + } + } + return primal_inf; +} + template f_t primal_infeasibility(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -2015,6 +2083,7 @@ f_t amount_of_perturbation(const lp_problem_t& lp, const std::vector void prepare_optimality(i_t info, + f_t orig_primal_infeas, const lp_problem_t& lp, const simplex_solver_settings_t& settings, basis_update_mpf_t& ft, @@ -2037,6 +2106,7 @@ void prepare_optimality(i_t info, sol.objective = compute_objective(lp, sol.x); sol.user_objective = compute_user_objective(lp, sol.objective); f_t perturbation = phase2::amount_of_perturbation(lp, objective); + f_t orig_perturbation = perturbation; if (perturbation > 1e-6 && phase == 2) { // Try to remove perturbation std::vector unperturbed_y(m); @@ -2085,7 +2155,19 @@ void prepare_optimality(i_t info, if (primal_infeas > 10.0*settings.primal_tol) { - printf("Primal infeasibility %e. Info %d\n", primal_infeas, info); + f_t basic_infeas = 0.0; + f_t nonbasic_infeas = 0.0; + f_t basic_over = 0.0; + phase2::primal_infeasibility_breakdown(lp, settings, vstatus, x, basic_infeas, nonbasic_infeas, basic_over); + printf("Primal infeasibility %e/%e (Basic %e, Nonbasic %e, Basic over %e). Perturbation %e/%e. Info %d\n", + primal_infeas, + orig_primal_infeas, + basic_infeas, + nonbasic_infeas, + basic_over, + orig_perturbation, + perturbation, + info); } } @@ -2454,8 +2536,8 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, #endif - //primal_infeasibility = phase2::compute_initial_primal_infeasibilities( - // lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + primal_infeasibility = phase2::compute_initial_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); if (primal_infeasibility > settings.primal_tol) { const i_t nz = infeasibility_indices.size(); @@ -2474,6 +2556,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, } phase2::prepare_optimality(0, + primal_infeasibility, lp, settings, ft, @@ -2641,6 +2724,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, obj = phase2::compute_perturbed_objective(objective, x); if (dual_infeas <= settings.dual_tol && primal_infeasibility <= settings.primal_tol) { phase2::prepare_optimality(1, + primal_infeasibility, lp, settings, ft, @@ -2679,6 +2763,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, if (primal_infeasibility <= settings.primal_tol && orig_dual_infeas <= settings.dual_tol) { phase2::prepare_optimality(2, + primal_infeasibility, lp, settings, ft, diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp index d65944f5e..f391598b1 100644 --- a/cpp/src/dual_simplex/pseudo_costs.cpp +++ b/cpp/src/dual_simplex/pseudo_costs.cpp @@ -141,7 +141,8 @@ f_t trial_branching(const lp_problem_t& original_lp, const std::vector& edge_norms, i_t branch_var, f_t branch_var_lower, - f_t branch_var_upper) + f_t branch_var_upper, + i_t& iter) { lp_problem_t child_problem = original_lp; child_problem.lower[branch_var] = branch_var_lower; @@ -152,12 +153,11 @@ f_t trial_branching(const lp_problem_t& original_lp, f_t lp_start_time = tic(); child_settings.iteration_limit = 200; lp_solution_t solution(original_lp.num_rows, original_lp.num_cols); - i_t iter = 0; std::vector vstatus = root_vstatus; std::vector child_edge_norms = edge_norms; dual::status_t status = dual_phase2( 2, 0, lp_start_time, child_problem, child_settings, vstatus, solution, iter, child_edge_norms); - printf("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %d. Obj %e\n", branch_var, child_problem.lower[branch_var], child_problem.upper[branch_var], iter, status, compute_objective(child_problem, solution.x)); + //printf("Trial branching on variable %d. Lo: %e Up: %e. Iter %d. Status %d. Obj %e\n", branch_var, child_problem.lower[branch_var], child_problem.upper[branch_var], iter, status, compute_objective(child_problem, solution.x)); if (status == dual::status_t::OPTIMAL || status == dual::status_t::ITERATION_LIMIT || status == dual::status_t::CUTOFF) { return compute_objective(child_problem, solution.x); @@ -373,6 +373,9 @@ i_t pseudo_costs_t::reliable_variable_selection(const lp_problem_t::reliable_variable_selection(const lp_problem_t::reliable_variable_selection(const lp_problem_t::reliable_variable_selection(const lp_problem_t Date: Tue, 13 Jan 2026 15:25:46 -0800 Subject: [PATCH 26/45] Add aggregation for MIR cuts --- cpp/src/dual_simplex/branch_and_bound.cpp | 12 +- cpp/src/dual_simplex/cuts.cpp | 751 +++++++++++++++++++++- cpp/src/dual_simplex/cuts.hpp | 87 +++ cpp/src/dual_simplex/sparse_matrix.cpp | 6 +- cpp/src/dual_simplex/sparse_matrix.hpp | 2 +- 5 files changed, 825 insertions(+), 33 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 971ec234b..23e512e1b 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1799,9 +1799,19 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut num_knapsack_cuts++; } } + print_cut_types(cut_types, settings_); + printf("Cut pool size: %d\n", cut_pool.pool_size()); - cuts_to_add.check_matrix(); + if (cuts_to_add.check_matrix() != 0) { + printf("Bad cuts matrix\n"); + for (i_t i = 0; i < static_cast(cut_types.size()); ++i) + { + printf("row %d cut type %d\n", i, cut_types[i]); + } + exit(-1); + } + #ifdef PRINT_CUTS csc_matrix_t cuts_to_add_col(cuts_to_add.m, cuts_to_add.n, cuts_to_add.row_start[cuts_to_add.m]); cuts_to_add.to_compressed_col(cuts_to_add_col); diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 4fd5a8299..e02531eca 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -26,6 +26,24 @@ void cut_pool_t::add_cut(cut_type_t cut_type, const sparse_vector_t index(original_vars_, 0); + for (i_t p = 0; p < cut.i.size(); p++) + { + const i_t j = cut.i[p]; + if (index[j] != 0) + { + printf("Repeated index %d in cut of size %ld\n", j, cut.i.size()); + for (i_t k = 0; k < cut.i.size(); k++) + { + printf("i %d val %e\n", cut.i[k], cut.x[k]); + } + exit(1); + } + index[j] = 1; + } +#endif + sparse_vector_t cut_squeezed; cut.squeeze(cut_squeezed); cut_storage_.append_row(cut_squeezed); @@ -97,7 +115,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) f_t violation; cut_distances_[i] = cut_distance(i, x_relax, violation, cut_norms_[i]); cut_scores_[i] = cut_distances_[i] <= min_cut_distance ? 0.0 : weight_distance * cut_distances_[i] + weight_orthogonality * cut_orthogonality_[i]; - //settings_.log.printf("Cut %d type %d distance %e violation %e orthogonality %e score %e\n", i, static_cast(cut_type_[i]), cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]); + //settings_.log.printf("Cut %d type %d distance %+e violation %+e orthogonality %e score %e\n", i, static_cast(cut_type_[i]), cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]); } std::vector sorted_indices(cut_storage_.m); @@ -590,13 +608,75 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& slack_map[i] = slack; } + // Compute initial scores for all rows + std::vector score(lp.num_rows, 0.0); for (i_t i = 0; i < lp.num_rows; i++) { + const i_t row_start = Arow.row_start[i]; + const i_t row_end = Arow.row_start[i + 1]; + + const i_t row_nz = row_end - row_start; + i_t num_integer_in_row = 0; + for (i_t p = row_start; p < row_end; p++) + { + const i_t j = Arow.j[p]; + if (var_types[j] == variable_type_t::INTEGER) + { + num_integer_in_row++; + } + } + + if (num_integer_in_row == 0) + { + score[i] = 0.0; + + } else { + f_t nz_score = lp.num_cols - row_nz; + + const i_t slack = slack_map[i]; + const f_t slack_value = xstar[slack]; + + f_t slack_score = -std::log10(1e-16 + std::abs(slack_value)); + + const f_t nz_weight = 1.0; + const f_t slack_weight = 1.0; + + score[i] = nz_weight * nz_score + slack_weight * slack_score; + } + } + + // Sort the rows by score + std::vector sorted_indices(lp.num_rows); + std::iota(sorted_indices.begin(), sorted_indices.end(), 0); + std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) { + return score[a] > score[b]; + }); + + // These data structures are used to track the rows that have been aggregated + // The invariant is that aggregated_rows is empty and aggregated_mark is all zeros + // at the beginning of each iteration of the for loop below + std::vector aggregated_rows; + std::vector aggregated_mark(lp.num_rows, 0); + + const i_t max_cuts = std::min(lp.num_rows, 1000); + for (i_t h = 0; h < max_cuts; h++) { + // Get the row with the highest score + const i_t i = sorted_indices[0]; + const f_t max_score = score[i]; + + const i_t row_nz = Arow.row_start[i+1] - Arow.row_start[i]; + const i_t slack = slack_map[i]; + const f_t slack_value = xstar[slack]; + + //printf("MIR %d/%d. row %d nz %d slack %e score %e\n", h, max_cuts, i, row_nz, slack_value, max_score); + + if (max_score <= 0.0) { + break; + } + sparse_vector_t inequality(Arow, i); f_t inequality_rhs = lp.rhs[i]; - const i_t row_start = Arow.row_start[i]; - const i_t row_end = Arow.row_start[i + 1]; - i_t slack = slack_map[i]; + // Remove the slack from the equality to get an inequality for (i_t k = 0; k < inequality.i.size(); k++) { @@ -609,39 +689,294 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& inequality_rhs *= -1; inequality.negate(); + // Transform the relaxation solution + std::vector transformed_xstar; + mir.relaxation_to_nonnegative(lp, xstar, transformed_xstar); + + sparse_vector_t cut(lp.num_cols, 0); f_t cut_rhs; - i_t mir_status = - mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut, cut_rhs); bool add_cut = false; - const f_t min_cut_distance = 1e-4; - if (mir_status == 0) { - if (cut.i.size() == 0) { - continue; + i_t num_aggregated = 0; + const i_t max_aggregated = 6; + + while (!add_cut && num_aggregated < max_aggregated) { + //printf("\t add_cut %d num_aggregated %d nz %ld\n", static_cast(add_cut), num_aggregated, inequality.i.size()); + + sparse_vector_t transformed_inequality = inequality; + f_t transformed_rhs = inequality_rhs; + + mir.to_nonnegative(lp, transformed_inequality, transformed_rhs); +#if 0 + for (i_t k = 0; k < transformed_inequality.i.size(); k++) + { + printf("transformed inequality: i %d x %e\n", transformed_inequality.i[k], transformed_inequality.x[k]); } - mir.substitute_slacks(lp, Arow, cut, cut_rhs); - if (cut.i.size() == 0) { - continue; +#endif + std::vector> transformed_cuts; + std::vector transformed_cut_rhs; + std::vector transformed_violations; + + // Generate cut for delta = 1 + { + sparse_vector_t cut_1(lp.num_cols, 0); + f_t cut_1_rhs; + mir.generate_cut_nonnegative(transformed_inequality, transformed_rhs, var_types, cut_1, cut_1_rhs); + f_t cut_1_violation = mir.compute_violation(cut_1, cut_1_rhs, transformed_xstar); + if (cut_1_violation > 1e-6) + { + //printf("Cut 1: Found violation of %e\n", cut_1_violation); + transformed_cuts.push_back(cut_1); + transformed_cut_rhs.push_back(cut_1_rhs); + transformed_violations.push_back(cut_1_violation); + } else { + //printf("Cut 1: No violation %e\n", cut_1_violation); + } + } + + // Generate a cut for delta = max { |a_j|, j in I} + { + f_t max_coeff = 0.0; + for (i_t k = 0; k < transformed_inequality.i.size(); k++) + { + const i_t j = transformed_inequality.i[k]; + if (var_types[j] == variable_type_t::INTEGER) + { + const f_t abs_aj = std::abs(transformed_inequality.x[k]); + if (abs_aj > max_coeff) + { + max_coeff = abs_aj; + } + } + } + //printf("Cut 2 max_coeff %e size %ld\n", max_coeff, transformed_inequality.i.size()); + + if (max_coeff > 1e-6 && max_coeff != 1.0) + { + + sparse_vector_t scaled_inequality = transformed_inequality; + const i_t nz = transformed_inequality.i.size(); + for (i_t k = 0; k < nz; k++) + { + scaled_inequality.x[k] /= max_coeff; + } + const f_t scaled_rhs = transformed_rhs / max_coeff; + sparse_vector_t cut_2(lp.num_cols, 0); + f_t cut_2_rhs; + mir.generate_cut_nonnegative(scaled_inequality, scaled_rhs, var_types, cut_2, cut_2_rhs); + f_t cut_2_violation = mir.compute_violation(cut_2, cut_2_rhs, transformed_xstar); + if (cut_2_violation > 1e-6) + { + //printf("Cut 2: Found violation of %e\n", cut_2_violation); + transformed_cuts.push_back(cut_2); + transformed_cut_rhs.push_back(cut_2_rhs); + transformed_violations.push_back(cut_2_violation); + } + else { + //printf("Cut 2: no violation %e\n", cut_2_violation); + } + + } + } + + if (!transformed_violations.empty()) { + std::vector permuted(transformed_violations.size()); + std::iota(permuted.begin(), permuted.end(), 0); + std::sort(permuted.begin(), permuted.end(), [&](i_t i, i_t j) { + return transformed_violations[i] > transformed_violations[j]; + }); + + // Get the biggest violation + const i_t best_index = permuted[0]; + //printf("\tBest index %d\n", best_index); + f_t max_viol = transformed_violations[best_index]; + cut = transformed_cuts[best_index]; + cut_rhs = transformed_cut_rhs[best_index]; + + if (max_viol > 1e-6) { +#if 0 + // Divide by 1/2*violation, 1/4*violation, 1/8*violation + sparse_vector_t tmp_cut = best_cut; + for (i_t k = 0; k < tmp_cut.i.size(); k++) + { + tmp_cut.x[k] /= (0.5 * max_viol); + } + f_t tmp_cut_rhs = best_cut_rhs / (0.5 * max_viol); + f_t tmp_viol = mir.compute_violations(tmp_cut, tmp_cut_rhs, transformed_xstar); + + if (tmp_viol > max_viol) + { + max_viol = tmp_viol; + best_cut = tmp_cut; + + } +#endif + + // Transform back to the original variables + mir.to_original(lp, cut, cut_rhs); + mir.remove_small_coefficients(lp.lower, lp.upper, cut, cut_rhs); + mir.substitute_slacks(lp, Arow, cut, cut_rhs); + f_t viol = mir.compute_violation(cut, cut_rhs, xstar); + //printf("after slacks and small coeff. Violation %e\n", viol); + add_cut = true; + } + } + +#if 0 + add_cut = generate_single_mir_cut( + lp, settings, Arow, var_types, xstar, inequality, inequality_rhs, mir, cut, cut_rhs); +#endif + if (add_cut) { + printf("\t adding cut - agg %d\n", num_aggregated); + cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs); + break; + } else { + // Perform aggregation to try and find a cut + + // Find all the continuous variables in the inequality + i_t num_continuous = 0; + f_t max_off_bound = 0.0; + i_t max_off_bound_var = -1; + for (i_t p = 0; p < inequality.i.size(); p++) { + const i_t j = inequality.i[p]; + if (var_types[j] == variable_type_t::CONTINUOUS) { + num_continuous++; + + const f_t off_lower = lp.lower[j] > -inf ? xstar[j] - lp.lower[j] : std::abs(xstar[j]); + const f_t off_upper = lp.upper[j] < inf ? lp.upper[j] - xstar[j] : std::abs(xstar[j]); + const f_t off_bound = std::max(off_lower, off_upper); + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j+1]; + const i_t col_len = col_end - col_start; + if (off_bound > max_off_bound && col_len > 1) { + max_off_bound = off_bound; + max_off_bound_var = j; + } + } + } + //printf("\tnum_continuous %d max_off_bound %e var %d\n", num_continuous, max_off_bound, max_off_bound_var); + + if (num_continuous == 0 || max_off_bound < 1e-6) { + break; + } + + // The variable that is farthest from its bound is used as a pivot + if (max_off_bound_var > 0) { + const i_t col_start = lp.A.col_start[max_off_bound_var]; + const i_t col_end = lp.A.col_start[max_off_bound_var + 1]; + const i_t col_len = col_end - col_start; + if (col_len > 1) { + std::vector potential_rows; + potential_rows.reserve(col_len); + + const f_t threshold = 1e-4; + for (i_t q = col_start; q < col_end; q++) { + const i_t i = lp.A.i[q]; + const f_t val = lp.A.x[q]; + // Can't use rows that have already been aggregated + if (std::abs(val) > threshold && aggregated_mark[i] == 0) { potential_rows.push_back(i); } + } + + if (!potential_rows.empty()) { + std::sort(potential_rows.begin(), potential_rows.end(), [&](i_t a, i_t b) { + return score[a] > score[b]; + }); + + const i_t pivot_row = potential_rows[0]; + + sparse_vector_t pivot_row_inequality(Arow, pivot_row); + f_t pivot_row_rhs = lp.rhs[pivot_row]; + //printf("\tCombining with %d\n", pivot_row); + mir.combine_rows(lp, Arow, max_off_bound_var, pivot_row_inequality, pivot_row_rhs, inequality, inequality_rhs); + aggregated_rows.push_back(pivot_row); + aggregated_mark[pivot_row] = 1; + } else { + //printf("\tno potential rows to aggregate\n"); + break; + } + } else { + printf("Bad col len\n"); + exit(1); + } + } + num_aggregated++; // Always increase so the loop terminates } - // Check that the cut is violated - // The cut is of the form cut'*x >= cut_rhs - // We need that cut'*xstar < cut_rhs for the cut to be violated by the current relaxation solution xstar - f_t dot = cut.dot(xstar); - f_t cut_norm = cut.norm2_squared(); - if (dot < cut_rhs && cut_norm > 0.0) { - // Cut is violated. Compute it's distance - f_t cut_distance = (cut_rhs - dot) / std::sqrt(cut_norm); - if (cut_distance > min_cut_distance) { - add_cut = true; - } - } } + if (add_cut) { - cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs); + // We were successful in generating a cut. + + // Set the score of the aggregated rows to zero + for (i_t row : aggregated_rows) { + score[row] = 0.0; + } + + // Clear the aggregated mark + for (i_t row : aggregated_rows) { + aggregated_mark[row] = 0; + } + // Clear the aggregated rows + aggregated_rows.clear(); } + + // Set the score of the current row to zero + score[i] = 0.0; + + // Re-sort the rows by score + // It's possible this could be made more efficient by storing the rows in a data structure + // that allows us to: + // 1. Get the row with the best score + // 2. Get the row with a nonzero in column j that has the best score + // 3. Remove the rows that have been aggregated + // 4. Remove the current row + std::iota(sorted_indices.begin(), sorted_indices.end(), 0); + std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) { + return score[a] > score[b]; + }); } } +template +bool cut_generation_t::generate_single_mir_cut( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csr_matrix_t& Arow, + const std::vector& var_types, + const std::vector& xstar, + const sparse_vector_t& inequality, + f_t inequality_rhs, + mixed_integer_rounding_cut_t& mir, + sparse_vector_t& cut, + f_t& cut_rhs) +{ + i_t mir_status = + mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut, cut_rhs); + bool add_cut = false; + const f_t min_cut_distance = 1e-4; + if (mir_status == 0) { + if (cut.i.size() == 0) { + return false; + } + mir.substitute_slacks(lp, Arow, cut, cut_rhs); + if (cut.i.size() == 0) { + return false; + } + // Check that the cut is violated + // The cut is of the form cut'*x >= cut_rhs + // We need that cut'*xstar < cut_rhs for the cut to be violated by the current relaxation solution xstar + f_t dot = cut.dot(xstar); + f_t cut_norm = cut.norm2_squared(); + if (dot < cut_rhs && cut_norm > 0.0) { + // Cut is violated. Compute it's distance + f_t cut_distance = (cut_rhs - dot) / std::sqrt(cut_norm); + if (cut_distance > min_cut_distance) { + add_cut = true; + } + } + } + return add_cut; +} + template void cut_generation_t::generate_gomory_cuts( @@ -925,10 +1260,13 @@ void mixed_integer_rounding_cut_t::initialize(const lp_problem_t::initialize(const lp_problem_t -inf) { has_lower_[j] = 1; } + if (lj > -inf && lj != 0.0) { + has_lower_[j] = 1; + bound_info_[j] = -1; + } } #if 0 @@ -977,6 +1320,230 @@ void mixed_integer_rounding_cut_t::initialize(const lp_problem_t +void mixed_integer_rounding_cut_t::to_nonnegative(const lp_problem_t& lp, + sparse_vector_t& inequality, + f_t& rhs) +{ + const i_t nz = inequality.i.size(); + for (i_t k = 0; k < nz; k++) + { + const i_t j = inequality.i[k]; + const f_t aj = inequality.x[k]; + if (bound_info_[j] == -1) + { + // v_j = x_j - l_j, v_j >= 0 + // x_j = v_j + l_j + // sum_{k != j} a_k x_j + a_j x_j <= beta + // sum_{k != j} a_k x_j + a_j (v_j + l_j) <= beta + // sum_{k != j} a_k x_j + a_j v_j <= beta - a_j l_j + const f_t lj = lp.lower[j]; + rhs -= aj * lj; + } + else if (bound_info_[j] == 1) + { + // w_j = u_j - x_j, w_j >= 0 + // x_j = u_j - w_j + // sum_{k != j} a_k x_k + a_j x_j <= beta + // sum_{k != j} a_k x_k + a_j (u_j - w_j) <= beta + // sum_{k != j} a_k x_k - a_j w_j <= beta - a_j u_j + const f_t uj = lp.upper[j]; + inequality.x[k] *= -1.0; + rhs -= aj * uj; + } + } +} + +template +void mixed_integer_rounding_cut_t::relaxation_to_nonnegative( + const lp_problem_t& lp, + const std::vector& xstar, + std::vector& xstar_nonnegative) +{ + xstar_nonnegative = xstar; + const i_t n = lp.num_cols; + for (i_t j = 0; j < n; ++j) + { + if (bound_info_[j] == -1) + { + // v_j = x_j - l_j + const f_t lj = lp.lower[j]; + xstar_nonnegative[j] -= lj; + } else if (bound_info_[j] == 1) + { + // w_j = u_j - x_j + const f_t uj = lp.upper[j]; + xstar_nonnegative[j] = uj - xstar_nonnegative[j]; + } + } +} + + +template +void mixed_integer_rounding_cut_t::to_original(const lp_problem_t& lp, + sparse_vector_t& inequality, + f_t& rhs) +{ + const i_t nz = inequality.i.size(); + for (i_t k = 0; k < nz; k++) + { + const i_t j = inequality.i[k]; + const f_t dj = inequality.x[k]; + if (bound_info_[j] == -1) + { + // v_j = x_j - l_j, v_j >= 0 + // sum_{k != j} d_k x_k + d_j v_j >= beta + // sum_{k != j} d_k x_k + d_j (x_j - l_j) >= beta + // sum_{k != j} d_k x_k + d_j x_j >= beta + d_j l_j + const f_t lj = lp.lower[j]; + rhs += dj * lj; + } else if (bound_info_[j] == 1) + { + // w_j = u_j - x_j, w_j >= 0 + // sum_{k != j} d_k x_k + d_j w_j >= beta + // sum_{k != j} d_k x_k + d_j (u_j - x_j) >= beta + // sum_{k != j} d_k x_k - d_j x_j >= beta - d_j u_j + const f_t uj = lp.upper[j]; + inequality.x[k] *= -1.0; + rhs -= dj * uj; + } + } +} + +template +void mixed_integer_rounding_cut_t::remove_small_coefficients( + const std::vector& lower_bounds, + const std::vector& upper_bounds, + sparse_vector_t& cut, + f_t& cut_rhs) +{ + const i_t nz = cut.i.size(); + i_t removed = 0; + for (i_t k = 0; k < cut.i.size(); k++) { + const i_t j = cut.i[k]; + + // Check for small coefficients + const f_t aj = cut.x[k]; + if (std::abs(aj) < 1e-6) { + if (aj >= 0.0 && upper_bounds[j] < inf) { + // Move this to the right-hand side + cut_rhs -= aj * upper_bounds[j]; + cut.x[k] = 0.0; + removed++; + } else if (aj <= 0.0 && lower_bounds[j] > -inf) { + cut_rhs += aj * lower_bounds[j]; + cut.x[k] = 0.0; + removed++; + continue; + } else { + } + } + } + + if (removed > 0) + { + sparse_vector_t new_cut(cut.n, 0); + cut.squeeze(new_cut); + cut = new_cut; + } +} + +template +i_t mixed_integer_rounding_cut_t::generate_cut_nonnegative( + const sparse_vector_t& a, + f_t beta, + const std::vector& var_types, + sparse_vector_t& cut, + f_t& cut_rhs) +{ + + auto f = [](f_t q_1, f_t q_2) -> f_t { + f_t q_1_hat = q_1 - std::floor(q_1); + f_t q_2_hat = q_2 - std::floor(q_2); + return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1); + }; + + auto h = [](f_t q) -> f_t { return std::max(q, 0.0); }; + + std::vector cut_indices; + cut_indices.reserve(a.i.size()); + f_t R = (beta - std::floor(beta)) * std::ceil(beta); + + for (i_t k = 0; k < a.i.size(); k++) { + const i_t jj = a.i[k]; + f_t aj = a.x[k]; + if (var_types[jj] == variable_type_t::INTEGER) { + x_workspace_[jj] += f(aj, beta); + if (!x_mark_[jj] && x_workspace_[jj] != 0.0) { + x_mark_[jj] = 1; + cut_indices.push_back(jj); + } + } else { + x_workspace_[jj] += h(aj); + if (!x_mark_[jj] && x_workspace_[jj] != 0.0) { + x_mark_[jj] = 1; + cut_indices.push_back(jj); + } + } + } + + cut.i.reserve(cut_indices.size()); + cut.x.reserve(cut_indices.size()); + cut.i.clear(); + cut.x.clear(); + for (i_t k = 0; k < cut_indices.size(); k++) { + const i_t j = cut_indices[k]; + cut.i.push_back(j); + cut.x.push_back(x_workspace_[j]); + //printf("cut i %d x %e n %d\n", j, x_workspace_[j], static_cast(var_types.size())); + } + + // Clear the workspace + for (i_t jj : cut_indices) { + x_workspace_[jj] = 0.0; + x_mark_[jj] = 0; + } + + +#if 1 + for (i_t j = 0; j < x_workspace_.size(); j++) { + if (x_workspace_[j] != 0.0) { + printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); + exit(1); + } + if (x_mark_[j] != 0) { + printf("After generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); + exit(1); + } + } +#endif + + // The new cut is: g'*x >= R + // But we want to have it in the form h'*x <= b + cut.sort(); + + cut_rhs = R; + + // Check for repeated indicies + std::vector check(num_vars_, 0); + for (i_t p = 0; p < cut.i.size(); p++) + { + if (check[cut.i[p]] != 0) + { + printf("repeated index in generated cut\n"); + exit(1); + } + check[cut.i[p]] == 1; + } + + if (cut.i.size() == 0) { + //settings_.log.printf("MIR: No coefficients in cut\n"); + return -1; + } + + return 0; +} + template i_t mixed_integer_rounding_cut_t::generate_cut( const sparse_vector_t& a, @@ -987,7 +1554,7 @@ i_t mixed_integer_rounding_cut_t::generate_cut( sparse_vector_t& cut, f_t& cut_rhs) { -#if 0 +#if 1 for (i_t j = 0; j < x_workspace_.size(); j++) { if (x_workspace_[j] != 0.0) { printf("Before generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); @@ -1118,6 +1685,8 @@ i_t mixed_integer_rounding_cut_t::generate_cut( cut.i.reserve(cut_indices.size()); cut.x.reserve(cut_indices.size()); + cut.i.clear(); + cut.x.clear(); for (i_t k = 0; k < cut_indices.size(); k++) { const i_t jj = cut_indices[k]; @@ -1145,7 +1714,7 @@ i_t mixed_integer_rounding_cut_t::generate_cut( } -#if 0 +#if 1 for (i_t j = 0; j < x_workspace_.size(); j++) { if (x_workspace_[j] != 0.0) { printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); @@ -1164,6 +1733,18 @@ i_t mixed_integer_rounding_cut_t::generate_cut( cut_rhs = R; + // Check for repeated indicies + std::vector check(num_vars_, 0); + for (i_t p = 0; p < cut.i.size(); p++) + { + if (check[cut.i[p]] != 0) + { + printf("repeated index in generated cut\n"); + exit(1); + } + check[cut.i[p]] == 1; + } + if (cut.i.size() == 0) { //settings_.log.printf("MIR: No coefficients in cut\n"); return -1; @@ -1318,6 +1899,118 @@ void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_ #endif } +template +f_t mixed_integer_rounding_cut_t::compute_violation(const sparse_vector_t& cut, + f_t cut_rhs, + const std::vector& xstar) +{ + f_t dot = cut.dot(xstar); + f_t cut_violation = cut_rhs - dot; + return cut_violation; +} + +template +void mixed_integer_rounding_cut_t::combine_rows(const lp_problem_t& lp, + csr_matrix_t& Arow, + i_t xj, + const sparse_vector_t& pivot_row, + f_t pivot_row_rhs, + sparse_vector_t& inequality, + f_t& inequality_rhs) +{ + +#if 1 + for (i_t k = 0; k < x_workspace_.size(); k++) { + if (x_workspace_[k] != 0.0) { + printf("Dirty x_workspace_[%d] = %e\n", k, x_workspace_[k]); + exit(1); + } + if (x_mark_[k] != 0) { + printf("Dirty x_mark_[%d] = %d\n", k, x_mark_[k]); + exit(1); + } + } +#endif + + indices_.clear(); + indices_.reserve(pivot_row.i.size() + inequality.i.size()); + + // Find the coefficient associated with variable xj in the pivot row + f_t a_l_j = 0.0; + for (i_t k = 0; k < pivot_row.i.size(); k++) { + const i_t j = pivot_row.i[k]; + if (j == xj) { + a_l_j = pivot_row.x[k]; + break; + } + } + + if (a_l_j == 0) + { + return; + } + + f_t a_i_j = 0.0; + + i_t nz = 0; + // Store the inequality in the workspace + // and save the coefficient associated with variable xj + for (i_t k = 0; k < inequality.i.size(); k++) { + const i_t j = inequality.i[k]; + if (j != xj) { + x_workspace_[j] = inequality.x[k]; + x_mark_[j] = 1; + indices_.push_back(j); + nz++; + } else { + a_i_j = inequality.x[k]; + } + } + + f_t pivot_value = a_i_j / a_l_j; + // Adjust the rhs of the inequality + inequality_rhs -= pivot_value * pivot_row_rhs; + + // Adjust the coefficients of the inequality + // based on the nonzeros in the pivot row + for (i_t k = 0; k < pivot_row.i.size(); k++) { + const i_t j = pivot_row.i[k]; + if (j != xj) { + x_workspace_[j] -= pivot_value * pivot_row.x[k]; + if (!x_mark_[j]) { + x_mark_[j] = 1; + indices_.push_back(j); + nz++; + } + } + } + + // Store the new inequality + inequality.i.resize(nz); + inequality.x.resize(nz); + for (i_t k = 0; k < nz; k++) { + inequality.i[k] = indices_[k]; + inequality.x[k] = x_workspace_[indices_[k]]; + } + + // Check for repeated indices + std::vector check(num_vars_, 0); + for (i_t k = 0; k < inequality.i.size(); k++) + { + if (check[inequality.i[k]] == 1) { + printf("repeated index\n"); + } + check[inequality.i[k]] = 1; + } + + // Clear the workspace + for (i_t j : indices_) { + x_workspace_[j] = 0.0; + x_mark_[j] = 0; + } + indices_.clear(); +} + template i_t add_cuts(const simplex_solver_settings_t& settings, const csr_matrix_t& cuts, diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp index e7014e546..838ad753c 100644 --- a/cpp/src/dual_simplex/cuts.hpp +++ b/cpp/src/dual_simplex/cuts.hpp @@ -137,10 +137,13 @@ class knapsack_generation_t { const std::vector& get_knapsack_constraints() const { return knapsack_constraints_; } private: + // Generate a heuristic solution to the 0-1 knapsack problem f_t greedy_knapsack_problem(const std::vector& values, const std::vector& weights, f_t rhs, std::vector& solution); + + // Solve a 0-1 knapsack problem using dynamic programming f_t solve_knapsack_problem(const std::vector& values, const std::vector& weights, f_t rhs, @@ -150,6 +153,10 @@ class knapsack_generation_t { std::vector knapsack_constraints_; }; +// Forward declaration +template +class mixed_integer_rounding_cut_t; + template class cut_generation_t { public: @@ -174,6 +181,7 @@ class cut_generation_t { const std::vector& nonbasic_list); private: + // Generate all mixed integer gomory cuts void generate_gomory_cuts(const lp_problem_t& lp, const simplex_solver_settings_t& settings, csr_matrix_t& Arow, @@ -184,6 +192,7 @@ class cut_generation_t { const std::vector& basic_list, const std::vector& nonbasic_list); + // Generate all mixed integer rounding cuts void generate_mir_cuts(const lp_problem_t& lp, const simplex_solver_settings_t& settings, csr_matrix_t& Arow, @@ -191,12 +200,29 @@ class cut_generation_t { const std::vector& var_types, const std::vector& xstar); + // Generate all knapsack cuts void generate_knapsack_cuts(const lp_problem_t& lp, const simplex_solver_settings_t& settings, csr_matrix_t& Arow, const std::vector& new_slacks, const std::vector& var_types, const std::vector& xstar); + + + // Generate a single MIR cut + bool generate_single_mir_cut(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csr_matrix_t& Arow, + const std::vector& var_types, + const std::vector& xstar, + const sparse_vector_t& inequality, + f_t inequality_rhs, + mixed_integer_rounding_cut_t& mir, + sparse_vector_t& cut, + f_t& cut_rhs); + + + cut_pool_t& cut_pool_; knapsack_generation_t knapsack_generation_; }; @@ -252,10 +278,59 @@ class mixed_integer_rounding_cut_t { { } + // We call initalize each cut pass + // it resizes the arrays void initialize(const lp_problem_t& lp, const std::vector& new_slacks, const std::vector& xstar); + + // Convert an inequality of the form: sum_j a_j x_j >= beta + // with l_j <= x_j <= u_j into the form: + // sum_{j not in L union U} d_j x_j + sum_{j in L} d_j v_j + // + sum_{j in U} d_j w_j >= delta, + // where v_j = x_j - l_j for j in L + // and w_j = u_j - x_j for j in Us + void to_nonnegative(const lp_problem_t& lp, + sparse_vector_t& inequality, + f_t& rhs); + + void relaxation_to_nonnegative(const lp_problem_t& lp, + const std::vector& xstar, + std::vector& xstar_nonnegative); + + // Convert an inequality of the form: + // sum_{j not in L union U} d_j x_j + sum_{j in L} d_j v_j + // + sum_{j in U} d_j w_j >= delta + // where v_j = x_j - l_j for j in L + // and w_j = u_j - x_j for j in U + // back to an inequality on the original variables + // sum_j a_j x_j >= beta + void to_original(const lp_problem_t&lp, + sparse_vector_t& inequality, + f_t& rhs); + + // Given a cut of the form sum_j d_j x_j >= beta + // with l_j <= x_j <= u_j, try to remove coefficients d_j + // with | d_j | < epsilon + void remove_small_coefficients(const std::vector& lower_bounds, + const std::vector& upper_bounds, + sparse_vector_t& cut, + f_t& cut_rhs); + + + // Given an inequality sum_j a_j x_j >= beta, x_j >= 0, x_j in Z, j in I + // generate an MIR cut of the form sum_j d_j x_j >= delta + i_t generate_cut_nonnegative(const sparse_vector_t& a, + f_t beta, + const std::vector& var_types, + sparse_vector_t& cut, + f_t& cut_rhs); + + f_t compute_violation(const sparse_vector_t& cut, + f_t cut_rhs, + const std::vector& xstar); + i_t generate_cut(const sparse_vector_t& a, f_t beta, const std::vector& upper_bounds, @@ -269,6 +344,16 @@ class mixed_integer_rounding_cut_t { sparse_vector_t& cut, f_t& cut_rhs); + // Combine the pivot row with the inequality to eliminate the variable j + // The new inequality is returned in inequality and inequality_rhs + void combine_rows(const lp_problem_t& lp, + csr_matrix_t& Arow, + i_t j, + const sparse_vector_t& pivot_row, + f_t pivot_row_rhs, + sparse_vector_t& inequality, + f_t& inequality_rhs); + private: i_t num_vars_; const simplex_solver_settings_t& settings_; @@ -278,6 +363,8 @@ class mixed_integer_rounding_cut_t { std::vector has_upper_; std::vector is_slack_; std::vector slack_rows_; + std::vector indices_; + std::vector bound_info_; bool needs_complement_; }; diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index 07d44f2e9..f717fc352 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -632,7 +632,7 @@ size_t csc_matrix_t::hash() const } template -void csr_matrix_t::check_matrix(std::string matrix_name) const +i_t csr_matrix_t::check_matrix(std::string matrix_name) const { std::vector col_marker(this->n, -1); for (i_t i = 0; i < this->m; ++i) { @@ -640,13 +640,15 @@ void csr_matrix_t::check_matrix(std::string matrix_name) const const i_t row_end = this->row_start[i + 1]; for (i_t p = row_start; p < row_end; ++p) { const i_t j = this->j[p]; - if (j < 0 || j >= this->n) { printf("CSR Error: column index %d not in range [0, %d)\n", j, this->n); } + if (j < 0 || j >= this->n) { printf("CSR Error: column index %d not in range [0, %d)\n", j, this->n); return -1;} if (col_marker[j] == i) { printf("CSR Error (%s) : repeated column index %d in row %d\n", matrix_name.c_str(), j, i); + return -1; } col_marker[j] = i; } } + return 0; } // x <- x + alpha * A(:, j) diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index 7be647270..ccf69dbe6 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -158,7 +158,7 @@ class csr_matrix_t { i_t append_row(const sparse_vector_t& c); // Ensures no repeated column indices within a row - void check_matrix(std::string matrix_name = "") const; + i_t check_matrix(std::string matrix_name = "") const; bool is_diagonal() const { From fb85947ad4d3e78b367a2f604a4b100acf2584b1 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 14 Jan 2026 14:11:54 -0800 Subject: [PATCH 27/45] Fix bug in knapsack cuts --- cpp/src/dual_simplex/branch_and_bound.cpp | 9 ++++++- cpp/src/dual_simplex/cuts.cpp | 29 +++++++++++++++++------ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 23e512e1b..25be253bc 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1750,6 +1750,13 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x); upper_bound_ = root_objective_; mutex_upper_.unlock(); + if (num_gomory_cuts + num_mir_cuts + num_knapsack_cuts > 0) { + settings_.log.printf("Gomory cuts : %d\n", num_gomory_cuts); + settings_.log.printf("MIR cuts : %d\n", num_mir_cuts); + settings_.log.printf("Knapsack cuts : %d\n", num_knapsack_cuts); + settings_.log.printf("Cut pool size : %d\n", cut_pool_size); + settings_.log.printf("Size with cuts: %d constraints, %d variables, %d nonzeros\n", original_lp_.num_rows, original_lp_.num_cols, original_lp_.A.col_start[original_lp_.A.n]); + } // We should be done here uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); solution.objective = incumbent_.objective; @@ -1811,7 +1818,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } exit(-1); } - + #ifdef PRINT_CUTS csc_matrix_t cuts_to_add_col(cuts_to_add.m, cuts_to_add.n, cuts_to_add.row_start[cuts_to_add.m]); cuts_to_add.to_compressed_col(cuts_to_add_col); diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index e02531eca..bd5e86033 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -228,7 +228,8 @@ knapsack_generation_t::knapsack_generation_t( for (i_t i = 0; i < lp.num_rows; i++) { const i_t row_start = Arow.row_start[i]; const i_t row_end = Arow.row_start[i + 1]; - if (row_end - row_start < 3) { continue; } + const i_t row_len = row_end - row_start; + if (row_len < 3) { continue; } bool is_knapsack = true; f_t sum_pos = 0.0; //printf("i %d ", i); @@ -255,9 +256,14 @@ knapsack_generation_t::knapsack_generation_t( if (is_knapsack) { const f_t beta = lp.rhs[i]; - printf("Knapsack constraint %d beta %e sum_pos %e\n", i, beta, sum_pos); if (std::abs(beta - std::round(beta)) <= settings.integer_tol) { - if (beta >= 0.0 && beta <= sum_pos) { + if (beta > 0.0 && beta <= sum_pos && std::abs(sum_pos / (row_len - 1) - beta) > 1e-3) { + printf("Knapsack constraint %d row len %d beta %e sum_pos %e sum_pos / (row_len - 1) %e\n", + i, + row_len, + beta, + sum_pos, + sum_pos / (row_len - 1)); knapsack_constraints_.push_back(i); } } @@ -322,7 +328,7 @@ i_t knapsack_generation_t::generate_knapsack_cuts( for (i_t k = 0; k < knapsack_inequality.i.size(); k++) { const i_t j = knapsack_inequality.i[k]; if (!is_slack_[j]) { - const f_t vj = 1.0 - xstar[j]; + const f_t vj = std::min(1.0, std::max(0.0,1.0 - xstar[j])); objective_constant += vj; values[h] = vj; weights[h] = knapsack_inequality.x[k]; @@ -357,6 +363,7 @@ i_t knapsack_generation_t::generate_knapsack_cuts( const i_t j = knapsack_inequality.i[k]; if (!is_slack_[j]) { if (solution[h] == 0.0) { + //printf("x%d in cover. relaxation %e\n", j, xstar[j]); cut.i.push_back(j); cut.x.push_back(-1.0); } @@ -374,7 +381,15 @@ i_t knapsack_generation_t::generate_knapsack_cuts( f_t violation = dot - cut_rhs; printf("Knapsack cut %d violation %e < 0\n", knapsack_row, violation); - if (violation <= tol) { return -1; } + if (violation >= -tol) { return -1; } + +#ifdef PRINT_KNAPSACK_CUT + printf("knapsack cut (cover %d): \n", cover_size); + for (i_t k = 0; k < cut.i.size(); k++) { + printf("x%d coeff %g value %g\n", cut.i[k], -cut.x[k], xstar[cut.i[k]]); + } + printf("cut_rhs %g\n", -cut_rhs); +#endif return 0; } @@ -1035,7 +1050,7 @@ void cut_generation_t::generate_gomory_cuts( f_t dot = cut_A.dot(xstar); f_t cut_norm = cut_A.norm2_squared(); if (dot >= cut_A_rhs) { - settings.log.printf("Cut %d is not violated. Skipping\n", i); + //settings.log.printf("Cut %d is not violated. Skipping\n", i); continue; } cut_A_distance = (cut_A_rhs - dot) / std::sqrt(cut_norm); @@ -1069,7 +1084,7 @@ void cut_generation_t::generate_gomory_cuts( f_t dot = cut_B.dot(xstar); f_t cut_norm = cut_B.norm2_squared(); if (dot >= cut_B_rhs) { - settings.log.printf("Cut %d is not violated. Skipping\n", i); + //settings.log.printf("Cut %d is not violated. Skipping\n", i); continue; } cut_B_distance = (cut_B_rhs - dot) / std::sqrt(cut_norm); From 62606f82ed15100e8c4837bdf8a08d9b629e087e Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 15 Jan 2026 18:41:30 -0800 Subject: [PATCH 28/45] Generate CG cuts. In prep for adding strong-CG cuts --- cpp/src/dual_simplex/branch_and_bound.cpp | 7 +- cpp/src/dual_simplex/cuts.cpp | 479 ++++++++++++++++------ cpp/src/dual_simplex/cuts.hpp | 35 +- 3 files changed, 401 insertions(+), 120 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 25be253bc..9cc0e6c78 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1736,6 +1736,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut i_t num_gomory_cuts = 0; i_t num_mir_cuts = 0; i_t num_knapsack_cuts = 0; + i_t num_cg_cuts = 0; i_t cut_pool_size = 0; for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { if (num_fractional == 0) { @@ -1754,6 +1755,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.log.printf("Gomory cuts : %d\n", num_gomory_cuts); settings_.log.printf("MIR cuts : %d\n", num_mir_cuts); settings_.log.printf("Knapsack cuts : %d\n", num_knapsack_cuts); + settings_.log.printf("CG cuts : %d\n", num_cg_cuts); settings_.log.printf("Cut pool size : %d\n", cut_pool_size); settings_.log.printf("Size with cuts: %d constraints, %d variables, %d nonzeros\n", original_lp_.num_rows, original_lp_.num_cols, original_lp_.A.col_start[original_lp_.A.n]); } @@ -1804,6 +1806,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut num_mir_cuts++; } else if (cut_types[k] == cut_type_t::KNAPSACK) { num_knapsack_cuts++; + } else if (cut_types[k] == cut_type_t::CHVATAL_GOMORY) { + num_cg_cuts++; } } print_cut_types(cut_types, settings_); @@ -1975,10 +1979,11 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } } - if (num_gomory_cuts + num_mir_cuts + num_knapsack_cuts > 0) { + if (num_gomory_cuts + num_mir_cuts + num_knapsack_cuts + num_cg_cuts > 0) { settings_.log.printf("Gomory cuts : %d\n", num_gomory_cuts); settings_.log.printf("MIR cuts : %d\n", num_mir_cuts); settings_.log.printf("Knapsack cuts : %d\n", num_knapsack_cuts); + settings_.log.printf("CG cuts : %d\n", num_cg_cuts); settings_.log.printf("Cut pool size : %d\n", cut_pool_size); settings_.log.printf("Size with cuts: %d constraints, %d variables, %d nonzeros\n", original_lp_.num_rows, original_lp_.num_cols, original_lp_.A.col_start[original_lp_.A.n]); } diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index bd5e86033..040a52461 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -8,12 +8,15 @@ #include #include +#include -namespace cuopt::linear_programming::dual_simplex { +namespace cuopt::linear_programming::dual_simplex { template -void cut_pool_t::add_cut(cut_type_t cut_type, const sparse_vector_t& cut, f_t rhs) +void cut_pool_t::add_cut(cut_type_t cut_type, + const sparse_vector_t& cut, + f_t rhs) { // TODO: Need to deduplicate cuts and only add if the cut is not already in the pool @@ -26,49 +29,39 @@ void cut_pool_t::add_cut(cut_type_t cut_type, const sparse_vector_t index(original_vars_, 0); - for (i_t p = 0; p < cut.i.size(); p++) - { - const i_t j = cut.i[p]; - if (index[j] != 0) - { - printf("Repeated index %d in cut of size %ld\n", j, cut.i.size()); - for (i_t k = 0; k < cut.i.size(); k++) - { - printf("i %d val %e\n", cut.i[k], cut.x[k]); - } - exit(1); - } - index[j] = 1; - } -#endif - sparse_vector_t cut_squeezed; cut.squeeze(cut_squeezed); + if (cut_squeezed.i.size() == 0) { + settings_.log.printf("Cut has no coefficients\n"); + return; + } cut_storage_.append_row(cut_squeezed); - //settings_.log.printf("Added cut %d to pool\n", cut_storage_.m - 1); +#ifdef PRINT_ADD_CUTS + settings_.log.printf("Added cut %d to pool\n", cut_storage_.m - 1); +#endif rhs_storage_.push_back(rhs); cut_type_.push_back(cut_type); cut_age_.push_back(0); } - template -f_t cut_pool_t::cut_distance(i_t row, const std::vector& x, f_t& cut_violation, f_t &cut_norm) +f_t cut_pool_t::cut_distance(i_t row, + const std::vector& x, + f_t& cut_violation, + f_t& cut_norm) { const i_t row_start = cut_storage_.row_start[row]; - const i_t row_end = cut_storage_.row_start[row + 1]; - f_t cut_x = 0.0; - f_t dot = 0.0; + const i_t row_end = cut_storage_.row_start[row + 1]; + f_t cut_x = 0.0; + f_t dot = 0.0; for (i_t p = row_start; p < row_end; p++) { - const i_t j = cut_storage_.j[p]; + const i_t j = cut_storage_.j[p]; const f_t cut_coeff = cut_storage_.x[p]; cut_x += cut_coeff * x[j]; dot += cut_coeff * cut_coeff; } - cut_violation = rhs_storage_[row] - cut_x; - cut_norm = std::sqrt(dot); + cut_violation = rhs_storage_[row] - cut_x; + cut_norm = std::sqrt(dot); const f_t distance = cut_violation / cut_norm; return distance; } @@ -84,17 +77,21 @@ f_t cut_pool_t::cut_density(i_t row) } template -f_t cut_pool_t::cut_orthogonality(i_t i, i_t j) +f_t cut_pool_t::cut_orthogonality(i_t i, i_t j) { const i_t i_start = cut_storage_.row_start[i]; - const i_t i_end = cut_storage_.row_start[i + 1]; - const i_t i_nz = i_end - i_start; + const i_t i_end = cut_storage_.row_start[i + 1]; + const i_t i_nz = i_end - i_start; const i_t j_start = cut_storage_.row_start[j]; - const i_t j_end = cut_storage_.row_start[j + 1]; - const i_t j_nz = j_end - j_start; + const i_t j_end = cut_storage_.row_start[j + 1]; + const i_t j_nz = j_end - j_start; - f_t dot = sparse_dot(cut_storage_.j.data() + i_start, cut_storage_.x.data() + i_start, i_nz, - cut_storage_.j.data() + j_start, cut_storage_.x.data() + j_start, j_nz); + f_t dot = sparse_dot(cut_storage_.j.data() + i_start, + cut_storage_.x.data() + i_start, + i_nz, + cut_storage_.j.data() + j_start, + cut_storage_.x.data() + j_start, + j_nz); f_t norm_i = cut_norms_[i]; f_t norm_j = cut_norms_[j]; @@ -114,14 +111,19 @@ void cut_pool_t::score_cuts(std::vector& x_relax) for (i_t i = 0; i < cut_storage_.m; i++) { f_t violation; cut_distances_[i] = cut_distance(i, x_relax, violation, cut_norms_[i]); - cut_scores_[i] = cut_distances_[i] <= min_cut_distance ? 0.0 : weight_distance * cut_distances_[i] + weight_orthogonality * cut_orthogonality_[i]; - //settings_.log.printf("Cut %d type %d distance %+e violation %+e orthogonality %e score %e\n", i, static_cast(cut_type_[i]), cut_distances_[i], violation, cut_orthogonality_[i], cut_scores_[i]); + cut_scores_[i] = + cut_distances_[i] <= min_cut_distance + ? 0.0 + : weight_distance * cut_distances_[i] + weight_orthogonality * cut_orthogonality_[i]; + settings_.log.printf("Cut %d type %d distance %+e violation %+e orthogonality %e score %.16e\n", + i, static_cast(cut_type_[i]), cut_distances_[i], violation, cut_orthogonality_[i], + cut_scores_[i]); } std::vector sorted_indices(cut_storage_.m); std::iota(sorted_indices.begin(), sorted_indices.end(), 0); std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) { - return cut_scores_[a] > cut_scores_[b]; + return cut_scores_[a] > cut_scores_[b] || (cut_scores_[a] == cut_scores_[b] && cut_type_[a] > cut_type_[b]); }); std::vector indices; @@ -145,19 +147,22 @@ void cut_pool_t::score_cuts(std::vector& x_relax) if (cut_age_[i] > 0) { settings_.log.printf("Adding cut with age %d\n", cut_age_[i]); } - //settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]); + settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]); best_cuts_.push_back(i); scored_cuts_++; // Recompute the orthogonality for the remaining cuts for (i_t k = 1; k < sorted_indices.size(); k++) { - const i_t j = sorted_indices[k]; + const i_t j = sorted_indices[k]; cut_orthogonality_[j] = std::min(cut_orthogonality_[j], cut_orthogonality(i, j)); if (cut_orthogonality_[j] >= min_orthogonality) { indices.push_back(j); - cut_scores_[j] = cut_distances_[j] <= min_cut_distance ? 0.0 : weight_distance * cut_distances_[j] + weight_orthogonality * cut_orthogonality_[j]; - //settings_.log.printf("Recomputed cut %d score %e\n", j, cut_scores_[j]); + cut_scores_[j] = + cut_distances_[j] <= min_cut_distance + ? 0.0 + : weight_distance * cut_distances_[j] + weight_orthogonality * cut_orthogonality_[j]; + // settings_.log.printf("Recomputed cut %d score %e\n", j, cut_scores_[j]); } } @@ -182,16 +187,21 @@ i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, std:: best_cuts.x.clear(); best_cuts.row_start.reserve(scored_cuts_ + 1); best_cuts.row_start.push_back(0); + best_rhs.clear(); + best_rhs.reserve(scored_cuts_); + best_cut_types.clear(); + best_cut_types.reserve(scored_cuts_); for (i_t i: best_cuts_) { sparse_vector_t cut(cut_storage_, i); cut.negate(); best_cuts.append_row(cut); - //settings_.log.printf("Best cuts nz %d\n", best_cuts.row_start[best_cuts.m]); best_rhs.push_back(-rhs_storage_[i]); best_cut_types.push_back(cut_type_[i]); } + age_cuts(); + return static_cast(best_cuts_.size()); } @@ -230,14 +240,12 @@ knapsack_generation_t::knapsack_generation_t( const i_t row_end = Arow.row_start[i + 1]; const i_t row_len = row_end - row_start; if (row_len < 3) { continue; } - bool is_knapsack = true; - f_t sum_pos = 0.0; - //printf("i %d ", i); + bool is_knapsack = true; + f_t sum_pos = 0.0; for (i_t p = row_start; p < row_end; p++) { const i_t j = Arow.j[p]; if (is_slack_[j]) { continue; } const f_t aj = Arow.x[p]; - //printf(" j %d (%e < %e) aj %e\n", j, lp.lower[j], lp.upper[j], aj); if (std::abs(aj - std::round(aj)) > settings.integer_tol) { is_knapsack = false; break; @@ -252,13 +260,13 @@ knapsack_generation_t::knapsack_generation_t( } sum_pos += aj; } - // printf("sum_pos %e\n", sum_pos); if (is_knapsack) { const f_t beta = lp.rhs[i]; if (std::abs(beta - std::round(beta)) <= settings.integer_tol) { if (beta > 0.0 && beta <= sum_pos && std::abs(sum_pos / (row_len - 1) - beta) > 1e-3) { - printf("Knapsack constraint %d row len %d beta %e sum_pos %e sum_pos / (row_len - 1) %e\n", + printf( + "Knapsack constraint %d row len %d beta %e sum_pos %e sum_pos / (row_len - 1) %e\n", i, row_len, beta, @@ -292,31 +300,31 @@ i_t knapsack_generation_t::generate_knapsack_cuts( // Remove the slacks from the inequality f_t seperation_rhs = 0.0; - printf(" Knapsack : "); + settings.log.printf(" Knapsack : "); for (i_t k = 0; k < knapsack_inequality.i.size(); k++) { const i_t j = knapsack_inequality.i[k]; if (is_slack_[j]) { knapsack_inequality.x[k] = 0.0; } else { - printf(" %g x%d +", knapsack_inequality.x[k], j); + settings.log.printf(" %g x%d +", knapsack_inequality.x[k], j); seperation_rhs += knapsack_inequality.x[k]; } } - printf(" <= %g\n", knapsack_rhs); + settings.log.printf(" <= %g\n", knapsack_rhs); seperation_rhs -= (knapsack_rhs + 1); - printf("\t"); + settings.log.printf("\t"); for (i_t k = 0; k < knapsack_inequality.i.size(); k++) { const i_t j = knapsack_inequality.i[k]; if (!is_slack_[j]) { if (std::abs(xstar[j]) > 1e-3) { - printf("x_relax[%d]= %g ", j, xstar[j]); + settings.log.printf("x_relax[%d]= %g ", j, xstar[j]); } } } - printf("\n"); + settings.log.printf("\n"); - printf("seperation_rhs %g\n", seperation_rhs); + settings.log.printf("seperation_rhs %g\n", seperation_rhs); if (seperation_rhs <= 0.0) { return -1; } std::vector values; @@ -338,13 +346,13 @@ i_t knapsack_generation_t::generate_knapsack_cuts( std::vector solution; solution.resize(knapsack_inequality.i.size() - 1); - printf("Calling solve_knapsack_problem\n"); + settings.log.printf("Calling solve_knapsack_problem\n"); f_t objective = solve_knapsack_problem(values, weights, seperation_rhs, solution); if (objective != objective) { return -1; } - printf("objective %e objective_constant %e\n", objective, objective_constant); + settings.log.printf("objective %e objective_constant %e\n", objective, objective_constant); f_t seperation_value = -objective + objective_constant; - printf("seperation_value %e\n", seperation_value); + settings.log.printf("seperation_value %e\n", seperation_value); const f_t tol = 1e-6; if (seperation_value >= 1.0 - tol) { return -1; } @@ -379,7 +387,7 @@ i_t knapsack_generation_t::generate_knapsack_cuts( // Verify the cut is violated f_t dot = cut.dot(xstar); f_t violation = dot - cut_rhs; - printf("Knapsack cut %d violation %e < 0\n", knapsack_row, violation); + settings.log.printf("Knapsack cut %d violation %e < 0\n", knapsack_row, violation); if (violation >= -tol) { return -1; } @@ -610,15 +618,12 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& mixed_integer_rounding_cut_t mir(lp.num_cols, settings); mir.initialize(lp, new_slacks, xstar); - std::vector slack_map(lp.num_rows); + std::vector slack_map(lp.num_rows, -1); for (i_t slack : new_slacks) { const i_t col_start = lp.A.col_start[slack]; const i_t col_end = lp.A.col_start[slack + 1]; const i_t col_len = col_end - col_start; - if (col_len != 1) { - printf("Generate MIR cuts: Slack %d has %d nzs in column\n", slack, col_len); - exit(1); - } + assert(col_len == 1); const i_t i = lp.A.i[col_start]; slack_map[i] = slack; } @@ -722,12 +727,6 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& f_t transformed_rhs = inequality_rhs; mir.to_nonnegative(lp, transformed_inequality, transformed_rhs); -#if 0 - for (i_t k = 0; k < transformed_inequality.i.size(); k++) - { - printf("transformed inequality: i %d x %e\n", transformed_inequality.i[k], transformed_inequality.x[k]); - } -#endif std::vector> transformed_cuts; std::vector transformed_cut_rhs; std::vector transformed_violations; @@ -809,24 +808,7 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& cut_rhs = transformed_cut_rhs[best_index]; if (max_viol > 1e-6) { -#if 0 - // Divide by 1/2*violation, 1/4*violation, 1/8*violation - sparse_vector_t tmp_cut = best_cut; - for (i_t k = 0; k < tmp_cut.i.size(); k++) - { - tmp_cut.x[k] /= (0.5 * max_viol); - } - f_t tmp_cut_rhs = best_cut_rhs / (0.5 * max_viol); - f_t tmp_viol = mir.compute_violations(tmp_cut, tmp_cut_rhs, transformed_xstar); - - if (tmp_viol > max_viol) - { - max_viol = tmp_viol; - best_cut = tmp_cut; - - } -#endif - + // TODO: Divide by 1/2*violation, 1/4*violation, 1/8*violation // Transform back to the original variables mir.to_original(lp, cut, cut_rhs); mir.remove_small_coefficients(lp.lower, lp.upper, cut, cut_rhs); @@ -837,10 +819,6 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& } } -#if 0 - add_cut = generate_single_mir_cut( - lp, settings, Arow, var_types, xstar, inequality, inequality_rhs, mir, cut, cut_rhs); -#endif if (add_cut) { printf("\t adding cut - agg %d\n", num_aggregated); cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs); @@ -902,7 +880,13 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& sparse_vector_t pivot_row_inequality(Arow, pivot_row); f_t pivot_row_rhs = lp.rhs[pivot_row]; //printf("\tCombining with %d\n", pivot_row); - mir.combine_rows(lp, Arow, max_off_bound_var, pivot_row_inequality, pivot_row_rhs, inequality, inequality_rhs); + mir.combine_rows(lp, + Arow, + max_off_bound_var, + pivot_row_inequality, + pivot_row_rhs, + inequality, + inequality_rhs); aggregated_rows.push_back(pivot_row); aggregated_mark[pivot_row] = 1; } else { @@ -1007,6 +991,7 @@ void cut_generation_t::generate_gomory_cuts( { mixed_integer_gomory_base_inequality_t gomory(lp, basis_update, nonbasic_list); mixed_integer_rounding_cut_t mir(lp.num_cols, settings); + strong_cg_cut_t cg(lp, var_types, xstar); mir.initialize(lp, new_slacks, xstar); @@ -1029,6 +1014,70 @@ void cut_generation_t::generate_gomory_cuts( inequality, inequality_rhs); if (gomory_status == 0) { + // Generate a CG cut + if (1) + { + + sparse_vector_t cg_inequality = inequality; + f_t cg_inequality_rhs = inequality_rhs; + printf("CG inequality with slacks nz %ld\n", cg_inequality.i.size()); + for (i_t k = 0; k < cg_inequality.i.size(); k++) { + printf("%e %c%d ", cg_inequality.x[k], var_types[cg_inequality.i[k]] == variable_type_t::CONTINUOUS ? 'x' : 'y', cg_inequality.i[k]); + } + printf("CG inequality rhs %e\n", cg_inequality_rhs); + printf("\n"); + + // Try to remove continuous variables from the inequality + // and transform integer variables to be nonnegative + i_t cg_status = cg.remove_continuous_variables_integers_nonnegative( + lp, settings, var_types, cg_inequality, cg_inequality_rhs); + + if (cg_status != 0) { + // Try negating the equality and see if that helps + cg_inequality = inequality; + cg_inequality.negate(); + cg_inequality_rhs = -inequality_rhs; + cg_status = cg.remove_continuous_variables_integers_nonnegative( + lp, settings, var_types, cg_inequality, cg_inequality_rhs); + } + + if (cg_status == 0) { + // We have an inequality with no continuous variables + + // Generate a CG cut + sparse_vector_t cg_cut(lp.num_cols, 0); + f_t cg_cut_rhs; + cg.generate_strong_cg_cut_integer_only( + settings, var_types, cg_inequality, cg_inequality_rhs, cg_cut, cg_cut_rhs); + + + // Convert the CG cut back to the original variables + cg.to_original_integer_variables(lp, cg_cut, cg_cut_rhs); + + // Check for violation + f_t dot = cg_cut.dot(xstar); + // If the cut is violated we will have: sum_j a_j xstar_j > rhs + f_t violation = dot - cg_cut_rhs; + printf("CG violation %e nz %ld\n", violation, cg_cut.i.size()); + if (violation > 0.0) { + + + // Substitute out the slack variables + //mir.substitute_slacks(lp, Arow, cg_cut, cg_cut_rhs); + + // The CG cut is in the form: sum_j a_j x_j <= rhs + // The cut pool wants the cut in the form: sum_j a_j x_j >= rhs + + cg_cut.negate(); + cg_cut_rhs *= -1.0; + printf("Adding CG cut nz %ld\n", cg_cut.i.size()); + cut_pool_.add_cut(cut_type_t::CHVATAL_GOMORY, cg_cut, cg_cut_rhs); + } + } else { + printf("CG status %d\n", cg_status); + } + } + // Given the base inequality, generate a MIR cut sparse_vector_t cut_A(lp.num_cols, 0); f_t cut_A_rhs; @@ -1050,13 +1099,11 @@ void cut_generation_t::generate_gomory_cuts( f_t dot = cut_A.dot(xstar); f_t cut_norm = cut_A.norm2_squared(); if (dot >= cut_A_rhs) { - //settings.log.printf("Cut %d is not violated. Skipping\n", i); continue; } cut_A_distance = (cut_A_rhs - dot) / std::sqrt(cut_norm); A_valid = true; } - //cut_pool_.add_cut(lp.num_cols, cut, cut_rhs); } // Negate the base inequality @@ -1084,20 +1131,16 @@ void cut_generation_t::generate_gomory_cuts( f_t dot = cut_B.dot(xstar); f_t cut_norm = cut_B.norm2_squared(); if (dot >= cut_B_rhs) { - //settings.log.printf("Cut %d is not violated. Skipping\n", i); continue; } cut_B_distance = (cut_B_rhs - dot) / std::sqrt(cut_norm); B_valid = true; } - // cut_pool_.add_cut(lp.num_cols, cut_B, cut_B_rhs); } if ((cut_A_distance > cut_B_distance) && A_valid) { - //printf("Adding Gomory cut A: nz %d distance %e valid %d\n", cut_A.i.size(), cut_A_distance, A_valid); cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_A, cut_A_rhs); } else if (B_valid) { - //printf("Adding Gomory cut B: nz %d distance %e valid %d\n", cut_B.i.size(), cut_B_distance, B_valid); cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_B, cut_B_rhs); } } @@ -1148,13 +1191,15 @@ i_t mixed_integer_gomory_base_inequality_t::generate_base_inequality( b_transpose_multiply(lp, basic_list, u_bar_dense, BTu_bar); for (i_t k = 0; k < lp.num_rows; k++) { if (k == i) { - if (std::abs(BTu_bar[k] - 1.0) > 1e-6) { - settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i); + settings.log.printf("BTu_bar %d error %e\n", k, std::abs(BTu_bar[k] - 1.0)); + if (std::abs(BTu_bar[k] - 1.0) > 1e-10) { + settings.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i); exit(1); } } else { - if (std::abs(BTu_bar[k]) > 1e-6) { - settings_.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i); + settings.log.printf("BTu_bar %d error %e\n", k, std::abs(BTu_bar[k])); + if (std::abs(BTu_bar[k]) > 1e-10) { + settings.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i); exit(1); } } @@ -1182,12 +1227,26 @@ i_t mixed_integer_gomory_base_inequality_t::generate_base_inequality( } } } + // TODO: abar has lots of small coefficients. It would be good to drop them. + // But we need to be careful not to accidently create a base (in)equality + // that cuts off an integer solution. - sparse_vector_t a_bar(lp.num_cols, abar_indices.size() + 1); + i_t small_coeff = 0; + const f_t drop_tol = 1e-12; + sparse_vector_t a_bar(lp.num_cols, 0) ; + a_bar.i.reserve(abar_indices.size() + 1); + a_bar.x.reserve(abar_indices.size() + 1); for (i_t k = 0; k < abar_indices.size(); k++) { const i_t jj = abar_indices[k]; - a_bar.i[k] = jj; - a_bar.x[k] = x_workspace_[jj]; + if (1 && std::abs(x_workspace_[jj]) < drop_tol) { + small_coeff++; + } else { + a_bar.i.push_back(jj); + a_bar.x.push_back(x_workspace_[jj]); + } + } + if (small_coeff > 0) { + settings.log.printf("Small coeff dropped %d\n", small_coeff); } // Clear the workspace @@ -1200,8 +1259,8 @@ i_t mixed_integer_gomory_base_inequality_t::generate_base_inequality( // We should now have the base inequality // x_j + a_bar^T x_N >= b_bar_i // We add x_j into a_bar so that everything is in a single sparse_vector_t - a_bar.i[a_bar.i.size() - 1] = j; - a_bar.x[a_bar.x.size() - 1] = 1.0; + a_bar.i.push_back(j); + a_bar.x.push_back(1.0); #ifdef CHECK_A_BAR_DENSE_DOT std::vector a_bar_dense(lp.num_cols); @@ -1288,17 +1347,13 @@ void mixed_integer_rounding_cut_t::initialize(const lp_problem_t::generate_cut_nonnegative( const i_t j = cut_indices[k]; cut.i.push_back(j); cut.x.push_back(x_workspace_[j]); - //printf("cut i %d x %e n %d\n", j, x_workspace_[j], static_cast(var_types.size())); } // Clear the workspace @@ -1520,7 +1574,7 @@ i_t mixed_integer_rounding_cut_t::generate_cut_nonnegative( } -#if 1 +#ifdef CHECK_WORKSPACE for (i_t j = 0; j < x_workspace_.size(); j++) { if (x_workspace_[j] != 0.0) { printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); @@ -1548,7 +1602,7 @@ i_t mixed_integer_rounding_cut_t::generate_cut_nonnegative( printf("repeated index in generated cut\n"); exit(1); } - check[cut.i[p]] == 1; + check[cut.i[p]] = 1; } if (cut.i.size() == 0) { @@ -2026,6 +2080,195 @@ void mixed_integer_rounding_cut_t::combine_rows(const lp_problem_t +strong_cg_cut_t::strong_cg_cut_t(const lp_problem_t& lp, + const std::vector& var_types, + const std::vector& xstar) + : transformed_variables_(lp.num_cols, 0) +{ + // Determine the substition for the integer variables + for (i_t j = 0; j < lp.num_cols; j++) { + if (var_types[j] == variable_type_t::INTEGER) { + const f_t l_j = lp.lower[j]; + const f_t u_j = lp.upper[j]; + if (l_j != 0.0) { + // We need to transform the variable + // Check the distance to each bound + const f_t dist_to_lower = std::max(0.0, xstar[j] - l_j); + const f_t dist_to_upper = std::max(0.0, u_j - xstar[j]); + if (dist_to_upper >= dist_to_lower || u_j >= inf) { + // We are closer to the lower bound. + transformed_variables_[j] = -1; + } else if (u_j < inf) { + // We are closer to the finite upper bound + transformed_variables_[j] = 1; + } + } + } + } +} + +template +i_t strong_cg_cut_t::remove_continuous_variables_integers_nonnegative( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + sparse_vector_t& inequality, + f_t& inequality_rhs) +{ + + // Count the number of continuous variables in the inequality + i_t num_continuous = 0; + const i_t nz = inequality.i.size(); + for (i_t k = 0; k < nz; k++) { + const i_t j = inequality.i[k]; + if (var_types[j] == variable_type_t::CONTINUOUS) { + num_continuous++; + } + } + + printf("num_continuous %d\n", num_continuous); + // We assume the inequality is of the form sum_j a_j x_j <= rhs + + for (i_t k = 0; k < nz; k++) { + const i_t j = inequality.i[k]; + const f_t l_j = lp.lower[j]; + const f_t u_j = lp.upper[j]; + const f_t a_j = inequality.x[k]; + if (var_types[j] == variable_type_t::CONTINUOUS) { + if (a_j == 0.0) { + continue; + } + + if (a_j > 0.0 && l_j > -inf) { + // v_j = x_j - l_j >= 0 + // x_j = v_j + l_j + // sum_{k != j} a_k x_k + a_j x_j <= rhs + // sum_{k != j} a_k x_k + a_j (v_j + l_j) <= rhs + // sum_{k != j} a_k x_k + a_j v_j <= rhs - a_j l_j + inequality_rhs -= a_j * l_j; + transformed_variables_[j] = -1; + + // We now have a_j * v_j with a_j, v_j >= 0 + // So we have sum_{k != j} a_k x_k <= sum_{k != j} a_k x_k + a_j v_j <= rhs - a_j l_j + // So we can now drop the continuous variable v_j + inequality.x[k] = 0.0; + + } else if (a_j < 0.0 && u_j < inf) { + // w_j = u_j - x_j >= 0 + // x_j = u_j - w_j + // sum_{k != j} a_k x_k + a_j x_j <= rhs + // sum_{k != j} a_k x_k + a_j (u_j - w_j) <= rhs + // sum_{k != j} a_k x_k - a_j w_j <= rhs - a_j u_j + inequality_rhs -= a_j * u_j; + transformed_variables_[j] = 1; + + // We now have a_j * w_j with a_j, w_j >= 0 + // So we have sum_{k != j} a_k x_k <= sum_{k != j} a_k x_k + a_j w_j <= rhs - a_j u_j + // So we can now drop the continuous variable w_j + inequality.x[k] = 0.0; + } else { + // We can't keep the coefficient of the continuous variable positive + // This means we can't eliminate the continuous variable + printf("x%d ak: %e lo: %e up: %e\n", j, a_j, l_j, u_j); + return -1; + } + } else { + // The variable is integer. We just need to ensure it is nonnegative + if (transformed_variables_[j] == -1) { + // We are closer to the lower bound. + // v_j = x_j - l_j >= 0 + // x_j = v_j + l_j + // sum_{k != j} a_k x_k + a_j x_j <= rhs + // sum_{k != j} a_k x_k + a_j (v_j + l_j) <= rhs + // sum_{k != j} a_k x_k + a_j v_j <= rhs - a_j l_j + inequality_rhs -= a_j * l_j; + } else if (transformed_variables_[j] == 1) { + // We are closer to the finite upper bound + // w_j = u_j - x_j >= 0 + // x_j = u_j - w_j + // sum_{k != j} a_k x_k + a_j x_j <= rhs + // sum_{k != j} a_k x_k + a_j (u_j - w_j) <= rhs + // sum_{k != j} a_k x_k - a_j w_j <= rhs - a_j u_j + inequality_rhs -= a_j * u_j; + inequality.x[k] *= -1.0; + } + } + } + + // Squeeze out the zero coefficents + sparse_vector_t new_inequality(inequality.n, 0); + inequality.squeeze(new_inequality); + inequality = new_inequality; + return 0; +} + +template +void strong_cg_cut_t::to_original_integer_variables( + const lp_problem_t& lp, + sparse_vector_t& cut, + f_t& cut_rhs) +{ + // We expect a cut of the form sum_j a_j y_j <= rhs + // where y_j >= 0 is a transformed variable + // We need to convert it back into a cut on the original variables + + for (i_t k = 0; k < cut.i.size(); k++) { + const i_t j = cut.i[k]; + const f_t a_j = cut.x[k]; + if (transformed_variables_[j] == -1) { + // sum_{k != j} a_k x_k + a_j v_j <= rhs + // v_j = x_j - l_j >= 0, + // sum_{k != j} a_k x_k + a_j (x_j - l_j) <= rhs + // sum_{k != j} a_k x_k + a_j x_j <= rhs + a_j l_j + cut_rhs += a_j * lp.lower[j]; + } else if (transformed_variables_[j] == 1) { + // sum_{k != j} a_k x_k + a_j w_j <= rhs + // w_j = u_j - x_j >= 0 + // sum_{k != j} a_k x_k + a_j (u_j - x_j) <= rhs + // sum_{k != j} a_k x_k - a_j x_j <= rhs - a_j u_j + cut_rhs -= a_j * lp.upper[j]; + cut.x[k] *= -1.0; + } + } +} + +template +i_t strong_cg_cut_t::generate_strong_cg_cut_integer_only( + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const sparse_vector_t& inequality, + f_t inequality_rhs, + sparse_vector_t& cut, + f_t& cut_rhs) +{ + // We expect an inequality of the form sum_j a_j x_j <= rhs + // where all the variables x_j are integer and nonnegative + + // We then apply the CG cut: + // sum_j floor(a_j) x_j <= floor(rhs) + cut.i.reserve(inequality.i.size()); + cut.x.reserve(inequality.i.size()); + cut.i.clear(); + cut.x.clear(); + + for (i_t k = 0; k < inequality.i.size(); k++) { + const i_t j = inequality.i[k]; + const f_t a_j = inequality.x[k]; + if (var_types[j] == variable_type_t::INTEGER) { + cut.i.push_back(j); + cut.x.push_back(std::floor(a_j)); + } else { + return -1; + } + } + + cut_rhs = std::floor(inequality_rhs); + + cut.sort(); + return 0; +} + template i_t add_cuts(const simplex_solver_settings_t& settings, const csr_matrix_t& cuts, diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp index 838ad753c..a6b49d33c 100644 --- a/cpp/src/dual_simplex/cuts.hpp +++ b/cpp/src/dual_simplex/cuts.hpp @@ -22,6 +22,7 @@ enum cut_type_t : int8_t { MIXED_INTEGER_GOMORY = 0, MIXED_INTEGER_ROUNDING = 1, KNAPSACK = 2, + CHVATAL_GOMORY = 3 }; template @@ -29,6 +30,7 @@ void print_cut_types(const std::vector& cut_types, const simplex_sol i_t num_gomory_cuts = 0; i_t num_mir_cuts = 0; i_t num_knapsack_cuts = 0; + i_t num_cg_cuts = 0; for (i_t i = 0; i < cut_types.size(); i++) { if (cut_types[i] == cut_type_t::MIXED_INTEGER_GOMORY) { num_gomory_cuts++; @@ -36,9 +38,11 @@ void print_cut_types(const std::vector& cut_types, const simplex_sol num_mir_cuts++; } else if (cut_types[i] == cut_type_t::KNAPSACK) { num_knapsack_cuts++; + } else if (cut_types[i] == cut_type_t::CHVATAL_GOMORY) { + num_cg_cuts++; } } - settings.log.printf("Gomory cuts: %d, MIR cuts: %d, Knapsack cuts: %d\n", num_gomory_cuts, num_mir_cuts, num_knapsack_cuts); + settings.log.printf("Gomory cuts: %d, MIR cuts: %d, Knapsack cuts: %d CG cuts: %d\n", num_gomory_cuts, num_mir_cuts, num_knapsack_cuts, num_cg_cuts); } @@ -368,6 +372,35 @@ class mixed_integer_rounding_cut_t { bool needs_complement_; }; +template +class strong_cg_cut_t { + public: + strong_cg_cut_t(const lp_problem_t& lp, + const std::vector& var_types, + const std::vector& xstar); + + i_t remove_continuous_variables_integers_nonnegative( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + sparse_vector_t& inequality, + f_t& inequality_rhs); + + void to_original_integer_variables(const lp_problem_t& lp, + sparse_vector_t& cut, + f_t& cut_rhs); + + i_t generate_strong_cg_cut_integer_only(const simplex_solver_settings_t& settings, + const std::vector& var_types, + const sparse_vector_t& inequality, + f_t inequality_rhs, + sparse_vector_t& cut, + f_t& cut_rhs); + + private: + std::vector transformed_variables_; +}; + template i_t add_cuts(const simplex_solver_settings_t& settings, const csr_matrix_t& cuts, From bfaf95cc3d34e2afd9bd5032165c424d972eee1f Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 16 Jan 2026 13:40:09 -0800 Subject: [PATCH 29/45] Call CG cut generation from aggregation. Refactor/clean up --- cpp/src/dual_simplex/branch_and_bound.cpp | 170 ++----- cpp/src/dual_simplex/cuts.cpp | 565 +++++++++++++--------- cpp/src/dual_simplex/cuts.hpp | 60 ++- 3 files changed, 412 insertions(+), 383 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 9cc0e6c78..99086a41f 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -228,10 +228,6 @@ branch_and_bound_t::branch_and_bound_t( { exploration_stats_.start_time = tic(); dualize_info_t dualize_info; -#ifdef PRINT_A - settings_.log.printf("A"); - original_problem_.A.print_matrix(); -#endif convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info); full_variable_types(original_problem_, original_lp_, var_types_); @@ -241,25 +237,26 @@ branch_and_bound_t::branch_and_bound_t( num_integer_variables_++; } } - printf("num_integer_variables %d\n", num_integer_variables_); // Check slack - printf("slacks size %ld m %d\n", new_slacks_.size(), original_lp_.num_rows); +#ifdef CHECK_SLACKS + assert(new_slacks_.size() == original_lp_.num_rows); for (i_t slack : new_slacks_) { const i_t col_start = original_lp_.A.col_start[slack]; const i_t col_end = original_lp_.A.col_start[slack + 1]; const i_t col_len = col_end - col_start; if (col_len != 1) { - printf("Slack %d has %d nzs\n", slack, col_len); - exit(1); + settings_.log.printf("Slack %d has %d nzs\n", slack, col_len); + assert(col_len == 1); } const i_t i = original_lp_.A.i[col_start]; const f_t x = original_lp_.A.x[col_start]; if (std::abs(x) != 1.0) { - printf("Slack %d row %d has non-unit coefficient %e\n", slack, i, x); - exit(1); + settings_.log.printf("Slack %d row %d has non-unit coefficient %e\n", slack, i, x); + assert(std::abs(x) == 1.0); } } +#endif mutex_upper_.lock(); upper_bound_ = inf; @@ -1484,12 +1481,15 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( } } if (basic_list.size() != original_lp_.num_rows) { - printf("basic_list size %d != m %d\n", basic_list.size(), original_lp_.num_rows); - exit(1); + settings_.log.printf( + "basic_list size %d != m %d\n", basic_list.size(), original_lp_.num_rows); + assert(basic_list.size() == original_lp_.num_rows); } if (nonbasic_list.size() != original_lp_.num_cols - original_lp_.num_rows) { - printf("nonbasic_list size %d != n - m %d\n", nonbasic_list.size(), original_lp_.num_cols - original_lp_.num_rows); - exit(1); + settings_.log.printf("nonbasic_list size %d != n - m %d\n", + nonbasic_list.size(), + original_lp_.num_cols - original_lp_.num_rows); + assert(nonbasic_list.size() == original_lp_.num_cols - original_lp_.num_rows); } root_crossover_settings.max_cut_passes = 3; // Populate the basis_update from the crossover vstatus @@ -1504,13 +1504,13 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( // Set the edge norms to a default value edge_norms.resize(original_lp_.num_cols, -1.0); set_uninitialized_steepest_edge_norms(edge_norms); - printf("Using crossover solution\n"); + settings_.log.printf("Using crossover solution\n"); } else { - printf("Using dual simplex solution 1: crossover status %d\n", crossover_status); + settings_.log.printf("Using dual simplex solution\n"); root_status = root_status_future.get(); } } else { - printf("Using dual simplex solution\n"); + settings_.log.printf("Using dual simplex solution\n"); root_status = root_status_future.get(); } return root_status; @@ -1646,91 +1646,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut std::vector saved_solution; #if 1 - printf("Trying to open solution.dat\n"); - FILE* fid = NULL; - fid = fopen("solution.dat", "r"); - if (fid != NULL) - { - i_t n_solution_dat; - i_t count = fscanf(fid, "%d\n", &n_solution_dat); - printf("Solution.dat variables %d =? %d =? %ld count %d\n", n_solution_dat, original_lp_.num_cols, solution.x.size(), count); - bool good = true; - if (count == 1 && n_solution_dat == original_lp_.num_cols) - { - printf("Opened solution.dat with %d number of variables\n", n_solution_dat); - saved_solution.resize(n_solution_dat); - for (i_t j = 0; j < n_solution_dat; j++) - { - count = fscanf(fid, "%lf", &saved_solution[j]); - if (count != 1) - { - printf("bad read solution.dat: j %d count %d\n", j, count); - good = false; - break; - } - } - } else { - good = false; - } - fclose(fid); - - if (!good) - { - saved_solution.resize(0); - printf("Solution.dat is bad\n"); - } - else - { - printf("Read solution file\n"); - - auto hash_combine_f = [](size_t seed, f_t x) { - seed ^= std::hash{}(x) + 0x9e3779b9 + (seed << 6) + (seed >> 2); - return seed; - }; - size_t seed = original_lp_.num_cols; - for (i_t j = 0; j < original_lp_.num_cols; ++j) - { - seed = hash_combine_f(seed, saved_solution[j]); - } - printf("Saved solution hash: %20x\n", seed); - - FILE* fid = NULL; - fid = fopen("solution.dat.2", "w"); - if (fid != NULL) { - printf("Writing solution.dat.2\n"); - i_t n = original_lp_.num_cols; - size_t seed = n; - fprintf(fid, "%d\n", n); - for (i_t j = 0; j < n; ++j) { - fprintf(fid, "%.17g\n", saved_solution[j]); - } - fclose(fid); - } - - // Compute || A * x - b ||_inf - std::vector residual = original_lp_.rhs; - matrix_vector_multiply(original_lp_.A, 1.0, saved_solution, -1.0, residual); - printf("Saved solution: || A*x - b ||_inf %e\n", vector_norm_inf(residual)); - f_t infeas = 0; - for (i_t j = 0; j < original_lp_.num_cols; j++) { - if (saved_solution[j] < original_lp_.lower[j] - 1e-6) { - f_t curr_infeas = (original_lp_.lower[j] - saved_solution[j]); - infeas += curr_infeas; - printf( - "j: %d saved solution %e lower %e\n", j, saved_solution[j], original_lp_.lower[j]); - } - if (saved_solution[j] > original_lp_.upper[j] + 1e-6) { - f_t curr_infeas = (saved_solution[j] - original_lp_.upper[j]); - infeas += curr_infeas; - printf( - "j %d saved solution %e upper %e\n", j, saved_solution[j], original_lp_.upper[j]); - } - } - printf("Bound infeasibility %e\n", infeas); - } - } else { - printf("Could not open solution.dat\n"); - } + read_saved_solution_for_cut_verification(original_lp_, settings_, saved_solution); #endif i_t num_gomory_cuts = 0; @@ -1740,13 +1656,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut i_t cut_pool_size = 0; for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { if (num_fractional == 0) { -#ifdef PRINT_SOLUTION - for (i_t j = 0; j < original_lp_.num_cols; j++) { - if (var_types_[j] == variable_type_t::INTEGER) { - settings_.log.printf("Variable %d type %d val %e\n", j, var_types_[j], root_relax_soln_.x[j]); - } - } -#endif mutex_upper_.lock(); incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x); upper_bound_ = root_objective_; @@ -1810,36 +1719,22 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut num_cg_cuts++; } } - print_cut_types(cut_types, settings_); + cut_pool.print_cutpool_types(); + print_cut_types("In LP ", cut_types, settings_); printf("Cut pool size: %d\n", cut_pool.pool_size()); - +#ifdef CHECK_CUT_MATRIX if (cuts_to_add.check_matrix() != 0) { - printf("Bad cuts matrix\n"); + settings_.log.printf("Bad cuts matrix\n"); for (i_t i = 0; i < static_cast(cut_types.size()); ++i) { - printf("row %d cut type %d\n", i, cut_types[i]); + settings_.log.printf("row %d cut type %d\n", i, cut_types[i]); } - exit(-1); - } - -#ifdef PRINT_CUTS - csc_matrix_t cuts_to_add_col(cuts_to_add.m, cuts_to_add.n, cuts_to_add.row_start[cuts_to_add.m]); - cuts_to_add.to_compressed_col(cuts_to_add_col); - cuts_to_add_col.print_matrix(); - for (i_t i = 0; i < cut_rhs.size(); i++) { - printf("cut_rhs[%d] = %g\n", i, cut_rhs[i]); - } -#endif - -#if 0 - f_t min_cut_violation = minimum_violation(cuts_to_add, cut_rhs, root_relax_soln_.x); - if (min_cut_violation < 1e-6) { - settings_.log.printf("Min cut violation %e\n", min_cut_violation); + return mip_status_t::NUMERICAL; } #endif - // Check against saved solution +#if 1 if (saved_solution.size() > 0) { csc_matrix_t cuts_to_add_col(cuts_to_add.m, cuts_to_add.n, cuts_to_add.row_start[cuts_to_add.m]); cuts_to_add.to_compressed_col(cuts_to_add_col); @@ -1849,11 +1744,11 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut //printf("Cx[%d] = %e cut_rhs[%d] = %e\n", k, Cx[k], k, cut_rhs[k]); if (Cx[k] > cut_rhs[k] + 1e-6) { printf("Cut %d is violated by saved solution. Cx %e cut_rhs %e\n", k, Cx[k], cut_rhs[k]); - exit(1); + return mip_status_t::NUMERICAL; } } } - +#endif cut_pool_size = cut_pool.pool_size(); // Resolve the LP with the new cuts @@ -1879,7 +1774,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut mutex_original_lp_.unlock(); if (add_cuts_status != 0) { settings_.log.printf("Failed to add cuts\n"); - exit(1); + return mip_status_t::NUMERICAL; } // Try to do bound strengthening @@ -1892,15 +1787,14 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut original_lp_.A.check_matrix(); #endif original_lp_.A.to_compressed_row(Arow); -#if 1 + bounds_strengthening_t node_presolve(original_lp_, Arow, row_sense, var_types_); bool feasible = node_presolve.bounds_strengthening(original_lp_.lower, original_lp_.upper, settings_); if (!feasible) { settings_.log.printf("Bound strengthening failed\n"); - exit(1); + return mip_status_t::NUMERICAL; } -#endif // Adjust the solution root_relax_soln_.x.resize(original_lp_.num_cols, 0.0); @@ -1934,7 +1828,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (cut_status != dual::status_t::OPTIMAL) { settings_.log.printf("Cut status %d\n", cut_status); - exit(1); + return mip_status_t::NUMERICAL; } local_lower_bounds_.assign(settings_.num_bfs_threads, root_objective_); @@ -1960,7 +1854,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut // TODO: Get upper bound from heuristics f_t upper_bound = get_upper_bound(); - f_t obj = num_fractional != 0 ? get_upper_bound() : compute_user_objective(original_lp_, root_objective_); + f_t obj = num_fractional != 0 ? get_upper_bound() : root_objective_; f_t user_obj = compute_user_objective(original_lp_, obj); f_t user_lower = compute_user_objective(original_lp_, root_objective_); std::string gap = num_fractional != 0 ? user_mip_gap(user_obj, user_lower) : "0.0%"; diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 040a52461..f5dac6bb6 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -108,6 +108,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) cut_norms_.resize(cut_storage_.m, 0.0); cut_orthogonality_.resize(cut_storage_.m, 1); cut_scores_.resize(cut_storage_.m, 0.0); + const bool verbose = false; for (i_t i = 0; i < cut_storage_.m; i++) { f_t violation; cut_distances_[i] = cut_distance(i, x_relax, violation, cut_norms_[i]); @@ -115,9 +116,16 @@ void cut_pool_t::score_cuts(std::vector& x_relax) cut_distances_[i] <= min_cut_distance ? 0.0 : weight_distance * cut_distances_[i] + weight_orthogonality * cut_orthogonality_[i]; - settings_.log.printf("Cut %d type %d distance %+e violation %+e orthogonality %e score %.16e\n", - i, static_cast(cut_type_[i]), cut_distances_[i], violation, cut_orthogonality_[i], - cut_scores_[i]); + if (verbose) { + settings_.log.printf( + "Cut %d type %d distance %+e violation %+e orthogonality %e score %.16e\n", + i, + static_cast(cut_type_[i]), + cut_distances_[i], + violation, + cut_orthogonality_[i], + cut_scores_[i]); + } } std::vector sorted_indices(cut_storage_.m); @@ -147,7 +155,9 @@ void cut_pool_t::score_cuts(std::vector& x_relax) if (cut_age_[i] > 0) { settings_.log.printf("Adding cut with age %d\n", cut_age_[i]); } - settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]); + if (verbose) { + settings_.log.printf("Scored cuts %d. Adding cut %d score %e\n", scored_cuts_, i, cut_scores_[i]); + } best_cuts_.push_back(i); scored_cuts_++; @@ -158,27 +168,27 @@ void cut_pool_t::score_cuts(std::vector& x_relax) cut_orthogonality_[j] = std::min(cut_orthogonality_[j], cut_orthogonality(i, j)); if (cut_orthogonality_[j] >= min_orthogonality) { indices.push_back(j); - cut_scores_[j] = - cut_distances_[j] <= min_cut_distance - ? 0.0 - : weight_distance * cut_distances_[j] + weight_orthogonality * cut_orthogonality_[j]; - // settings_.log.printf("Recomputed cut %d score %e\n", j, cut_scores_[j]); + if (cut_distances_[j] <= min_cut_distance) { + cut_scores_[j] = 0.0; // Ignore cuts under the minimum distance threshold + } else { + cut_scores_[j] = weight_distance * cut_distances_[j] + weight_orthogonality * cut_orthogonality_[j]; + } } } sorted_indices = indices; indices.clear(); - //settings_.log.printf("Sorting %d cuts\n", sorted_indices.size()); std::sort(sorted_indices.begin(), sorted_indices.end(), [&](i_t a, i_t b) { return cut_scores_[a] > cut_scores_[b]; }); - //settings_.log.printf("\t Sorted indicies %d\n", sorted_indices.size()); } } template -i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, std::vector& best_rhs, std::vector& best_cut_types) +i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, + std::vector& best_rhs, + std::vector& best_cut_types) { best_cuts.m = 0; best_cuts.n = original_vars_; @@ -205,7 +215,6 @@ i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, std:: return static_cast(best_cuts_.size()); } - template void cut_pool_t::age_cuts() { @@ -228,6 +237,7 @@ knapsack_generation_t::knapsack_generation_t( const std::vector& new_slacks, const std::vector& var_types) { + const bool verbose = false; knapsack_constraints_.reserve(lp.num_rows); is_slack_.resize(lp.num_cols, 0); @@ -265,13 +275,15 @@ knapsack_generation_t::knapsack_generation_t( const f_t beta = lp.rhs[i]; if (std::abs(beta - std::round(beta)) <= settings.integer_tol) { if (beta > 0.0 && beta <= sum_pos && std::abs(sum_pos / (row_len - 1) - beta) > 1e-3) { - printf( - "Knapsack constraint %d row len %d beta %e sum_pos %e sum_pos / (row_len - 1) %e\n", - i, - row_len, - beta, - sum_pos, - sum_pos / (row_len - 1)); + if (verbose) { + printf( + "Knapsack constraint %d row len %d beta %e sum_pos %e sum_pos / (row_len - 1) %e\n", + i, + row_len, + beta, + sum_pos, + sum_pos / (row_len - 1)); + } knapsack_constraints_.push_back(i); } } @@ -294,37 +306,44 @@ i_t knapsack_generation_t::generate_knapsack_cuts( sparse_vector_t& cut, f_t& cut_rhs) { + const bool verbose = false; // Get the row associated with the knapsack constraint sparse_vector_t knapsack_inequality(Arow, knapsack_row); f_t knapsack_rhs = lp.rhs[knapsack_row]; // Remove the slacks from the inequality f_t seperation_rhs = 0.0; - settings.log.printf(" Knapsack : "); + if (verbose) { + settings.log.printf(" Knapsack : "); + } for (i_t k = 0; k < knapsack_inequality.i.size(); k++) { const i_t j = knapsack_inequality.i[k]; if (is_slack_[j]) { knapsack_inequality.x[k] = 0.0; } else { - settings.log.printf(" %g x%d +", knapsack_inequality.x[k], j); + if (verbose) { + settings.log.printf(" %g x%d +", knapsack_inequality.x[k], j); + } seperation_rhs += knapsack_inequality.x[k]; } } - settings.log.printf(" <= %g\n", knapsack_rhs); + if (verbose) { + settings.log.printf(" <= %g\n", knapsack_rhs); + } seperation_rhs -= (knapsack_rhs + 1); - settings.log.printf("\t"); - for (i_t k = 0; k < knapsack_inequality.i.size(); k++) { - const i_t j = knapsack_inequality.i[k]; - if (!is_slack_[j]) { - if (std::abs(xstar[j]) > 1e-3) { - settings.log.printf("x_relax[%d]= %g ", j, xstar[j]); - } + if (verbose) { + settings.log.printf("\t"); + for (i_t k = 0; k < knapsack_inequality.i.size(); k++) { + const i_t j = knapsack_inequality.i[k]; + if (!is_slack_[j]) { + if (std::abs(xstar[j]) > 1e-3) { settings.log.printf("x_relax[%d]= %g ", j, xstar[j]); } + } } - } - settings.log.printf("\n"); + settings.log.printf("\n"); - settings.log.printf("seperation_rhs %g\n", seperation_rhs); + settings.log.printf("seperation_rhs %g\n", seperation_rhs); + } if (seperation_rhs <= 0.0) { return -1; } std::vector values; @@ -346,13 +365,18 @@ i_t knapsack_generation_t::generate_knapsack_cuts( std::vector solution; solution.resize(knapsack_inequality.i.size() - 1); - settings.log.printf("Calling solve_knapsack_problem\n"); + if (verbose) { + settings.log.printf("Calling solve_knapsack_problem\n"); + } f_t objective = solve_knapsack_problem(values, weights, seperation_rhs, solution); if (objective != objective) { return -1; } - settings.log.printf("objective %e objective_constant %e\n", objective, objective_constant); - + if (verbose) { + settings.log.printf("objective %e objective_constant %e\n", objective, objective_constant); + } f_t seperation_value = -objective + objective_constant; - settings.log.printf("seperation_value %e\n", seperation_value); + if (verbose) { + settings.log.printf("seperation_value %e\n", seperation_value); + } const f_t tol = 1e-6; if (seperation_value >= 1.0 - tol) { return -1; } @@ -387,7 +411,9 @@ i_t knapsack_generation_t::generate_knapsack_cuts( // Verify the cut is violated f_t dot = cut.dot(xstar); f_t violation = dot - cut_rhs; - settings.log.printf("Knapsack cut %d violation %e < 0\n", knapsack_row, violation); + if (verbose) { + settings.log.printf("Knapsack cut %d violation %e < 0\n", knapsack_row, violation); + } if (violation >= -tol) { return -1; } @@ -486,7 +512,11 @@ f_t knapsack_generation_t::solve_knapsack_problem(const std::vector::solve_knapsack_problem(const std::vector(n); if (scale <= 0.0) { return std::numeric_limits::quiet_NaN(); } - printf("scale %g epsilon %g vmax %g n %d\n", scale, epsilon, vmax, n); + if (verbose) { + printf("scale %g epsilon %g vmax %g n %d\n", scale, epsilon, vmax, n); + } for (i_t i = 0; i < n; ++i) { scaled_values[i] = static_cast(std::floor(values[i] / scale)); - //printf("scaled_values[%d] %d values[%d] %g\n", i, scaled_values[i], i, values[i]); } } i_t sum_value = std::accumulate(scaled_values.begin(), scaled_values.end(), 0); const i_t INT_INF = std::numeric_limits::max() / 2; - printf("sum value %d\n", sum_value); + if (verbose) { + printf("sum value %d\n", sum_value); + } const i_t max_size = 10000; if (sum_value <= 0.0 || sum_value >= max_size) { - printf("sum value %d is negative or too large using greedy solution\n", sum_value); + if (verbose) { + printf("sum value %d is negative or too large using greedy solution\n", sum_value); + } return greedy_knapsack_problem(values, weights, rhs, solution); } @@ -519,11 +554,10 @@ f_t knapsack_generation_t::solve_knapsack_problem(const std::vector dp(n + 1, sum_value + 1, INT_INF); dense_matrix_t take(n + 1, sum_value + 1, 0); dp(0, 0) = 0; - printf("start dp\n"); // 4. Dynamic programming - for (int j = 1; j <= n; ++j) { - for (int v = 0; v <= sum_value; ++v) { + for (i_t j = 1; j <= n; ++j) { + for (i_t v = 0; v <= sum_value; ++v) { // Do not take item i-1 dp(j, v) = dp(j - 1, v); @@ -570,13 +604,12 @@ void cut_generation_t::generate_cuts(const lp_problem_t& lp, const std::vector& basic_list, const std::vector& nonbasic_list) { - // Generate Gomory Cuts + // Generate Gomory and CG Cuts generate_gomory_cuts( lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list); // Generate Knapsack cuts generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar); - //settings.log.printf("Generated Knapsack cuts\n"); // Generate MIR cuts generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar); @@ -598,10 +631,7 @@ void cut_generation_t::generate_knapsack_cuts( i_t knapsack_status = knapsack_generation_.generate_knapsack_cuts( lp, settings, Arow, new_slacks, var_types, xstar, knapsack_row, cut, cut_rhs); if (knapsack_status == 0) { - settings.log.printf("Adding Knapsack cut %d\n", knapsack_row); cut_pool_.add_cut(cut_type_t::KNAPSACK, cut, cut_rhs); - } else { - settings.log.printf("Knapsack cut %d is not violated. Skipping\n", knapsack_row); } } } @@ -615,8 +645,8 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& const std::vector& var_types, const std::vector& xstar) { - mixed_integer_rounding_cut_t mir(lp.num_cols, settings); - mir.initialize(lp, new_slacks, xstar); + mixed_integer_rounding_cut_t mir(lp, settings, new_slacks, xstar); + strong_cg_cut_t cg(lp, var_types, xstar); std::vector slack_map(lp.num_rows, -1); for (i_t slack : new_slacks) { @@ -704,6 +734,20 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& if (j == slack) { inequality.x[k] = 0.0; } } + { + // Try to generate a CG cut + sparse_vector_t cg_inequality = inequality; + f_t cg_inequality_rhs = inequality_rhs; + sparse_vector_t cg_cut(lp.num_cols, 0); + f_t cg_cut_rhs; + i_t cg_status = cg.generate_strong_cg_cut( + lp, settings, var_types, cg_inequality, cg_inequality_rhs, xstar, cg_cut, cg_cut_rhs); + if (cg_status == 0) { + printf("Adding CG cut nz %ld\n", cg_cut.i.size()); + cut_pool_.add_cut(cut_type_t::CHVATAL_GOMORY, cg_cut, cg_cut_rhs); + } + } + // inequaility'*x <= inequality_rhs // But for MIR we need: inequality'*x >= inequality_rhs inequality_rhs *= -1; @@ -820,7 +864,9 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& } if (add_cut) { - printf("\t adding cut - agg %d\n", num_aggregated); + if (num_aggregated > 0) { + settings.log.printf("MIR cut with aggregation %d\n", num_aggregated); + } cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs); break; } else { @@ -894,8 +940,8 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& break; } } else { - printf("Bad col len\n"); - exit(1); + settings.log.printf("Bad col len\n"); + assert(col_len > 1); } } num_aggregated++; // Always increase so the loop terminates @@ -989,12 +1035,10 @@ void cut_generation_t::generate_gomory_cuts( const std::vector& basic_list, const std::vector& nonbasic_list) { - mixed_integer_gomory_base_inequality_t gomory(lp, basis_update, nonbasic_list); - mixed_integer_rounding_cut_t mir(lp.num_cols, settings); + tableau_equality_t tableau(lp, basis_update, nonbasic_list); + mixed_integer_rounding_cut_t mir(lp, settings, new_slacks, xstar); strong_cg_cut_t cg(lp, var_types, xstar); - mir.initialize(lp, new_slacks, xstar); - for (i_t i = 0; i < lp.num_rows; i++) { sparse_vector_t inequality(lp.num_cols, 0); f_t inequality_rhs; @@ -1002,79 +1046,31 @@ void cut_generation_t::generate_gomory_cuts( if (var_types[j] != variable_type_t::INTEGER) { continue; } const f_t x_j = xstar[j]; if (std::abs(x_j - std::round(x_j)) < settings.integer_tol) { continue; } - i_t gomory_status = gomory.generate_base_inequality(lp, - settings, - Arow, - var_types, - basis_update, - xstar, - basic_list, - nonbasic_list, - i, - inequality, - inequality_rhs); - if (gomory_status == 0) { + i_t tableau_status = tableau.generate_base_equality(lp, + settings, + Arow, + var_types, + basis_update, + xstar, + basic_list, + nonbasic_list, + i, + inequality, + inequality_rhs); + if (tableau_status == 0) { // Generate a CG cut if (1) { - + // Try to generate a CG cut sparse_vector_t cg_inequality = inequality; - f_t cg_inequality_rhs = inequality_rhs; - printf("CG inequality with slacks nz %ld\n", cg_inequality.i.size()); - for (i_t k = 0; k < cg_inequality.i.size(); k++) { - printf("%e %c%d ", cg_inequality.x[k], var_types[cg_inequality.i[k]] == variable_type_t::CONTINUOUS ? 'x' : 'y', cg_inequality.i[k]); - } - printf("CG inequality rhs %e\n", cg_inequality_rhs); - printf("\n"); - - // Try to remove continuous variables from the inequality - // and transform integer variables to be nonnegative - i_t cg_status = cg.remove_continuous_variables_integers_nonnegative( - lp, settings, var_types, cg_inequality, cg_inequality_rhs); - - if (cg_status != 0) { - // Try negating the equality and see if that helps - cg_inequality = inequality; - cg_inequality.negate(); - cg_inequality_rhs = -inequality_rhs; - cg_status = cg.remove_continuous_variables_integers_nonnegative( - lp, settings, var_types, cg_inequality, cg_inequality_rhs); - } - + f_t cg_inequality_rhs = inequality_rhs; + sparse_vector_t cg_cut(lp.num_cols, 0); + f_t cg_cut_rhs; + i_t cg_status = cg.generate_strong_cg_cut( + lp, settings, var_types, cg_inequality, cg_inequality_rhs, xstar, cg_cut, cg_cut_rhs); if (cg_status == 0) { - // We have an inequality with no continuous variables - - // Generate a CG cut - sparse_vector_t cg_cut(lp.num_cols, 0); - f_t cg_cut_rhs; - cg.generate_strong_cg_cut_integer_only( - settings, var_types, cg_inequality, cg_inequality_rhs, cg_cut, cg_cut_rhs); - - - // Convert the CG cut back to the original variables - cg.to_original_integer_variables(lp, cg_cut, cg_cut_rhs); - - // Check for violation - f_t dot = cg_cut.dot(xstar); - // If the cut is violated we will have: sum_j a_j xstar_j > rhs - f_t violation = dot - cg_cut_rhs; - printf("CG violation %e nz %ld\n", violation, cg_cut.i.size()); - if (violation > 0.0) { - - - // Substitute out the slack variables - //mir.substitute_slacks(lp, Arow, cg_cut, cg_cut_rhs); - - // The CG cut is in the form: sum_j a_j x_j <= rhs - // The cut pool wants the cut in the form: sum_j a_j x_j >= rhs - - cg_cut.negate(); - cg_cut_rhs *= -1.0; - printf("Adding CG cut nz %ld\n", cg_cut.i.size()); - cut_pool_.add_cut(cut_type_t::CHVATAL_GOMORY, cg_cut, cg_cut_rhs); - } - } else { - printf("CG status %d\n", cg_status); + printf("Adding CG cut nz %ld\n", cg_cut.i.size()); + cut_pool_.add_cut(cut_type_t::CHVATAL_GOMORY, cg_cut, cg_cut_rhs); } } @@ -1148,7 +1144,7 @@ void cut_generation_t::generate_gomory_cuts( } template -i_t mixed_integer_gomory_base_inequality_t::generate_base_inequality( +i_t tableau_equality_t::generate_base_equality( const lp_problem_t& lp, const simplex_solver_settings_t& settings, csr_matrix_t& Arow, @@ -1194,13 +1190,13 @@ i_t mixed_integer_gomory_base_inequality_t::generate_base_inequality( settings.log.printf("BTu_bar %d error %e\n", k, std::abs(BTu_bar[k] - 1.0)); if (std::abs(BTu_bar[k] - 1.0) > 1e-10) { settings.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i); - exit(1); + assert(false); } } else { settings.log.printf("BTu_bar %d error %e\n", k, std::abs(BTu_bar[k])); if (std::abs(BTu_bar[k]) > 1e-10) { settings.log.printf("BTu_bar[%d] = %e i %d\n", k, BTu_bar[k], i); - exit(1); + assert(false); } } } @@ -1270,7 +1266,7 @@ i_t mixed_integer_gomory_base_inequality_t::generate_base_inequality( if (std::abs(a_bar_dense_dot - b_bar[i]) > 1e-6) { settings_.log.printf("a_bar_dense_dot = %e b_bar[%d] = %e\n", a_bar_dense_dot, i, b_bar[i]); settings_.log.printf("x_j %e b_bar_i %e\n", x_j, b_bar[i]); - exit(1); + assert(false); } #endif @@ -1321,37 +1317,31 @@ i_t mixed_integer_gomory_base_inequality_t::generate_base_inequality( } template -void mixed_integer_rounding_cut_t::initialize(const lp_problem_t& lp, - const std::vector& new_slacks, - const std::vector& xstar) +mixed_integer_rounding_cut_t::mixed_integer_rounding_cut_t( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& new_slacks, + const std::vector& xstar) + : num_vars_(lp.num_cols), + settings_(settings), + x_workspace_(num_vars_, 0.0), + x_mark_(num_vars_, 0), + has_lower_(num_vars_, 0), + has_upper_(num_vars_, 0), + is_slack_(num_vars_, 0), + slack_rows_(num_vars_, 0), + bound_info_(num_vars_, 0) { - - if (lp.num_cols != num_vars_) { - num_vars_ = lp.num_cols; - x_workspace_.resize(num_vars_, 0.0); - x_mark_.resize(num_vars_, 0); - has_lower_.resize(num_vars_, 0); - has_upper_.resize(num_vars_, 0); - } - - - is_slack_.clear(); - is_slack_.resize(num_vars_, 0); - slack_rows_.clear(); - slack_rows_.resize(num_vars_, 0); - bound_info_.clear(); - bound_info_.resize(num_vars_, 0); - for (i_t j : new_slacks) { - is_slack_[j] = 1; + is_slack_[j] = 1; const i_t col_start = lp.A.col_start[j]; - const i_t i = lp.A.i[col_start]; - slack_rows_[j] = i; + const i_t i = lp.A.i[col_start]; + slack_rows_[j] = i; assert(std::abs(lp.A.x[col_start]) == 1.0); } needs_complement_ = false; - for (i_t j = 0; j < lp.num_cols; j++) { + for (i_t j = 0; j < num_vars_; j++) { if (lp.lower[j] < 0) { settings_.log.printf("Variable %d has negative lower bound %e\n", j, lp.lower[j]); } @@ -1361,39 +1351,26 @@ void mixed_integer_rounding_cut_t::initialize(const lp_problem_t -inf && lj != 0.0) { - has_lower_[j] = 1; + has_lower_[j] = 1; bound_info_[j] = -1; } } - -#if 0 - for (i_t j = 0; j < x_workspace_.size(); j++) { - if (x_workspace_[j] != 0.0) { - printf("Initialize: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); - exit(1); - } - if (x_mark_[j] != 0) { - printf("Initialize: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); - exit(1); - } - } -#endif } template void mixed_integer_rounding_cut_t::to_nonnegative(const lp_problem_t& lp, - sparse_vector_t& inequality, - f_t& rhs) + sparse_vector_t& inequality, + f_t& rhs) { const i_t nz = inequality.i.size(); for (i_t k = 0; k < nz; k++) @@ -1578,11 +1555,11 @@ i_t mixed_integer_rounding_cut_t::generate_cut_nonnegative( for (i_t j = 0; j < x_workspace_.size(); j++) { if (x_workspace_[j] != 0.0) { printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); - exit(1); + assert(x_workspace_[j] == 0.0); } if (x_mark_[j] != 0) { printf("After generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); - exit(1); + assert(x_mark_[j] == 0); } } #endif @@ -1593,6 +1570,7 @@ i_t mixed_integer_rounding_cut_t::generate_cut_nonnegative( cut_rhs = R; +#ifdef CHECK_REPEATED_INDICES // Check for repeated indicies std::vector check(num_vars_, 0); for (i_t p = 0; p < cut.i.size(); p++) @@ -1600,13 +1578,13 @@ i_t mixed_integer_rounding_cut_t::generate_cut_nonnegative( if (check[cut.i[p]] != 0) { printf("repeated index in generated cut\n"); - exit(1); + assert(check[cut.i[p]] == 0); } check[cut.i[p]] = 1; } +#endif if (cut.i.size() == 0) { - //settings_.log.printf("MIR: No coefficients in cut\n"); return -1; } @@ -1623,17 +1601,17 @@ i_t mixed_integer_rounding_cut_t::generate_cut( sparse_vector_t& cut, f_t& cut_rhs) { -#if 1 +#ifdef CHECK_WORKSPACE for (i_t j = 0; j < x_workspace_.size(); j++) { if (x_workspace_[j] != 0.0) { printf("Before generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); printf("num_vars_ %d\n", num_vars_); printf("x_workspace_.size() %ld\n", x_workspace_.size()); - exit(1); + assert(x_workspace_[j] == 0.0); } if (x_mark_[j] != 0) { printf("Before generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); - exit(1); + assert(x_mark_[j] == 0); } } #endif @@ -1783,15 +1761,15 @@ i_t mixed_integer_rounding_cut_t::generate_cut( } -#if 1 +#ifdef CHECK_WORKSPACE for (i_t j = 0; j < x_workspace_.size(); j++) { if (x_workspace_[j] != 0.0) { printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); - exit(1); + assert(x_workspace_[j] == 0.0); } if (x_mark_[j] != 0) { printf("After generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); - exit(1); + assert(x_mark_[j] == 0); } } #endif @@ -1802,6 +1780,7 @@ i_t mixed_integer_rounding_cut_t::generate_cut( cut_rhs = R; +#ifdef CHECK_REPEATED_INDICES // Check for repeated indicies std::vector check(num_vars_, 0); for (i_t p = 0; p < cut.i.size(); p++) @@ -1809,13 +1788,13 @@ i_t mixed_integer_rounding_cut_t::generate_cut( if (check[cut.i[p]] != 0) { printf("repeated index in generated cut\n"); - exit(1); + assert(check[cut.i[p]] == 0); } - check[cut.i[p]] == 1; + check[cut.i[p]] = 1; } +#endif if (cut.i.size() == 0) { - //settings_.log.printf("MIR: No coefficients in cut\n"); return -1; } @@ -1835,15 +1814,15 @@ void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_ std::vector cut_indices; cut_indices.reserve(cut.i.size()); -#if 0 +#ifdef CHECK_WORKSPACE for (i_t j = 0; j < x_workspace_.size(); j++) { if (x_workspace_[j] != 0.0) { printf("Begin Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); - exit(1); + assert(x_workspace_[j] == 0.0); } if (x_mark_[j] != 0) { printf("Begin Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); - exit(1); + assert(x_mark_[j] == 0); } } #endif @@ -1860,12 +1839,12 @@ void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_ const i_t slack_len = slack_end - slack_start; if (slack_len != 1) { printf("Slack %d has %d nzs in colum\n", j, slack_len); - exit(1); + assert(slack_len == 1); } const f_t alpha = lp.A.x[slack_start]; if (std::abs(alpha) != 1.0) { printf("Slack %d has non-unit coefficient %e\n", j, alpha); - exit(1); + assert(std::abs(alpha) == 1.0); } // Do the substitution @@ -1901,7 +1880,7 @@ void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_ const f_t aij = Arow.x[q]; if (std::abs(aij)!= 1.0) { printf("Slack row %d has non-unit coefficient %e for variable %d\n", i, aij, j); - exit(1); + assert(std::abs(aij) == 1.0); } } } @@ -1954,15 +1933,15 @@ void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_ } -#if 0 +#ifdef CHECK_WORKSPACE for (i_t j = 0; j < x_workspace_.size(); j++) { if (x_workspace_[j] != 0.0) { printf("End Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); - exit(1); + assert(x_workspace_[j] == 0.0); } if (x_mark_[j] != 0) { printf("End Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); - exit(1); + assert(x_mark_[j] == 0); } } #endif @@ -1988,15 +1967,15 @@ void mixed_integer_rounding_cut_t::combine_rows(const lp_problem_t::combine_rows(const lp_problem_t check(num_vars_, 0); for (i_t k = 0; k < inequality.i.size(); k++) { if (check[inequality.i[k]] == 1) { printf("repeated index\n"); + assert(check[inequality.i[k]] == 0); } check[inequality.i[k]] = 1; } +#endif // Clear the workspace for (i_t j : indices_) { @@ -2116,7 +2098,7 @@ i_t strong_cg_cut_t::remove_continuous_variables_integers_nonnegative( sparse_vector_t& inequality, f_t& inequality_rhs) { - + const bool verbose = false; // Count the number of continuous variables in the inequality i_t num_continuous = 0; const i_t nz = inequality.i.size(); @@ -2127,7 +2109,9 @@ i_t strong_cg_cut_t::remove_continuous_variables_integers_nonnegative( } } - printf("num_continuous %d\n", num_continuous); + if (verbose) { + settings.log.printf("num_continuous %d\n", num_continuous); + } // We assume the inequality is of the form sum_j a_j x_j <= rhs for (i_t k = 0; k < nz; k++) { @@ -2170,7 +2154,9 @@ i_t strong_cg_cut_t::remove_continuous_variables_integers_nonnegative( } else { // We can't keep the coefficient of the continuous variable positive // This means we can't eliminate the continuous variable - printf("x%d ak: %e lo: %e up: %e\n", j, a_j, l_j, u_j); + if (verbose) { + settings.log.printf("x%d ak: %e lo: %e up: %e\n", j, a_j, l_j, u_j); + } return -1; } } else { @@ -2269,6 +2255,76 @@ i_t strong_cg_cut_t::generate_strong_cg_cut_integer_only( return 0; } +template +i_t strong_cg_cut_t::generate_strong_cg_cut( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const sparse_vector_t& inequality, + const f_t inequality_rhs, + const std::vector& xstar, + sparse_vector_t& cut, + f_t& cut_rhs) +{ +#ifdef PRINT_INEQUALITY_INFO + for (i_t k = 0; k < inequality.i.size(); k++) { + printf("%e %c%d ", + inequality.x[k], + var_types[inequality.i[k]] == variable_type_t::CONTINUOUS ? 'x' : 'y', + inequality.i[k]); + } + printf("CG inequality rhs %e\n", inequality_rhs); +#endif + // Try to remove continuous variables from the inequality + // and transform integer variables to be nonnegative + + // Copy the inequality since remove continuous variables will modify it + sparse_vector_t cg_inequality = inequality; + f_t cg_inequality_rhs = inequality_rhs; + i_t status = remove_continuous_variables_integers_nonnegative( + lp, settings, var_types, cg_inequality, cg_inequality_rhs); + + if (status != 0) { + // Try negating the equality and see if that helps + cg_inequality = inequality; + cg_inequality.negate(); + cg_inequality_rhs = -inequality_rhs; + + status = remove_continuous_variables_integers_nonnegative( + lp, settings, var_types, cg_inequality, cg_inequality_rhs); + } + + if (status == 0) { + // We have an inequality with no continuous variables + + // Generate a CG cut + generate_strong_cg_cut_integer_only( + settings, var_types, cg_inequality, cg_inequality_rhs, cut, cut_rhs); + + // Convert the CG cut back to the original variables + to_original_integer_variables(lp, cut, cut_rhs); + + // Check for violation + f_t dot = cut.dot(xstar); + // If the cut is violated we will have: sum_j a_j xstar_j > rhs + f_t violation = dot - cut_rhs; + const f_t min_violation_threshold = 1e-6; + if (violation > min_violation_threshold) { + printf("CG violation %e nz %ld\n", violation, cut.i.size()); + // Note that no slacks are currently present. Since slacks are currently treated as continuous. + // However, this may change. We may need to substitute out the slacks here + + + // The CG cut is in the form: sum_j a_j x_j <= rhs + // The cut pool wants the cut in the form: sum_j a_j x_j >= rhs + cut.negate(); + cut_rhs *= -1.0; + return 0; + } + } + return -1; +} + template i_t add_cuts(const simplex_solver_settings_t& settings, const csr_matrix_t& cuts, @@ -2297,14 +2353,14 @@ i_t add_cuts(const simplex_solver_settings_t& settings, add(Btest, B, 1.0, -1.0, Diff); const f_t err = Diff.norm1(); settings.log.printf("Before || B - L*U || %e\n", err); - if (err > 1e-6) { exit(1); } + assert(err <= 1e-6); } #endif const i_t p = cuts.m; if (cut_rhs.size() != static_cast(p)) { settings.log.printf("cut_rhs must have the same number of rows as cuts\n"); - return -1; + assert(cut_rhs.size() == static_cast(p)); } settings.log.debug("Number of cuts %d\n", p); settings.log.debug("Original lp rows %d\n", lp.num_rows); @@ -2316,7 +2372,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, i_t append_status = new_A_row.append_rows(cuts); if (append_status != 0) { settings.log.printf("append_rows error: %d\n", append_status); - exit(1); + assert(append_status == 0); } csc_matrix_t new_A_col(lp.num_rows + p, lp.num_cols, 1); @@ -2353,8 +2409,8 @@ i_t add_cuts(const simplex_solver_settings_t& settings, const i_t col_end = lp.A.col_start[slack + 1]; const i_t col_len = col_end - col_start; if (col_len != 1) { - printf("Add cuts: Slack %d has %d nzs in column\n", slack, col_len); - exit(1); + settings.log.printf("Add cuts: Slack %d has %d nzs in column\n", slack, col_len); + assert(col_len == 1); } } @@ -2392,7 +2448,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, if (j < 0 || j >= old_cols) { settings.log.printf( "basic_list[%d] = %d is out of bounds %d old_cols %d\n", k, j, j, old_cols); - return -1; + assert(j >= 0 && j < old_cols); } in_basis[j] = k; // The cuts are on the original variables. So it is possible that @@ -2420,15 +2476,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, if (nz != C_B_nz) { settings.log.printf("Add cuts: predicted nz %d actual nz %d\n", C_B_nz, nz); - for (i_t i = 0; i < p; i++) { - const i_t row_start = cuts.row_start[i]; - const i_t row_end = cuts.row_start[i + 1]; - for (i_t q = row_start; q < row_end; q++) { - const i_t j = cuts.j[q]; - printf("C(%d, %d) = %e\n", i, j, C_B.x[q]); - } - } - return -1; + assert(nz == C_B_nz); } settings.log.debug("C_B rows %d cols %d nz %d\n", C_B.m, C_B.n, nz); @@ -2456,7 +2504,7 @@ i_t add_cuts(const simplex_solver_settings_t& settings, if (err > 1e-6) { settings.log.printf("Diff matrix\n"); // Diff.print_matrix(); - exit(1); + assert(err <= 1e-6); } #endif // Adjust the vstatus @@ -2498,7 +2546,7 @@ void remove_cuts(lp_problem_t& lp, const i_t col_len = col_end - col_start; if (col_len != 1) { printf("Remove cuts: Slack %d has %d nzs in column\n", j, col_len); - exit(1); + assert(col_len == 1); } } @@ -2613,11 +2661,82 @@ void remove_cuts(lp_problem_t& lp, } } +template +void read_saved_solution_for_cut_verification(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + std::vector& saved_solution) +{ + settings.log.printf("Trying to open solution.dat\n"); + FILE* fid = NULL; + fid = fopen("solution.dat", "r"); + if (fid != NULL) { + i_t n_solution_dat; + i_t count = fscanf(fid, "%d\n", &n_solution_dat); + settings.log.printf("Solution.dat variables %d =? %d =? count %d\n", + n_solution_dat, + lp.num_cols, + count); + bool good = true; + if (count == 1 && n_solution_dat == lp.num_cols) { + settings.log.printf("Opened solution.dat with %d number of variables\n", n_solution_dat); + saved_solution.resize(n_solution_dat); + for (i_t j = 0; j < n_solution_dat; j++) { + count = fscanf(fid, "%lf", &saved_solution[j]); + if (count != 1) { + settings.log.printf("bad read solution.dat: j %d count %d\n", j, count); + good = false; + break; + } + } + } else { + good = false; + } + fclose(fid); + + if (!good) { + saved_solution.resize(0); + settings.log.printf("Solution.dat is bad\n"); + } else { + settings.log.printf("Read solution file\n"); + + auto hash_combine_f = [](size_t seed, f_t x) { + seed ^= std::hash{}(x) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + }; + size_t seed = lp.num_cols; + for (i_t j = 0; j < lp.num_cols; ++j) { + seed = hash_combine_f(seed, saved_solution[j]); + } + settings.log.printf("Saved solution hash: %20x\n", seed); + + // Compute || A * x - b ||_inf + std::vector residual = lp.rhs; + matrix_vector_multiply(lp.A, 1.0, saved_solution, -1.0, residual); + settings.log.printf("Saved solution: || A*x - b ||_inf %e\n", vector_norm_inf(residual)); + f_t infeas = 0; + for (i_t j = 0; j < lp.num_cols; j++) { + if (saved_solution[j] < lp.lower[j] - 1e-6) { + f_t curr_infeas = (lp.lower[j] - saved_solution[j]); + infeas += curr_infeas; + settings.log.printf("j: %d saved solution %e lower %e\n", j, saved_solution[j], lp.lower[j]); + } + if (saved_solution[j] > lp.upper[j] + 1e-6) { + f_t curr_infeas = (saved_solution[j] - lp.upper[j]); + infeas += curr_infeas; + settings.log.printf("j %d saved solution %e upper %e\n", j, saved_solution[j], lp.upper[j]); + } + } + settings.log.printf("Bound infeasibility %e\n", infeas); + } + } else { + settings.log.printf("Could not open solution.dat\n"); + } +} #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template class cut_pool_t; template class cut_generation_t; -template class mixed_integer_gomory_base_inequality_t; +template class tableau_equality_t; template class mixed_integer_rounding_cut_t; template @@ -2647,6 +2766,12 @@ void remove_cuts(lp_problem_t& lp, std::vector& basic_list, std::vector& nonbasic_list, basis_update_mpf_t& basis_update); + +template +void read_saved_solution_for_cut_verification( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + std::vector& saved_solution); #endif } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp index a6b49d33c..a9e79aa23 100644 --- a/cpp/src/dual_simplex/cuts.hpp +++ b/cpp/src/dual_simplex/cuts.hpp @@ -26,11 +26,14 @@ enum cut_type_t : int8_t { }; template -void print_cut_types(const std::vector& cut_types, const simplex_solver_settings_t& settings) { - i_t num_gomory_cuts = 0; - i_t num_mir_cuts = 0; +void print_cut_types(const std::string& prefix, + const std::vector& cut_types, + const simplex_solver_settings_t& settings) +{ + i_t num_gomory_cuts = 0; + i_t num_mir_cuts = 0; i_t num_knapsack_cuts = 0; - i_t num_cg_cuts = 0; + i_t num_cg_cuts = 0; for (i_t i = 0; i < cut_types.size(); i++) { if (cut_types[i] == cut_type_t::MIXED_INTEGER_GOMORY) { num_gomory_cuts++; @@ -42,9 +45,18 @@ void print_cut_types(const std::vector& cut_types, const simplex_sol num_cg_cuts++; } } - settings.log.printf("Gomory cuts: %d, MIR cuts: %d, Knapsack cuts: %d CG cuts: %d\n", num_gomory_cuts, num_mir_cuts, num_knapsack_cuts, num_cg_cuts); + settings.log.printf("%s: Gomory cuts: %d, MIR cuts: %d, Knapsack cuts: %d CG cuts: %d\n", + prefix.c_str(), + num_gomory_cuts, + num_mir_cuts, + num_knapsack_cuts, + num_cg_cuts); } +template +void read_saved_solution_for_cut_verification(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + std::vector& saved_solution); template f_t minimum_violation(const csr_matrix_t& C, @@ -97,6 +109,8 @@ class cut_pool_t { i_t pool_size() const { return cut_storage_.m; } + void print_cutpool_types() { print_cut_types("In cut pool", cut_type_, settings_); } + private: f_t cut_distance(i_t row, const std::vector& x, f_t& cut_violation, f_t &cut_norm); f_t cut_density(i_t row); @@ -232,9 +246,9 @@ class cut_generation_t { }; template -class mixed_integer_gomory_base_inequality_t { +class tableau_equality_t { public: - mixed_integer_gomory_base_inequality_t(const lp_problem_t& lp, + tableau_equality_t(const lp_problem_t& lp, basis_update_mpf_t& basis_update, const std::vector nonbasic_list) : b_bar_(lp.num_rows, 0.0), @@ -249,7 +263,7 @@ class mixed_integer_gomory_base_inequality_t { } // Generates the base inequalities: C*x == d that will be turned into cuts - i_t generate_base_inequality(const lp_problem_t& lp, + i_t generate_base_equality(const lp_problem_t& lp, const simplex_solver_settings_t& settings, csr_matrix_t& Arow, const std::vector& var_types, @@ -271,23 +285,10 @@ class mixed_integer_gomory_base_inequality_t { template class mixed_integer_rounding_cut_t { public: - mixed_integer_rounding_cut_t(i_t num_vars, const simplex_solver_settings_t& settings) - : num_vars_(num_vars), - settings_(settings), - x_workspace_(num_vars, 0.0), - x_mark_(num_vars, 0), - has_lower_(num_vars, 0), - has_upper_(num_vars, 0), - needs_complement_(false) - { - } - - // We call initalize each cut pass - // it resizes the arrays - void initialize(const lp_problem_t& lp, - const std::vector& new_slacks, - const std::vector& xstar); - + mixed_integer_rounding_cut_t(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& new_slacks, + const std::vector& xstar); // Convert an inequality of the form: sum_j a_j x_j >= beta // with l_j <= x_j <= u_j into the form: @@ -379,6 +380,15 @@ class strong_cg_cut_t { const std::vector& var_types, const std::vector& xstar); + i_t generate_strong_cg_cut(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const sparse_vector_t& inequality, + const f_t inequality_rhs, + const std::vector& xstar, + sparse_vector_t& cut, + f_t& cut_rhs); + i_t remove_continuous_variables_integers_nonnegative( const lp_problem_t& lp, const simplex_solver_settings_t& settings, From be4e181cee418cf8d1aa66da85b1ffc124b8b9e5 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 20 Jan 2026 15:49:50 -0800 Subject: [PATCH 30/45] Add strong cg cuts. Clean up logs. Add timing info --- cpp/src/dual_simplex/branch_and_bound.cpp | 59 ++++-- cpp/src/dual_simplex/cuts.cpp | 233 +++++++++++++++------- cpp/src/dual_simplex/cuts.hpp | 27 ++- cpp/src/mip/solver.cu | 2 +- 4 files changed, 223 insertions(+), 98 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 99086a41f..6a19fb56a 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -191,7 +191,7 @@ std::string user_mip_gap(f_t obj_value, f_t lower_bound) } else { constexpr int BUFFER_LEN = 32; char buffer[BUFFER_LEN]; - snprintf(buffer, BUFFER_LEN - 1, "%4.1f%%", user_mip_gap * 100); + snprintf(buffer, BUFFER_LEN - 1, "%5.1f%%", user_mip_gap * 100); return std::string(buffer); } } @@ -227,6 +227,11 @@ branch_and_bound_t::branch_and_bound_t( solver_status_(mip_exploration_status_t::UNSET) { exploration_stats_.start_time = tic(); +#ifdef PRINT_CONSTRAINT_MATRIX + settings_.log.printf("A"); + original_problem_.A.print_matrix(); +#endif + dualize_info_t dualize_info; convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info); full_variable_types(original_problem_, original_lp_, var_types_); @@ -299,7 +304,6 @@ i_t branch_and_bound_t::get_heap_size() template void branch_and_bound_t::find_reduced_cost_fixings(f_t upper_bound) { - printf("Finding reduced cost fixings\n"); mutex_original_lp_.lock(); std::vector reduced_costs = root_relax_soln_.z; std::vector lower_bounds = original_lp_.lower; @@ -351,7 +355,9 @@ void branch_and_bound_t::find_reduced_cost_fixings(f_t upper_bound) } } - printf("Reduced costs: Found %d improved bounds and %d fixed variables (%.1f%%)\n", num_improved, num_fixed, 100.0*static_cast(num_fixed)/static_cast(num_integer_variables_)); + if (num_fixed > 0) { + printf("Reduced costs: Found %d improved bounds and %d fixed variables (%.1f%%)\n", num_improved, num_fixed, 100.0*static_cast(num_fixed)/static_cast(num_integer_variables_)); + } if (num_improved > 0) { lp_problem_t new_lp = original_lp_; @@ -363,12 +369,14 @@ void branch_and_bound_t::find_reduced_cost_fixings(f_t upper_bound) bounds_strengthening_t node_presolve(new_lp, Arow, row_sense, var_types_); bool feasible = node_presolve.bounds_strengthening(new_lp.lower, new_lp.upper, settings_); - num_improved = 0; + i_t bnd_num_improved = 0; for (i_t j = 0; j < original_lp_.num_cols; j++) { - if (new_lp.lower[j] > original_lp_.lower[j]) { num_improved++; } - if (new_lp.upper[j] < original_lp_.upper[j]) { num_improved++; } + if (new_lp.lower[j] > original_lp_.lower[j]) { bnd_num_improved++; } + if (new_lp.upper[j] < original_lp_.upper[j]) { bnd_num_improved++; } + } + if (bnd_num_improved != num_improved) { + printf("Bound strengthening: Found %d improved bounds\n", bnd_num_improved); } - printf("Bound strengthening: Found %d improved bounds\n", num_improved); } mutex_original_lp_.unlock(); @@ -426,7 +434,7 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu std::string gap = user_mip_gap(user_obj, user_lower); settings_.log.printf( - "H %+13.6e %+10.6e %s %9.2f\n", + "H %+13.6e %+10.6e %s %9.2f\n", user_obj, user_lower, gap.c_str(), @@ -742,13 +750,31 @@ node_solve_info_t branch_and_bound_t::solve_node( const f_t upper_bound = get_upper_bound(); if (node_ptr->depth > num_integer_variables_) { - printf("Depth %d > num_integer_variables %d\n", node_ptr->depth, num_integer_variables_); + std::vector branched_variables(original_lp_.num_cols, 0); + std::vector branched_lower(original_lp_.num_cols, std::numeric_limits::quiet_NaN()); + std::vector branched_upper(original_lp_.num_cols, std::numeric_limits::quiet_NaN()); mip_node_t* parent = node_ptr->parent; while (parent != nullptr) { - printf("Parent depth %d\n", parent->depth); - printf("Parent branch var %d dir %d lower %e upper %e\n", parent->branch_var, parent->branch_dir, parent->branch_var_lower, parent->branch_var_upper); + if (original_lp_.lower[parent->branch_var] != 0.0 || original_lp_.upper[parent->branch_var] != 1.0) { + break; + } + if (branched_variables[parent->branch_var] == 1) { + printf( + "Variable %d already branched. Previous lower %e upper %e. Current lower %e upper %e.\n", + parent->branch_var, + branched_lower[parent->branch_var], + branched_upper[parent->branch_var], + parent->branch_var_lower, + parent->branch_var_upper); + } + branched_variables[parent->branch_var] = 1; + branched_lower[parent->branch_var] = parent->branch_var_lower; + branched_upper[parent->branch_var] = parent->branch_var_upper; parent = parent->parent; } + if (parent == nullptr) { + printf("Depth %d > num_integer_variables %d\n", node_ptr->depth, num_integer_variables_); + } } lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); @@ -1694,8 +1720,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut #endif // Generate cuts and add them to the cut pool + f_t cut_start_time = tic(); cut_generation.generate_cuts(original_lp_, settings_, Arow, new_slacks_, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list); - + f_t cut_generation_time = toc(cut_start_time); + if (cut_generation_time > 1.0) { + settings_.log.printf("Cut generation time %.2f seconds\n", cut_generation_time); + } // Score the cuts cut_pool.score_cuts(root_relax_soln_.x); // Get the best cuts from the cut pool @@ -1743,8 +1773,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut for (i_t k = 0; k < num_cuts; k++) { //printf("Cx[%d] = %e cut_rhs[%d] = %e\n", k, Cx[k], k, cut_rhs[k]); if (Cx[k] > cut_rhs[k] + 1e-6) { - printf("Cut %d is violated by saved solution. Cx %e cut_rhs %e\n", k, Cx[k], cut_rhs[k]); - return mip_status_t::NUMERICAL; + printf("Cut %d is violated by saved solution. Cx %e cut_rhs %e Diff: %e\n", k, Cx[k], cut_rhs[k], Cx[k] - cut_rhs[k]); } } } @@ -1860,7 +1889,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut std::string gap = num_fractional != 0 ? user_mip_gap(user_obj, user_lower) : "0.0%"; - settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f\n", + settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f\n", 0, 0, user_obj, diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index f5dac6bb6..139b43242 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -7,7 +7,7 @@ #include #include - +#include #include @@ -605,14 +605,29 @@ void cut_generation_t::generate_cuts(const lp_problem_t& lp, const std::vector& nonbasic_list) { // Generate Gomory and CG Cuts + f_t cut_start_time = tic(); generate_gomory_cuts( lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list); + f_t cut_generation_time = toc(cut_start_time); + if (cut_generation_time > 1.0) { + settings.log.printf("Gomory and CG cut generation time %.2f seconds\n", cut_generation_time); + } // Generate Knapsack cuts + cut_start_time = tic(); generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar); + cut_generation_time = toc(cut_start_time); + if (cut_generation_time > 1.0) { + settings.log.printf("Knapsack cut generation time %.2f seconds\n", cut_generation_time); + } - // Generate MIR cuts + // Generate MIR and CG cuts + cut_start_time = tic(); generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar); + cut_generation_time = toc(cut_start_time); + if (cut_generation_time > 1.0) { + settings.log.printf("MIR and CG cut generation time %.2f seconds\n", cut_generation_time); + } } template @@ -645,6 +660,7 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& const std::vector& var_types, const std::vector& xstar) { + f_t mir_start_time = tic(); mixed_integer_rounding_cut_t mir(lp, settings, new_slacks, xstar); strong_cg_cut_t cg(lp, var_types, xstar); @@ -666,12 +682,15 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& const i_t row_nz = row_end - row_start; i_t num_integer_in_row = 0; + i_t num_continuous_in_row = 0; for (i_t p = row_start; p < row_end; p++) { const i_t j = Arow.j[p]; if (var_types[j] == variable_type_t::INTEGER) { num_integer_in_row++; + } else { + num_continuous_in_row++; } } @@ -725,29 +744,35 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& sparse_vector_t inequality(Arow, i); f_t inequality_rhs = lp.rhs[i]; - - - - // Remove the slack from the equality to get an inequality - for (i_t k = 0; k < inequality.i.size(); k++) { - const i_t j = inequality.i[k]; - if (j == slack) { inequality.x[k] = 0.0; } - } - + const bool generate_cg_cut = true; + f_t fractional_part_rhs = fractional_part(inequality_rhs); + if (generate_cg_cut && fractional_part_rhs > 1e-6 && fractional_part_rhs < (1-1e-6)) { // Try to generate a CG cut + //printf("Trying to generate a CG cut from row %d\n", i); sparse_vector_t cg_inequality = inequality; f_t cg_inequality_rhs = inequality_rhs; + if (fractional_part(inequality_rhs) < 0.5) { + // Multiply by -1 to force the fractional part to be greater than 0.5 + cg_inequality_rhs *= -1; + cg_inequality.negate(); + } sparse_vector_t cg_cut(lp.num_cols, 0); f_t cg_cut_rhs; i_t cg_status = cg.generate_strong_cg_cut( lp, settings, var_types, cg_inequality, cg_inequality_rhs, xstar, cg_cut, cg_cut_rhs); if (cg_status == 0) { - printf("Adding CG cut nz %ld\n", cg_cut.i.size()); + printf("Adding CG cut nz %ld status %d row %d rhs %e inequality nz %d\n", cg_cut.i.size(), cg_status, i, cg_inequality_rhs, cg_inequality.i.size()); cut_pool_.add_cut(cut_type_t::CHVATAL_GOMORY, cg_cut, cg_cut_rhs); } } + // Remove the slack from the equality to get an inequality + for (i_t k = 0; k < inequality.i.size(); k++) { + const i_t j = inequality.i[k]; + if (j == slack) { inequality.x[k] = 0.0; } + } + // inequaility'*x <= inequality_rhs // But for MIR we need: inequality'*x >= inequality_rhs inequality_rhs *= -1; @@ -904,6 +929,7 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& const i_t col_start = lp.A.col_start[max_off_bound_var]; const i_t col_end = lp.A.col_start[max_off_bound_var + 1]; const i_t col_len = col_end - col_start; + const i_t max_potential_rows = 10; if (col_len > 1) { std::vector potential_rows; potential_rows.reserve(col_len); @@ -914,6 +940,7 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& const f_t val = lp.A.x[q]; // Can't use rows that have already been aggregated if (std::abs(val) > threshold && aggregated_mark[i] == 0) { potential_rows.push_back(i); } + if (potential_rows.size() >= max_potential_rows) { break; } } if (!potential_rows.empty()) { @@ -981,47 +1008,6 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& } } -template -bool cut_generation_t::generate_single_mir_cut( - const lp_problem_t& lp, - const simplex_solver_settings_t& settings, - csr_matrix_t& Arow, - const std::vector& var_types, - const std::vector& xstar, - const sparse_vector_t& inequality, - f_t inequality_rhs, - mixed_integer_rounding_cut_t& mir, - sparse_vector_t& cut, - f_t& cut_rhs) -{ - i_t mir_status = - mir.generate_cut(inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut, cut_rhs); - bool add_cut = false; - const f_t min_cut_distance = 1e-4; - if (mir_status == 0) { - if (cut.i.size() == 0) { - return false; - } - mir.substitute_slacks(lp, Arow, cut, cut_rhs); - if (cut.i.size() == 0) { - return false; - } - // Check that the cut is violated - // The cut is of the form cut'*x >= cut_rhs - // We need that cut'*xstar < cut_rhs for the cut to be violated by the current relaxation solution xstar - f_t dot = cut.dot(xstar); - f_t cut_norm = cut.norm2_squared(); - if (dot < cut_rhs && cut_norm > 0.0) { - // Cut is violated. Compute it's distance - f_t cut_distance = (cut_rhs - dot) / std::sqrt(cut_norm); - if (cut_distance > min_cut_distance) { - add_cut = true; - } - } - } - return add_cut; -} - template void cut_generation_t::generate_gomory_cuts( @@ -1059,11 +1045,17 @@ void cut_generation_t::generate_gomory_cuts( inequality_rhs); if (tableau_status == 0) { // Generate a CG cut - if (1) + const bool generate_cg_cut = false; + if (generate_cg_cut) { // Try to generate a CG cut sparse_vector_t cg_inequality = inequality; f_t cg_inequality_rhs = inequality_rhs; + if (fractional_part(inequality_rhs) < 0.5) { + // Multiply by -1 to force the fractional part to be greater than 0.5 + cg_inequality_rhs *= -1; + cg_inequality.negate(); + } sparse_vector_t cg_cut(lp.num_cols, 0); f_t cg_cut_rhs; i_t cg_status = cg.generate_strong_cg_cut( @@ -1229,19 +1221,21 @@ i_t tableau_equality_t::generate_base_equality( i_t small_coeff = 0; const f_t drop_tol = 1e-12; + const bool drop_coefficients = true; sparse_vector_t a_bar(lp.num_cols, 0) ; a_bar.i.reserve(abar_indices.size() + 1); a_bar.x.reserve(abar_indices.size() + 1); for (i_t k = 0; k < abar_indices.size(); k++) { const i_t jj = abar_indices[k]; - if (1 && std::abs(x_workspace_[jj]) < drop_tol) { + if (drop_coefficients && std::abs(x_workspace_[jj]) < drop_tol) { small_coeff++; } else { a_bar.i.push_back(jj); a_bar.x.push_back(x_workspace_[jj]); } } - if (small_coeff > 0) { + const bool verbose = false; + if (verbose && small_coeff > 0) { settings.log.printf("Small coeff dropped %d\n", small_coeff); } @@ -2238,20 +2232,122 @@ i_t strong_cg_cut_t::generate_strong_cg_cut_integer_only( cut.i.clear(); cut.x.clear(); - for (i_t k = 0; k < inequality.i.size(); k++) { - const i_t j = inequality.i[k]; - const f_t a_j = inequality.x[k]; - if (var_types[j] == variable_type_t::INTEGER) { - cut.i.push_back(j); - cut.x.push_back(std::floor(a_j)); - } else { - return -1; + f_t a_0 = inequality_rhs; + f_t f_a_0 = fractional_part(a_0); + + if (f_a_0 == 0.0) { + // f(a_0) == 0.0 so we do a weak CG cut + cut.i.reserve(inequality.i.size()); + cut.x.reserve(inequality.i.size()); + cut.i.clear(); + cut.x.clear(); + for (i_t k = 0; k < inequality.i.size(); k++) { + const i_t j = inequality.i[k]; + const f_t a_j = inequality.x[k]; + if (var_types[j] == variable_type_t::INTEGER) { + cut.i.push_back(j); + cut.x.push_back(std::floor(a_j)); + } else { + return -1; + } } + cut_rhs = std::floor(inequality_rhs); + } else { + return generate_strong_cg_cut_helper( + inequality.i, inequality.x, inequality_rhs, var_types, cut, cut_rhs); } + return 0; +} - cut_rhs = std::floor(inequality_rhs); +template +i_t strong_cg_cut_t::generate_strong_cg_cut_helper( + const std::vector& indicies, + const std::vector& coefficients, + f_t rhs, + const std::vector& var_types, + sparse_vector_t& cut, + f_t& cut_rhs) +{ + const bool verbose = false; + const i_t nz = indicies.size(); + const f_t f_a_0 = fractional_part(rhs); + // We will try to generat a strong CG cut. + // Find the unique integer k such that + // 1/(k+1) <= f(a_0) < 1/k + f_t k_upper = 1.0 / f_a_0; + i_t k = static_cast(std::floor(k_upper)); + if (k_upper - static_cast(k) < 1e-6) { + k--; + if (verbose) { + printf("Decreased k to %d\n", k); + } + } - cut.sort(); + const f_t alpha = 1.0 - f_a_0; + f_t lower = 1.0 / static_cast(k + 1); + f_t upper = 1.0 / static_cast(k); + if (verbose) { + printf("f_a_0 %e lower %e upper %e alpha %e\n", f_a_0, lower, upper, alpha); + } + if (f_a_0 >= lower && f_a_0 < upper) { + cut.i.reserve(nz); + cut.x.reserve(nz); + cut.i.clear(); + cut.x.clear(); + for (i_t q = 0; q < nz; q++) { + const i_t j = indicies[q]; + const f_t a_j = coefficients[q]; + if (var_types[j] == variable_type_t::INTEGER) { + const f_t f_a_j = fractional_part(a_j); + if (f_a_j <= f_a_0) { + cut.i.push_back(j); + cut.x.push_back((k + 1.0) * std::floor(a_j)); + if (verbose) { + printf("j %d a_j %e f_a_j %e k %d\n", j, a_j, f_a_j, k); + } + } else { + // Need to compute the p such that + // f(a_0) + (p-1)/k * alpha < f(a_j) <= f(a_0) + p/k * alpha + const f_t value = static_cast(k) * (f_a_j - f_a_0) / alpha; + i_t p = static_cast(std::ceil(value)); + if (fractional_part(value) < 1e-12) { + printf("Warning: p %d value %.16e is close to an integer\n", p, value, p + 1); + } + if (verbose) { + printf("j %d a_j %e f_a_j %e p %d value %.16e\n", j, a_j, f_a_j, p, value); + } + if (f_a_0 + static_cast(p - 1) / static_cast(k) * alpha < f_a_j && + f_a_j <= f_a_0 + static_cast(p) / static_cast(k) * alpha) { + cut.i.push_back(j); + cut.x.push_back((k + 1.0) * std::floor(a_j) + p); + } else { + printf("Error: p %d f_a_0 %e f_a_j %e alpha %e value %.16e\n", p, f_a_0, f_a_j, alpha, value); + return -1; + } + } + } else { + return -1; + } + } + } else { + printf("Error: k %d lower %e f(a_0) %e upper %e\n", k, lower, f_a_0, upper); + return -1; + } + cut_rhs = (k + 1.0) * std::floor(rhs); + if (verbose) { + printf("Generated strong CG cut: k %d f_a_0 %e cut_rhs %e\n", k, f_a_0, cut_rhs); + for (i_t q = 0; q < cut.i.size(); q++) { + if (cut.x[q] != 0.0) { + printf("%.16e x%d ", cut.x[q], cut.i[q]); + } + } + printf("\n"); + printf("Original inequality rhs %e nz %d\n", rhs, coefficients.size()); + for (i_t q = 0; q < nz; q++) { + printf("%e x%d ", coefficients[q], indicies[q]); + } + printf("\n"); + } return 0; } @@ -2298,8 +2394,11 @@ i_t strong_cg_cut_t::generate_strong_cg_cut( // We have an inequality with no continuous variables // Generate a CG cut - generate_strong_cg_cut_integer_only( + status = generate_strong_cg_cut_integer_only( settings, var_types, cg_inequality, cg_inequality_rhs, cut, cut_rhs); + if (status != 0) { + return -1; + } // Convert the CG cut back to the original variables to_original_integer_variables(lp, cut, cut_rhs); diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp index a9e79aa23..a914e189c 100644 --- a/cpp/src/dual_simplex/cuts.hpp +++ b/cpp/src/dual_simplex/cuts.hpp @@ -53,6 +53,10 @@ void print_cut_types(const std::string& prefix, num_cg_cuts); } +template +f_t fractional_part(f_t a) { return a - std::floor(a); } + + template void read_saved_solution_for_cut_verification(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -226,21 +230,6 @@ class cut_generation_t { const std::vector& var_types, const std::vector& xstar); - - // Generate a single MIR cut - bool generate_single_mir_cut(const lp_problem_t& lp, - const simplex_solver_settings_t& settings, - csr_matrix_t& Arow, - const std::vector& var_types, - const std::vector& xstar, - const sparse_vector_t& inequality, - f_t inequality_rhs, - mixed_integer_rounding_cut_t& mir, - sparse_vector_t& cut, - f_t& cut_rhs); - - - cut_pool_t& cut_pool_; knapsack_generation_t knapsack_generation_; }; @@ -408,6 +397,14 @@ class strong_cg_cut_t { f_t& cut_rhs); private: + + i_t generate_strong_cg_cut_helper(const std::vector& indicies, + const std::vector& coefficients, + f_t rhs, + const std::vector& var_types, + sparse_vector_t& cut, + f_t& cut_rhs); + std::vector transformed_variables_; }; diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index b6ffd04f1..12330666d 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -164,7 +164,7 @@ solution_t mip_solver_t::run_solver() // Fill in the settings for branch and bound branch_and_bound_settings.time_limit = timer_.remaining_time(); branch_and_bound_settings.node_limit = context.settings.node_limit; - branch_and_bound_settings.reliability_branching = context.settings.reliability_branching; + branch_and_bound_settings.reliability_branching = context.settings.reliability_branching; branch_and_bound_settings.print_presolve_stats = false; branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; From 377ffb1efd4ed8dde00e3c635a23739ce11c772c Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 20 Jan 2026 15:57:49 -0800 Subject: [PATCH 31/45] Clean up disabling of MIP presolve in solver.cu. Set default max_cut_passes=10 --- cpp/src/dual_simplex/simplex_solver_settings.hpp | 2 +- cpp/src/mip/solver.cu | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 7dbf0e1cc..c31a6bbac 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -72,7 +72,7 @@ struct simplex_solver_settings_t { num_threads(omp_get_max_threads() - 1), num_bfs_threads(std::min(num_threads / 4, 1)), num_diving_threads(std::min(num_threads - num_bfs_threads, 1)), - max_cut_passes(10), + max_cut_passes(0), random_seed(0), inside_mip(0), reliability_branching(-1), diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 12330666d..92d548c2f 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -109,8 +109,8 @@ solution_t mip_solver_t::run_solver() diversity_manager_t dm(context); dm.timer = timer_; - //bool presolve_success = dm.run_presolve(timer_.remaining_time()); - bool presolve_success = true; + const bool run_presolve = context.settings.presolve; + bool presolve_success = run_presolve ? dm.run_presolve(timer_.remaining_time()) : true; if (!presolve_success) { CUOPT_LOG_INFO("Problem proven infeasible in presolve"); solution_t sol(*context.problem_ptr); @@ -118,7 +118,7 @@ solution_t mip_solver_t::run_solver() context.problem_ptr->post_process_solution(sol); return sol; } - if (0 && context.problem_ptr->empty) { + if (run_presolve && context.problem_ptr->empty) { CUOPT_LOG_INFO("Problem full reduced in presolve"); solution_t sol(*context.problem_ptr); sol.set_problem_fully_reduced(); @@ -127,7 +127,7 @@ solution_t mip_solver_t::run_solver() } // if the problem was reduced to a LP: run concurrent LP - if (0 && context.problem_ptr->n_integer_vars == 0) { + if (run_presolve && context.problem_ptr->n_integer_vars == 0) { CUOPT_LOG_INFO("Problem reduced to a LP, running concurrent LP"); pdlp_solver_settings_t settings{}; settings.time_limit = timer_.remaining_time(); @@ -164,7 +164,7 @@ solution_t mip_solver_t::run_solver() // Fill in the settings for branch and bound branch_and_bound_settings.time_limit = timer_.remaining_time(); branch_and_bound_settings.node_limit = context.settings.node_limit; - branch_and_bound_settings.reliability_branching = context.settings.reliability_branching; + branch_and_bound_settings.reliability_branching = context.settings.reliability_branching; branch_and_bound_settings.print_presolve_stats = false; branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; @@ -237,9 +237,6 @@ solution_t mip_solver_t::run_solver() std::ref(branch_and_bound_solution)); } - //auto bb_status = branch_and_bound_status_future.get(); - //CUOPT_LOG_INFO("BB status: %d", bb_status); - // Start the primal heuristics auto sol = dm.run_solver(); if (!context.settings.heuristics_only) { From 425f2893b2388eba806143602bfc3beb3e7df929 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 20 Jan 2026 16:06:45 -0800 Subject: [PATCH 32/45] More fixes to enable cut_passes=0 --- .../mip/solver_settings.hpp | 2 +- cpp/src/dual_simplex/branch_and_bound.cpp | 24 +++++++++++++++++++ cpp/src/math_optimization/solver_settings.cu | 2 +- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 65a4d4bd0..6da848b40 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -83,7 +83,7 @@ class mip_solver_settings_t { i_t reliability_branching = -1; bool heuristics_only = false; i_t num_cpu_threads = -1; // -1 means use default number of threads in branch and bound - i_t max_cut_passes = 10; // number of cut passes to make + i_t max_cut_passes = 0; // number of cut passes to make i_t num_gpus = 1; bool log_to_console = true; std::string log_file; diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 6a19fb56a..91bcfc2c3 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1655,6 +1655,30 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut i_t num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); + if (num_fractional == 0) { + mutex_upper_.lock(); + incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x); + upper_bound_ = root_objective_; + mutex_upper_.unlock(); + // We should be done here + uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); + solution.objective = incumbent_.objective; + solution.lower_bound = root_objective_; + solution.nodes_explored = 0; + solution.simplex_iterations = root_relax_soln_.iterations; + settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n", + compute_user_objective(original_lp_, root_objective_), + toc(exploration_stats_.start_time)); + + if (settings_.solution_callback != nullptr) { + settings_.solution_callback(solution.x, solution.objective); + } + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); + } + return mip_status_t::OPTIMAL; + } + csr_matrix_t Arow(1, 1, 1); original_lp_.A.to_compressed_row(Arow); diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 8ae1fa51b..0198e119d 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -87,7 +87,7 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_DUALIZE, &pdlp_settings.dualize, -1, 1, -1}, {CUOPT_ORDERING, &pdlp_settings.ordering, -1, 1, -1}, {CUOPT_BARRIER_DUAL_INITIAL_POINT, &pdlp_settings.barrier_dual_initial_point, -1, 1, -1}, - {CUOPT_MIP_CUT_PASSES, &mip_settings.max_cut_passes, -1, std::numeric_limits::max(), 10}, + {CUOPT_MIP_CUT_PASSES, &mip_settings.max_cut_passes, -1, std::numeric_limits::max(), 0}, {CUOPT_MIP_NODE_LIMIT, &mip_settings.node_limit, 0, std::numeric_limits::max(), std::numeric_limits::max()}, {CUOPT_MIP_RELIABILITY_BRANCHING, &mip_settings.reliability_branching, -1, std::numeric_limits::max(), -1}, {CUOPT_NUM_GPUS, &pdlp_settings.num_gpus, 1, 2, 1}, From 98a8c574c7768615536b781b6dbede2d7bfd78ff Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 20 Jan 2026 16:11:41 -0800 Subject: [PATCH 33/45] Re-enable RINS --- cpp/src/mip/diversity/diversity_manager.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu index 823f14681..483ffeb68 100644 --- a/cpp/src/mip/diversity/diversity_manager.cu +++ b/cpp/src/mip/diversity/diversity_manager.cu @@ -470,7 +470,7 @@ solution_t diversity_manager_t::run_solver() run_fj_alone(sol); return sol; } - //rins.enable(); + rins.enable(); generate_solution(timer.remaining_time(), false); if (timer.check_time_limit()) { From a2b845d6e6f760a191ba34cb5a8a39679cdd6f5b Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 20 Jan 2026 16:14:40 -0800 Subject: [PATCH 34/45] Re-enable sub-MIP --- cpp/src/mip/diversity/recombiners/sub_mip.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh index 0000ccd29..771c4781a 100644 --- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh @@ -86,7 +86,7 @@ class sub_mip_recombiner_t : public recombiner_t { trivial_presolve(fixed_problem); fixed_problem.check_problem_representation(true); // brute force rounding threshold is 8 - const bool run_sub_mip = 0 && fixed_problem.n_integer_vars > 8; + const bool run_sub_mip = fixed_problem.n_integer_vars > 8; dual_simplex::mip_status_t branch_and_bound_status = dual_simplex::mip_status_t::UNSET; dual_simplex::mip_solution_t branch_and_bound_solution(1); if (run_sub_mip) { @@ -105,6 +105,7 @@ class sub_mip_recombiner_t : public recombiner_t { branch_and_bound_settings.num_threads = 2; branch_and_bound_settings.num_bfs_threads = 1; branch_and_bound_settings.num_diving_threads = 1; + branch_and_bound_settings.max_cut_passes = 0; branch_and_bound_settings.solution_callback = [this](std::vector& solution, f_t objective) { this->solution_callback(solution, objective); From 94a191fb1db68883ac21a8dd5cd8d6d4124319c4 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 20 Jan 2026 16:20:02 -0800 Subject: [PATCH 35/45] Swap exit for assert in basis_updates --- cpp/src/dual_simplex/basis_updates.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 115c00744..3e2bbb6dc 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1274,7 +1274,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts new_L.col_start[m + cuts_basic.m] = L_nz; if (L_nz != predicted_nz) { printf("L_nz %d predicted_nz %d\n", L_nz, predicted_nz); - exit(1); + assert(L_nz == predicted_nz); } L0_ = new_L; From 1f091436b8434809294060461b01e65e64ed252e Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Tue, 20 Jan 2026 17:39:44 -0800 Subject: [PATCH 36/45] Merge in main. Make B&B time consecutive. Print Papilo hash. Print cuOpt presolve time. --- .github/workflows/build.yaml | 2 +- .github/workflows/build_images.yaml | 5 +- .github/workflows/nightly.yaml | 4 +- .github/workflows/pr.yaml | 34 +- .github/workflows/test.yaml | 34 +- .../trigger-breaking-change-alert.yaml | 2 +- README.md | 7 +- .../linear_programming/cuopt/run_mip.cpp | 10 +- .../linear_programming/utils/get_datasets.py | 4 +- build.sh | 36 +- ci/build_wheel.sh | 28 +- ci/build_wheel_cuopt.sh | 6 +- ci/docker/Dockerfile | 19 +- ci/docker/test_image.sh | 4 +- .../all_cuda-129_arch-aarch64.yaml | 1 + .../all_cuda-129_arch-x86_64.yaml | 1 + ...64.yaml => all_cuda-131_arch-aarch64.yaml} | 5 +- ..._64.yaml => all_cuda-131_arch-x86_64.yaml} | 5 +- cpp/CMakeLists.txt | 65 +- cpp/include/cuopt/error.hpp | 4 +- .../utilities/internals.hpp | 3 +- cpp/include/cuopt/routing/cython/cython.hpp | 7 +- cpp/libmps_parser/CMakeLists.txt | 8 +- cpp/libmps_parser/tests/CMakeLists.txt | 5 +- cpp/src/dual_simplex/CMakeLists.txt | 3 +- cpp/src/dual_simplex/barrier.cu | 1586 +++++++---------- cpp/src/dual_simplex/barrier.hpp | 27 +- cpp/src/dual_simplex/basis_solves.cpp | 2 +- cpp/src/dual_simplex/basis_solves.hpp | 2 +- cpp/src/dual_simplex/basis_updates.cpp | 2 +- cpp/src/dual_simplex/basis_updates.hpp | 2 +- cpp/src/dual_simplex/bounds_strengthening.cpp | 6 +- cpp/src/dual_simplex/branch_and_bound.cpp | 1007 ++++++----- cpp/src/dual_simplex/branch_and_bound.hpp | 173 +- cpp/src/dual_simplex/crossover.cpp | 2 +- cpp/src/dual_simplex/cusparse_view.cu | 7 +- cpp/src/dual_simplex/cuts.cpp | 2 +- cpp/src/dual_simplex/diving_heuristics.cpp | 306 ++++ cpp/src/dual_simplex/diving_heuristics.hpp | 58 + cpp/src/dual_simplex/diving_queue.hpp | 73 - cpp/src/dual_simplex/iterative_refinement.hpp | 251 ++- cpp/src/dual_simplex/logger.hpp | 12 +- cpp/src/dual_simplex/mip_node.hpp | 35 +- cpp/src/dual_simplex/node_queue.hpp | 162 ++ cpp/src/dual_simplex/phase2.cpp | 2 +- cpp/src/dual_simplex/presolve.cpp | 7 +- cpp/src/dual_simplex/primal.cpp | 2 +- cpp/src/dual_simplex/pseudo_costs.cpp | 58 +- cpp/src/dual_simplex/pseudo_costs.hpp | 15 +- .../dual_simplex/simplex_solver_settings.hpp | 29 +- cpp/src/dual_simplex/solve.cpp | 4 +- cpp/src/dual_simplex/sparse_cholesky.cuh | 6 +- .../initial_scaling.cu | 8 +- .../optimization_problem.cu | 118 +- cpp/src/linear_programming/pdhg.cu | 14 +- cpp/src/linear_programming/pdlp.cu | 20 +- .../restart_strategy/pdlp_restart_strategy.cu | 6 +- .../weighted_average_solution.cu | 6 +- .../adaptive_step_size_strategy.cu | 4 +- .../convergence_information.cu | 10 +- .../infeasibility_information.cu | 4 +- cpp/src/linear_programming/translate.hpp | 27 +- .../utilities/cython_solve.cu | 59 +- cpp/src/linear_programming/utils.cuh | 8 +- cpp/src/mip/diversity/diversity_config.hpp | 6 +- cpp/src/mip/diversity/diversity_manager.cu | 33 +- cpp/src/mip/diversity/lns/rins.cu | 33 +- .../recombiners/bound_prop_recombiner.cuh | 6 +- cpp/src/mip/diversity/recombiners/sub_mip.cuh | 20 +- cpp/src/mip/local_search/local_search.cu | 8 +- .../rounding/simple_rounding_kernels.cuh | 3 +- .../conditional_bound_strengthening.cu | 24 +- cpp/src/mip/presolve/gf2_presolve.hpp | 6 +- cpp/src/mip/presolve/lb_probing_cache.cu | 11 +- .../load_balanced_partition_helpers.cuh | 4 +- cpp/src/mip/presolve/multi_probe.cu | 9 +- cpp/src/mip/presolve/probing_cache.cu | 486 ++++- cpp/src/mip/presolve/probing_cache.cuh | 4 +- cpp/src/mip/presolve/third_party_presolve.cpp | 13 +- cpp/src/mip/presolve/trivial_presolve.cu | 6 +- cpp/src/mip/presolve/trivial_presolve.cuh | 36 +- .../mip/presolve/trivial_presolve_helpers.cuh | 5 +- cpp/src/mip/problem/presolve_data.cuh | 19 +- cpp/src/mip/problem/problem.cu | 282 ++- cpp/src/mip/problem/problem.cuh | 11 +- cpp/src/mip/problem/problem_helpers.cuh | 57 +- cpp/src/mip/solution/solution.cu | 9 +- cpp/src/mip/solve.cu | 7 +- cpp/src/mip/solver.cu | 20 +- cpp/src/mip/utilities/cpu_worker_thread.cuh | 8 +- .../mip/utilities/work_unit_ordered_queue.cuh | 136 ++ cpp/src/routing/adapters/adapted_sol.cuh | 16 +- .../routing/adapters/assignment_adapter.cuh | 12 +- cpp/src/routing/adapters/solution_adapter.cuh | 11 +- cpp/src/routing/assignment.cu | 17 +- cpp/src/routing/crossovers/ox_graph.hpp | 14 +- cpp/src/routing/crossovers/ox_recombiner.cuh | 23 +- cpp/src/routing/cuda_graph.cuh | 6 +- cpp/src/routing/fleet_info.hpp | 43 +- cpp/src/routing/fleet_order_constraints.hpp | 8 +- cpp/src/routing/ges_solver.cu | 7 +- .../local_search/cycle_finder/cycle.hpp | 8 +- .../local_search/cycle_finder/cycle_finder.cu | 7 +- .../local_search/cycle_finder/cycle_graph.hpp | 12 +- cpp/src/routing/local_search/local_search.cu | 6 +- cpp/src/routing/local_search/sliding_tsp.cu | 6 +- .../routing/local_search/sliding_window.cu | 7 +- cpp/src/routing/local_search/two_opt.cu | 7 +- .../routing/local_search/vrp/vrp_execute.cu | 6 +- cpp/src/routing/order_info.hpp | 14 +- cpp/src/routing/problem/problem.cu | 26 +- cpp/src/routing/solution/pool_allocator.cuh | 28 +- cpp/src/routing/solution/solution.cu | 4 +- cpp/src/routing/utilities/cython.cu | 73 +- cpp/src/utilities/copy_helpers.hpp | 41 +- cpp/src/utilities/cuda_helpers.cuh | 25 +- cpp/src/utilities/driver_helpers.cuh | 12 +- cpp/src/utilities/omp_helpers.hpp | 4 +- cpp/src/utilities/timer.hpp | 29 + cpp/src/utilities/tsan_suppressions.txt | 6 + .../distance_engine/waypoint_matrix_test.cpp | 8 +- cpp/tests/examples/routing/CMakeLists.txt | 4 +- .../c_api_tests/c_api_test.c | 3 +- .../unit_tests/solver_settings_test.cu | 40 +- .../utilities/pdlp_test_utilities.cuh | 7 +- cpp/tests/mip/elim_var_remap_test.cu | 6 +- cpp/tests/mip/load_balancing_test.cu | 23 +- cpp/tests/mip/mip_utils.cuh | 6 +- cpp/tests/mip/multi_probe_test.cu | 47 +- cpp/tests/mip/presolve_test.cu | 4 +- cpp/tests/mip/problem_test.cu | 20 +- cpp/tests/mip/unit_test.cu | 4 +- cpp/tests/qp/CMakeLists.txt | 3 +- cpp/tests/qp/unit_tests/no_constraints.cu | 66 + cpp/tests/qp/unit_tests/two_variable_test.cu | 4 +- cpp/tests/routing/CMakeLists.txt | 3 +- cpp/tests/routing/level0/l0_ges_test.cu | 6 +- .../level0/l0_objective_function_test.cu | 4 +- cpp/tests/routing/level0/l0_routing_test.cu | 4 +- .../routing/level0/l0_vehicle_order_match.cu | 4 +- .../routing/level0/l0_vehicle_types_test.cu | 4 +- cpp/tests/routing/routing_test.cuh | 6 +- cpp/tests/routing/unit_tests/batch_tsp.cu | 89 + .../routing/unit_tests/vehicle_order_match.cu | 16 +- .../cuopt_service_data/lpmip_configs.json | 18 +- datasets/get_test_data.sh | 87 +- .../download_pdlp_test_dataset.sh | 83 +- datasets/mip/download_miplib_test_dataset.sh | 81 +- dependencies.yaml | 7 +- docs/cuopt/source/cuopt-c/index.rst | 6 +- docs/cuopt/source/cuopt-c/lp-milp/index.rst | 15 - .../{lp-milp => lp-qp-milp}/examples/Makefile | 0 .../examples/milp_mps_example.c | 0 .../examples/mip_sample.mps | 0 .../examples/mps_file_example.c | 0 .../examples/sample.mps | 0 .../examples/simple_lp_example.c | 0 .../examples/simple_milp_example.c | 0 .../examples/simple_qp_example.c | 3 +- .../cuopt/source/cuopt-c/lp-qp-milp/index.rst | 15 + .../lp-qp-example.rst} | 7 +- .../lp-qp-milp-c-api.rst} | 8 +- .../{lp-milp => lp-qp-milp}/milp-examples.rst | 2 +- docs/cuopt/source/cuopt-c/quick-start.rst | 2 +- docs/cuopt/source/cuopt-cli/quick-start.rst | 2 +- docs/cuopt/source/cuopt-python/index.rst | 8 +- .../source/cuopt-python/lp-milp/index.rst | 14 - .../expressions_constraints_example.py | 0 .../examples/incumbent_solutions_example.py | 0 .../examples/pdlp_warmstart_example.py | 0 .../examples/production_planning_example.py | 0 .../examples/simple_lp_example.py | 0 .../examples/simple_milp_example.py | 0 .../examples/simple_qp_example.py | 10 +- .../source/cuopt-python/lp-qp-milp/index.rst | 14 + .../lp-qp-milp-api.rst} | 8 +- .../lp-qp-milp-examples.rst} | 12 +- .../cuopt/source/cuopt-python/quick-start.rst | 2 +- .../cuopt-server/examples/lp-examples.rst | 2 +- .../source/cuopt-server/server-api/index.rst | 2 +- docs/cuopt/source/index.rst | 2 +- .../{lp-features.rst => lp-qp-features.rst} | 2 +- ...p-settings.rst => lp-qp-milp-settings.rst} | 6 +- docs/cuopt/source/transition.rst | 2 +- docs/cuopt/source/versions1.json | 14 +- .../waypoint_matrix_wrapper.pyx | 3 +- python/cuopt/cuopt/routing/__init__.py | 4 +- python/cuopt/cuopt/routing/utils_wrapper.pyx | 19 +- .../cuopt/cuopt/routing/vehicle_routing.pxd | 8 +- python/cuopt/cuopt/routing/vehicle_routing.py | 51 +- .../cuopt/routing/vehicle_routing_wrapper.pyx | 128 +- .../cuopt/tests/routing/test_batch_solve.py | 67 + python/cuopt/pyproject.toml | 1 + .../cuopt_sh_client/cuopt_self_host_client.py | 11 +- .../thin_client_solver_settings.py | 14 +- .../cuopt_server/tests/test_lp.py | 22 +- .../linear_programming/data_definition.py | 76 +- .../utils/linear_programming/solver.py | 76 +- 198 files changed, 4835 insertions(+), 2624 deletions(-) rename conda/environments/{all_cuda-130_arch-aarch64.yaml => all_cuda-131_arch-aarch64.yaml} (95%) rename conda/environments/{all_cuda-130_arch-x86_64.yaml => all_cuda-131_arch-x86_64.yaml} (95%) create mode 100644 cpp/src/dual_simplex/diving_heuristics.cpp create mode 100644 cpp/src/dual_simplex/diving_heuristics.hpp delete mode 100644 cpp/src/dual_simplex/diving_queue.hpp create mode 100644 cpp/src/dual_simplex/node_queue.hpp create mode 100644 cpp/src/mip/utilities/work_unit_ordered_queue.cuh create mode 100644 cpp/src/utilities/tsan_suppressions.txt create mode 100644 cpp/tests/qp/unit_tests/no_constraints.cu create mode 100644 cpp/tests/routing/unit_tests/batch_tsp.cu delete mode 100644 docs/cuopt/source/cuopt-c/lp-milp/index.rst rename docs/cuopt/source/cuopt-c/{lp-milp => lp-qp-milp}/examples/Makefile (100%) rename docs/cuopt/source/cuopt-c/{lp-milp => lp-qp-milp}/examples/milp_mps_example.c (100%) rename docs/cuopt/source/cuopt-c/{lp-milp => lp-qp-milp}/examples/mip_sample.mps (100%) rename docs/cuopt/source/cuopt-c/{lp-milp => lp-qp-milp}/examples/mps_file_example.c (100%) rename docs/cuopt/source/cuopt-c/{lp-milp => lp-qp-milp}/examples/sample.mps (100%) rename docs/cuopt/source/cuopt-c/{lp-milp => lp-qp-milp}/examples/simple_lp_example.c (100%) rename docs/cuopt/source/cuopt-c/{lp-milp => lp-qp-milp}/examples/simple_milp_example.c (100%) rename docs/cuopt/source/cuopt-c/{lp-milp => lp-qp-milp}/examples/simple_qp_example.c (97%) create mode 100644 docs/cuopt/source/cuopt-c/lp-qp-milp/index.rst rename docs/cuopt/source/cuopt-c/{lp-milp/lp-example.rst => lp-qp-milp/lp-qp-example.rst} (97%) rename docs/cuopt/source/cuopt-c/{lp-milp/lp-milp-c-api.rst => lp-qp-milp/lp-qp-milp-c-api.rst} (97%) rename docs/cuopt/source/cuopt-c/{lp-milp => lp-qp-milp}/milp-examples.rst (98%) delete mode 100644 docs/cuopt/source/cuopt-python/lp-milp/index.rst rename docs/cuopt/source/cuopt-python/{lp-milp => lp-qp-milp}/examples/expressions_constraints_example.py (100%) rename docs/cuopt/source/cuopt-python/{lp-milp => lp-qp-milp}/examples/incumbent_solutions_example.py (100%) rename docs/cuopt/source/cuopt-python/{lp-milp => lp-qp-milp}/examples/pdlp_warmstart_example.py (100%) rename docs/cuopt/source/cuopt-python/{lp-milp => lp-qp-milp}/examples/production_planning_example.py (100%) rename docs/cuopt/source/cuopt-python/{lp-milp => lp-qp-milp}/examples/simple_lp_example.py (100%) rename docs/cuopt/source/cuopt-python/{lp-milp => lp-qp-milp}/examples/simple_milp_example.py (100%) rename docs/cuopt/source/cuopt-python/{lp-milp => lp-qp-milp}/examples/simple_qp_example.py (82%) create mode 100644 docs/cuopt/source/cuopt-python/lp-qp-milp/index.rst rename docs/cuopt/source/cuopt-python/{lp-milp/lp-milp-api.rst => lp-qp-milp/lp-qp-milp-api.rst} (95%) rename docs/cuopt/source/cuopt-python/{lp-milp/lp-milp-examples.rst => lp-qp-milp/lp-qp-milp-examples.rst} (92%) rename docs/cuopt/source/{lp-features.rst => lp-qp-features.rst} (99%) rename docs/cuopt/source/{lp-milp-settings.rst => lp-qp-milp-settings.rst} (98%) create mode 100644 python/cuopt/cuopt/tests/routing/test_batch_solve.py diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 66df454bd..e4d7cfdd4 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 name: build diff --git a/.github/workflows/build_images.yaml b/.github/workflows/build_images.yaml index 6647bb546..aaf97fc88 100644 --- a/.github/workflows/build_images.yaml +++ b/.github/workflows/build_images.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 name: Build and push image variant @@ -55,9 +55,10 @@ jobs: cp ./LICENSE ./ci/docker/context/LICENSE cp ./VERSION ./ci/docker/context/VERSION cp ./thirdparty/THIRD_PARTY_LICENSES ./ci/docker/context/THIRD_PARTY_LICENSES - - name: Copy Commit SHA + - name: Copy Commit SHA and commit time run: | git rev-parse HEAD > ./ci/docker/context/COMMIT_SHA + git log -n1 --pretty='%ct' > ./ci/docker/context/COMMIT_TIME - name: Login to NGC uses: docker/login-action@v3 with: diff --git a/.github/workflows/nightly.yaml b/.github/workflows/nightly.yaml index db9afda97..de7e03b14 100644 --- a/.github/workflows/nightly.yaml +++ b/.github/workflows/nightly.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. All rights reserved. # SPDX-License-Identifier: Apache-2.0 name: Trigger Nightly cuOpt Pipeline @@ -17,7 +17,7 @@ jobs: matrix: cuopt_branch: - "main" - - "release/25.12" + - "release/26.02" steps: - uses: actions/checkout@v4 - name: Trigger Pipeline diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 4996f3315..a0da1c8fc 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 name: pr @@ -186,13 +186,19 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_lean_filter }} conda-cpp-tests: needs: [conda-cpp-build, changed-files, compute-matrix-filters] - secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request script: ci/test_cpp.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} + secrets: + script-env-secret-1-key: CUOPT_DATASET_S3_URI + script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }} + script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID + script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }} + script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY + script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} conda-python-build: needs: [conda-cpp-build, compute-matrix-filters] secrets: inherit @@ -203,7 +209,6 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} conda-python-tests: needs: [conda-python-build, changed-files, compute-matrix-filters] - secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: @@ -211,6 +216,13 @@ jobs: build_type: pull-request script: ci/test_python.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} + secrets: + script-env-secret-1-key: CUOPT_DATASET_S3_URI + script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }} + script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID + script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }} + script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY + script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} docs-build: needs: conda-python-build secrets: inherit @@ -258,13 +270,19 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }} wheel-tests-cuopt: needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files, compute-matrix-filters] - secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt with: build_type: pull-request script: ci/test_wheel_cuopt.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }} + secrets: + script-env-secret-1-key: CUOPT_DATASET_S3_URI + script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }} + script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID + script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }} + script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY + script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} wheel-build-cuopt-server: needs: [checks, compute-matrix-filters] secrets: inherit @@ -292,13 +310,19 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.cuopt_sh_client_filter }} wheel-tests-cuopt-server: needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files, compute-matrix-filters] - secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt_server with: build_type: pull-request script: ci/test_wheel_cuopt_server.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }} + secrets: + script-env-secret-1-key: CUOPT_DATASET_S3_URI + script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }} + script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID + script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }} + script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY + script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} test-self-hosted-server: needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files] secrets: inherit diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f4c81eb97..0dc4b3cfe 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 name: test @@ -27,7 +27,6 @@ on: jobs: conda-cpp-tests: - secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main with: build_type: ${{ inputs.build_type }} @@ -35,8 +34,14 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/test_cpp.sh + secrets: + script-env-secret-1-key: CUOPT_DATASET_S3_URI + script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }} + script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID + script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }} + script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY + script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} conda-python-tests: - secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main with: run_codecov: false @@ -45,8 +50,14 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/test_python.sh + secrets: + script-env-secret-1-key: CUOPT_DATASET_S3_URI + script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }} + script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID + script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }} + script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY + script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} wheel-tests-cuopt: - secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main with: build_type: ${{ inputs.build_type }} @@ -54,8 +65,14 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/test_wheel_cuopt.sh + secrets: + script-env-secret-1-key: CUOPT_DATASET_S3_URI + script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }} + script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID + script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }} + script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY + script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} wheel-tests-cuopt-server: - secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main with: build_type: ${{ inputs.build_type }} @@ -63,6 +80,13 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/test_wheel_cuopt_server.sh + secrets: + script-env-secret-1-key: CUOPT_DATASET_S3_URI + script-env-secret-1-value: ${{ secrets.CUOPT_DATASET_S3_URI }} + script-env-secret-2-key: CUOPT_AWS_ACCESS_KEY_ID + script-env-secret-2-value: ${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }} + script-env-secret-3-key: CUOPT_AWS_SECRET_ACCESS_KEY + script-env-secret-3-value: ${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }} conda-notebook-tests: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main diff --git a/.github/workflows/trigger-breaking-change-alert.yaml b/.github/workflows/trigger-breaking-change-alert.yaml index 529f11cfa..57b178740 100644 --- a/.github/workflows/trigger-breaking-change-alert.yaml +++ b/.github/workflows/trigger-breaking-change-alert.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 name: Trigger Breaking Change Notifications diff --git a/README.md b/README.md index eab460eb7..f497d6b67 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ -NVIDIA® cuOpt™ is a GPU-accelerated optimization engine that excels in mixed integer linear programming (MILP), linear programming (LP), and vehicle routing problems (VRP). It enables near real-time solutions for large-scale challenges with millions of variables and constraints, offering +NVIDIA® cuOpt™ is a GPU-accelerated optimization engine that excels in mixed integer linear programming (MILP), linear programming (LP), quadratic programming (QP) and vehicle routing problems (VRP). It enables near real-time solutions for large-scale challenges with millions of variables and constraints, offering easy integration into existing solvers and seamless deployment across hybrid and multi-cloud environments. The core engine is written in C++ and wrapped with a C API, Python API and Server API. @@ -25,12 +25,13 @@ cuOpt supports the following APIs: - C API support - Linear Programming (LP) - Mixed Integer Linear Programming (MILP) + - Quadratic Programming (QP) - C++ API support - cuOpt is written in C++ and includes a native C++ API. However, we do not provide documentation for the C++ API at this time. We anticipate that the C++ API will change significantly in the future. Use it at your own risk. - Python support - Routing (TSP, VRP, and PDP) - - Linear Programming (LP) and Mixed Integer Linear Programming (MILP) - - cuOpt includes a Python API that is used as the backend of the cuOpt server. However, we do not provide documentation for the Python API at this time. We suggest using cuOpt server to access cuOpt via Python. We anticipate that the Python API will change significantly in the future. Use it at your own risk. + - Linear Programming (LP), Mixed Integer Linear Programming (MILP) and Quadratic Programming (QP) + - Algebraic modeling Python API allows users to easily build constraints and objectives - Server support - Linear Programming (LP) - Mixed Integer Linear Programming (MILP) diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index 6013dcaf5..213e38e5e 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -231,10 +231,16 @@ int run_single_file(std::string file_path, } std::stringstream ss; int decimal_places = 2; + double mip_gap = solution.get_mip_gap(); + int is_optimal = solution.get_termination_status() == + cuopt::linear_programming::mip_termination_status_t::Optimal + ? 1 + : 0; ss << std::fixed << std::setprecision(decimal_places) << base_filename << "," << sol_found << "," << obj_val << "," << benchmark_info.objective_of_initial_population << "," << benchmark_info.last_improvement_of_best_feasible << "," - << benchmark_info.last_improvement_after_recombination << "\n"; + << benchmark_info.last_improvement_after_recombination << "," << mip_gap << "," << is_optimal + << "\n"; write_to_output_file(out_dir, base_filename, device, n_gpus, batch_id, ss.str()); CUOPT_LOG_INFO("Results written to the file %s", base_filename.c_str()); return sol_found; diff --git a/benchmarks/linear_programming/utils/get_datasets.py b/benchmarks/linear_programming/utils/get_datasets.py index 357fc3264..29d23e57d 100644 --- a/benchmarks/linear_programming/utils/get_datasets.py +++ b/benchmarks/linear_programming/utils/get_datasets.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import os @@ -663,7 +663,7 @@ def extract(file, dir, type): raise Exception(f"Unknown file extension found for extraction {file}") # download emps and compile # Disable emps for now - if type == "netlib" and False: + if type == "netlib": url = MittelmannInstances["emps"] file = os.path.join(dir, "emps.c") download(url, file) diff --git a/build.sh b/build.sh index e129ee4ef..1ee8e87fc 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,6 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -15,7 +15,7 @@ REPODIR=$(cd "$(dirname "$0")"; pwd) LIBCUOPT_BUILD_DIR=${LIBCUOPT_BUILD_DIR:=${REPODIR}/cpp/build} LIBMPS_PARSER_BUILD_DIR=${LIBMPS_PARSER_BUILD_DIR:=${REPODIR}/cpp/libmps_parser/build} -VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -fsanitize -v -l= --verbose-pdlp --build-lp-only --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write --host-lineinfo [--cmake-args=\\\"\\\"] [--cache-tool=] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help" +VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -fsanitize -tsan -msan -v -l= --verbose-pdlp --build-lp-only --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write --host-lineinfo [--cmake-args=\\\"\\\"] [--cache-tool=] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) @@ -32,7 +32,9 @@ HELP="$0 [ ...] [ ...] -g - build for debug -a - Enable assertion (by default in debug mode) -b - Build with benchmark settings - -fsanitize - Build with sanitizer + -fsanitize - Build with AddressSanitizer and UndefinedBehaviorSanitizer + -tsan - Build with ThreadSanitizer (cannot be used with -fsanitize or -msan) + -msan - Build with MemorySanitizer (cannot be used with -fsanitize or -tsan) -n - no install step --no-fetch-rapids - don't fetch rapids dependencies -l= - log level. Options are: TRACE | DEBUG | INFO | WARN | ERROR | CRITICAL | OFF. Default=INFO @@ -76,6 +78,8 @@ BUILD_ALL_GPU_ARCH=0 BUILD_CI_ONLY=0 BUILD_LP_ONLY=0 BUILD_SANITIZER=0 +BUILD_TSAN=0 +BUILD_MSAN=0 SKIP_C_PYTHON_ADAPTERS=0 SKIP_TESTS_BUILD=0 SKIP_ROUTING_BUILD=0 @@ -230,6 +234,12 @@ fi if hasArg -fsanitize; then BUILD_SANITIZER=1 fi +if hasArg -tsan; then + BUILD_TSAN=1 +fi +if hasArg -msan; then + BUILD_MSAN=1 +fi if hasArg --skip-c-python-adapters; then SKIP_C_PYTHON_ADAPTERS=1 fi @@ -298,6 +308,24 @@ if [ ${BUILD_LP_ONLY} -eq 1 ] && [ ${SKIP_C_PYTHON_ADAPTERS} -eq 0 ]; then exit 1 fi +if [ ${BUILD_SANITIZER} -eq 1 ] && [ ${BUILD_TSAN} -eq 1 ]; then + echo "ERROR: -fsanitize and -tsan cannot be used together" + echo "AddressSanitizer and ThreadSanitizer are mutually exclusive" + exit 1 +fi + +if [ ${BUILD_SANITIZER} -eq 1 ] && [ ${BUILD_MSAN} -eq 1 ]; then + echo "ERROR: -fsanitize and -msan cannot be used together" + echo "AddressSanitizer and MemorySanitizer are mutually exclusive" + exit 1 +fi + +if [ ${BUILD_TSAN} -eq 1 ] && [ ${BUILD_MSAN} -eq 1 ]; then + echo "ERROR: -tsan and -msan cannot be used together" + echo "ThreadSanitizer and MemorySanitizer are mutually exclusive" + exit 1 +fi + if [ ${BUILD_ALL_GPU_ARCH} -eq 1 ]; then CUOPT_CMAKE_CUDA_ARCHITECTURES="RAPIDS" echo "Building for *ALL* supported GPU architectures..." @@ -344,6 +372,8 @@ if buildAll || hasArg libcuopt; then -DFETCH_RAPIDS=${FETCH_RAPIDS} \ -DBUILD_LP_ONLY=${BUILD_LP_ONLY} \ -DBUILD_SANITIZER=${BUILD_SANITIZER} \ + -DBUILD_TSAN=${BUILD_TSAN} \ + -DBUILD_MSAN=${BUILD_MSAN} \ -DSKIP_C_PYTHON_ADAPTERS=${SKIP_C_PYTHON_ADAPTERS} \ -DBUILD_TESTS=$((1 - ${SKIP_TESTS_BUILD})) \ -DSKIP_ROUTING_BUILD=${SKIP_ROUTING_BUILD} \ diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 959dce38e..41a71cef9 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -1,6 +1,6 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 set -euo pipefail @@ -21,12 +21,28 @@ sccache --zero-stats rapids-logger "Building '${package_name}' wheel" +RAPIDS_PIP_WHEEL_ARGS=( + -w dist + -v + --no-deps + --disable-pip-version-check + --extra-index-url=https://pypi.nvidia.com +) + +# Only use --build-constraint when build isolation is enabled. +# +# Passing '--build-constraint' and '--no-build-isolation` together results in an error from 'pip', +# but we want to keep environment variable PIP_CONSTRAINT set unconditionally. +# PIP_NO_BUILD_ISOLATION=0 means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/573 +if [[ "${PIP_NO_BUILD_ISOLATION:-}" != "0" ]]; then + RAPIDS_PIP_WHEEL_ARGS+=(--build-constraint="${PIP_CONSTRAINT}") +fi + +# unset PIP_CONSTRAINT (set by rapids-init-pip)... it doesn't affect builds as of pip 25.3, and +# results in an error from 'pip wheel' when set and --build-constraint is also passed +unset PIP_CONSTRAINT rapids-pip-retry wheel \ - -w dist \ - -v \ - --no-deps \ - --disable-pip-version-check \ - --extra-index-url=https://pypi.nvidia.com \ + "${RAPIDS_PIP_WHEEL_ARGS[@]}" \ . sccache --show-adv-stats diff --git a/ci/build_wheel_cuopt.sh b/ci/build_wheel_cuopt.sh index 61c39d53a..0ba991e57 100755 --- a/ci/build_wheel_cuopt.sh +++ b/ci/build_wheel_cuopt.sh @@ -1,6 +1,6 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -24,8 +24,8 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")" # Download the libcuopt and cuopt-mps-parser wheel built in the previous step and make it # available for pip to find. # -# Using env variable PIP_CONSTRAINT (initialized by 'rapids-init-pip') is necessary to ensure the constraints -# are used when creating the isolated build environment. +# env variable 'PIP_CONSTRAINT' is set up by rapids-init-pip. It constrains all subsequent +# 'pip install', 'pip download', etc. calls (except those used in 'pip wheel', handled separately in build scripts) RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen "${RAPIDS_CUDA_VERSION}")" CUOPT_MPS_PARSER_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="cuopt_mps_parser" rapids-download-wheels-from-github python) LIBCUOPT_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libcuopt_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-github cpp) diff --git a/ci/docker/Dockerfile b/ci/docker/Dockerfile index 8564f8d5d..1d49a4c04 100644 --- a/ci/docker/Dockerfile +++ b/ci/docker/Dockerfile @@ -1,5 +1,5 @@ # syntax=docker/dockerfile:1.2 -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 ARG CUDA_VER=unset @@ -9,6 +9,11 @@ ARG LINUX_VER=unset FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu${LINUX_VER} AS cuda-libs +# CUDA headers (e.g., cuda_fp16.h) are not present in `runtime`/`base` images. +# Create a lightweight `devel` stage solely to copy `/usr/local/cuda/include` +# into the final image for CuPy/NVRTC runtime compilation. +FROM nvidia/cuda:${CUDA_VER}-devel-ubuntu${LINUX_VER} AS cuda-headers + # Install cuOpt FROM nvidia/cuda:${CUDA_VER}-base-ubuntu${LINUX_VER} AS python-env @@ -25,9 +30,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends build-essential wget \ unzip \ gcc \ + gnupg2 \ python${PYTHON_SHORT_VER} \ python${PYTHON_SHORT_VER}-dev \ python${PYTHON_SHORT_VER}-venv \ + && apt-get install -y --no-install-recommends --only-upgrade gnupg2 \ && rm -rf /var/lib/apt/lists/* && \ python${PYTHON_SHORT_VER} -m ensurepip --upgrade && \ python${PYTHON_SHORT_VER} -m pip install --upgrade pip @@ -49,12 +56,13 @@ RUN \ --extra-index-url https://pypi.nvidia.com \ --extra-index-url https://pypi.anaconda.org/rapidsai-wheels-nightly/simple \ --no-cache-dir \ + "pyyaml" \ "cuopt-server-${cuda_suffix}==${CUOPT_VER}" \ "cuopt-sh-client==${CUOPT_VER}" && \ python -m pip list -# Remove gcc to save space, gcc was required for building psutils -RUN apt-get purge -y gcc && rm -rf /var/lib/apt/lists/* +# Remove build-only deps to save space and reduce CVE surface area +RUN apt-get purge -y gcc gnupg2 && rm -rf /var/lib/apt/lists/* FROM install-env AS cuopt-final @@ -95,12 +103,15 @@ RUN mkdir -p /opt/cuopt && \ WORKDIR /opt/cuopt # Copy all static files in a single layer -COPY ./LICENSE ./VERSION ./THIRD_PARTY_LICENSES ./COMMIT_SHA /opt/cuopt/ +COPY ./LICENSE ./VERSION ./THIRD_PARTY_LICENSES ./COMMIT_SHA ./COMMIT_TIME /opt/cuopt/ # Copy CUDA libraries from the cuda-libs stage COPY --from=cuda-libs /usr/local/cuda/lib64/libnvrtc* /usr/local/cuda/lib64/ COPY --from=cuda-libs /usr/local/cuda/lib64/libnvJitLink* /usr/local/cuda/lib64/ +# Copy CUDA headers needed for runtime compilation (e.g., CuPy NVRTC). +COPY --from=cuda-headers /usr/local/cuda/include/ /usr/local/cuda/include/ + # Use the flexible entrypoint ENTRYPOINT ["/opt/cuopt/entrypoint.sh"] CMD ["python", "-m", "cuopt_server.cuopt_service"] diff --git a/ci/docker/test_image.sh b/ci/docker/test_image.sh index 4eb4d114a..3e11c16d0 100644 --- a/ci/docker/test_image.sh +++ b/ci/docker/test_image.sh @@ -1,6 +1,6 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 set -euo pipefail @@ -24,6 +24,8 @@ ln -sf "$(pwd)" /opt/cuopt/cuopt chmod -R a+w "$(pwd)" cat > /opt/cuopt/test.sh <=3.10,<3.14 +- pyyaml>=6.0.0 - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 - requests diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 55b3d3dfb..0eaa7000a 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -55,6 +55,7 @@ dependencies: - pytest-cov - pytest<9.0 - python>=3.10,<3.14 +- pyyaml>=6.0.0 - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 - requests diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml similarity index 95% rename from conda/environments/all_cuda-130_arch-aarch64.yaml rename to conda/environments/all_cuda-131_arch-aarch64.yaml index 938c0cce8..fb23f887a 100644 --- a/conda/environments/all_cuda-130_arch-aarch64.yaml +++ b/conda/environments/all_cuda-131_arch-aarch64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvtx-dev - cuda-python>=13.0.1,<14.0 - cuda-sanitizer-api -- cuda-version=13.0 +- cuda-version=13.1 - cudf==26.2.*,>=0.0.0a0 - cupy>=13.6.0 - cxx-compiler @@ -55,6 +55,7 @@ dependencies: - pytest-cov - pytest<9.0 - python>=3.10,<3.14 +- pyyaml>=6.0.0 - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 - requests @@ -75,4 +76,4 @@ dependencies: - nvidia-sphinx-theme - swagger-plugin-for-sphinx - veroviz -name: all_cuda-130_arch-aarch64 +name: all_cuda-131_arch-aarch64 diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml similarity index 95% rename from conda/environments/all_cuda-130_arch-x86_64.yaml rename to conda/environments/all_cuda-131_arch-x86_64.yaml index 03030108d..501729acd 100644 --- a/conda/environments/all_cuda-130_arch-x86_64.yaml +++ b/conda/environments/all_cuda-131_arch-x86_64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-nvtx-dev - cuda-python>=13.0.1,<14.0 - cuda-sanitizer-api -- cuda-version=13.0 +- cuda-version=13.1 - cudf==26.2.*,>=0.0.0a0 - cupy>=13.6.0 - cxx-compiler @@ -55,6 +55,7 @@ dependencies: - pytest-cov - pytest<9.0 - python>=3.10,<3.14 +- pyyaml>=6.0.0 - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 - requests @@ -75,4 +76,4 @@ dependencies: - nvidia-sphinx-theme - swagger-plugin-for-sphinx - veroviz -name: all_cuda-130_arch-x86_64 +name: all_cuda-131_arch-x86_64 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 35f7b7e08..db87c3fd5 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -29,6 +29,9 @@ project( LANGUAGES CXX CUDA C ) +# Disable C++20 module scanning as the codebase doesn't use modules +set(CMAKE_CXX_SCAN_FOR_MODULES OFF CACHE BOOL "Disable C++20 module scanning") + rapids_cmake_write_version_file(include/cuopt/version_config.hpp) # ################################################################################################## # - build type ------------------------------------------------------------------------------------ @@ -79,10 +82,33 @@ endif(CMAKE_COMPILER_IS_GNUCXX) # 1. Run the binary with env var set: LD_PRELOAD="$(gcc -print-file-name=libasan.so)" ASAN_OPTIONS='protect_shadow_gap=0:replace_intrin=0' # 2. (Optional) To run with a debugger (gdb or cuda-gdb) use the additional ASAN option alloc_dealloc_mismatch=0 if(BUILD_SANITIZER) - list(APPEND CUOPT_CXX_FLAGS -fsanitize=address,undefined -fno-omit-frame-pointer -g -Wno-error=maybe-uninitialized) + list(APPEND CUOPT_CXX_FLAGS -fsanitize=address,undefined -fno-omit-frame-pointer -g) + if(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + list(APPEND CUOPT_CXX_FLAGS -Wno-error=maybe-uninitialized) + endif() add_link_options(-fsanitize=address,undefined) endif(BUILD_SANITIZER) +# To use ThreadSanitizer: +# 1. Build with clang and the -tsan flag +# 2. Run the binary with env var set: OMP_TOOL_LIBRARIES=/usr/lib/llvm-17/lib/libarcher.so ARCHER_OPTIONS='verbose=1' TSAN_OPTIONS='suppresions=cpp/utilities/tsan_suppressions.txt:ignore_noninstrumented_modules=1:halt_on_error=1' +# Replace with local llvm install path. libarcher.so must be presetn +if(BUILD_TSAN) + message(STATUS "Building with ThreadSanitizer enabled") + list(APPEND CUOPT_CXX_FLAGS -fsanitize=thread -fno-omit-frame-pointer -g) + add_link_options(-fsanitize=thread) +endif(BUILD_TSAN) + +# To use MemorySanitizer: +# 1. Build with clang and the -msan flag (MemorySanitizer requires clang) +# 2. Run the binary with env var set: MSAN_OPTIONS='halt_on_error=1' +# Note: MemorySanitizer requires all code (including libraries) to be instrumented for accurate results +if(BUILD_MSAN) + message(STATUS "Building with MemorySanitizer enabled") + list(APPEND CUOPT_CXX_FLAGS -fsanitize=memory -fno-omit-frame-pointer -g -fsanitize-memory-track-origins=1) + add_link_options(-fsanitize=memory) +endif(BUILD_MSAN) + if(DEFINE_ASSERT) add_definitions(-DASSERT_MODE) endif(DEFINE_ASSERT) @@ -116,7 +142,11 @@ if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.9) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -static-global-template-stub=false") endif() list(APPEND CUOPT_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xcompiler=-Werror --default-stream=per-thread) -list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wall -Wno-error=non-template-friend) +if("${CMAKE_CUDA_HOST_COMPILER}" MATCHES "clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wall) +else() + list(APPEND CUOPT_CUDA_FLAGS -Xcompiler=-Wall -Wno-error=non-template-friend) +endif() list(APPEND CUOPT_CUDA_FLAGS -Xfatbin=-compress-all) if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.9 AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 13.0) list(APPEND CUOPT_CUDA_FLAGS -Xfatbin=--compress-level=3) @@ -216,11 +246,12 @@ set_target_properties(cuopt INSTALL_RPATH "\$ORIGIN" # set target compile options - CXX_STANDARD 17 + CXX_STANDARD 20 CXX_STANDARD_REQUIRED ON - CUDA_STANDARD 17 + CUDA_STANDARD 20 CUDA_STANDARD_REQUIRED ON INTERFACE_POSITION_INDEPENDENT_CODE ON + CXX_SCAN_FOR_MODULES OFF ) target_compile_definitions(cuopt PUBLIC "CUOPT_LOG_ACTIVE_LEVEL=RAPIDS_LOGGER_LOG_LEVEL_${LIBCUOPT_LOGGING_LEVEL}") @@ -419,6 +450,14 @@ endif() if(NOT BUILD_LP_ONLY) add_executable(cuopt_cli cuopt_cli.cpp) + +set_target_properties(cuopt_cli + PROPERTIES + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + CXX_SCAN_FOR_MODULES OFF +) + target_compile_options(cuopt_cli PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" "$<$:${CUOPT_CUDA_FLAGS}>" @@ -459,6 +498,14 @@ endif() option(BUILD_MIP_BENCHMARKS "Build MIP benchmarks" OFF) if(BUILD_MIP_BENCHMARKS AND NOT BUILD_LP_ONLY) add_executable(solve_MIP ../benchmarks/linear_programming/cuopt/run_mip.cpp) + + set_target_properties(solve_MIP + PROPERTIES + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + CXX_SCAN_FOR_MODULES OFF + ) + target_compile_options(solve_MIP PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" "$<$:${CUOPT_CUDA_FLAGS}>" @@ -483,6 +530,16 @@ endif() option(BUILD_LP_BENCHMARKS "Build LP benchmarks" OFF) if(BUILD_LP_BENCHMARKS) add_executable(solve_LP ../benchmarks/linear_programming/cuopt/run_pdlp.cu) + + set_target_properties(solve_LP + PROPERTIES + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 20 + CUDA_STANDARD_REQUIRED ON + CXX_SCAN_FOR_MODULES OFF + ) + target_compile_options(solve_LP PRIVATE "$<$:${CUOPT_CXX_FLAGS}>" "$<$:${CUOPT_CUDA_FLAGS}>" diff --git a/cpp/include/cuopt/error.hpp b/cpp/include/cuopt/error.hpp index b6086245d..9dd547adb 100644 --- a/cpp/include/cuopt/error.hpp +++ b/cpp/include/cuopt/error.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -33,8 +33,6 @@ enum class error_type_t { */ struct logic_error : public std::logic_error { - explicit logic_error() = default; - logic_error(const logic_error& exception) = default; // Move constructor diff --git a/cpp/include/cuopt/linear_programming/utilities/internals.hpp b/cpp/include/cuopt/linear_programming/utilities/internals.hpp index 84c96a716..90d856b23 100644 --- a/cpp/include/cuopt/linear_programming/utilities/internals.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/internals.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -62,6 +62,7 @@ namespace linear_programming { class base_solution_t { public: + virtual ~base_solution_t() = default; virtual bool is_mip() const = 0; }; diff --git a/cpp/include/cuopt/routing/cython/cython.hpp b/cpp/include/cuopt/routing/cython/cython.hpp index 75898ece7..93fdedc78 100644 --- a/cpp/include/cuopt/routing/cython/cython.hpp +++ b/cpp/include/cuopt/routing/cython/cython.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -16,6 +16,7 @@ #include #include +#include namespace cuopt { namespace cython { @@ -82,6 +83,10 @@ struct dataset_ret_t { std::unique_ptr call_solve(routing::data_model_view_t*, routing::solver_settings_t*); +// Wrapper for batch solve to expose the API to cython. +std::vector> call_batch_solve( + std::vector*>, routing::solver_settings_t*); + // Wrapper for dataset to expose the API to cython. std::unique_ptr call_generate_dataset( raft::handle_t const& handle, routing::generator::dataset_params_t const& params); diff --git a/cpp/libmps_parser/CMakeLists.txt b/cpp/libmps_parser/CMakeLists.txt index 9c96cc911..4fe497157 100644 --- a/cpp/libmps_parser/CMakeLists.txt +++ b/cpp/libmps_parser/CMakeLists.txt @@ -1,5 +1,5 @@ # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on @@ -16,6 +16,9 @@ project( LANGUAGES CXX ) +# Disable C++20 module scanning as the codebase doesn't use modules +set(CMAKE_CXX_SCAN_FOR_MODULES OFF CACHE BOOL "Disable C++20 module scanning") + # Write the version header rapids_cmake_write_version_file(include/mps_parser/version_config.hpp) @@ -79,9 +82,10 @@ set_target_properties(mps_parser INSTALL_RPATH "\$ORIGIN" # set target compile options - CXX_STANDARD 17 + CXX_STANDARD 20 CXX_STANDARD_REQUIRED ON INTERFACE_POSITION_INDEPENDENT_CODE ON + CXX_SCAN_FOR_MODULES OFF ) target_compile_options(mps_parser diff --git a/cpp/libmps_parser/tests/CMakeLists.txt b/cpp/libmps_parser/tests/CMakeLists.txt index 21fc02eb4..b39a09607 100644 --- a/cpp/libmps_parser/tests/CMakeLists.txt +++ b/cpp/libmps_parser/tests/CMakeLists.txt @@ -1,5 +1,5 @@ # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on @@ -13,9 +13,10 @@ function(ConfigureTest CMAKE_TEST_NAME) set_target_properties(${CMAKE_TEST_NAME} PROPERTIES # set target compile options - CXX_STANDARD 17 + CXX_STANDARD 20 CXX_STANDARD_REQUIRED ON INTERFACE_POSITION_INDEPENDENT_CODE ON + CXX_SCAN_FOR_MODULES OFF ) target_include_directories(${CMAKE_TEST_NAME} diff --git a/cpp/src/dual_simplex/CMakeLists.txt b/cpp/src/dual_simplex/CMakeLists.txt index 4528a9b67..76312c49d 100644 --- a/cpp/src/dual_simplex/CMakeLists.txt +++ b/cpp/src/dual_simplex/CMakeLists.txt @@ -1,5 +1,5 @@ # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on @@ -32,6 +32,7 @@ set(DUAL_SIMPLEX_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/triangle_solve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vector_math.cpp ${CMAKE_CURRENT_SOURCE_DIR}/pinned_host_allocator.cu + ${CMAKE_CURRENT_SOURCE_DIR}/diving_heuristics.cpp ) # Uncomment to enable debug info diff --git a/cpp/src/dual_simplex/barrier.cu b/cpp/src/dual_simplex/barrier.cu index 8c0a32fad..482993600 100644 --- a/cpp/src/dual_simplex/barrier.cu +++ b/cpp/src/dual_simplex/barrier.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -42,7 +42,47 @@ namespace cuopt::linear_programming::dual_simplex { -auto constexpr use_gpu = true; +// non-template wrappers to work around clang compiler bug +[[maybe_unused]] static void pairwise_multiply( + float* a, float* b, float* out, int size, rmm::cuda_stream_view stream) +{ + cub::DeviceTransform::Transform( + cuda::std::make_tuple(a, b), out, size, cuda::std::multiplies<>{}, stream.value()); +} + +[[maybe_unused]] static void pairwise_multiply( + double* a, double* b, double* out, int size, rmm::cuda_stream_view stream) +{ + cub::DeviceTransform::Transform( + cuda::std::make_tuple(a, b), out, size, cuda::std::multiplies<>{}, stream.value()); +} + +[[maybe_unused]] static void axpy( + float alpha, float* x, float beta, float* y, float* out, int size, rmm::cuda_stream_view stream) +{ + cub::DeviceTransform::Transform( + cuda::std::make_tuple(x, y), + out, + size, + [alpha, beta] __host__ __device__(float a, float b) { return alpha * a + beta * b; }, + stream.value()); +} + +[[maybe_unused]] static void axpy(double alpha, + double* x, + double beta, + double* y, + double* out, + int size, + rmm::cuda_stream_view stream) +{ + cub::DeviceTransform::Transform( + cuda::std::make_tuple(x, y), + out, + size, + [alpha, beta] __host__ __device__(double a, double b) { return alpha * a + beta * b; }, + stream.value()); +} template class iteration_data_t { @@ -76,7 +116,7 @@ class iteration_data_t { AD(lp.num_cols, lp.num_rows, 0), AT(lp.num_rows, lp.num_cols, 0), ADAT(lp.num_rows, lp.num_rows, 0), - augmented(lp.num_cols + lp.num_rows, lp.num_cols + lp.num_rows, 0), + // augmented(lp.num_cols + lp.num_rows, lp.num_cols + lp.num_rows, 0), A_dense(lp.num_rows, 0), AD_dense(0, 0), H(0, 0), @@ -109,6 +149,8 @@ class iteration_data_t { device_AD(lp.num_cols, lp.num_rows, 0, lp.handle_ptr->get_stream()), device_A(lp.num_cols, lp.num_rows, 0, lp.handle_ptr->get_stream()), device_ADAT(lp.num_rows, lp.num_rows, 0, lp.handle_ptr->get_stream()), + device_augmented( + lp.num_cols + lp.num_rows, lp.num_cols + lp.num_rows, 0, lp.handle_ptr->get_stream()), d_original_A_values(0, lp.handle_ptr->get_stream()), device_A_x_values(0, lp.handle_ptr->get_stream()), d_inv_diag_prime(0, lp.handle_ptr->get_stream()), @@ -116,6 +158,7 @@ class iteration_data_t { d_num_flag(lp.handle_ptr->get_stream()), d_inv_diag(lp.num_cols, lp.handle_ptr->get_stream()), d_cols_to_remove(0, lp.handle_ptr->get_stream()), + d_augmented_diagonal_indices_(0, lp.handle_ptr->get_stream()), use_augmented(false), has_factorization(false), num_factorizations(0), @@ -212,21 +255,18 @@ class iteration_data_t { } // Allocating GPU flag data for Form ADAT - if (use_gpu) { - raft::common::nvtx::range fun_scope("Barrier: GPU Flag memory allocation"); + RAFT_CUDA_TRY(cub::DeviceSelect::Flagged( + nullptr, + flag_buffer_size, + d_inv_diag_prime.data(), // Not the actual input but just to allcoate the memory + thrust::make_transform_iterator(d_cols_to_remove.data(), cuda::std::logical_not{}), + d_inv_diag_prime.data(), + d_num_flag.data(), + inv_diag.size(), + stream_view_)); + + d_flag_buffer.resize(flag_buffer_size, stream_view_); - cub::DeviceSelect::Flagged( - nullptr, - flag_buffer_size, - d_inv_diag_prime.data(), // Not the actual input but just to allcoate the memory - thrust::make_transform_iterator(d_cols_to_remove.data(), cuda::std::logical_not{}), - d_inv_diag_prime.data(), - d_num_flag.data(), - inv_diag.size(), - stream_view_); - - d_flag_buffer.resize(flag_buffer_size, stream_view_); - } // Create the upper bounds vector n_upper_bounds = 0; for (i_t j = 0; j < lp.num_cols; j++) { @@ -265,6 +305,7 @@ class iteration_data_t { settings.log.printf("Density estimator time : %.2fs\n", column_density_time); if ((settings.augmented != 0) && (n_dense_columns > 50 || n_dense_rows > 10 || + lp.A.m == 0 /* handle case with no constraints */ || (max_row_nz > 5000 && estimated_nz_AAT > 1e10) || settings.augmented == 1)) { use_augmented = true; n_dense_columns = 0; @@ -275,6 +316,7 @@ class iteration_data_t { n_dense_columns = 0; use_augmented = !Q_diagonal; } + if (use_augmented) { settings.log.printf("Linear system : augmented\n"); } else { @@ -301,10 +343,8 @@ class iteration_data_t { inv_diag.set_scalar(1.0); if (use_augmented) { diag.multiply_scalar(-1.0); } if (n_upper_bounds > 0 || (has_Q && !use_augmented)) { diag.inverse(inv_diag); } - if (use_gpu) { - // TMP diag and inv_diag should directly created and filled on the GPU - raft::copy(d_inv_diag.data(), inv_diag.data(), inv_diag.size(), stream_view_); - } + // TMP diag and inv_diag should directly created and filled on the GPU + raft::copy(d_inv_diag.data(), inv_diag.data(), inv_diag.size(), stream_view_); inv_sqrt_diag.set_scalar(1.0); if (n_upper_bounds > 0 || (has_Q && !use_augmented)) { inv_diag.sqrt(inv_sqrt_diag); } @@ -339,24 +379,20 @@ class iteration_data_t { } } original_A_values = AD.x; - if (use_gpu) { - d_original_A_values.resize(original_A_values.size(), handle_ptr->get_stream()); - raft::copy(d_original_A_values.data(), AD.x.data(), AD.x.size(), handle_ptr->get_stream()); - } + d_original_A_values.resize(original_A_values.size(), handle_ptr->get_stream()); + raft::copy(d_original_A_values.data(), AD.x.data(), AD.x.size(), handle_ptr->get_stream()); AD.transpose(AT); - if (use_gpu) { - device_AD.copy(AD, handle_ptr->get_stream()); - // For efficient scaling of AD col we form the col index array - device_AD.form_col_index(handle_ptr->get_stream()); - device_A_x_values.resize(original_A_values.size(), handle_ptr->get_stream()); - raft::copy( - device_A_x_values.data(), device_AD.x.data(), device_AD.x.size(), handle_ptr->get_stream()); - csr_matrix_t host_A_CSR(1, 1, 1); // Sizes will be set by to_compressed_row() - AD.to_compressed_row(host_A_CSR); - device_A.copy(host_A_CSR, lp.handle_ptr->get_stream()); - RAFT_CHECK_CUDA(handle_ptr->get_stream()); - } + device_AD.copy(AD, handle_ptr->get_stream()); + // For efficient scaling of AD col we form the col index array + device_AD.form_col_index(handle_ptr->get_stream()); + device_A_x_values.resize(original_A_values.size(), handle_ptr->get_stream()); + raft::copy( + device_A_x_values.data(), device_AD.x.data(), device_AD.x.size(), handle_ptr->get_stream()); + csr_matrix_t host_A_CSR(1, 1, 1); // Sizes will be set by to_compressed_row() + AD.to_compressed_row(host_A_CSR); + device_A.copy(host_A_CSR, lp.handle_ptr->get_stream()); + RAFT_CHECK_CUDA(handle_ptr->get_stream()); if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } i_t factorization_size = use_augmented ? lp.num_rows + lp.num_cols : lp.num_rows; @@ -370,15 +406,11 @@ class iteration_data_t { // Build the sparsity pattern of the augmented system form_augmented(true); if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } - symbolic_status = chol->analyze(augmented); + symbolic_status = chol->analyze(device_augmented); } else { form_adat(true); if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { return; } - if (use_gpu) { - symbolic_status = chol->analyze(device_ADAT); - } else { - symbolic_status = chol->analyze(ADAT); - } + symbolic_status = chol->analyze(device_ADAT); } } @@ -392,64 +424,79 @@ class iteration_data_t { const f_t dual_perturb = 0.0; const f_t primal_perturb = 1e-6; if (first_call) { - augmented_diagonal_indices.resize(n + m, -1); i_t new_nnz = 2 * nnzA + n + m + nnzQ; - augmented.reallocate(2 * nnzA + n + m + nnzQ); + csr_matrix_t augmented_CSR(n + m, n + m, new_nnz); + std::vector augmented_diagonal_indices(n + m, -1); i_t q = 0; i_t off_diag_Qnz = 0; - for (i_t j = 0; j < n; j++) { - cuopt_assert(std::isfinite(diag[j]), "diag[j] is not finite"); - augmented.col_start[j] = q; + + for (i_t i = 0; i < n; i++) { + augmented_CSR.row_start[i] = q; if (nnzQ == 0) { - augmented_diagonal_indices[j] = q; - augmented.i[q] = j; - augmented.x[q++] = -diag[j] - dual_perturb; + augmented_diagonal_indices[i] = q; + augmented_CSR.j[q] = i; + augmented_CSR.x[q++] = -diag[i] - dual_perturb; } else { - const i_t q_col_beg = Q.col_start[j]; - const i_t q_col_end = Q.col_start[j + 1]; + // Q is symmetric + const i_t q_col_beg = Q.col_start[i]; + const i_t q_col_end = Q.col_start[i + 1]; bool has_diagonal = false; for (i_t p = q_col_beg; p < q_col_end; ++p) { - augmented.i[q] = Q.i[p]; - if (Q.i[p] == j) { + augmented_CSR.j[q] = Q.i[p]; + if (Q.i[p] == i) { has_diagonal = true; - augmented_diagonal_indices[j] = q; - augmented.x[q++] = -Q.x[p] - diag[j] - dual_perturb; + augmented_diagonal_indices[i] = q; + augmented_CSR.x[q++] = -Q.x[p] - diag[i] - dual_perturb; } else { off_diag_Qnz++; - augmented.x[q++] = -Q.x[p]; + augmented_CSR.x[q++] = -Q.x[p]; } } if (!has_diagonal) { - augmented_diagonal_indices[j] = q; - augmented.i[q] = j; - augmented.x[q++] = -diag[j] - dual_perturb; + augmented_diagonal_indices[i] = q; + augmented_CSR.j[q] = i; + augmented_CSR.x[q++] = -diag[i] - dual_perturb; } } - const i_t col_beg = A.col_start[j]; - const i_t col_end = A.col_start[j + 1]; + // AT block, we can use A in csc directly + const i_t col_beg = A.col_start[i]; + const i_t col_end = A.col_start[i + 1]; for (i_t p = col_beg; p < col_end; ++p) { - augmented.i[q] = n + A.i[p]; - augmented.x[q++] = A.x[p]; + augmented_CSR.j[q] = A.i[p] + n; + augmented_CSR.x[q++] = A.x[p]; } } - settings_.log.debug("augmented nz %d predicted %d\n", q, off_diag_Qnz + nnzA + n); + for (i_t k = n; k < n + m; ++k) { - augmented.col_start[k] = q; - const i_t l = k - n; - const i_t col_beg = AT.col_start[l]; - const i_t col_end = AT.col_start[l + 1]; + // A block, we can use AT in csc directly + augmented_CSR.row_start[k] = q; + const i_t l = k - n; + const i_t col_beg = AT.col_start[l]; + const i_t col_end = AT.col_start[l + 1]; for (i_t p = col_beg; p < col_end; ++p) { - augmented.i[q] = AT.i[p]; - augmented.x[q++] = AT.x[p]; + augmented_CSR.j[q] = AT.i[p]; + augmented_CSR.x[q++] = AT.x[p]; } augmented_diagonal_indices[k] = q; - augmented.i[q] = k; - augmented.x[q++] = primal_perturb; + augmented_CSR.j[q] = k; + augmented_CSR.x[q++] = primal_perturb; } - augmented.col_start[n + m] = q; + augmented_CSR.row_start[n + m] = q; + augmented_CSR.nz_max = q; + augmented_CSR.j.resize(q); + augmented_CSR.x.resize(q); + settings_.log.debug("augmented nz %d predicted %d\n", q, off_diag_Qnz + nnzA + n); cuopt_assert(q == 2 * nnzA + n + m + off_diag_Qnz, "augmented nnz != predicted"); cuopt_assert(A.col_start[n] == AT.col_start[m], "A nz != AT nz"); + device_augmented.copy(augmented_CSR, handle_ptr->get_stream()); + d_augmented_diagonal_indices_.resize(augmented_diagonal_indices.size(), + handle_ptr->get_stream()); + raft::copy(d_augmented_diagonal_indices_.data(), + augmented_diagonal_indices.data(), + augmented_diagonal_indices.size(), + handle_ptr->get_stream()); + handle_ptr->sync_stream(); #ifdef CHECK_SYMMETRY csc_matrix_t augmented_transpose(1, 1, 1); augmented.transpose(augmented_transpose); @@ -464,16 +511,29 @@ class iteration_data_t { cuopt_assert(error.norm1() <= 1e-2, "|| Aug - Aug^T ||_1 > 1e-2"); #endif } else { - for (i_t j = 0; j < n; ++j) { - f_t q_diag = nnzQ > 0 ? Qdiag[j] : 0.0; + thrust::for_each_n(rmm::exec_policy(handle_ptr->get_stream()), + thrust::make_counting_iterator(0), + i_t(n), + [span_x = cuopt::make_span(device_augmented.x), + span_diag_indices = cuopt::make_span(d_augmented_diagonal_indices_), + span_q_diag = cuopt::make_span(d_Q_diag_), + span_diag = cuopt::make_span(d_diag_), + dual_perturb_value = dual_perturb] __device__(i_t j) { + f_t q_diag = span_q_diag.size() > 0 ? span_q_diag[j] : 0.0; + span_x[span_diag_indices[j]] = + -q_diag - span_diag[j] - dual_perturb_value; + }); - const i_t p = augmented_diagonal_indices[j]; - augmented.x[p] = -q_diag - diag[j] - dual_perturb; - } - for (i_t j = n; j < n + m; ++j) { - const i_t p = augmented_diagonal_indices[j]; - augmented.x[p] = primal_perturb; - } + RAFT_CHECK_CUDA(handle_ptr->get_stream()); + thrust::for_each_n(rmm::exec_policy(handle_ptr->get_stream()), + thrust::make_counting_iterator(n), + i_t(m), + [span_x = cuopt::make_span(device_augmented.x), + span_diag_indices = cuopt::make_span(d_augmented_diagonal_indices_), + primal_perturb_value = primal_perturb] __device__(i_t j) { + span_x[span_diag_indices[j]] = primal_perturb_value; + }); + RAFT_CHECK_CUDA(handle_ptr->get_stream()); } } @@ -484,99 +544,67 @@ class iteration_data_t { float64_t start_form_adat = tic(); const i_t m = AD.m; - if (use_gpu) { - // TODO do we really need this copy? (it's ok since gpu to gpu) - raft::copy(device_AD.x.data(), - d_original_A_values.data(), - d_original_A_values.size(), - handle_ptr->get_stream()); - if (n_dense_columns > 0) { - // Adjust inv_diag - d_inv_diag_prime.resize(AD.n, stream_view_); - // Copy If - cub::DeviceSelect::Flagged( - d_flag_buffer.data(), - flag_buffer_size, - d_inv_diag.data(), - thrust::make_transform_iterator(d_cols_to_remove.data(), cuda::std::logical_not{}), - d_inv_diag_prime.data(), - d_num_flag.data(), - d_inv_diag.size(), - stream_view_); - } else { - d_inv_diag_prime.resize(inv_diag.size(), stream_view_); - raft::copy(d_inv_diag_prime.data(), d_inv_diag.data(), inv_diag.size(), stream_view_); - } + raft::copy(device_AD.x.data(), + d_original_A_values.data(), + d_original_A_values.size(), + handle_ptr->get_stream()); + if (n_dense_columns > 0) { + // Adjust inv_diag + d_inv_diag_prime.resize(AD.n, stream_view_); + // Copy If + cub::DeviceSelect::Flagged( + d_flag_buffer.data(), + flag_buffer_size, + d_inv_diag.data(), + thrust::make_transform_iterator(d_cols_to_remove.data(), cuda::std::logical_not{}), + d_inv_diag_prime.data(), + d_num_flag.data(), + d_inv_diag.size(), + stream_view_); + RAFT_CHECK_CUDA(stream_view_); + } else { + d_inv_diag_prime.resize(inv_diag.size(), stream_view_); + raft::copy(d_inv_diag_prime.data(), d_inv_diag.data(), inv_diag.size(), stream_view_); + } - cuopt_assert(static_cast(d_inv_diag_prime.size()) == AD.n, - "inv_diag_prime.size() != AD.n"); + cuopt_assert(static_cast(d_inv_diag_prime.size()) == AD.n, + "inv_diag_prime.size() != AD.n"); - thrust::for_each_n(rmm::exec_policy(stream_view_), - thrust::make_counting_iterator(0), - i_t(device_AD.x.size()), - [span_x = cuopt::make_span(device_AD.x), - span_scale = cuopt::make_span(d_inv_diag_prime), - span_col_ind = cuopt::make_span(device_AD.col_index)] __device__(i_t i) { - span_x[i] *= span_scale[span_col_ind[i]]; - }); - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return; } - if (first_call) { - try { - initialize_cusparse_data( - handle_ptr, device_A, device_AD, device_ADAT, cusparse_info); - } catch (const raft::cuda_error& e) { - settings_.log.printf("Error in initialize_cusparse_data: %s\n", e.what()); - return; - } + thrust::for_each_n(rmm::exec_policy(stream_view_), + thrust::make_counting_iterator(0), + i_t(device_AD.x.size()), + [span_x = cuopt::make_span(device_AD.x), + span_scale = cuopt::make_span(d_inv_diag_prime), + span_col_ind = cuopt::make_span(device_AD.col_index)] __device__(i_t i) { + span_x[i] *= span_scale[span_col_ind[i]]; + }); + RAFT_CHECK_CUDA(stream_view_); + if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return; } + if (first_call) { + try { + initialize_cusparse_data( + handle_ptr, device_A, device_AD, device_ADAT, cusparse_info); + } catch (const raft::cuda_error& e) { + settings_.log.printf("Error in initialize_cusparse_data: %s\n", e.what()); + return; } - if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return; } - - multiply_kernels(handle_ptr, device_A, device_AD, device_ADAT, cusparse_info); - handle_ptr->sync_stream(); - - auto adat_nnz = device_ADAT.row_start.element(device_ADAT.m, handle_ptr->get_stream()); - float64_t adat_time = toc(start_form_adat); + } + if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { return; } - if (num_factorizations == 0) { - settings_.log.printf("ADAT time : %.2fs\n", adat_time); - settings_.log.printf("ADAT nonzeros : %.2e\n", - static_cast(adat_nnz)); - settings_.log.printf( - "ADAT density : %.2f\n", - static_cast(adat_nnz) / - (static_cast(device_ADAT.m) * static_cast(device_ADAT.m))); - } - } else { - // Restore the columns of AD to A - AD.x = original_A_values; - std::vector inv_diag_prime; - if (n_dense_columns > 0) { - // Adjust inv_diag - inv_diag_prime.resize(AD.n); - const i_t n = A.n; + multiply_kernels(handle_ptr, device_A, device_AD, device_ADAT, cusparse_info); + handle_ptr->sync_stream(); - i_t new_j = 0; - for (i_t j = 0; j < n; j++) { - if (cols_to_remove[j]) { continue; } - inv_diag_prime[new_j++] = inv_diag[j]; - } - } else { - inv_diag_prime = copy(inv_diag); - } + auto adat_nnz = device_ADAT.row_start.element(device_ADAT.m, handle_ptr->get_stream()); + float64_t adat_time = toc(start_form_adat); - cuopt_assert(static_cast(inv_diag_prime.size()) == AD.n, - "inv_diag_prime.size() != AD.n"); - AD.scale_columns(inv_diag_prime); - multiply(AD, AT, ADAT); - - float64_t adat_time = toc(start_form_adat); - if (num_factorizations == 0) { - settings_.log.printf("ADAT time %.2fs\n", adat_time); - settings_.log.printf("ADAT nonzeros %e density %.2f\n", - static_cast(ADAT.col_start[m]), - static_cast(ADAT.col_start[m]) / - (static_cast(m) * static_cast(m))); - } + if (num_factorizations == 0) { + settings_.log.printf("ADAT time : %.2fs\n", adat_time); + settings_.log.printf("ADAT nonzeros : %.2e\n", + static_cast(adat_nnz)); + settings_.log.printf( + "ADAT density : %.2f\n", + static_cast(adat_nnz) / + (static_cast(device_ADAT.m) * static_cast(device_ADAT.m))); } } @@ -993,12 +1021,7 @@ class iteration_data_t { dense_vector_t dual_res = z_tilde; dual_res.axpy(-1.0, lp.objective, 1.0); - if (use_gpu) { - cusparse_view.transpose_spmv(1.0, solution.y, 1.0, dual_res); - - } else { - matrix_transpose_vector_multiply(lp.A, 1.0, solution.y, 1.0, dual_res); - } + cusparse_view.transpose_spmv(1.0, solution.y, 1.0, dual_res); f_t dual_residual_norm = vector_norm_inf(dual_res, stream_view_); #ifdef PRINT_INFO settings_.log.printf("Solution Dual residual: %e\n", dual_residual_norm); @@ -1328,7 +1351,8 @@ class iteration_data_t { u.data(), u.size(), cuda::std::multiplies<>{}, - stream_view_); + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); // y = alpha * A * w + beta * v = alpha * A * Dinv * A^T * y + beta * v cusparse_view.spmv(alpha, cusparse_u, beta, cusparse_v); @@ -1377,26 +1401,22 @@ class iteration_data_t { // y <- alpha * Augmented * x + beta * y void augmented_multiply(f_t alpha, - const dense_vector_t& x, + const rmm::device_uvector& x, f_t beta, - dense_vector_t& y) + rmm::device_uvector& y) { - const i_t m = A.m; - const i_t n = A.n; - dense_vector_t x1 = x.head(n); - dense_vector_t x2 = x.tail(m); - dense_vector_t y1 = y.head(n); - dense_vector_t y2 = y.tail(m); + const i_t m = A.m; + const i_t n = A.n; rmm::device_uvector d_x1(n, handle_ptr->get_stream()); rmm::device_uvector d_x2(m, handle_ptr->get_stream()); rmm::device_uvector d_y1(n, handle_ptr->get_stream()); rmm::device_uvector d_y2(m, handle_ptr->get_stream()); - raft::copy(d_x1.data(), x1.data(), n, handle_ptr->get_stream()); - raft::copy(d_x2.data(), x2.data(), m, handle_ptr->get_stream()); - raft::copy(d_y1.data(), y1.data(), n, handle_ptr->get_stream()); - raft::copy(d_y2.data(), y2.data(), m, handle_ptr->get_stream()); + raft::copy(d_x1.data(), x.data(), n, handle_ptr->get_stream()); + raft::copy(d_x2.data(), x.data() + n, m, handle_ptr->get_stream()); + raft::copy(d_y1.data(), y.data(), n, handle_ptr->get_stream()); + raft::copy(d_y2.data(), y.data() + n, m, handle_ptr->get_stream()); // y1 <- alpha ( -D * x_1 + A^T x_2) + beta * y1 @@ -1404,12 +1424,7 @@ class iteration_data_t { // diag.pairwise_product(x1, r1); // r1 <- D * x_1 - thrust::transform(handle_ptr->get_thrust_policy(), - d_x1.data(), - d_x1.data() + n, - d_diag_.data(), - d_r1.data(), - thrust::multiplies()); + pairwise_multiply(d_x1.data(), d_diag_.data(), d_r1.data(), n, stream_view_); // r1 <- Q x1 + D x1 if (Q.n > 0) { @@ -1419,12 +1434,7 @@ class iteration_data_t { // y1 <- - alpha * r1 + beta * y1 // y1.axpy(-alpha, r1, beta); - thrust::transform(handle_ptr->get_thrust_policy(), - d_r1.data(), - d_r1.data() + n, - d_y1.data(), - d_y1.data(), - axpy_op{-alpha, beta}); + axpy(-alpha, d_r1.data(), beta, d_y1.data(), d_y1.data(), n, stream_view_); // matrix_transpose_vector_multiply(A, alpha, x2, 1.0, y1); cusparse_view_.transpose_spmv(alpha, d_x2, 1.0, d_y1); @@ -1437,6 +1447,20 @@ class iteration_data_t { handle_ptr->sync_stream(); } + void augmented_multiply(f_t alpha, + const dense_vector_t& x, + f_t beta, + dense_vector_t& y) + { + rmm::device_uvector d_x(x.size(), handle_ptr->get_stream()); + raft::copy(d_x.data(), x.data(), x.size(), handle_ptr->get_stream()); + rmm::device_uvector d_y(y.size(), handle_ptr->get_stream()); + raft::copy(d_y.data(), y.data(), y.size(), handle_ptr->get_stream()); + augmented_multiply(alpha, d_x, beta, d_y); + raft::copy(y.data(), d_y.data(), y.size(), handle_ptr->get_stream()); + handle_ptr->sync_stream(); + } + raft::handle_t const* handle_ptr; i_t n_upper_bounds; pinned_dense_vector_t upper_bounds; @@ -1471,7 +1495,9 @@ class iteration_data_t { csc_matrix_t AD; csc_matrix_t AT; csc_matrix_t ADAT; - csc_matrix_t augmented; + // csc_matrix_t augmented; + device_csr_matrix_t device_augmented; + device_csr_matrix_t device_ADAT; device_csr_matrix_t device_A; device_csc_matrix_t device_AD; @@ -1497,7 +1523,7 @@ class iteration_data_t { const csc_matrix_t& Q; std::vector Qdiag; bool Q_diagonal; - std::vector augmented_diagonal_indices; + rmm::device_uvector d_augmented_diagonal_indices_; bool indefinite_Q; cusparse_view_t cusparse_Q_view_; @@ -1709,17 +1735,13 @@ int barrier_solver_t::initial_point(iteration_data_t& data) // Perform a numerical factorization i_t status; if (use_augmented) { - status = data.chol->factorize(data.augmented); + status = data.chol->factorize(data.device_augmented); #ifdef CHOLESKY_DEBUG_CHECK cholesky_debug_check(data, lp, use_augmented); #endif } else { - if (use_gpu) { - status = data.chol->factorize(data.device_ADAT); - } else { - status = data.chol->factorize(data.ADAT); - } + status = data.chol->factorize(data.device_ADAT); } if (status == -2) { return -2; } if (status != 0) { @@ -1752,13 +1774,13 @@ int barrier_solver_t::initial_point(iteration_data_t& data) op_t(iteration_data_t& data) : data_(data) {} iteration_data_t& data_; void a_multiply(f_t alpha, - const dense_vector_t& x, + const rmm::device_uvector& x, f_t beta, - dense_vector_t& y) const + rmm::device_uvector& y) const { data_.augmented_multiply(alpha, x, beta, y); } - void solve(const dense_vector_t& b, dense_vector_t& x) const + void solve(rmm::device_uvector& b, rmm::device_uvector& x) const { data_.chol->solve(b, x); } @@ -1773,11 +1795,7 @@ int barrier_solver_t::initial_point(iteration_data_t& data) } } else { // rhs_x <- A * Dinv * F * u - b - if (use_gpu) { - data.cusparse_view_.spmv(1.0, DinvFu, -1.0, rhs_x); - } else { - matrix_vector_multiply(lp.A, 1.0, DinvFu, -1.0, rhs_x); - } + data.cusparse_view_.spmv(1.0, DinvFu, -1.0, rhs_x); #ifdef PRINT_INFO settings.log.printf("||DinvFu|| = %e\n", vector_norm2(DinvFu)); #endif @@ -1804,12 +1822,9 @@ int barrier_solver_t::initial_point(iteration_data_t& data) // x = Dinv*(F*u - A'*q) // Fu <- -1.0 * A' * q + 1.0 * Fu - if (use_gpu) { - data.cusparse_view_.transpose_spmv(-1.0, q, 1.0, Fu); - data.handle_ptr->get_stream().synchronize(); - } else { - matrix_transpose_vector_multiply(lp.A, -1.0, q, 1.0, Fu); - } + data.cusparse_view_.transpose_spmv(-1.0, q, 1.0, Fu); + data.handle_ptr->get_stream().synchronize(); + // x <- Dinv * (F*u - A'*q) data.inv_diag.pairwise_product(Fu, data.x); } @@ -1824,12 +1839,8 @@ int barrier_solver_t::initial_point(iteration_data_t& data) // Verify A*x = b data.primal_residual = lp.rhs; - if (use_gpu) { - data.cusparse_view_.spmv(1.0, data.x, -1.0, data.primal_residual); - data.handle_ptr->get_stream().synchronize(); - } else { - matrix_vector_multiply(lp.A, 1.0, data.x, -1.0, data.primal_residual); - } + data.cusparse_view_.spmv(1.0, data.x, -1.0, data.primal_residual); + data.handle_ptr->get_stream().synchronize(); #ifdef PRINT_INFO settings.log.printf("||b - A * x||: %.16e\n", vector_norm2(data.primal_residual)); #endif @@ -1902,13 +1913,6 @@ int barrier_solver_t::initial_point(iteration_data_t& data) for (i_t k = 0; k < lp.num_rows; k++) { data.y[k] = py[lp.num_cols + k]; } - dense_vector_t full_res = dual_rhs; - matrix_vector_multiply(data.augmented, 1.0, py, -1.0, full_res); - settings.log.printf("|| Aug (x y) - b || %e\n", vector_norm_inf(full_res)); - - dense_vector_t res1(lp.num_rows); - matrix_vector_multiply(lp.A, -1.0, data.z, 0.0, res1); - settings.log.printf("|| A p || %e\n", vector_norm2(res1)); // v = -E'*z data.gather_upper_bounds(data.z, data.v); @@ -1922,11 +1926,7 @@ int barrier_solver_t::initial_point(iteration_data_t& data) dense_vector_t Dinvc(lp.num_cols); data.inv_diag.pairwise_product(lp.objective, Dinvc); // rhs = 1.0 * A * Dinv * c - if (use_gpu) { - data.cusparse_view_.spmv(1.0, Dinvc, 0.0, rhs); - } else { - matrix_vector_multiply(lp.A, 1.0, Dinvc, 0.0, rhs); - } + data.cusparse_view_.spmv(1.0, Dinvc, 0.0, rhs); // Solve A*Dinv*A'*q = A*Dinv*c // data.chol->solve(rhs, data.y); @@ -1935,11 +1935,7 @@ int barrier_solver_t::initial_point(iteration_data_t& data) // z = Dinv*(c - A'*y) dense_vector_t cmATy = data.c; - if (use_gpu) { - data.cusparse_view_.transpose_spmv(-1.0, data.y, 1.0, cmATy); - } else { - matrix_transpose_vector_multiply(lp.A, -1.0, data.y, 1.0, cmATy); - } + data.cusparse_view_.transpose_spmv(-1.0, data.y, 1.0, cmATy); // z <- Dinv * (c - A'*y) data.inv_diag.pairwise_product(cmATy, data.z); @@ -1953,11 +1949,7 @@ int barrier_solver_t::initial_point(iteration_data_t& data) // Verify A'*y + z - E*v - Q*x = c data.z.pairwise_subtract(data.c, data.dual_residual); if (data.Q.n > 0) { matrix_vector_multiply(data.Q, -1.0, data.x, 1.0, data.dual_residual); } - if (use_gpu) { - data.cusparse_view_.transpose_spmv(1.0, data.y, 1.0, data.dual_residual); - } else { - matrix_transpose_vector_multiply(lp.A, 1.0, data.y, 1.0, data.dual_residual); - } + data.cusparse_view_.transpose_spmv(1.0, data.y, 1.0, data.dual_residual); if (data.n_upper_bounds > 0) { for (i_t k = 0; k < data.n_upper_bounds; k++) { i_t j = data.upper_bounds[k]; @@ -2023,7 +2015,8 @@ void barrier_solver_t::gpu_compute_residuals(const rmm::device_uvector data.d_bound_residual_.data(), data.d_upper_bounds_.size(), [] HD(f_t upper_j, f_t w_k, f_t x_j) { return upper_j - w_k - x_j; }, - stream_view_); + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); } // Compute dual_residual = c - A'*y - z + E*v + Q*x @@ -2038,8 +2031,8 @@ void barrier_solver_t::gpu_compute_residuals(const rmm::device_uvector data.d_dual_residual_.data(), data.d_dual_residual_.size(), cuda::std::minus<>{}, - stream_view_); - + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); // Compute dual_residual = c - A'*y - z + E*v auto cusparse_d_y = data.cusparse_view_.create_vector(d_y); auto descr_dual_residual = data.cusparse_view_.create_vector(data.d_dual_residual_); @@ -2053,7 +2046,8 @@ void barrier_solver_t::gpu_compute_residuals(const rmm::device_uvector thrust::make_permutation_iterator(data.d_dual_residual_.data(), data.d_upper_bounds_.data()), data.d_upper_bounds_.size(), [] HD(f_t dual_residual_j, f_t v_k) { return dual_residual_j + v_k; }, - stream_view_); + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); } // Compute complementarity_xz_residual = x.*z @@ -2061,14 +2055,15 @@ void barrier_solver_t::gpu_compute_residuals(const rmm::device_uvector data.d_complementarity_xz_residual_.data(), data.d_complementarity_xz_residual_.size(), cuda::std::multiplies<>{}, - stream_view_); - + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); // Compute complementarity_wv_residual = w.*v cub::DeviceTransform::Transform(cuda::std::make_tuple(d_w.data(), d_v.data()), data.d_complementarity_wv_residual_.data(), data.d_complementarity_wv_residual_.size(), cuda::std::multiplies<>{}, - stream_view_); + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); raft::copy(data.complementarity_wv_residual.data(), data.d_complementarity_wv_residual_.data(), data.d_complementarity_wv_residual_.size(), @@ -2106,11 +2101,7 @@ void barrier_solver_t::compute_residuals(const dense_vector_t 0) { @@ -2122,11 +2113,7 @@ void barrier_solver_t::compute_residuals(const dense_vector_t 0) { for (i_t k = 0; k < data.n_upper_bounds; k++) { i_t j = data.upper_bounds[k]; @@ -2166,51 +2153,6 @@ void barrier_solver_t::gpu_compute_residual_norms(const rmm::device_uv device_vector_norm_inf(data.d_complementarity_wv_rhs_, stream_view_)); } -template -void barrier_solver_t::cpu_compute_residual_norms(const dense_vector_t& w, - const dense_vector_t& x, - const dense_vector_t& y, - const dense_vector_t& v, - const dense_vector_t& z, - iteration_data_t& data, - f_t& primal_residual_norm, - f_t& dual_residual_norm, - f_t& complementarity_residual_norm) -{ - raft::common::nvtx::range fun_scope("Barrier: CPU compute_residual_norms"); - - compute_residuals(w, x, y, v, z, data); - primal_residual_norm = std::max(vector_norm_inf(data.primal_residual, stream_view_), - vector_norm_inf(data.bound_residual, stream_view_)); - dual_residual_norm = vector_norm_inf(data.dual_residual, stream_view_); - complementarity_residual_norm = - std::max(vector_norm_inf(data.complementarity_xz_residual, stream_view_), - vector_norm_inf(data.complementarity_wv_residual, stream_view_)); -} - -template -template -f_t barrier_solver_t::max_step_to_boundary(const dense_vector_t& x, - const dense_vector_t& dx, - i_t& index) const -{ - float64_t max_step = 1.0; - index = -1; - for (i_t i = 0; i < static_cast(x.size()); i++) { - // x_i + alpha * dx_i >= 0, x_i >= 0, alpha >= 0 - // We only need to worry about the case where dx_i < 0 - // alpha * dx_i >= -x_i => alpha <= -x_i / dx_i - if (dx[i] < 0.0) { - const f_t ratio = -x[i] / dx[i]; - if (ratio < max_step) { - max_step = ratio; - index = i; - } - } - } - return max_step; -} - template f_t barrier_solver_t::gpu_max_step_to_boundary(iteration_data_t& data, const rmm::device_uvector& x, @@ -2231,16 +2173,6 @@ f_t barrier_solver_t::gpu_max_step_to_boundary(iteration_data_t -template -f_t barrier_solver_t::max_step_to_boundary( - const dense_vector_t& x, - const dense_vector_t& dx) const -{ - i_t index; - return max_step_to_boundary(x, dx, index); -} - template i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t& data, pinned_dense_vector_t& dw, @@ -2312,8 +2244,8 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t{}, - stream_view_); - + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); // diag = z ./ x + E * (v ./ w) * E' if (data.n_upper_bounds > 0) { @@ -2325,7 +2257,7 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_tfactorize(data.augmented); + status = data.chol->factorize(data.device_augmented); #ifdef CHOLESKY_DEBUG_CHECK cholesky_debug_check(data, lp, use_augmented); @@ -2368,11 +2301,7 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_tfactorize(data.device_ADAT); - } else { - status = data.chol->factorize(data.ADAT); - } + status = data.chol->factorize(data.device_ADAT); } data.has_factorization = true; data.num_factorizations++; @@ -2405,8 +2334,8 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t r1(lp.num_cols); - raft::copy(r1.data(), data.d_r1_.data(), data.d_r1_.size(), stream_view_); - RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); - dense_vector_t augmented_rhs(lp.num_cols + lp.num_rows); - for (i_t k = 0; k < lp.num_cols; k++) { - augmented_rhs[k] = r1[k]; - } - for (i_t k = 0; k < lp.num_rows; k++) { - augmented_rhs[k + lp.num_cols] = data.primal_rhs[k]; - } - dense_vector_t augmented_soln(lp.num_cols + lp.num_rows); - data.chol->solve(augmented_rhs, augmented_soln); + rmm::device_uvector d_augmented_rhs(lp.num_cols + lp.num_rows, stream_view_); + raft::copy(d_augmented_rhs.data(), data.d_r1_.data(), lp.num_cols, stream_view_); + raft::copy( + d_augmented_rhs.data() + lp.num_cols, data.primal_rhs.data(), lp.num_rows, stream_view_); + rmm::device_uvector d_augmented_soln(lp.num_cols + lp.num_rows, stream_view_); + data.chol->solve(d_augmented_rhs, d_augmented_soln); struct op_t { op_t(iteration_data_t& data) : data_(data) {} iteration_data_t& data_; + void a_multiply(f_t alpha, - const dense_vector_t& x, + const rmm::device_uvector& x, f_t beta, - dense_vector_t& y) + rmm::device_uvector& y) { data_.augmented_multiply(alpha, x, beta, y); } - void solve(const dense_vector_t& b, dense_vector_t& x) const + + void solve(rmm::device_uvector& b, rmm::device_uvector& x) const { data_.chol->solve(b, x); } } op(data); - iterative_refinement(op, augmented_rhs, augmented_soln); - dense_vector_t augmented_residual = augmented_rhs; - matrix_vector_multiply(data.augmented, 1.0, augmented_soln, -1.0, augmented_residual); - f_t solve_err = vector_norm_inf(augmented_residual); + auto solve_err = iterative_refinement(op, d_augmented_rhs, d_augmented_soln); if (solve_err > 1e-1) { settings.log.printf("|| Aug (dx, dy) - aug_rhs || %e after IR\n", solve_err); } - for (i_t k = 0; k < lp.num_cols; k++) { - dx[k] = augmented_soln[k]; - } - for (i_t k = 0; k < lp.num_rows; k++) { - dy[k] = augmented_soln[k + lp.num_cols]; - } - raft::copy(data.d_dx_.data(), dx.data(), data.d_dx_.size(), stream_view_); - raft::copy(data.d_dy_.data(), dy.data(), data.d_dy_.size(), stream_view_); + raft::copy(data.d_dx_.data(), d_augmented_soln.data(), lp.num_cols, stream_view_); + raft::copy(data.d_dy_.data(), d_augmented_soln.data() + lp.num_cols, lp.num_rows, stream_view_); + raft::copy(dx.data(), data.d_dx_.data(), lp.num_cols, stream_view_); + raft::copy(dy.data(), data.d_dy_.data(), lp.num_rows, stream_view_); RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); // TMP should only be init once data.cusparse_dy_ = data.cusparse_view_.create_vector(data.d_dy_); - - dense_vector_t res = data.primal_rhs; - matrix_vector_multiply(lp.A, 1.0, dx, -1.0, res); - f_t prim_err = vector_norm_inf(res); - if (prim_err > 1e-1) { settings.log.printf("|| A * dx - r_p || %e\n", prim_err); } - - dense_vector_t res1(lp.num_cols); - data.diag.pairwise_product(dx, res1); - if (data.Q.n > 0) { matrix_vector_multiply(data.Q, 1.0, dx, 1.0, res1); } - - res1.axpy(-1.0, r1, -1.0); - matrix_transpose_vector_multiply(lp.A, 1.0, dy, 1.0, res1); - f_t res1_err = vector_norm_inf(res1); - if (res1_err > 1e-1) { - settings.log.printf("|| A'*dy - r_1 - D dx || %e", vector_norm_inf(res1)); - } - - dense_vector_t res2(lp.num_cols + lp.num_rows); - for (i_t k = 0; k < lp.num_cols; k++) { - res2[k] = r1[k]; - } - for (i_t k = 0; k < lp.num_rows; k++) { - res2[k + lp.num_cols] = data.primal_rhs[k]; - } - dense_vector_t dxdy(lp.num_cols + lp.num_rows); - for (i_t k = 0; k < lp.num_cols; k++) { - dxdy[k] = dx[k]; - } - for (i_t k = 0; k < lp.num_rows; k++) { - dxdy[k + lp.num_cols] = dy[k]; - } - data.augmented_multiply(1.0, dxdy, -1.0, res2); - f_t res2_err = vector_norm_inf(res2); - if (res2_err > 1e-1) { settings.log.printf("|| Aug_0 (dx, dy) - aug_rhs || %e\n", res2_err); } } else { { raft::common::nvtx::range fun_scope("Barrier: Solve A D^{-1} A^T dy = h"); @@ -2582,8 +2466,8 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t x_residual = data.primal_rhs; - if (use_gpu) { - data.cusparse_view_.spmv(1.0, dx, -1.0, x_residual); - } else { - matrix_vector_multiply(lp.A, 1.0, dx, -1.0, x_residual); - } + data.cusparse_view_.spmv(1.0, dx, -1.0, x_residual); const f_t x_residual_norm = vector_norm_inf(x_residual, stream_view_); max_residual = std::max(max_residual, x_residual_norm); if (x_residual_norm > 1e-2) { @@ -2765,8 +2646,8 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t(data.d_xz_residual_, stream_view_); max_residual = std::max(max_residual, xz_residual_norm); @@ -2808,8 +2690,8 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t(d_dv_residual, stream_view_); max_residual = std::max(max_residual, dv_residual_norm); if (dv_residual_norm > 1e-2) { @@ -2869,8 +2751,8 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t(data.d_dual_residual_, stream_view_); max_residual = std::max(max_residual, dual_residual_norm); @@ -2890,8 +2772,8 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t::gpu_compute_search_direction(iteration_data_t(data.d_dw_residual_, stream_view_); max_residual = std::max(max_residual, dw_residual_norm); @@ -2931,8 +2813,8 @@ i_t barrier_solver_t::gpu_compute_search_direction(iteration_data_t(data.d_wv_residual_, stream_view_); max_residual = std::max(max_residual, wv_residual_norm); @@ -2953,40 +2835,31 @@ void barrier_solver_t::compute_affine_rhs(iteration_data_t& data.bound_rhs = data.bound_residual; data.dual_rhs = data.dual_residual; - if (use_gpu) { - raft::copy(data.d_complementarity_xz_rhs_.data(), - data.d_complementarity_xz_residual_.data(), - data.d_complementarity_xz_residual_.size(), - stream_view_); - raft::copy(data.d_complementarity_wv_rhs_.data(), - data.d_complementarity_wv_residual_.data(), - data.d_complementarity_wv_residual_.size(), - stream_view_); - - // x.*z -> -x .* z - cub::DeviceTransform::Transform( - data.d_complementarity_xz_rhs_.data(), - data.d_complementarity_xz_rhs_.data(), - data.d_complementarity_xz_rhs_.size(), - [] HD(f_t xz_rhs) { return -xz_rhs; }, - stream_view_); - - // w.*v -> -w .* v - cub::DeviceTransform::Transform( - data.d_complementarity_wv_rhs_.data(), - data.d_complementarity_wv_rhs_.data(), - data.d_complementarity_wv_rhs_.size(), - [] HD(f_t wv_rhs) { return -wv_rhs; }, - stream_view_); - } else { - data.complementarity_xz_rhs = data.complementarity_xz_residual; - data.complementarity_wv_rhs = data.complementarity_wv_residual; + raft::copy(data.d_complementarity_xz_rhs_.data(), + data.d_complementarity_xz_residual_.data(), + data.d_complementarity_xz_residual_.size(), + stream_view_); + raft::copy(data.d_complementarity_wv_rhs_.data(), + data.d_complementarity_wv_residual_.data(), + data.d_complementarity_wv_residual_.size(), + stream_view_); - // x.*z -> -x .* z - data.complementarity_xz_rhs.multiply_scalar(-1.0); - // w.*v -> -w .* v - data.complementarity_wv_rhs.multiply_scalar(-1.0); - } + // x.*z -> -x .* z + cub::DeviceTransform::Transform( + data.d_complementarity_xz_rhs_.data(), + data.d_complementarity_xz_rhs_.data(), + data.d_complementarity_xz_rhs_.size(), + [] HD(f_t xz_rhs) { return -xz_rhs; }, + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); + // w.*v -> -w .* v + cub::DeviceTransform::Transform( + data.d_complementarity_wv_rhs_.data(), + data.d_complementarity_wv_rhs_.data(), + data.d_complementarity_wv_rhs_.size(), + [] HD(f_t wv_rhs) { return -wv_rhs; }, + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); } template @@ -2996,101 +2869,74 @@ void barrier_solver_t::compute_target_mu( raft::common::nvtx::range fun_scope("Barrier: compute_target_mu"); f_t complementarity_aff_sum = 0.0; - if (!use_gpu) { - f_t step_primal_aff = std::min(max_step_to_boundary(data.w, data.dw_aff), - max_step_to_boundary(data.x, data.dx_aff)); - f_t step_dual_aff = std::min(max_step_to_boundary(data.v, data.dv_aff), - max_step_to_boundary(data.z, data.dz_aff)); - - if (data.Q.n > 0) { - step_primal_aff = step_dual_aff = std::min(step_primal_aff, step_dual_aff); - } - - // w_aff = w + step_primal_aff * dw_aff - // x_aff = x + step_primal_aff * dx_aff - // v_aff = v + step_dual_aff * dv_aff - // z_aff = z + step_dual_aff * dz_aff - dense_vector_t w_aff = data.w; - dense_vector_t x_aff = data.x; - dense_vector_t v_aff = data.v; - dense_vector_t z_aff = data.z; - w_aff.axpy(step_primal_aff, data.dw_aff, 1.0); - x_aff.axpy(step_primal_aff, data.dx_aff, 1.0); - v_aff.axpy(step_dual_aff, data.dv_aff, 1.0); - z_aff.axpy(step_dual_aff, data.dz_aff, 1.0); - - dense_vector_t complementarity_xz_aff(lp.num_cols); - dense_vector_t complementarity_wv_aff(data.n_upper_bounds); - x_aff.pairwise_product(z_aff, complementarity_xz_aff); - w_aff.pairwise_product(v_aff, complementarity_wv_aff); - - complementarity_aff_sum = complementarity_xz_aff.sum() + complementarity_wv_aff.sum(); + // TMP no copy and data should always be on the GPU + data.d_dw_aff_.resize(data.dw_aff.size(), stream_view_); + data.d_dx_aff_.resize(data.dx_aff.size(), stream_view_); + data.d_dv_aff_.resize(data.dv_aff.size(), stream_view_); + data.d_dz_aff_.resize(data.dz_aff.size(), stream_view_); + + raft::copy(data.d_dw_aff_.data(), data.dw_aff.data(), data.dw_aff.size(), stream_view_); + raft::copy(data.d_dx_aff_.data(), data.dx_aff.data(), data.dx_aff.size(), stream_view_); + raft::copy(data.d_dv_aff_.data(), data.dv_aff.data(), data.dv_aff.size(), stream_view_); + raft::copy(data.d_dz_aff_.data(), data.dz_aff.data(), data.dz_aff.size(), stream_view_); + + f_t step_primal_aff = std::min(gpu_max_step_to_boundary(data, data.d_w_, data.d_dw_aff_), + gpu_max_step_to_boundary(data, data.d_x_, data.d_dx_aff_)); + f_t step_dual_aff = std::min(gpu_max_step_to_boundary(data, data.d_v_, data.d_dv_aff_), + gpu_max_step_to_boundary(data, data.d_z_, data.d_dz_aff_)); + + if (data.Q.n > 0) { step_primal_aff = step_dual_aff = std::min(step_primal_aff, step_dual_aff); } + + // Compute complementarity_xz_aff_sum = sum(x_aff * z_aff), + // where x_aff = x + step_primal_aff * dx_aff and z_aff = z + step_dual_aff * dz_aff + // Here the update of x_aff and z_aff are done temporarily and sum of their products is + // computed without storing intermediate results. + f_t complementarity_xz_aff_sum = data.transform_reduce_helper_.transform_reduce( + thrust::make_zip_iterator( + data.d_x_.data(), data.d_z_.data(), data.d_dx_aff_.data(), data.d_dz_aff_.data()), + cuda::std::plus{}, + [step_primal_aff, step_dual_aff] HD(const thrust::tuple t) { + const f_t x = thrust::get<0>(t); + const f_t z = thrust::get<1>(t); + const f_t dx_aff = thrust::get<2>(t); + const f_t dz_aff = thrust::get<3>(t); + + const f_t x_aff = x + step_primal_aff * dx_aff; + const f_t z_aff = z + step_dual_aff * dz_aff; + + const f_t complementarity_xz_aff = x_aff * z_aff; + + return complementarity_xz_aff; + }, + f_t(0), + data.d_x_.size(), + stream_view_); - } else { - // TMP no copy and data should always be on the GPU - data.d_dw_aff_.resize(data.dw_aff.size(), stream_view_); - data.d_dx_aff_.resize(data.dx_aff.size(), stream_view_); - data.d_dv_aff_.resize(data.dv_aff.size(), stream_view_); - data.d_dz_aff_.resize(data.dz_aff.size(), stream_view_); - - raft::copy(data.d_dw_aff_.data(), data.dw_aff.data(), data.dw_aff.size(), stream_view_); - raft::copy(data.d_dx_aff_.data(), data.dx_aff.data(), data.dx_aff.size(), stream_view_); - raft::copy(data.d_dv_aff_.data(), data.dv_aff.data(), data.dv_aff.size(), stream_view_); - raft::copy(data.d_dz_aff_.data(), data.dz_aff.data(), data.dz_aff.size(), stream_view_); - - f_t step_primal_aff = std::min(gpu_max_step_to_boundary(data, data.d_w_, data.d_dw_aff_), - gpu_max_step_to_boundary(data, data.d_x_, data.d_dx_aff_)); - f_t step_dual_aff = std::min(gpu_max_step_to_boundary(data, data.d_v_, data.d_dv_aff_), - gpu_max_step_to_boundary(data, data.d_z_, data.d_dz_aff_)); + // Here the update of w_aff and v_aff are done temporarily and sum of their products is + // computed without storing intermediate results. + f_t complementarity_wv_aff_sum = data.transform_reduce_helper_.transform_reduce( + thrust::make_zip_iterator( + data.d_w_.data(), data.d_v_.data(), data.d_dw_aff_.data(), data.d_dv_aff_.data()), + cuda::std::plus{}, + [step_primal_aff, step_dual_aff] HD(const thrust::tuple t) { + const f_t w = thrust::get<0>(t); + const f_t v = thrust::get<1>(t); + const f_t dw_aff = thrust::get<2>(t); + const f_t dv_aff = thrust::get<3>(t); - if (data.Q.n > 0) { - step_primal_aff = step_dual_aff = std::min(step_primal_aff, step_dual_aff); - } + const f_t w_aff = w + step_primal_aff * dw_aff; + const f_t v_aff = v + step_dual_aff * dv_aff; - f_t complementarity_xz_aff_sum = data.transform_reduce_helper_.transform_reduce( - thrust::make_zip_iterator( - data.d_x_.data(), data.d_z_.data(), data.d_dx_aff_.data(), data.d_dz_aff_.data()), - cuda::std::plus{}, - [step_primal_aff, step_dual_aff] HD(const thrust::tuple t) { - const f_t x = thrust::get<0>(t); - const f_t z = thrust::get<1>(t); - const f_t dx_aff = thrust::get<2>(t); - const f_t dz_aff = thrust::get<3>(t); + const f_t complementarity_wv_aff = w_aff * v_aff; - const f_t x_aff = x + step_primal_aff * dx_aff; - const f_t z_aff = z + step_dual_aff * dz_aff; - - const f_t complementarity_xz_aff = x_aff * z_aff; + return complementarity_wv_aff; + }, + f_t(0), + data.d_w_.size(), + stream_view_); - return complementarity_xz_aff; - }, - f_t(0), - data.d_x_.size(), - stream_view_); - - f_t complementarity_wv_aff_sum = data.transform_reduce_helper_.transform_reduce( - thrust::make_zip_iterator( - data.d_w_.data(), data.d_v_.data(), data.d_dw_aff_.data(), data.d_dv_aff_.data()), - cuda::std::plus{}, - [step_primal_aff, step_dual_aff] HD(const thrust::tuple t) { - const f_t w = thrust::get<0>(t); - const f_t v = thrust::get<1>(t); - const f_t dw_aff = thrust::get<2>(t); - const f_t dv_aff = thrust::get<3>(t); - - const f_t w_aff = w + step_primal_aff * dw_aff; - const f_t v_aff = v + step_dual_aff * dv_aff; - - const f_t complementarity_wv_aff = w_aff * v_aff; - - return complementarity_wv_aff; - }, - f_t(0), - data.d_w_.size(), - stream_view_); + complementarity_aff_sum = complementarity_xz_aff_sum + complementarity_wv_aff_sum; - complementarity_aff_sum = complementarity_xz_aff_sum + complementarity_wv_aff_sum; - } mu_aff = (complementarity_aff_sum) / (static_cast(data.x.size()) + static_cast(data.n_upper_bounds)); sigma = std::max(0.0, std::min(1.0, std::pow(mu_aff / mu, 3.0))); @@ -3102,31 +2948,20 @@ void barrier_solver_t::compute_cc_rhs(iteration_data_t& data { raft::common::nvtx::range fun_scope("Barrier: compute_cc_rhs"); - if (use_gpu) { - cub::DeviceTransform::Transform( - cuda::std::make_tuple(data.d_dx_aff_.data(), data.d_dz_aff_.data()), - data.d_complementarity_xz_rhs_.data(), - data.d_complementarity_xz_rhs_.size(), - [new_mu] HD(f_t dx_aff, f_t dz_aff) { return -(dx_aff * dz_aff) + new_mu; }, - stream_view_); - - cub::DeviceTransform::Transform( - cuda::std::make_tuple(data.d_dw_aff_.data(), data.d_dv_aff_.data()), - data.d_complementarity_wv_rhs_.data(), - data.d_complementarity_wv_rhs_.size(), - [new_mu] HD(f_t dw_aff, f_t dv_aff) { return -(dw_aff * dv_aff) + new_mu; }, - stream_view_); - } else { - // complementarity_xz_rhs = -dx_aff .* dz_aff + sigma * mu - data.dx_aff.pairwise_product(data.dz_aff, data.complementarity_xz_rhs); - data.complementarity_xz_rhs.multiply_scalar(-1.0); - data.complementarity_xz_rhs.add_scalar(new_mu); - - // complementarity_wv_rhs = -dw_aff .* dv_aff + sigma * mu - data.dw_aff.pairwise_product(data.dv_aff, data.complementarity_wv_rhs); - data.complementarity_wv_rhs.multiply_scalar(-1.0); - data.complementarity_wv_rhs.add_scalar(new_mu); - } + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_dx_aff_.data(), data.d_dz_aff_.data()), + data.d_complementarity_xz_rhs_.data(), + data.d_complementarity_xz_rhs_.size(), + [new_mu] HD(f_t dx_aff, f_t dz_aff) { return -(dx_aff * dz_aff) + new_mu; }, + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_dw_aff_.data(), data.d_dv_aff_.data()), + data.d_complementarity_wv_rhs_.data(), + data.d_complementarity_wv_rhs_.size(), + [new_mu] HD(f_t dw_aff, f_t dv_aff) { return -(dw_aff * dv_aff) + new_mu; }, + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); // TMP should be CPU to 0 if CPU and GPU to 0 if GPU data.primal_rhs.set_scalar(0.0); @@ -3138,79 +2973,63 @@ template void barrier_solver_t::compute_final_direction(iteration_data_t& data) { raft::common::nvtx::range fun_scope("Barrier: compute_final_direction"); - if (use_gpu) { - raft::common::nvtx::range fun_scope("Barrier: GPU vector operations"); - // TODO Nicolas: Redundant copies - data.d_y_.resize(data.y.size(), stream_view_); - data.d_dy_aff_.resize(data.dy_aff.size(), stream_view_); - raft::copy(data.d_y_.data(), data.y.data(), data.y.size(), stream_view_); - raft::copy(data.d_dy_aff_.data(), data.dy_aff.data(), data.dy_aff.size(), stream_view_); + // TODO Nicolas: Redundant copies + data.d_y_.resize(data.y.size(), stream_view_); + data.d_dy_aff_.resize(data.dy_aff.size(), stream_view_); + raft::copy(data.d_y_.data(), data.y.data(), data.y.size(), stream_view_); + raft::copy(data.d_dy_aff_.data(), data.dy_aff.data(), data.dy_aff.size(), stream_view_); #ifdef FINITE_CHECK - for (i_t i = 0; i < (int)data.y.size(); i++) { - cuopt_assert(std::isfinite(data.y[i]), "data.d_y_[i] is not finite"); - } + for (i_t i = 0; i < (int)data.y.size(); i++) { + cuopt_assert(std::isfinite(data.y[i]), "data.d_y_[i] is not finite"); + } - for (i_t i = 0; i < (int)data.dy_aff.size(); i++) { - cuopt_assert(std::isfinite(data.dy_aff[i]), "data.dy_aff_[i] is not finite"); - } + for (i_t i = 0; i < (int)data.dy_aff.size(); i++) { + cuopt_assert(std::isfinite(data.dy_aff[i]), "data.dy_aff_[i] is not finite"); + } #endif - // dw = dw_aff + dw_cc - // dx = dx_aff + dx_cc - // dy = dy_aff + dy_cc - // dv = dv_aff + dv_cc - // dz = dz_aff + dz_cc - // Note: dw_cc - dz_cc are stored in dw - dz - - // Transforms are grouped according to vector sizes. - assert(data.d_dw_.size() == data.d_dv_.size()); - assert(data.d_dx_.size() == data.d_dz_.size()); - assert(data.d_dw_aff_.size() == data.d_dv_aff_.size()); - assert(data.d_dx_aff_.size() == data.d_dz_aff_.size()); - assert(data.d_dy_aff_.size() == data.d_dy_.size()); - - cub::DeviceTransform::Transform( - cuda::std::make_tuple( - data.d_dw_aff_.data(), data.d_dv_aff_.data(), data.d_dw_.data(), data.d_dv_.data()), - thrust::make_zip_iterator(data.d_dw_.data(), data.d_dv_.data()), - data.d_dw_.size(), - [] HD(f_t dw_aff, f_t dv_aff, f_t dw, f_t dv) -> thrust::tuple { - return {dw + dw_aff, dv + dv_aff}; - }, - stream_view_); - - cub::DeviceTransform::Transform( - cuda::std::make_tuple( - data.d_dx_aff_.data(), data.d_dz_aff_.data(), data.d_dx_.data(), data.d_dz_.data()), - thrust::make_zip_iterator(data.d_dx_.data(), data.d_dz_.data()), - data.d_dx_.size(), - [] HD(f_t dx_aff, f_t dz_aff, f_t dx, f_t dz) -> thrust::tuple { - return {dx + dx_aff, dz + dz_aff}; - }, - stream_view_); - - cub::DeviceTransform::Transform( - cuda::std::make_tuple(data.d_dy_aff_.data(), data.d_dy_.data()), - data.d_dy_.data(), - data.d_dy_.size(), - [] HD(f_t dy_aff, f_t dy) { return dy + dy_aff; }, - stream_view_); - - } else { - raft::common::nvtx::range fun_scope("Barrier: CPU vector operations"); - // dw = dw_aff + dw_cc - // dx = dx_aff + dx_cc - // dy = dy_aff + dy_cc - // dv = dv_aff + dv_cc - // dz = dz_aff + dz_cc - // Note: dw_cc - dz_cc are stored in dw - dz - data.dw.axpy(1.0, data.dw_aff, 1.0); - data.dx.axpy(1.0, data.dx_aff, 1.0); - data.dy.axpy(1.0, data.dy_aff, 1.0); - data.dv.axpy(1.0, data.dv_aff, 1.0); - data.dz.axpy(1.0, data.dz_aff, 1.0); - } + // dw = dw_aff + dw_cc + // dx = dx_aff + dx_cc + // dy = dy_aff + dy_cc + // dv = dv_aff + dv_cc + // dz = dz_aff + dz_cc + // Note: dw_cc - dz_cc are stored in dw - dz + + // Transforms are grouped according to vector sizes. + assert(data.d_dw_.size() == data.d_dv_.size()); + assert(data.d_dx_.size() == data.d_dz_.size()); + assert(data.d_dw_aff_.size() == data.d_dv_aff_.size()); + assert(data.d_dx_aff_.size() == data.d_dz_aff_.size()); + assert(data.d_dy_aff_.size() == data.d_dy_.size()); + + cub::DeviceTransform::Transform( + cuda::std::make_tuple( + data.d_dw_aff_.data(), data.d_dv_aff_.data(), data.d_dw_.data(), data.d_dv_.data()), + thrust::make_zip_iterator(data.d_dw_.data(), data.d_dv_.data()), + data.d_dw_.size(), + [] HD(f_t dw_aff, f_t dv_aff, f_t dw, f_t dv) -> thrust::tuple { + return {dw + dw_aff, dv + dv_aff}; + }, + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); + cub::DeviceTransform::Transform( + cuda::std::make_tuple( + data.d_dx_aff_.data(), data.d_dz_aff_.data(), data.d_dx_.data(), data.d_dz_.data()), + thrust::make_zip_iterator(data.d_dx_.data(), data.d_dz_.data()), + data.d_dx_.size(), + [] HD(f_t dx_aff, f_t dz_aff, f_t dx, f_t dz) -> thrust::tuple { + return {dx + dx_aff, dz + dz_aff}; + }, + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_dy_aff_.data(), data.d_dy_.data()), + data.d_dy_.data(), + data.d_dy_.size(), + [] HD(f_t dy_aff, f_t dy) { return dy + dy_aff; }, + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); } template @@ -3222,17 +3041,10 @@ void barrier_solver_t::compute_primal_dual_step_length(iteration_data_ raft::common::nvtx::range fun_scope("Barrier: compute_primal_dual_step_length"); f_t max_step_primal = 0.0; f_t max_step_dual = 0.0; - if (use_gpu) { - max_step_primal = std::min(gpu_max_step_to_boundary(data, data.d_w_, data.d_dw_), - gpu_max_step_to_boundary(data, data.d_x_, data.d_dx_)); - max_step_dual = std::min(gpu_max_step_to_boundary(data, data.d_v_, data.d_dv_), - gpu_max_step_to_boundary(data, data.d_z_, data.d_dz_)); - } else { - max_step_primal = - std::min(max_step_to_boundary(data.w, data.dw), max_step_to_boundary(data.x, data.dx)); - max_step_dual = - std::min(max_step_to_boundary(data.v, data.dv), max_step_to_boundary(data.z, data.dz)); - } + max_step_primal = std::min(gpu_max_step_to_boundary(data, data.d_w_, data.d_dw_), + gpu_max_step_to_boundary(data, data.d_x_, data.d_dx_)); + max_step_dual = std::min(gpu_max_step_to_boundary(data, data.d_v_, data.d_dv_), + gpu_max_step_to_boundary(data, data.d_z_, data.d_dz_)); step_primal = step_scale * max_step_primal; step_dual = step_scale * max_step_dual; @@ -3248,86 +3060,61 @@ void barrier_solver_t::compute_next_iterate(iteration_data_t { raft::common::nvtx::range fun_scope("Barrier: compute_next_iterate"); - if (use_gpu) { - cub::DeviceTransform::Transform( - cuda::std::make_tuple( - data.d_w_.data(), data.d_v_.data(), data.d_dw_.data(), data.d_dv_.data()), - thrust::make_zip_iterator(data.d_w_.data(), data.d_v_.data()), - data.d_dw_.size(), - [step_primal, step_dual] HD(f_t w, f_t v, f_t dw, f_t dv) -> thrust::tuple { - return {w + step_primal * dw, v + step_dual * dv}; - }, - stream_view_); - - cub::DeviceTransform::Transform( - cuda::std::make_tuple( - data.d_x_.data(), data.d_z_.data(), data.d_dx_.data(), data.d_dz_.data()), - thrust::make_zip_iterator(data.d_x_.data(), data.d_z_.data()), - data.d_dx_.size(), - [step_primal, step_dual] HD(f_t x, f_t z, f_t dx, f_t dz) -> thrust::tuple { - return {x + step_primal * dx, z + step_dual * dz}; - }, - stream_view_); - - cub::DeviceTransform::Transform( - cuda::std::make_tuple(data.d_y_.data(), data.d_dy_.data()), - data.d_y_.data(), - data.d_y_.size(), - [step_dual] HD(f_t y, f_t dy) { return y + step_dual * dy; }, - stream_view_); - - // Do not handle free variables for quadratic problems - i_t num_free_variables = presolve_info.free_variable_pairs.size() / 2; - if (num_free_variables > 0 && data.Q.n == 0) { - auto d_free_variable_pairs = device_copy(presolve_info.free_variable_pairs, stream_view_); - thrust::for_each(rmm::exec_policy(stream_view_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_free_variables), - [span_free_variable_pairs = cuopt::make_span(d_free_variable_pairs), - span_x = cuopt::make_span(data.d_x_), - my_step_scale = step_scale] __device__(i_t i) { - // Not coalesced - i_t k = 2 * i; - i_t u = span_free_variable_pairs[k]; - i_t v = span_free_variable_pairs[k + 1]; - f_t u_val = span_x[u]; - f_t v_val = span_x[v]; - f_t min_val = cuda::std::min(u_val, v_val); - f_t eta = my_step_scale * min_val; - span_x[u] -= eta; - span_x[v] -= eta; - }); - } - - raft::copy(data.w.data(), data.d_w_.data(), data.d_w_.size(), stream_view_); - raft::copy(data.x.data(), data.d_x_.data(), data.d_x_.size(), stream_view_); - raft::copy(data.y.data(), data.d_y_.data(), data.d_y_.size(), stream_view_); - raft::copy(data.v.data(), data.d_v_.data(), data.d_v_.size(), stream_view_); - raft::copy(data.z.data(), data.d_z_.data(), data.d_z_.size(), stream_view_); - // Sync to make sure all host variable are done copying - RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); - } else { - data.w.axpy(step_primal, data.dw, 1.0); - data.x.axpy(step_primal, data.dx, 1.0); - data.y.axpy(step_dual, data.dy, 1.0); - data.v.axpy(step_dual, data.dv, 1.0); - data.z.axpy(step_dual, data.dz, 1.0); - - // Handle free variables - i_t num_free_variables = presolve_info.free_variable_pairs.size() / 2; - if (num_free_variables > 0 && data.Q.n == 0) { - for (i_t k = 0; k < 2 * num_free_variables; k += 2) { - i_t u = presolve_info.free_variable_pairs[k]; - i_t v = presolve_info.free_variable_pairs[k + 1]; - f_t u_val = data.x[u]; - f_t v_val = data.x[v]; - f_t min_val = std::min(u_val, v_val); - f_t eta = step_scale * min_val; - data.x[u] -= eta; - data.x[v] -= eta; - } - } + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_w_.data(), data.d_v_.data(), data.d_dw_.data(), data.d_dv_.data()), + thrust::make_zip_iterator(data.d_w_.data(), data.d_v_.data()), + data.d_dw_.size(), + [step_primal, step_dual] HD(f_t w, f_t v, f_t dw, f_t dv) -> thrust::tuple { + return {w + step_primal * dw, v + step_dual * dv}; + }, + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_x_.data(), data.d_z_.data(), data.d_dx_.data(), data.d_dz_.data()), + thrust::make_zip_iterator(data.d_x_.data(), data.d_z_.data()), + data.d_dx_.size(), + [step_primal, step_dual] HD(f_t x, f_t z, f_t dx, f_t dz) -> thrust::tuple { + return {x + step_primal * dx, z + step_dual * dz}; + }, + stream_view_.value()); + RAFT_CHECK_CUDA(stream_view_); + cub::DeviceTransform::Transform( + cuda::std::make_tuple(data.d_y_.data(), data.d_dy_.data()), + data.d_y_.data(), + data.d_y_.size(), + [step_dual] HD(f_t y, f_t dy) { return y + step_dual * dy; }, + stream_view_); + RAFT_CHECK_CUDA(stream_view_); + // Do not handle free variables for quadratic problems + i_t num_free_variables = presolve_info.free_variable_pairs.size() / 2; + if (num_free_variables > 0 && data.Q.n == 0) { + auto d_free_variable_pairs = device_copy(presolve_info.free_variable_pairs, stream_view_); + thrust::for_each(rmm::exec_policy(stream_view_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_free_variables), + [span_free_variable_pairs = cuopt::make_span(d_free_variable_pairs), + span_x = cuopt::make_span(data.d_x_), + my_step_scale = step_scale] __device__(i_t i) { + // Not coalesced + i_t k = 2 * i; + i_t u = span_free_variable_pairs[k]; + i_t v = span_free_variable_pairs[k + 1]; + f_t u_val = span_x[u]; + f_t v_val = span_x[v]; + f_t min_val = cuda::std::min(u_val, v_val); + f_t eta = my_step_scale * min_val; + span_x[u] -= eta; + span_x[v] -= eta; + }); } + + raft::copy(data.w.data(), data.d_w_.data(), data.d_w_.size(), stream_view_); + raft::copy(data.x.data(), data.d_x_.data(), data.d_x_.size(), stream_view_); + raft::copy(data.y.data(), data.d_y_.data(), data.d_y_.size(), stream_view_); + raft::copy(data.v.data(), data.d_v_.data(), data.d_v_.size(), stream_view_); + raft::copy(data.z.data(), data.d_z_.data(), data.d_z_.size(), stream_view_); + // Sync to make sure all host variable are done copying + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); } template @@ -3337,27 +3124,15 @@ void barrier_solver_t::compute_residual_norms(iteration_data_t @@ -3365,18 +3140,13 @@ void barrier_solver_t::compute_mu(iteration_data_t& data, f_ { raft::common::nvtx::range fun_scope("Barrier: compute_mu"); - if (use_gpu) { - mu = (data.sum_reduce_helper_.sum(data.d_complementarity_xz_residual_.begin(), - data.d_complementarity_xz_residual_.size(), - stream_view_) + - data.sum_reduce_helper_.sum(data.d_complementarity_wv_residual_.begin(), - data.d_complementarity_wv_residual_.size(), - stream_view_)) / - (static_cast(data.x.size()) + static_cast(data.n_upper_bounds)); - } else { - mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) / - (static_cast(data.x.size()) + static_cast(data.n_upper_bounds)); - } + mu = (data.sum_reduce_helper_.sum(data.d_complementarity_xz_residual_.begin(), + data.d_complementarity_xz_residual_.size(), + stream_view_) + + data.sum_reduce_helper_.sum(data.d_complementarity_wv_residual_.begin(), + data.d_complementarity_wv_residual_.size(), + stream_view_)) / + (static_cast(data.x.size()) + static_cast(data.n_upper_bounds)); } template @@ -3385,161 +3155,147 @@ void barrier_solver_t::compute_primal_dual_objective(iteration_data_t< f_t& dual_objective) { raft::common::nvtx::range fun_scope("Barrier: compute_primal_dual_objective"); - if (use_gpu) { - raft::copy(data.d_c_.data(), data.c.data(), data.c.size(), stream_view_); - auto d_b = device_copy(data.b, stream_view_); - auto d_restrict_u = device_copy(data.restrict_u_, stream_view_); - rmm::device_scalar d_cx(stream_view_); - rmm::device_scalar d_by(stream_view_); - rmm::device_scalar d_uv(stream_view_); - + raft::copy(data.d_c_.data(), data.c.data(), data.c.size(), stream_view_); + auto d_b = device_copy(data.b, stream_view_); + auto d_restrict_u = device_copy(data.restrict_u_, stream_view_); + rmm::device_scalar d_cx(stream_view_); + rmm::device_scalar d_by(stream_view_); + rmm::device_scalar d_uv(stream_view_); + + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), + data.d_c_.size(), + data.d_c_.data(), + 1, + data.d_x_.data(), + 1, + d_cx.data(), + stream_view_)); + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), + d_b.size(), + d_b.data(), + 1, + data.d_y_.data(), + 1, + d_by.data(), + stream_view_)); + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), + d_restrict_u.size(), + d_restrict_u.data(), + 1, + data.d_v_.data(), + 1, + d_uv.data(), + stream_view_)); + f_t quad_objective = 0.0; + if (data.Q.n > 0) { + auto cusparse_d_x = data.cusparse_view_.create_vector(data.d_x_); + auto cusparse_Qx = data.cusparse_view_.create_vector(data.d_Qx_); + data.cusparse_Q_view_.spmv(1.0, cusparse_d_x, 0.0, cusparse_Qx); + rmm::device_scalar d_xQx(stream_view_); RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), - data.d_c_.size(), - data.d_c_.data(), + data.d_Qx_.size(), + data.d_Qx_.data(), 1, data.d_x_.data(), 1, - d_cx.data(), + d_xQx.data(), stream_view_)); - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), - d_b.size(), - d_b.data(), - 1, - data.d_y_.data(), - 1, - d_by.data(), - stream_view_)); - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), - d_restrict_u.size(), - d_restrict_u.data(), - 1, - data.d_v_.data(), - 1, - d_uv.data(), - stream_view_)); - f_t quad_objective = 0.0; - if (data.Q.n > 0) { - auto cusparse_d_x = data.cusparse_view_.create_vector(data.d_x_); - auto cusparse_Qx = data.cusparse_view_.create_vector(data.d_Qx_); - data.cusparse_Q_view_.spmv(1.0, cusparse_d_x, 0.0, cusparse_Qx); - rmm::device_scalar d_xQx(stream_view_); - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), - data.d_Qx_.size(), - data.d_Qx_.data(), - 1, - data.d_x_.data(), - 1, - d_xQx.data(), - stream_view_)); - quad_objective = 0.5 * d_xQx.value(stream_view_); - } - - primal_objective = d_cx.value(stream_view_) + quad_objective; - dual_objective = d_by.value(stream_view_) - d_uv.value(stream_view_) - quad_objective; + quad_objective = 0.5 * d_xQx.value(stream_view_); + } + + primal_objective = d_cx.value(stream_view_) + quad_objective; + dual_objective = d_by.value(stream_view_) - d_uv.value(stream_view_) - quad_objective; #ifdef CHECK_OBJECTIVE_GAP - rmm::device_scalar d_xz(stream_view_); - rmm::device_scalar d_wv(stream_view_); - rmm::device_scalar d_rdx(stream_view_); - rmm::device_scalar d_rpy(stream_view_); - rmm::device_scalar d_rwv(stream_view_); - rmm::device_scalar d_p(stream_view_); - rmm::device_scalar d_y(stream_view_); - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), - data.d_x_.size(), - data.d_x_.data(), - 1, - data.d_z_.data(), - 1, - d_xz.data(), - stream_view_)); - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), - data.d_w_.size(), - data.d_w_.data(), - 1, - data.d_v_.data(), - 1, - d_wv.data(), - stream_view_)); - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), - data.d_x_.size(), - data.d_x_.data(), - 1, - data.d_dual_residual_.data(), - 1, - d_rdx.data(), - stream_view_)); - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), - data.d_y_.size(), - data.d_y_.data(), - 1, - data.d_primal_residual_.data(), - 1, - d_rpy.data(), - stream_view_)); - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), - data.d_bound_residual_.size(), - data.d_bound_residual_.data(), - 1, - data.d_v_.data(), - 1, - d_rwv.data(), - stream_view_)); - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), - data.d_primal_residual_.size(), - data.d_primal_residual_.data(), - 1, - data.d_primal_residual_.data(), - 1, - d_p.data(), - stream_view_)); - RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), - data.d_y_.size(), - data.d_y_.data(), - 1, - data.d_y_.data(), - 1, - d_y.data(), - stream_view_)); - f_t xz = d_xz.value(stream_view_); - f_t wv = d_wv.value(stream_view_); - f_t rdx = d_rdx.value(stream_view_); - f_t rpy = d_rpy.value(stream_view_); - f_t rwv = d_rwv.value(stream_view_); - f_t p = d_p.value(stream_view_); - f_t y = d_y.value(stream_view_); - - stream_view_.synchronize(); - - f_t objective_gap_1 = primal_objective - dual_objective; - f_t objective_gap_2 = xz + wv + rdx - rpy + rwv; - - settings.log.printf("Objective gap 1: %.2e, Objective gap 2: %.2e Diff: %.2e\n", - objective_gap_1, - objective_gap_2, - std::abs(objective_gap_1 - objective_gap_2)); - settings.log.printf( - "Objective - Complementarity: %.2e, rdx: %.2e, rpy: %.2e, rwv: %.2e, p: %.2e, y: %.2e\n", - std::abs(objective_gap_1 - (xz + wv)), - rdx, - rpy, - rwv, - p, - y); + rmm::device_scalar d_xz(stream_view_); + rmm::device_scalar d_wv(stream_view_); + rmm::device_scalar d_rdx(stream_view_); + rmm::device_scalar d_rpy(stream_view_); + rmm::device_scalar d_rwv(stream_view_); + rmm::device_scalar d_p(stream_view_); + rmm::device_scalar d_y(stream_view_); + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), + data.d_x_.size(), + data.d_x_.data(), + 1, + data.d_z_.data(), + 1, + d_xz.data(), + stream_view_)); + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), + data.d_w_.size(), + data.d_w_.data(), + 1, + data.d_v_.data(), + 1, + d_wv.data(), + stream_view_)); + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), + data.d_x_.size(), + data.d_x_.data(), + 1, + data.d_dual_residual_.data(), + 1, + d_rdx.data(), + stream_view_)); + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), + data.d_y_.size(), + data.d_y_.data(), + 1, + data.d_primal_residual_.data(), + 1, + d_rpy.data(), + stream_view_)); + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), + data.d_bound_residual_.size(), + data.d_bound_residual_.data(), + 1, + data.d_v_.data(), + 1, + d_rwv.data(), + stream_view_)); + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), + data.d_primal_residual_.size(), + data.d_primal_residual_.data(), + 1, + data.d_primal_residual_.data(), + 1, + d_p.data(), + stream_view_)); + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(lp.handle_ptr->get_cublas_handle(), + data.d_y_.size(), + data.d_y_.data(), + 1, + data.d_y_.data(), + 1, + d_y.data(), + stream_view_)); + f_t xz = d_xz.value(stream_view_); + f_t wv = d_wv.value(stream_view_); + f_t rdx = d_rdx.value(stream_view_); + f_t rpy = d_rpy.value(stream_view_); + f_t rwv = d_rwv.value(stream_view_); + f_t p = d_p.value(stream_view_); + f_t y = d_y.value(stream_view_); + + stream_view_.synchronize(); + + f_t objective_gap_1 = primal_objective - dual_objective; + f_t objective_gap_2 = xz + wv + rdx - rpy + rwv; + + settings.log.printf("Objective gap 1: %.2e, Objective gap 2: %.2e Diff: %.2e\n", + objective_gap_1, + objective_gap_2, + std::abs(objective_gap_1 - objective_gap_2)); + settings.log.printf( + "Objective - Complementarity: %.2e, rdx: %.2e, rpy: %.2e, rwv: %.2e, p: %.2e, y: %.2e\n", + std::abs(objective_gap_1 - (xz + wv)), + rdx, + rpy, + rwv, + p, + y); #endif - } else { - f_t quad_objective = 0.0; - if (data.Q.n > 0) { - dense_vector_t x_host(data.Q.n); - std::copy(data.x.begin(), data.x.begin() + data.Q.n, x_host.begin()); - dense_vector_t Qx(data.Q.n); - matrix_vector_multiply(data.Q, 1.0, x_host, 0.0, Qx); - quad_objective = 0.5 * x_host.inner_product(Qx); - } - primal_objective = data.c.inner_product(data.x) + quad_objective; - dual_objective = - data.b.inner_product(data.y) - data.restrict_u_.inner_product(data.v) - quad_objective; - } } template @@ -3813,7 +3569,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, return lp_status_t::CONCURRENT_LIMIT; } // Sync to make sure all the async copies to host done inside are finished - if (use_gpu) RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); if (status < 0) { return check_for_suboptimal_solution(options, @@ -3852,7 +3608,7 @@ lp_status_t barrier_solver_t::solve(f_t start_time, return lp_status_t::CONCURRENT_LIMIT; } // Sync to make sure all the async copies to host done inside are finished - if (use_gpu) RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); if (status < 0) { return check_for_suboptimal_solution(options, data, diff --git a/cpp/src/dual_simplex/barrier.hpp b/cpp/src/dual_simplex/barrier.hpp index 84cc7516e..d8ca748b6 100644 --- a/cpp/src/dual_simplex/barrier.hpp +++ b/cpp/src/dual_simplex/barrier.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -59,14 +59,6 @@ class barrier_solver_t { const dense_vector_t& v, const dense_vector_t& z, iteration_data_t& data); - template - f_t max_step_to_boundary(const dense_vector_t& x, - const dense_vector_t& dx, - i_t& index) const; - template - f_t max_step_to_boundary(const dense_vector_t& x, - const dense_vector_t& dx) const; - void compute_primal_dual_step_length(iteration_data_t& data, f_t step_scale, f_t& step_primal, @@ -81,16 +73,6 @@ class barrier_solver_t { f_t& primal_objective, f_t& dual_objective); - void cpu_compute_residual_norms(const dense_vector_t& w, - const dense_vector_t& x, - const dense_vector_t& y, - const dense_vector_t& v, - const dense_vector_t& z, - iteration_data_t& data, - f_t& primal_residual_norm, - f_t& dual_residual_norm, - f_t& complementarity_residual_norm); - // To be able to directly pass lambdas to transform functions public: void compute_next_iterate(iteration_data_t& data, @@ -128,13 +110,6 @@ class barrier_solver_t { pinned_dense_vector_t& dv, pinned_dense_vector_t& dz, f_t& max_residual); - i_t compute_search_direction(iteration_data_t& data, - pinned_dense_vector_t& dw, - pinned_dense_vector_t& dx, - pinned_dense_vector_t& dy, - pinned_dense_vector_t& dv, - pinned_dense_vector_t& dz, - f_t& max_residual); private: lp_status_t check_for_suboptimal_solution(const barrier_solver_settings_t& options, diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index 3080f269d..f5cd54053 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ diff --git a/cpp/src/dual_simplex/basis_solves.hpp b/cpp/src/dual_simplex/basis_solves.hpp index 0745806a6..295bedccd 100644 --- a/cpp/src/dual_simplex/basis_solves.hpp +++ b/cpp/src/dual_simplex/basis_solves.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 3e2bbb6dc..ddc8fb5a8 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index b69bcfc2a..8eca3ba8a 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ diff --git a/cpp/src/dual_simplex/bounds_strengthening.cpp b/cpp/src/dual_simplex/bounds_strengthening.cpp index f1bf52c1e..4114e7e09 100644 --- a/cpp/src/dual_simplex/bounds_strengthening.cpp +++ b/cpp/src/dual_simplex/bounds_strengthening.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -154,7 +154,7 @@ bool bounds_strengthening_t::bounds_strengthening( bool is_infeasible = check_infeasibility(min_a, max_a, cnst_lb, cnst_ub, settings.primal_tol); if (is_infeasible) { - settings.log.printf( + settings.log.debug( "Iter:: %d, Infeasible constraint %d, cnst_lb %e, cnst_ub %e, min_a %e, max_a %e\n", iter, i, @@ -211,7 +211,7 @@ bool bounds_strengthening_t::bounds_strengthening( new_ub = std::min(new_ub, upper_bounds[k]); if (new_lb > new_ub + 1e-6) { - settings.log.printf( + settings.log.debug( "Iter:: %d, Infeasible variable after update %d, %e > %e\n", iter, k, new_lb, new_ub); return false; } diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 91bcfc2c3..59f94c9a9 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -187,7 +187,7 @@ std::string user_mip_gap(f_t obj_value, f_t lower_bound) { const f_t user_mip_gap = relative_gap(obj_value, lower_bound); if (user_mip_gap == std::numeric_limits::infinity()) { - return " - "; + return " - "; } else { constexpr int BUFFER_LEN = 32; char buffer[BUFFER_LEN]; @@ -196,37 +196,50 @@ std::string user_mip_gap(f_t obj_value, f_t lower_bound) } } -inline const char* feasible_solution_symbol(thread_type_t type) +#ifdef SHOW_DIVING_TYPE +inline char feasible_solution_symbol(bnb_worker_type_t type) { switch (type) { - case thread_type_t::EXPLORATION: return "B"; - case thread_type_t::DIVING: return "D"; - default: return "U"; + case bnb_worker_type_t::BEST_FIRST: return 'B'; + case bnb_worker_type_t::COEFFICIENT_DIVING: return 'C'; + case bnb_worker_type_t::LINE_SEARCH_DIVING: return 'L'; + case bnb_worker_type_t::PSEUDOCOST_DIVING: return 'P'; + case bnb_worker_type_t::GUIDED_DIVING: return 'G'; + default: return 'U'; } } - -inline bool has_children(node_solve_info_t status) +#else +inline char feasible_solution_symbol(bnb_worker_type_t type) { - return status == node_solve_info_t::UP_CHILD_FIRST || - status == node_solve_info_t::DOWN_CHILD_FIRST; + switch (type) { + case bnb_worker_type_t::BEST_FIRST: return 'B'; + case bnb_worker_type_t::COEFFICIENT_DIVING: return 'D'; + case bnb_worker_type_t::LINE_SEARCH_DIVING: return 'D'; + case bnb_worker_type_t::PSEUDOCOST_DIVING: return 'D'; + case bnb_worker_type_t::GUIDED_DIVING: return 'D'; + default: return 'U'; + } } +#endif } // namespace template branch_and_bound_t::branch_and_bound_t( const user_problem_t& user_problem, - const simplex_solver_settings_t& solver_settings) + const simplex_solver_settings_t& solver_settings, + f_t start_time) : original_problem_(user_problem), settings_(solver_settings), original_lp_(user_problem.handle_ptr, 1, 1, 1), + Arow_(1, 1, 0), incumbent_(1), root_relax_soln_(1, 1), root_crossover_soln_(1, 1), pc_(1), - solver_status_(mip_exploration_status_t::UNSET) + solver_status_(mip_status_t::UNSET) { - exploration_stats_.start_time = tic(); + exploration_stats_.start_time = start_time; #ifdef PRINT_CONSTRAINT_MATRIX settings_.log.printf("A"); original_problem_.A.print_matrix(); @@ -268,37 +281,61 @@ branch_and_bound_t::branch_and_bound_t( mutex_upper_.unlock(); } -template -f_t branch_and_bound_t::get_upper_bound() -{ - mutex_upper_.lock(); - const f_t upper_bound = upper_bound_; - mutex_upper_.unlock(); - return upper_bound; -} - template f_t branch_and_bound_t::get_lower_bound() { - f_t lower_bound = lower_bound_ceiling_.load(); - mutex_heap_.lock(); - if (heap_.size() > 0) { lower_bound = std::min(heap_.top()->lower_bound, lower_bound); } - mutex_heap_.unlock(); + f_t lower_bound = lower_bound_ceiling_.load(); + f_t heap_lower_bound = node_queue_.get_lower_bound(); + lower_bound = std::min(heap_lower_bound, lower_bound); for (i_t i = 0; i < local_lower_bounds_.size(); ++i) { lower_bound = std::min(local_lower_bounds_[i].load(), lower_bound); } - return lower_bound; + return std::isfinite(lower_bound) ? lower_bound : -inf; +} + +template +void branch_and_bound_t::report_heuristic(f_t obj) +{ + if (is_running) { + f_t user_obj = compute_user_objective(original_lp_, obj); + f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); + std::string user_gap = user_mip_gap(user_obj, user_lower); + + settings_.log.printf( + "H %+13.6e %+10.6e %s %9.2f\n", + user_obj, + user_lower, + user_gap.c_str(), + toc(exploration_stats_.start_time)); + } else { + settings_.log.printf("New solution from primal heuristics. Objective %+.6e. Time %.2f\n", + compute_user_objective(original_lp_, obj), + toc(exploration_stats_.start_time)); + } } template -i_t branch_and_bound_t::get_heap_size() +void branch_and_bound_t::report(char symbol, f_t obj, f_t lower_bound, i_t node_depth, i_t node_int_infeas) { - mutex_heap_.lock(); - i_t size = heap_.size(); - mutex_heap_.unlock(); - return size; + i_t nodes_explored = exploration_stats_.nodes_explored; + i_t nodes_unexplored = exploration_stats_.nodes_unexplored; + f_t user_obj = compute_user_objective(original_lp_, obj); + f_t user_lower = compute_user_objective(original_lp_, lower_bound); + f_t iter_node = exploration_stats_.total_lp_iters / nodes_explored; + std::string user_gap = user_mip_gap(user_obj, user_lower); + settings_.log.printf("%c %10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f\n", + symbol, + nodes_explored, + nodes_unexplored, + user_obj, + user_lower, + node_int_infeas, + node_depth, + iter_node, + user_gap.c_str(), + toc(exploration_stats_.start_time)); } template @@ -427,27 +464,7 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu mutex_upper_.unlock(); } - if (is_feasible) { - if (solver_status_ == mip_exploration_status_t::RUNNING) { - f_t user_obj = compute_user_objective(original_lp_, obj); - f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); - std::string gap = user_mip_gap(user_obj, user_lower); - - settings_.log.printf( - "H %+13.6e %+10.6e %s %9.2f\n", - user_obj, - user_lower, - gap.c_str(), - toc(exploration_stats_.start_time)); - - find_reduced_cost_fixings(obj); - } else { - settings_.log.printf("New solution from primal heuristics. Objective %+.6e. Time %.2f\n", - compute_user_objective(original_lp_, obj), - toc(exploration_stats_.start_time)); - } - } - + if (is_feasible) { report_heuristic(obj); } if (attempt_repair) { mutex_repair_.lock(); repair_queue_.push_back(crushed_solution); @@ -543,17 +560,7 @@ void branch_and_bound_t::repair_heuristic_solutions() if (repaired_obj < upper_bound_) { upper_bound_ = repaired_obj; incumbent_.set_incumbent_solution(repaired_obj, repaired_solution); - - f_t obj = compute_user_objective(original_lp_, repaired_obj); - f_t lower = compute_user_objective(original_lp_, get_lower_bound()); - std::string user_gap = user_mip_gap(obj, lower); - - settings_.log.printf( - "H %+13.6e %+10.6e %s %9.2f\n", - obj, - lower, - user_gap.c_str(), - toc(exploration_stats_.start_time)); + report_heuristic(repaired_obj); if (settings_.solution_callback != nullptr) { std::vector original_x; @@ -561,7 +568,7 @@ void branch_and_bound_t::repair_heuristic_solutions() settings_.solution_callback(original_x, repaired_obj); } - find_reduced_cost_fixings(obj); + find_reduced_cost_fixings(repaired_obj); } mutex_upper_.unlock(); @@ -571,30 +578,24 @@ void branch_and_bound_t::repair_heuristic_solutions() } template -mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t& solution, - f_t lower_bound) +void branch_and_bound_t::set_final_solution(mip_solution_t& solution, + f_t lower_bound) { - mip_status_t mip_status = mip_status_t::UNSET; - - if (solver_status_ == mip_exploration_status_t::NUMERICAL) { + if (solver_status_ == mip_status_t::NUMERICAL) { settings_.log.printf("Numerical issue encountered. Stopping the solver...\n"); - mip_status = mip_status_t::NUMERICAL; } - if (solver_status_ == mip_exploration_status_t::TIME_LIMIT) { + if (solver_status_ == mip_status_t::TIME_LIMIT) { settings_.log.printf("Time limit reached. Stopping the solver...\n"); - mip_status = mip_status_t::TIME_LIMIT; } - if (solver_status_ == mip_exploration_status_t::NODE_LIMIT) { + if (solver_status_ == mip_status_t::NODE_LIMIT) { settings_.log.printf("Node limit reached. Stopping the solver...\n"); - mip_status = mip_status_t::NODE_LIMIT; } - f_t upper_bound = get_upper_bound(); - f_t gap = upper_bound - lower_bound; - f_t obj = compute_user_objective(original_lp_, upper_bound); + f_t gap = upper_bound_ - lower_bound; + f_t obj = compute_user_objective(original_lp_, upper_bound_.load()); f_t user_bound = compute_user_objective(original_lp_, lower_bound); - f_t gap_rel = user_relative_gap(original_lp_, upper_bound, lower_bound); + f_t gap_rel = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); bool is_maximization = original_lp_.obj_scale < 0.0; settings_.log.printf("Explored %d nodes in %.2fs.\n", @@ -607,7 +608,7 @@ mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t::set_final_solution(mip_solution_t 0 && exploration_stats_.nodes_unexplored == 0 && - upper_bound == inf) { - settings_.log.printf("Integer infeasible.\n"); - mip_status = mip_status_t::INFEASIBLE; + upper_bound_ == inf) { + settings_.log.printf("Integer infeasible. (set final solution)\n"); + solver_status_ = mip_status_t::INFEASIBLE; if (settings_.heuristic_preemption_callback != nullptr) { settings_.heuristic_preemption_callback(); } } } - if (upper_bound != inf) { + if (upper_bound_ != inf) { assert(incumbent_.has_incumbent); uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); } @@ -666,41 +667,25 @@ mip_status_t branch_and_bound_t::set_final_solution(mip_solution_t void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - thread_type_t thread_type) + bnb_worker_type_t thread_type) { - bool send_solution = false; - i_t nodes_explored = exploration_stats_.nodes_explored; - i_t nodes_unexplored = exploration_stats_.nodes_unexplored; + bool send_solution = false; + + settings_.log.debug("%c found a feasible solution with obj=%.10e.\n", + feasible_solution_symbol(thread_type), + compute_user_objective(original_lp_, leaf_objective)); mutex_upper_.lock(); if (leaf_objective < upper_bound_) { incumbent_.set_incumbent_solution(leaf_objective, leaf_solution); - upper_bound_ = leaf_objective; - f_t lower_bound = get_lower_bound(); - f_t obj = compute_user_objective(original_lp_, upper_bound_); - f_t lower = compute_user_objective(original_lp_, lower_bound); - settings_.log.printf("%s%10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f\n", - feasible_solution_symbol(thread_type), - nodes_explored, - nodes_unexplored, - obj, - lower, - 0, - leaf_depth, - nodes_explored > 0 ? exploration_stats_.total_lp_iters / nodes_explored : 0, - user_mip_gap(obj, lower).c_str(), - toc(exploration_stats_.start_time)); - - find_reduced_cost_fixings(upper_bound_); - + upper_bound_ = leaf_objective; + report(feasible_solution_symbol(thread_type), leaf_objective, get_lower_bound(), leaf_depth, 0); send_solution = true; } @@ -712,16 +697,18 @@ void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, mutex_upper_.unlock(); } -template -rounding_direction_t branch_and_bound_t::child_selection(mip_node_t* node_ptr) +// Martin's criteria for the preferred rounding direction (see [1]) +// [1] A. Martin, “Integer Programs with Block Structure,” +// Technische Universit¨at Berlin, Berlin, 1999. Accessed: Aug. 08, 2025. +// [Online]. Available: https://opus4.kobv.de/opus4-zib/frontdoor/index/index/docId/391 +template +rounding_direction_t martin_criteria(f_t val, f_t root_val) { - const i_t branch_var = node_ptr->get_down_child()->branch_var; - const f_t branch_var_val = node_ptr->get_down_child()->fractional_val; - const f_t down_val = std::floor(root_relax_soln_.x[branch_var]); - const f_t up_val = std::ceil(root_relax_soln_.x[branch_var]); - const f_t down_dist = branch_var_val - down_val; - const f_t up_dist = up_val - branch_var_val; - constexpr f_t eps = 1e-6; + const f_t down_val = std::floor(root_val); + const f_t up_val = std::ceil(root_val); + const f_t down_dist = val - down_val; + const f_t up_dist = up_val - val; + constexpr f_t eps = 1e-6; if (down_dist < up_dist + eps) { return rounding_direction_t::DOWN; @@ -732,22 +719,68 @@ rounding_direction_t branch_and_bound_t::child_selection(mip_node_t -node_solve_info_t branch_and_bound_t::solve_node( +branch_variable_t branch_and_bound_t::variable_selection( + mip_node_t* node_ptr, + const std::vector& fractional, + const std::vector& solution, + bnb_worker_type_t type) +{ + logger_t log; + log.log = false; + i_t branch_var = -1; + rounding_direction_t round_dir = rounding_direction_t::NONE; + std::vector current_incumbent; + + // If there is no incumbent, use pseudocost diving instead of guided diving + if (upper_bound_ == inf && type == bnb_worker_type_t::GUIDED_DIVING) { + type = bnb_worker_type_t::PSEUDOCOST_DIVING; + } + + switch (type) { + case bnb_worker_type_t::BEST_FIRST: + branch_var = pc_.variable_selection(fractional, solution, log); + round_dir = martin_criteria(solution[branch_var], root_relax_soln_.x[branch_var]); + return {branch_var, round_dir}; + + case bnb_worker_type_t::COEFFICIENT_DIVING: + return coefficient_diving( + original_lp_, fractional, solution, var_up_locks_, var_down_locks_, log); + + case bnb_worker_type_t::LINE_SEARCH_DIVING: + return line_search_diving(fractional, solution, root_relax_soln_.x, log); + + case bnb_worker_type_t::PSEUDOCOST_DIVING: + return pseudocost_diving(pc_, fractional, solution, root_relax_soln_.x, log); + + case bnb_worker_type_t::GUIDED_DIVING: + mutex_upper_.lock(); + current_incumbent = incumbent_.x; + mutex_upper_.unlock(); + return guided_diving(pc_, fractional, solution, current_incumbent, log); + + default: + log.debug("Unknown variable selection method: %d\n", type); + return {-1, rounding_direction_t::NONE}; + } +} + +template +dual::status_t branch_and_bound_t::solve_node_lp( mip_node_t* node_ptr, - search_tree_t& search_tree, lp_problem_t& leaf_problem, + lp_solution_t& leaf_solution, + std::vector& leaf_edge_norms, basis_update_mpf_t& basis_factors, std::vector& basic_list, std::vector& nonbasic_list, bounds_strengthening_t& node_presolver, - thread_type_t thread_type, + bnb_worker_type_t thread_type, bool recompute_bounds_and_basis, const std::vector& root_lower, const std::vector& root_upper, + bnb_stats_t& stats, logger_t& log) { - const f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; - const f_t upper_bound = get_upper_bound(); if (node_ptr->depth > num_integer_variables_) { std::vector branched_variables(original_lp_.num_cols, 0); @@ -776,36 +809,44 @@ node_solve_info_t branch_and_bound_t::solve_node( printf("Depth %d > num_integer_variables %d\n", node_ptr->depth, num_integer_variables_); } } - - lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); std::vector& leaf_vstatus = node_ptr->vstatus; assert(leaf_vstatus.size() == leaf_problem.num_cols); simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); - lp_settings.cut_off = upper_bound + settings_.dual_tol; + lp_settings.cut_off = upper_bound_ + settings_.dual_tol; lp_settings.inside_mip = 2; lp_settings.time_limit = settings_.time_limit - toc(exploration_stats_.start_time); lp_settings.scale_columns = false; + if (thread_type != bnb_worker_type_t::BEST_FIRST) { + i_t bnb_lp_iters = exploration_stats_.total_lp_iters; + f_t factor = settings_.diving_settings.iteration_limit_factor; + i_t max_iter = factor * bnb_lp_iters; + lp_settings.iteration_limit = max_iter - stats.total_lp_iters; + if (lp_settings.iteration_limit <= 0) { return dual::status_t::ITERATION_LIMIT; } + } + #ifdef LOG_NODE_SIMPLEX lp_settings.set_log(true); std::stringstream ss; ss << "simplex-" << std::this_thread::get_id() << ".log"; std::string logname; ss >> logname; - lp_settings.set_log_filename(logname); - lp_settings.log.enable_log_to_file("a+"); + lp_settings.log.set_log_file(logname, "a"); lp_settings.log.log_to_console = false; lp_settings.log.printf( - "%s node id = %d, branch var = %d, fractional val = %f, variable lower bound = %f, variable " - "upper bound = %f\n", + "%scurrent node: id = %d, depth = %d, branch var = %d, branch dir = %s, fractional val = " + "%f, variable lower bound = %f, variable upper bound = %f, branch vstatus = %d\n\n", settings_.log.log_prefix.c_str(), node_ptr->node_id, + node_ptr->depth, node_ptr->branch_var, + node_ptr->branch_dir == rounding_direction_t::DOWN ? "DOWN" : "UP", node_ptr->fractional_val, node_ptr->branch_var_lower, - node_ptr->branch_var_upper); + node_ptr->branch_var_upper, + node_ptr->vstatus[node_ptr->branch_var]); #endif // Reset the bound_changed markers @@ -827,7 +868,7 @@ node_solve_info_t branch_and_bound_t::solve_node( node_presolver.bounds_strengthening(leaf_problem.lower, leaf_problem.upper, lp_settings); dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED; - std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; + if (feasible) { i_t node_iter = 0; @@ -862,26 +903,45 @@ node_solve_info_t branch_and_bound_t::solve_node( lp_status = convert_lp_status_to_dual_status(second_status); } - if (thread_type == thread_type_t::EXPLORATION) { - exploration_stats_.total_lp_solve_time += toc(lp_start_time); - exploration_stats_.total_lp_iters += node_iter; - } + stats.total_lp_solve_time += toc(lp_start_time); + stats.total_lp_iters += node_iter; } +#ifdef LOG_NODE_SIMPLEX + lp_settings.log.printf("\nLP status: %d\n\n", lp_status); +#endif + + return lp_status; +} + +template +std::pair branch_and_bound_t::update_tree( + mip_node_t* node_ptr, + search_tree_t& search_tree, + lp_problem_t& leaf_problem, + lp_solution_t& leaf_solution, + std::vector& leaf_edge_norms, + bnb_worker_type_t thread_type, + dual::status_t lp_status, + logger_t& log) +{ + const f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; + std::vector& leaf_vstatus = node_ptr->vstatus; + if (lp_status == dual::status_t::DUAL_UNBOUNDED) { // Node was infeasible. Do not branch node_ptr->lower_bound = inf; search_tree.graphviz_node(log, node_ptr, "infeasible", 0.0); search_tree.update(node_ptr, node_status_t::INFEASIBLE); - return node_solve_info_t::NO_CHILDREN; + return {node_status_t::INFEASIBLE, rounding_direction_t::NONE}; } else if (lp_status == dual::status_t::CUTOFF) { // Node was cut off. Do not branch - node_ptr->lower_bound = upper_bound; + node_ptr->lower_bound = upper_bound_; f_t leaf_objective = compute_objective(leaf_problem, leaf_solution.x); search_tree.graphviz_node(log, node_ptr, "cut off", leaf_objective); search_tree.update(node_ptr, node_status_t::FATHOMED); - return node_solve_info_t::NO_CHILDREN; + return {node_status_t::FATHOMED, rounding_direction_t::NONE}; } else if (lp_status == dual::status_t::OPTIMAL) { // LP was feasible @@ -911,10 +971,12 @@ node_solve_info_t branch_and_bound_t::solve_node( search_tree.graphviz_node(log, node_ptr, "lower bound", leaf_objective); pc_.update_pseudo_costs(node_ptr, leaf_objective); - if (settings_.node_processed_callback != nullptr) { - std::vector original_x; - uncrush_primal_solution(original_problem_, original_lp_, leaf_solution.x, original_x); - settings_.node_processed_callback(original_x, leaf_objective); + if (thread_type == bnb_worker_type_t::BEST_FIRST) { + if (settings_.node_processed_callback != nullptr) { + std::vector original_x; + uncrush_primal_solution(original_problem_, original_lp_, leaf_solution.x, original_x); + settings_.node_processed_callback(original_x, leaf_objective); + } } if (leaf_num_fractional == 0) { @@ -922,50 +984,39 @@ node_solve_info_t branch_and_bound_t::solve_node( add_feasible_solution(leaf_objective, leaf_solution.x, node_ptr->depth, thread_type); search_tree.graphviz_node(log, node_ptr, "integer feasible", leaf_objective); search_tree.update(node_ptr, node_status_t::INTEGER_FEASIBLE); - return node_solve_info_t::NO_CHILDREN; + return {node_status_t::INTEGER_FEASIBLE, rounding_direction_t::NONE}; - } else if (leaf_objective <= upper_bound + abs_fathom_tol) { + } else if (leaf_objective <= upper_bound_ + abs_fathom_tol) { // Choose fractional variable to branch on + auto [branch_var, round_dir] = + variable_selection(node_ptr, leaf_fractional, leaf_solution.x, thread_type); - i_t branch_var = -1; - if (lp_settings.reliability_branching > 0) { - branch_var = pc_.reliable_variable_selection(leaf_problem, - lp_settings, - var_types_, - leaf_vstatus, - leaf_edge_norms, - leaf_fractional, - leaf_solution.x, - leaf_objective, - lp_settings.log); - } else { - branch_var = pc_.variable_selection(leaf_fractional, leaf_solution.x, lp_settings.log); + assert(leaf_vstatus.size() == leaf_problem.num_cols); + assert(branch_var >= 0); + assert(round_dir != rounding_direction_t::NONE); + + // Note that the exploration thread is the only one that can insert new nodes into the heap, + // and thus, we only need to calculate the objective estimate here (it is used for + // sorting the nodes for diving). + if (thread_type == bnb_worker_type_t::BEST_FIRST) { + logger_t pc_log; + pc_log.log = false; + node_ptr->objective_estimate = + pc_.obj_estimate(leaf_fractional, leaf_solution.x, node_ptr->lower_bound, pc_log); } - assert(leaf_vstatus.size() == leaf_problem.num_cols); search_tree.branch( node_ptr, branch_var, leaf_solution.x[branch_var], leaf_num_fractional, leaf_vstatus, leaf_problem, log); search_tree.update(node_ptr, node_status_t::HAS_CHILDREN); - - rounding_direction_t round_dir = child_selection(node_ptr); - - if (round_dir == rounding_direction_t::UP) { - return node_solve_info_t::UP_CHILD_FIRST; - } else { - return node_solve_info_t::DOWN_CHILD_FIRST; - } + return {node_status_t::HAS_CHILDREN, round_dir}; } else { search_tree.graphviz_node(log, node_ptr, "fathomed", leaf_objective); search_tree.update(node_ptr, node_status_t::FATHOMED); - return node_solve_info_t::NO_CHILDREN; + return {node_status_t::FATHOMED, rounding_direction_t::NONE}; } - } else if (lp_status == dual::status_t::TIME_LIMIT) { - search_tree.graphviz_node(log, node_ptr, "timeout", 0.0); - return node_solve_info_t::TIME_LIMIT; - } else { - if (thread_type == thread_type_t::EXPLORATION) { + if (thread_type == bnb_worker_type_t::BEST_FIRST) { fetch_min(lower_bound_ceiling_, node_ptr->lower_bound); log.printf( "LP returned status %d on node %d. This indicates a numerical issue. The best bound is set " @@ -978,17 +1029,15 @@ node_solve_info_t branch_and_bound_t::solve_node( search_tree.graphviz_node(log, node_ptr, "numerical", 0.0); search_tree.update(node_ptr, node_status_t::NUMERICAL); - return node_solve_info_t::NUMERICAL; + return {node_status_t::NUMERICAL, rounding_direction_t::NONE}; } } template void branch_and_bound_t::exploration_ramp_up(mip_node_t* node, - search_tree_t* search_tree, - const csr_matrix_t& Arow, i_t initial_heap_size) { - if (solver_status_ != mip_exploration_status_t::RUNNING) { return; } + if (solver_status_ != mip_status_t::UNSET) { return; } // Note that we do not know which thread will execute the // `exploration_ramp_up` task, so we allow to any thread @@ -996,13 +1045,13 @@ void branch_and_bound_t::exploration_ramp_up(mip_node_t* nod repair_heuristic_solutions(); f_t lower_bound = node->lower_bound; - f_t upper_bound = get_upper_bound(); + f_t upper_bound = upper_bound_; f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); f_t abs_gap = upper_bound - lower_bound; if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - search_tree->graphviz_node(settings_.log, node, "cutoff", node->lower_bound); - search_tree->update(node, node_status_t::FATHOMED); + search_tree_.graphviz_node(settings_.log, node, "cutoff", node->lower_bound); + search_tree_.update(node, node_status_t::FATHOMED); --exploration_stats_.nodes_unexplored; return; } @@ -1018,23 +1067,7 @@ void branch_and_bound_t::exploration_ramp_up(mip_node_t* nod bool should_report = should_report_.exchange(false); if (should_report) { - f_t obj = compute_user_objective(original_lp_, upper_bound); - f_t user_lower = compute_user_objective(original_lp_, root_objective_); - std::string gap_user = user_mip_gap(obj, user_lower); - i_t nodes_explored = exploration_stats_.nodes_explored; - i_t nodes_unexplored = exploration_stats_.nodes_unexplored; - - settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f\n", - nodes_explored, - nodes_unexplored, - obj, - user_lower, - node->integer_infeasible, - node->depth, - nodes_explored > 0 ? exploration_stats_.total_lp_iters / nodes_explored : 0, - gap_user.c_str(), - now); - + report(' ', upper_bound, root_objective_, node->depth, node->integer_infeasible); exploration_stats_.nodes_since_last_log = 0; exploration_stats_.last_log = tic(); should_report_ = true; @@ -1042,84 +1075,94 @@ void branch_and_bound_t::exploration_ramp_up(mip_node_t* nod } if (now > settings_.time_limit) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; + solver_status_ = mip_status_t::TIME_LIMIT; return; } // Make a copy of the original LP. We will modify its bounds at each leaf lp_problem_t leaf_problem = original_lp_; std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow, row_sense, var_types_); + bounds_strengthening_t node_presolver(leaf_problem, Arow_, row_sense, var_types_); const i_t m = leaf_problem.num_rows; basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); std::vector basic_list(m); std::vector nonbasic_list; - node_solve_info_t status = solve_node(node, - *search_tree, - leaf_problem, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - thread_type_t::EXPLORATION, - true, - original_lp_.lower, - original_lp_.upper, - settings_.log); + lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); + std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; + dual::status_t lp_status = solve_node_lp(node, + leaf_problem, + leaf_solution, + leaf_edge_norms, + basis_factors, + basic_list, + nonbasic_list, + node_presolver, + bnb_worker_type_t::BEST_FIRST, + true, + original_lp_.lower, + original_lp_.upper, + exploration_stats_, + settings_.log); + if (lp_status == dual::status_t::TIME_LIMIT) { + solver_status_ = mip_status_t::TIME_LIMIT; + return; + } ++exploration_stats_.nodes_since_last_log; ++exploration_stats_.nodes_explored; --exploration_stats_.nodes_unexplored; - if (status == node_solve_info_t::TIME_LIMIT) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; + auto [node_status, round_dir] = update_tree(node, + search_tree_, + leaf_problem, + leaf_solution, + leaf_edge_norms, + bnb_worker_type_t::BEST_FIRST, + lp_status, + settings_.log); - } else if (has_children(status)) { + if (node_status == node_status_t::HAS_CHILDREN) { exploration_stats_.nodes_unexplored += 2; // If we haven't generated enough nodes to keep the threads busy, continue the ramp up phase if (exploration_stats_.nodes_unexplored < initial_heap_size) { #pragma omp task - exploration_ramp_up(node->get_down_child(), search_tree, Arow, initial_heap_size); + exploration_ramp_up(node->get_down_child(), initial_heap_size); #pragma omp task - exploration_ramp_up(node->get_up_child(), search_tree, Arow, initial_heap_size); + exploration_ramp_up(node->get_up_child(), initial_heap_size); } else { // We've generated enough nodes, push further nodes onto the heap - mutex_heap_.lock(); - heap_.push(node->get_down_child()); - heap_.push(node->get_up_child()); - mutex_heap_.unlock(); + node_queue_.push(node->get_down_child()); + node_queue_.push(node->get_up_child()); } } } template -void branch_and_bound_t::explore_subtree(i_t task_id, - mip_node_t* start_node, - search_tree_t& search_tree, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_factors, - std::vector& basic_list, - std::vector& nonbasic_list) +void branch_and_bound_t::plunge_from(i_t task_id, + mip_node_t* start_node, + lp_problem_t& leaf_problem, + bounds_strengthening_t& node_presolver, + basis_update_mpf_t& basis_factors, + std::vector& basic_list, + std::vector& nonbasic_list) { bool recompute_bounds_and_basis = true; std::deque*> stack; stack.push_front(start_node); - while (stack.size() > 0 && solver_status_ == mip_exploration_status_t::RUNNING) { + while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET && is_running) { if (task_id == 0) { repair_heuristic_solutions(); } mip_node_t* node_ptr = stack.front(); stack.pop_front(); f_t lower_bound = node_ptr->lower_bound; - f_t upper_bound = get_upper_bound(); + f_t upper_bound = upper_bound_; f_t abs_gap = upper_bound - lower_bound; f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); @@ -1133,8 +1176,9 @@ void branch_and_bound_t::explore_subtree(i_t task_id, local_lower_bounds_[task_id] = lower_bound; if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - search_tree.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); - search_tree.update(node_ptr, node_status_t::FATHOMED); + search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound); + search_tree_.update(node_ptr, node_status_t::FATHOMED); + recompute_bounds_and_basis = true; --exploration_stats_.nodes_unexplored; continue; } @@ -1149,59 +1193,61 @@ void branch_and_bound_t::explore_subtree(i_t task_id, abs_gap < 10 * settings_.absolute_mip_gap_tol) && time_since_last_log >= 1) || (time_since_last_log > 30) || now > settings_.time_limit) { - f_t obj = compute_user_objective(original_lp_, upper_bound); - f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); - std::string gap_user = user_mip_gap(obj, user_lower); - i_t nodes_explored = exploration_stats_.nodes_explored; - i_t nodes_unexplored = exploration_stats_.nodes_unexplored; - settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f\n", - nodes_explored, - nodes_unexplored, - obj, - user_lower, - node_ptr->integer_infeasible, - node_ptr->depth, - nodes_explored > 0 ? exploration_stats_.total_lp_iters / nodes_explored : 0, - gap_user.c_str(), - now); + report(' ', upper_bound, get_lower_bound(), node_ptr->depth, node_ptr->integer_infeasible); exploration_stats_.last_log = tic(); exploration_stats_.nodes_since_last_log = 0; } } if (now > settings_.time_limit) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; + solver_status_ = mip_status_t::TIME_LIMIT; + break; } if (exploration_stats_.nodes_explored >= settings_.node_limit) { - solver_status_ = mip_exploration_status_t::NODE_LIMIT; - return; + solver_status_ = mip_status_t::NODE_LIMIT; + break; } - node_solve_info_t status = solve_node(node_ptr, - search_tree, - leaf_problem, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - thread_type_t::EXPLORATION, - recompute_bounds_and_basis, - original_lp_.lower, - original_lp_.upper, - settings_.log); - - recompute_bounds_and_basis = !has_children(status); + lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); + std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; + dual::status_t lp_status = solve_node_lp(node_ptr, + leaf_problem, + leaf_solution, + leaf_edge_norms, + basis_factors, + basic_list, + nonbasic_list, + node_presolver, + bnb_worker_type_t::BEST_FIRST, + recompute_bounds_and_basis, + original_lp_.lower, + original_lp_.upper, + exploration_stats_, + settings_.log); + + if (lp_status == dual::status_t::TIME_LIMIT) { + solver_status_ = mip_status_t::TIME_LIMIT; + break; + } else if (lp_status == dual::status_t::ITERATION_LIMIT) { + break; + } ++exploration_stats_.nodes_since_last_log; ++exploration_stats_.nodes_explored; --exploration_stats_.nodes_unexplored; - if (status == node_solve_info_t::TIME_LIMIT) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; + auto [node_status, round_dir] = update_tree(node_ptr, + search_tree_, + leaf_problem, + leaf_solution, + leaf_edge_norms, + bnb_worker_type_t::BEST_FIRST, + lp_status, + settings_.log); + + recompute_bounds_and_basis = node_status != node_status_t::HAS_CHILDREN; - } else if (has_children(status)) { + if (node_status == node_status_t::HAS_CHILDREN) { // The stack should only contain the children of the current parent. // If the stack size is greater than 0, // we pop the current node from the stack and place it in the global heap, @@ -1209,33 +1255,12 @@ void branch_and_bound_t::explore_subtree(i_t task_id, if (stack.size() > 0) { mip_node_t* node = stack.back(); stack.pop_back(); - - // The order here matters. We want to create a copy of the node - // before adding to the global heap. Otherwise, - // some thread may consume the node (possibly fathoming it) - // before we had the chance to add to the diving queue. - // This lead to a SIGSEGV. Although, in this case, it - // would be better if we discard the node instead. - if (get_heap_size() > settings_.num_bfs_threads) { - std::vector lower = original_lp_.lower; - std::vector upper = original_lp_.upper; - std::fill( - node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); - node->get_variable_bounds(lower, upper, node_presolver.bounds_changed); - - mutex_dive_queue_.lock(); - diving_queue_.emplace(node->detach_copy(), std::move(lower), std::move(upper)); - mutex_dive_queue_.unlock(); - } - - mutex_heap_.lock(); - heap_.push(node); - mutex_heap_.unlock(); + node_queue_.push(node); } exploration_stats_.nodes_unexplored += 2; - if (status == node_solve_info_t::UP_CHILD_FIRST) { + if (round_dir == rounding_direction_t::UP) { stack.push_front(node_ptr->get_down_child()); stack.push_front(node_ptr->get_up_child()); } else { @@ -1247,177 +1272,222 @@ void branch_and_bound_t::explore_subtree(i_t task_id, } template -void branch_and_bound_t::best_first_thread(i_t task_id, - search_tree_t& search_tree, - const csr_matrix_t& Arow) +void branch_and_bound_t::best_first_thread(i_t task_id) { f_t lower_bound = -inf; - f_t upper_bound = inf; f_t abs_gap = inf; f_t rel_gap = inf; // Make a copy of the original LP. We will modify its bounds at each leaf lp_problem_t leaf_problem = original_lp_; std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow, row_sense, var_types_); + bounds_strengthening_t node_presolver(leaf_problem, Arow_, row_sense, var_types_); const i_t m = leaf_problem.num_rows; basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); std::vector basic_list(m); std::vector nonbasic_list; - while (solver_status_ == mip_exploration_status_t::RUNNING && - abs_gap > settings_.absolute_mip_gap_tol && rel_gap > settings_.relative_mip_gap_tol && - (active_subtrees_ > 0 || get_heap_size() > 0)) { - mip_node_t* start_node = nullptr; - + while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && + rel_gap > settings_.relative_mip_gap_tol && + (active_subtrees_ > 0 || node_queue_.best_first_queue_size() > 0)) { + // In the current implementation, we are use the active number of subtree to decide + // when to stop the execution. We need to increment the counter at the same + // time as we pop a node from the queue to avoid some threads exiting + // the main loop thinking that the solver has already finished. + // This will be not needed in the master-worker model. + node_queue_.lock(); // If there any node left in the heap, we pop the top node and explore it. - mutex_heap_.lock(); - if (heap_.size() > 0) { - start_node = heap_.top(); - heap_.pop(); - active_subtrees_++; - } - mutex_heap_.unlock(); + std::optional*> start_node = node_queue_.pop_best_first(); + if (start_node.has_value()) { active_subtrees_++; }; + node_queue_.unlock(); - if (start_node != nullptr) { - if (get_upper_bound() < start_node->lower_bound) { + if (start_node.has_value()) { + if (upper_bound_ < start_node.value()->lower_bound) { // This node was put on the heap earlier but its lower bound is now greater than the // current upper bound - search_tree.graphviz_node(settings_.log, start_node, "cutoff", start_node->lower_bound); - search_tree.update(start_node, node_status_t::FATHOMED); + search_tree_.graphviz_node( + settings_.log, start_node.value(), "cutoff", start_node.value()->lower_bound); + search_tree_.update(start_node.value(), node_status_t::FATHOMED); active_subtrees_--; continue; } // Best-first search with plunging - explore_subtree(task_id, - start_node, - search_tree, - leaf_problem, - node_presolver, - basis_factors, - basic_list, - nonbasic_list); + plunge_from(task_id, + start_node.value(), + leaf_problem, + node_presolver, + basis_factors, + basic_list, + nonbasic_list); active_subtrees_--; } lower_bound = get_lower_bound(); - upper_bound = get_upper_bound(); - abs_gap = upper_bound - lower_bound; - rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + abs_gap = upper_bound_ - lower_bound; + rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); } + is_running = false; + // Check if it is the last thread that exited the loop and no // timeout or numerical error has happen. - if (solver_status_ == mip_exploration_status_t::RUNNING) { - if (active_subtrees_ == 0) { - solver_status_ = mip_exploration_status_t::COMPLETED; - } else { - local_lower_bounds_[task_id] = inf; - } + if (solver_status_ == mip_status_t::UNSET) { + if (active_subtrees_ > 0) { local_lower_bounds_[task_id] = inf; } } } template -void branch_and_bound_t::diving_thread(const csr_matrix_t& Arow) +void branch_and_bound_t::dive_from(mip_node_t& start_node, + const std::vector& start_lower, + const std::vector& start_upper, + lp_problem_t& leaf_problem, + bounds_strengthening_t& node_presolver, + basis_update_mpf_t& basis_factors, + std::vector& basic_list, + std::vector& nonbasic_list, + bnb_worker_type_t diving_type) { logger_t log; log.log = false; + + const i_t diving_node_limit = settings_.diving_settings.node_limit; + const i_t diving_backtrack_limit = settings_.diving_settings.backtrack_limit; + bool recompute_bounds_and_basis = true; + search_tree_t dive_tree(std::move(start_node)); + std::deque*> stack; + stack.push_front(&dive_tree.root); + + bnb_stats_t dive_stats; + dive_stats.total_lp_iters = 0; + dive_stats.total_lp_solve_time = 0; + dive_stats.nodes_explored = 0; + dive_stats.nodes_unexplored = 1; + + while (stack.size() > 0 && solver_status_ == mip_status_t::UNSET && is_running) { + mip_node_t* node_ptr = stack.front(); + stack.pop_front(); + + f_t lower_bound = node_ptr->lower_bound; + f_t upper_bound = upper_bound_; + f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); + + if (node_ptr->lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { + recompute_bounds_and_basis = true; + continue; + } + + if (toc(exploration_stats_.start_time) > settings_.time_limit) { break; } + if (dive_stats.nodes_explored > diving_node_limit) { break; } + + lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); + std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; + dual::status_t lp_status = solve_node_lp(node_ptr, + leaf_problem, + leaf_solution, + leaf_edge_norms, + basis_factors, + basic_list, + nonbasic_list, + node_presolver, + diving_type, + recompute_bounds_and_basis, + start_lower, + start_upper, + dive_stats, + log); + + if (lp_status == dual::status_t::TIME_LIMIT) { + solver_status_ = mip_status_t::TIME_LIMIT; + break; + } else if (lp_status == dual::status_t::ITERATION_LIMIT) { + break; + } + + ++dive_stats.nodes_explored; + + auto [node_status, round_dir] = + update_tree(node_ptr, dive_tree, leaf_problem, leaf_solution, leaf_edge_norms, diving_type, lp_status, log); + recompute_bounds_and_basis = node_status != node_status_t::HAS_CHILDREN; + + if (node_status == node_status_t::HAS_CHILDREN) { + if (round_dir == rounding_direction_t::UP) { + stack.push_front(node_ptr->get_down_child()); + stack.push_front(node_ptr->get_up_child()); + } else { + stack.push_front(node_ptr->get_up_child()); + stack.push_front(node_ptr->get_down_child()); + } + } + + // Remove nodes that we no longer can backtrack to (i.e., from the current node, we can only + // backtrack to a node that is has a depth of at most 5 levels lower than the current node). + if (stack.size() > 1 && stack.front()->depth - stack.back()->depth > diving_backtrack_limit) { + stack.pop_back(); + } + } +} + +template +void branch_and_bound_t::diving_thread(bnb_worker_type_t diving_type) +{ // Make a copy of the original LP. We will modify its bounds at each leaf lp_problem_t leaf_problem = original_lp_; std::vector row_sense; - bounds_strengthening_t node_presolver(leaf_problem, Arow, row_sense, var_types_); + bounds_strengthening_t node_presolver(leaf_problem, Arow_, row_sense, var_types_); const i_t m = leaf_problem.num_rows; basis_update_mpf_t basis_factors(m, settings_.refactor_frequency); std::vector basic_list(m); std::vector nonbasic_list; - while (solver_status_ == mip_exploration_status_t::RUNNING && - (active_subtrees_ > 0 || get_heap_size() > 0)) { - std::optional> start_node; + std::vector start_lower; + std::vector start_upper; + bool reset_starting_bounds = true; + + while (solver_status_ == mip_status_t::UNSET && is_running && + (active_subtrees_ > 0 || node_queue_.best_first_queue_size() > 0)) { + if (reset_starting_bounds) { + start_lower = original_lp_.lower; + start_upper = original_lp_.upper; + std::fill(node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); + reset_starting_bounds = false; + } - mutex_dive_queue_.lock(); - if (diving_queue_.size() > 0) { start_node = diving_queue_.pop(); } - mutex_dive_queue_.unlock(); + // In the current implementation, multiple threads can pop the nodes + // from the queue, so we need to initialize the lower and upper bound here + // to avoid other thread fathoming the node (i.e., deleting) before we can read + // the variable bounds from the tree. + // This will be not needed in the master-worker model. + node_queue_.lock(); + std::optional*> node_ptr = node_queue_.pop_diving(); + std::optional> start_node = std::nullopt; + + if (node_ptr.has_value()) { + node_ptr.value()->get_variable_bounds( + start_lower, start_upper, node_presolver.bounds_changed); + start_node = node_ptr.value()->detach_copy(); + } + node_queue_.unlock(); if (start_node.has_value()) { - if (get_upper_bound() < start_node->node.lower_bound) { continue; } - - bool recompute_bounds_and_basis = true; - i_t nodes_explored = 0; - search_tree_t subtree(std::move(start_node->node)); - std::deque*> stack; - stack.push_front(&subtree.root); - - while (stack.size() > 0 && solver_status_ == mip_exploration_status_t::RUNNING) { - mip_node_t* node_ptr = stack.front(); - stack.pop_front(); - f_t upper_bound = get_upper_bound(); - f_t rel_gap = user_relative_gap(original_lp_, upper_bound, node_ptr->lower_bound); - - if (node_ptr->lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { - recompute_bounds_and_basis = true; - continue; - } - - if (toc(exploration_stats_.start_time) > settings_.time_limit) { return; } - - if (nodes_explored >= 1000) { break; } - - node_solve_info_t status = solve_node(node_ptr, - subtree, - leaf_problem, - basis_factors, - basic_list, - nonbasic_list, - node_presolver, - thread_type_t::DIVING, - recompute_bounds_and_basis, - start_node->lower, - start_node->upper, - log); - - nodes_explored++; - - recompute_bounds_and_basis = !has_children(status); - - if (status == node_solve_info_t::TIME_LIMIT) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return; - - } else if (has_children(status)) { - if (status == node_solve_info_t::UP_CHILD_FIRST) { - stack.push_front(node_ptr->get_down_child()); - stack.push_front(node_ptr->get_up_child()); - } else { - stack.push_front(node_ptr->get_up_child()); - stack.push_front(node_ptr->get_down_child()); - } - } - - if (stack.size() > 1) { - // If the diving thread is consuming the nodes faster than the - // best first search, then we split the current subtree at the - // lowest possible point and move to the queue, so it can - // be picked by another thread. - if (std::lock_guard lock(mutex_dive_queue_); - diving_queue_.size() < min_diving_queue_size_) { - mip_node_t* new_node = stack.back(); - stack.pop_back(); - - std::vector lower = start_node->lower; - std::vector upper = start_node->upper; - std::fill( - node_presolver.bounds_changed.begin(), node_presolver.bounds_changed.end(), false); - new_node->get_variable_bounds(lower, upper, node_presolver.bounds_changed); - - diving_queue_.emplace(new_node->detach_copy(), std::move(lower), std::move(upper)); - } - } - } + reset_starting_bounds = true; + + if (upper_bound_ < start_node->lower_bound) { continue; } + bool is_feasible = node_presolver.bounds_strengthening(start_lower, start_upper, settings_); + if (!is_feasible) { continue; } + + dive_from(start_node.value(), + start_lower, + start_upper, + leaf_problem, + node_presolver, + basis_factors, + basic_list, + nonbasic_list, + diving_type); } } } @@ -1548,9 +1618,35 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut logger_t log; log.log = false; log.log_prefix = settings_.log.log_prefix; - solver_status_ = mip_exploration_status_t::UNSET; + solver_status_ = mip_status_t::UNSET; + is_running = false; exploration_stats_.nodes_unexplored = 0; exploration_stats_.nodes_explored = 0; + original_lp_.A.to_compressed_row(Arow_); + + std::vector diving_strategies; + diving_strategies.reserve(4); + + if (settings_.diving_settings.pseudocost_diving != 0) { + diving_strategies.push_back(bnb_worker_type_t::PSEUDOCOST_DIVING); + } + + if (settings_.diving_settings.line_search_diving != 0) { + diving_strategies.push_back(bnb_worker_type_t::LINE_SEARCH_DIVING); + } + + if (settings_.diving_settings.guided_diving != 0) { + diving_strategies.push_back(bnb_worker_type_t::GUIDED_DIVING); + } + + if (settings_.diving_settings.coefficient_diving != 0) { + diving_strategies.push_back(bnb_worker_type_t::COEFFICIENT_DIVING); + calculate_variable_locks(original_lp_, var_up_locks_, var_down_locks_); + } + + if (diving_strategies.empty()) { + settings_.log.printf("Warning: All diving heuristics are disabled!\n"); + } printf("Branch and bound solve called\n"); @@ -1626,15 +1722,16 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } if (root_status == lp_status_t::TIME_LIMIT) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return set_final_solution(solution, -inf); + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, -inf); + return solver_status_; } assert(root_vstatus_.size() == original_lp_.num_cols); set_uninitialized_steepest_edge_norms(edge_norms_); root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); - local_lower_bounds_.assign(settings_.num_bfs_threads, root_objective_); + local_lower_bounds_.assign(settings_.num_bfs_workers, root_objective_); if (settings_.set_simplex_solution_callback != nullptr) { std::vector original_x; @@ -1679,10 +1776,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut return mip_status_t::OPTIMAL; } - csr_matrix_t Arow(1, 1, 1); - original_lp_.A.to_compressed_row(Arow); - - solver_status_ = mip_exploration_status_t::RUNNING; + is_running = true; lower_bound_ceiling_ = inf; if (num_fractional != 0) { @@ -1692,7 +1786,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } cut_pool_t cut_pool(original_lp_.num_cols, settings_); - cut_generation_t cut_generation(cut_pool, original_lp_, settings_, Arow, new_slacks_, var_types_); + cut_generation_t cut_generation(cut_pool, original_lp_, settings_, Arow_, new_slacks_, var_types_); std::vector saved_solution; #if 1 @@ -1745,7 +1839,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut // Generate cuts and add them to the cut pool f_t cut_start_time = tic(); - cut_generation.generate_cuts(original_lp_, settings_, Arow, new_slacks_, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list); + cut_generation.generate_cuts(original_lp_, settings_, Arow_, new_slacks_, var_types_, basis_update, root_relax_soln_.x, basic_list, nonbasic_list); f_t cut_generation_time = toc(cut_start_time); if (cut_generation_time > 1.0) { settings_.log.printf("Cut generation time %.2f seconds\n", cut_generation_time); @@ -1773,9 +1867,11 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut num_cg_cuts++; } } +#ifdef PRINT_CUT_INFO cut_pool.print_cutpool_types(); print_cut_types("In LP ", cut_types, settings_); printf("Cut pool size: %d\n", cut_pool.pool_size()); +#endif #ifdef CHECK_CUT_MATRIX if (cuts_to_add.check_matrix() != 0) { @@ -1839,9 +1935,9 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.log.printf("Before A check\n"); original_lp_.A.check_matrix(); #endif - original_lp_.A.to_compressed_row(Arow); + original_lp_.A.to_compressed_row(Arow_); - bounds_strengthening_t node_presolve(original_lp_, Arow, row_sense, var_types_); + bounds_strengthening_t node_presolve(original_lp_, Arow_, row_sense, var_types_); bool feasible = node_presolve.bounds_strengthening(original_lp_.lower, original_lp_.upper, settings_); if (!feasible) { @@ -1884,12 +1980,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut return mip_status_t::NUMERICAL; } - local_lower_bounds_.assign(settings_.num_bfs_threads, root_objective_); + local_lower_bounds_.assign(settings_.num_bfs_workers, root_objective_); mutex_original_lp_.lock(); remove_cuts(original_lp_, settings_, - Arow, + Arow_, new_slacks_, original_rows, var_types_, @@ -1906,14 +2002,13 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); // TODO: Get upper bound from heuristics - f_t upper_bound = get_upper_bound(); - f_t obj = num_fractional != 0 ? get_upper_bound() : root_objective_; + f_t obj = num_fractional != 0 ? upper_bound_.load() : root_objective_; f_t user_obj = compute_user_objective(original_lp_, obj); f_t user_lower = compute_user_objective(original_lp_, root_objective_); std::string gap = num_fractional != 0 ? user_mip_gap(user_obj, user_lower) : "0.0%"; - settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f\n", + settings_.log.printf(" %10d %10lu %+13.6e %+10.6e %6d %6d %7.1e %s %9.2f\n", 0, 0, user_obj, @@ -1961,8 +2056,9 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut pc_); if (toc(exploration_stats_.start_time) > settings_.time_limit) { - solver_status_ = mip_exploration_status_t::TIME_LIMIT; - return set_final_solution(solution, root_objective_); + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, root_objective_); + return solver_status_; } // Choose variable to branch on @@ -1981,53 +2077,58 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut settings_.log.printf("Exploring the B&B tree using %d threads (best-first = %d, diving = %d)\n", settings_.num_threads, - settings_.num_bfs_threads, - settings_.num_diving_threads); - + settings_.num_bfs_workers, + settings_.num_threads - settings_.num_bfs_workers); exploration_stats_.nodes_explored = 0; exploration_stats_.nodes_unexplored = 2; exploration_stats_.nodes_since_last_log = 0; exploration_stats_.last_log = tic(); active_subtrees_ = 0; - min_diving_queue_size_ = 4 * settings_.num_diving_threads; - solver_status_ = mip_exploration_status_t::RUNNING; lower_bound_ceiling_ = inf; should_report_ = true; + settings_.log.printf( + " | Explored | Unexplored | Objective | Bound | IntInf | Depth | Iter/Node | Gap " + "| Time |\n"); #pragma omp parallel num_threads(settings_.num_threads) { #pragma omp master { - auto down_child = search_tree_.root.get_down_child(); - auto up_child = search_tree_.root.get_up_child(); - i_t initial_size = 2 * settings_.num_threads; + auto down_child = search_tree_.root.get_down_child(); + auto up_child = search_tree_.root.get_up_child(); + i_t initial_size = 2 * settings_.num_threads; + const i_t num_strategies = diving_strategies.size(); +#pragma omp taskgroup + { #pragma omp task - exploration_ramp_up(down_child, &search_tree_, Arow, initial_size); + exploration_ramp_up(down_child, initial_size); #pragma omp task - exploration_ramp_up(up_child, &search_tree_, Arow, initial_size); - } - -#pragma omp barrier + exploration_ramp_up(up_child, initial_size); + } -#pragma omp master - { - for (i_t i = 0; i < settings_.num_bfs_threads; i++) { + for (i_t i = 0; i < settings_.num_bfs_workers; i++) { #pragma omp task - best_first_thread(i, search_tree_, Arow); + best_first_thread(i); } - for (i_t i = 0; i < settings_.num_diving_threads; i++) { + if (!diving_strategies.empty()) { + for (i_t k = 0; k < settings_.diving_settings.num_diving_workers; k++) { + const bnb_worker_type_t diving_type = diving_strategies[k % num_strategies]; #pragma omp task - diving_thread(Arow); + diving_thread(diving_type); + } } } } - f_t lower_bound = heap_.size() > 0 ? heap_.top()->lower_bound : search_tree_.root.lower_bound; - return set_final_solution(solution, lower_bound); + is_running = false; + f_t lower_bound = node_queue_.best_first_queue_size() > 0 ? node_queue_.get_lower_bound() + : search_tree_.root.lower_bound; + set_final_solution(solution, lower_bound); + return solver_status_; } #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index 5cd35f263..4a46db1da 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -1,15 +1,16 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ #pragma once -#include +#include #include #include +#include #include #include #include @@ -20,7 +21,6 @@ #include #include -#include #include namespace cuopt::linear_programming::dual_simplex { @@ -35,31 +35,17 @@ enum class mip_status_t { UNSET = 6, // The status is not set }; -enum class mip_exploration_status_t { - UNSET = 0, // The status is not set - TIME_LIMIT = 1, // The solver reached a time limit - NODE_LIMIT = 2, // The maximum number of nodes was reached (not implemented) - NUMERICAL = 3, // The solver encountered a numerical error - RUNNING = 4, // The solver is currently exploring the tree - COMPLETED = 5, // The solver finished exploring the tree -}; - -enum class node_solve_info_t { - NO_CHILDREN = 0, // The node does not produced children - UP_CHILD_FIRST = 1, // The up child should be explored first - DOWN_CHILD_FIRST = 2, // The down child should be explored first - TIME_LIMIT = 3, // The solver reached a time limit - ITERATION_LIMIT = 4, // The solver reached a iteration limit - NUMERICAL = 5 // The solver encounter a numerical error when solving the node -}; - -// Indicate the search and variable selection algorithms used by the thread (See [1]). +// Indicate the search and variable selection algorithms used by each thread +// in B&B (See [1]). // // [1] T. Achterberg, “Constraint Integer Programming,” PhD, Technischen Universität Berlin, // Berlin, 2007. doi: 10.14279/depositonce-1634. -enum class thread_type_t { - EXPLORATION = 0, // Best-First + Plunging. Pseudocost branching + Martin's criteria. - DIVING = 1, +enum class bnb_worker_type_t { + BEST_FIRST = 0, // Best-First + Plunging. + PSEUDOCOST_DIVING = 1, // Pseudocost diving (9.2.5) + LINE_SEARCH_DIVING = 2, // Line search diving (9.2.4) + GUIDED_DIVING = 3, // Guided diving (9.2.3). If no incumbent is found yet, use pseudocost diving. + COEFFICIENT_DIVING = 4 // Coefficient diving (9.2.1) }; template @@ -68,14 +54,25 @@ class bounds_strengthening_t; template void upper_bound_callback(f_t upper_bound); +template +struct bnb_stats_t { + f_t start_time = 0.0; + omp_atomic_t total_lp_solve_time = 0.0; + omp_atomic_t nodes_explored = 0; + omp_atomic_t nodes_unexplored = 0; + omp_atomic_t total_lp_iters = 0; + + // This should only be used by the main thread + omp_atomic_t last_log = 0.0; + omp_atomic_t nodes_since_last_log = 0; +}; + template class branch_and_bound_t { public: - template - using mip_node_heap_t = std::priority_queue, node_compare_t>; - branch_and_bound_t(const user_problem_t& user_problem, - const simplex_solver_settings_t& solver_settings); + const simplex_solver_settings_t& solver_settings, + f_t start_time); // Set an initial guess based on the user_problem. This should be called before solve. void set_initial_guess(const std::vector& user_guess) { guess_ = user_guess; } @@ -109,9 +106,7 @@ class branch_and_bound_t { f_t& repaired_obj, std::vector& repaired_solution) const; - f_t get_upper_bound(); f_t get_lower_bound(); - i_t get_heap_size(); bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; } std::atomic* get_root_concurrent_halt() { return &root_concurrent_halt_; } void set_root_concurrent_halt(int value) { root_concurrent_halt_ = value; } @@ -136,11 +131,18 @@ class branch_and_bound_t { std::vector guess_; // LP relaxation + csr_matrix_t Arow_; lp_problem_t original_lp_; std::vector new_slacks_; std::vector var_types_; i_t num_integer_variables_; + // Variable locks (see definition 3.3 from T. Achterberg, “Constraint Integer Programming,” + // PhD, Technischen Universität Berlin, Berlin, 2007. doi: 10.14279/depositonce-1634). + // Here we assume that the constraints are in the form `Ax = b, l <= x <= u`. + std::vector var_up_locks_; + std::vector var_down_locks_; + // Local lower bounds for each thread std::vector> local_lower_bounds_; @@ -151,23 +153,13 @@ class branch_and_bound_t { omp_mutex_t mutex_upper_; // Global variable for upper bound - f_t upper_bound_; + omp_atomic_t upper_bound_; // Global variable for incumbent. The incumbent should be updated with the upper bound mip_solution_t incumbent_; // Structure with the general info of the solver. - struct stats_t { - f_t start_time = 0.0; - omp_atomic_t total_lp_solve_time = 0.0; - omp_atomic_t nodes_explored = 0; - omp_atomic_t nodes_unexplored = 0; - omp_atomic_t total_lp_iters = 0; - - // This should only be used by the main thread - omp_atomic_t last_log = 0.0; - omp_atomic_t nodes_since_last_log = 0; - } exploration_stats_; + bnb_stats_t exploration_stats_; // Mutex for repair omp_mutex_t mutex_repair_; @@ -187,9 +179,8 @@ class branch_and_bound_t { // Pseudocosts pseudo_costs_t pc_; - // Heap storing the nodes to be explored. - omp_mutex_t mutex_heap_; - mip_node_heap_t*> heap_; + // Heap storing the nodes waiting to be explored. + node_queue_t node_queue_; // Search tree search_tree_t search_tree_; @@ -197,13 +188,9 @@ class branch_and_bound_t { // Count the number of subtrees that are currently being explored. omp_atomic_t active_subtrees_; - // Queue for storing the promising node for performing dives. - omp_mutex_t mutex_dive_queue_; - diving_queue_t diving_queue_; - i_t min_diving_queue_size_; - // Global status of the solver. - omp_atomic_t solver_status_; + omp_atomic_t solver_status_; + omp_atomic_t is_running{false}; omp_atomic_t should_report_; @@ -211,62 +198,90 @@ class branch_and_bound_t { // its blocks the progression of the lower bound. omp_atomic_t lower_bound_ceiling_; + void report_heuristic(f_t obj); + void report(char symbol, f_t obj, f_t lower_bound, i_t node_depth, i_t node_int_infeas); + // Set the final solution. - mip_status_t set_final_solution(mip_solution_t& solution, f_t lower_bound); + void set_final_solution(mip_solution_t& solution, f_t lower_bound); // Update the incumbent solution with the new feasible solution // found during branch and bound. void add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, i_t leaf_depth, - thread_type_t thread_type); + bnb_worker_type_t thread_type); // Repairs low-quality solutions from the heuristics, if it is applicable. void repair_heuristic_solutions(); // Ramp-up phase of the solver, where we greedily expand the tree until // there is enough unexplored nodes. This is done recursively using OpenMP tasks. - void exploration_ramp_up(mip_node_t* node, - search_tree_t* search_tree, - const csr_matrix_t& Arow, - i_t initial_heap_size); - - // Explore the search tree using the best-first search with plunging strategy. - void explore_subtree(i_t task_id, - mip_node_t* start_node, - search_tree_t& search_tree, - lp_problem_t& leaf_problem, - bounds_strengthening_t& node_presolver, - basis_update_mpf_t& basis_update, - std::vector& basic_list, - std::vector& nonbasic_list); + void exploration_ramp_up(mip_node_t* node, i_t initial_heap_size); + + // We use best-first to pick the `start_node` and then perform a depth-first search + // from this node (i.e., a plunge). It can only backtrack to a sibling node. + // Unexplored nodes in the subtree are inserted back into the global heap. + void plunge_from(i_t task_id, + mip_node_t* start_node, + lp_problem_t& leaf_problem, + bounds_strengthening_t& node_presolver, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list); // Each "main" thread pops a node from the global heap and then performs a plunge // (i.e., a shallow dive) into the subtree determined by the node. - void best_first_thread(i_t task_id, - search_tree_t& search_tree, - const csr_matrix_t& Arow); + void best_first_thread(i_t task_id); + + // Perform a deep dive in the subtree determined by the `start_node` in order + // to find integer feasible solutions. + void dive_from(mip_node_t& start_node, + const std::vector& start_lower, + const std::vector& start_upper, + lp_problem_t& leaf_problem, + bounds_strengthening_t& node_presolver, + basis_update_mpf_t& basis_update, + std::vector& basic_list, + std::vector& nonbasic_list, + bnb_worker_type_t diving_type); // Each diving thread pops the first node from the dive queue and then performs // a deep dive into the subtree determined by the node. - void diving_thread(const csr_matrix_t& Arow); + void diving_thread(bnb_worker_type_t diving_type); - // Solve the LP relaxation of a leaf node and update the tree. - node_solve_info_t solve_node(mip_node_t* node_ptr, - search_tree_t& search_tree, + // Solve the LP relaxation of a leaf node + dual::status_t solve_node_lp(mip_node_t* node_ptr, lp_problem_t& leaf_problem, + lp_solution_t& leaf_solution, + std::vector& leaf_edge_norms, basis_update_mpf_t& basis_factors, std::vector& basic_list, std::vector& nonbasic_list, bounds_strengthening_t& node_presolver, - thread_type_t thread_type, - bool recompute_basis_and_bounds, + bnb_worker_type_t thread_type, + bool recompute_bounds_and_basis, const std::vector& root_lower, const std::vector& root_upper, + bnb_stats_t& stats, logger_t& log); - // Sort the children based on the Martin's criteria. - rounding_direction_t child_selection(mip_node_t* node_ptr); + // Update the tree based on the LP relaxation. Returns the status + // of the node and, if appropriated, the preferred rounding direction + // when visiting the children. + std::pair update_tree(mip_node_t* node_ptr, + search_tree_t& search_tree, + lp_problem_t& leaf_problem, + lp_solution_t& leaf_solution, + std::vector& leaf_edge_norms, + bnb_worker_type_t thread_type, + dual::status_t lp_status, + logger_t& log); + + // Selects the variable to branch on. + branch_variable_t variable_selection(mip_node_t* node_ptr, + const std::vector& fractional, + const std::vector& solution, + bnb_worker_type_t type); }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index a8937b70b..4115db3ba 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ diff --git a/cpp/src/dual_simplex/cusparse_view.cu b/cpp/src/dual_simplex/cusparse_view.cu index a3ef0c6ab..a63ed6add 100644 --- a/cpp/src/dual_simplex/cusparse_view.cu +++ b/cpp/src/dual_simplex/cusparse_view.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -116,7 +116,10 @@ void my_cusparsespmv_preprocess(cusparseHandle_t handle, static cusparseSpMVAlg_t get_spmv_alg(int num_rows) { // The older version of ALG2 has a bug with single row matrices - if (num_rows == 1 && __CUDACC_VER_MAJOR__ < 13) { return CUSPARSE_SPMV_CSR_ALG1; } + if (num_rows == 1 && + (CUSPARSE_VER_MAJOR * 1000 + CUSPARSE_VER_MINOR * 100 + CUSPARSE_VER_PATCH < 12603)) { + return CUSPARSE_SPMV_CSR_ALG1; + } return CUSPARSE_SPMV_CSR_ALG2; } diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 139b43242..57431edfb 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -2311,7 +2311,7 @@ i_t strong_cg_cut_t::generate_strong_cg_cut_helper( const f_t value = static_cast(k) * (f_a_j - f_a_0) / alpha; i_t p = static_cast(std::ceil(value)); if (fractional_part(value) < 1e-12) { - printf("Warning: p %d value %.16e is close to an integer\n", p, value, p + 1); + //printf("Warning: p %d value %.16e is close to an integer\n", p, value, p + 1); } if (verbose) { printf("j %d a_j %e f_a_j %e p %d value %.16e\n", j, a_j, f_a_j, p, value); diff --git a/cpp/src/dual_simplex/diving_heuristics.cpp b/cpp/src/dual_simplex/diving_heuristics.cpp new file mode 100644 index 000000000..a56b4cce3 --- /dev/null +++ b/cpp/src/dual_simplex/diving_heuristics.cpp @@ -0,0 +1,306 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +branch_variable_t line_search_diving(const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log) +{ + constexpr f_t eps = 1e-6; + i_t branch_var = -1; + f_t min_score = std::numeric_limits::max(); + rounding_direction_t round_dir = rounding_direction_t::NONE; + + for (i_t j : fractional) { + f_t score = inf; + rounding_direction_t dir = rounding_direction_t::NONE; + + if (solution[j] < root_solution[j] - eps) { + f_t f = solution[j] - std::floor(solution[j]); + f_t d = root_solution[j] - solution[j]; + score = f / d; + dir = rounding_direction_t::DOWN; + + } else if (solution[j] > root_solution[j] + eps) { + f_t f = std::ceil(solution[j]) - solution[j]; + f_t d = solution[j] - root_solution[j]; + score = f / d; + dir = rounding_direction_t::UP; + } + + if (min_score > score) { + min_score = score; + branch_var = j; + round_dir = dir; + } + } + + // If the current solution is equal to the root solution, arbitrarily + // set the branch variable to the first fractional variable and round it down + if (round_dir == rounding_direction_t::NONE) { + branch_var = fractional[0]; + round_dir = rounding_direction_t::DOWN; + } + + assert(round_dir != rounding_direction_t::NONE); + assert(branch_var >= 0); + + log.debug("Line search diving: selected var %d with val = %e, round dir = %d and score = %e\n", + branch_var, + solution[branch_var], + round_dir, + min_score); + + return {branch_var, round_dir}; +} + +template +branch_variable_t pseudocost_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log) +{ + std::lock_guard lock(pc.mutex); + i_t branch_var = -1; + f_t max_score = std::numeric_limits::lowest(); + rounding_direction_t round_dir = rounding_direction_t::NONE; + constexpr f_t eps = 1e-6; + + i_t num_initialized_down; + i_t num_initialized_up; + f_t pseudo_cost_down_avg; + f_t pseudo_cost_up_avg; + pc.initialized( + num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + + for (i_t j : fractional) { + rounding_direction_t dir = rounding_direction_t::NONE; + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + + f_t pc_down = pc.pseudo_cost_num_down[j] != 0 + ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j] + : pseudo_cost_down_avg; + + f_t pc_up = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j] + : pseudo_cost_up_avg; + + f_t score_down = std::sqrt(f_up) * (1 + pc_up) / (1 + pc_down); + f_t score_up = std::sqrt(f_down) * (1 + pc_down) / (1 + pc_up); + f_t score = 0; + + if (solution[j] < root_solution[j] - 0.4) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else if (solution[j] > root_solution[j] + 0.4) { + score = score_up; + dir = rounding_direction_t::UP; + } else if (f_down < 0.3) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else if (f_down > 0.7) { + score = score_up; + dir = rounding_direction_t::UP; + } else if (pc_down < pc_up + eps) { + score = score_down; + dir = rounding_direction_t::DOWN; + } else { + score = score_up; + dir = rounding_direction_t::UP; + } + + if (score > max_score) { + max_score = score; + branch_var = j; + round_dir = dir; + } + } + + assert(round_dir != rounding_direction_t::NONE); + assert(branch_var >= 0); + + log.debug("Pseudocost diving: selected var %d with val = %e, round dir = %d and score = %e\n", + branch_var, + solution[branch_var], + round_dir, + max_score); + + return {branch_var, round_dir}; +} + +template +branch_variable_t guided_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& incumbent, + logger_t& log) +{ + std::lock_guard lock(pc.mutex); + i_t branch_var = -1; + f_t max_score = std::numeric_limits::lowest(); + rounding_direction_t round_dir = rounding_direction_t::NONE; + constexpr f_t eps = 1e-6; + + i_t num_initialized_down; + i_t num_initialized_up; + f_t pseudo_cost_down_avg; + f_t pseudo_cost_up_avg; + pc.initialized( + num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + + for (i_t j : fractional) { + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t down_dist = std::abs(incumbent[j] - std::floor(solution[j])); + f_t up_dist = std::abs(std::ceil(solution[j]) - incumbent[j]); + rounding_direction_t dir = + down_dist < up_dist + eps ? rounding_direction_t::DOWN : rounding_direction_t::UP; + + f_t pc_down = pc.pseudo_cost_num_down[j] != 0 + ? pc.pseudo_cost_sum_down[j] / pc.pseudo_cost_num_down[j] + : pseudo_cost_down_avg; + + f_t pc_up = pc.pseudo_cost_num_up[j] != 0 ? pc.pseudo_cost_sum_up[j] / pc.pseudo_cost_num_up[j] + : pseudo_cost_up_avg; + + f_t score1 = dir == rounding_direction_t::DOWN ? 5 * pc_down * f_down : 5 * pc_up * f_up; + f_t score2 = dir == rounding_direction_t::DOWN ? pc_up * f_up : pc_down * f_down; + f_t score = (score1 + score2) / 6; + + if (score > max_score) { + max_score = score; + branch_var = j; + round_dir = dir; + } + } + + assert(round_dir != rounding_direction_t::NONE); + assert(branch_var >= 0); + + log.debug("Guided diving: selected var %d with val = %e, round dir = %d and score = %e\n", + branch_var, + solution[branch_var], + round_dir, + max_score); + + return {branch_var, round_dir}; +} + +template +void calculate_variable_locks(const lp_problem_t& lp_problem, + std::vector& up_locks, + std::vector& down_locks) +{ + constexpr f_t eps = 1E-6; + up_locks.assign(lp_problem.num_cols, 0); + down_locks.assign(lp_problem.num_cols, 0); + + for (i_t j = 0; j < lp_problem.num_cols; ++j) { + i_t start = lp_problem.A.col_start[j]; + i_t end = lp_problem.A.col_start[j + 1]; + + for (i_t p = start; p < end; ++p) { + f_t val = lp_problem.A.x[p]; + if (std::abs(val) > eps) { + up_locks[j]++; + down_locks[j]++; + } + } + } +} + +template +branch_variable_t coefficient_diving(const lp_problem_t& lp_problem, + const std::vector& fractional, + const std::vector& solution, + const std::vector& up_locks, + const std::vector& down_locks, + logger_t& log) +{ + i_t branch_var = -1; + i_t min_locks = std::numeric_limits::max(); + rounding_direction_t round_dir = rounding_direction_t::NONE; + constexpr f_t eps = 1e-6; + + for (i_t j : fractional) { + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + i_t up_lock = up_locks[j]; + i_t down_lock = down_locks[j]; + f_t upper = lp_problem.upper[j]; + f_t lower = lp_problem.lower[j]; + if (std::isfinite(upper)) { up_lock++; } + if (std::isfinite(lower)) { down_lock++; } + i_t alpha = std::min(up_lock, down_lock); + + if (min_locks > alpha) { + min_locks = alpha; + branch_var = j; + + if (up_lock < down_lock) { + round_dir = rounding_direction_t::UP; + } else if (up_lock > down_lock) { + round_dir = rounding_direction_t::DOWN; + } else if (f_down < f_up + eps) { + round_dir = rounding_direction_t::DOWN; + } else { + round_dir = rounding_direction_t::UP; + } + } + } + + assert(round_dir != rounding_direction_t::NONE); + assert(branch_var >= 0); + + log.debug( + "Coefficient diving: selected var %d with val = %e, round dir = %d and min locks = %d\n", + branch_var, + solution[branch_var], + round_dir, + min_locks); + + return {branch_var, round_dir}; +} + +#ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE +template branch_variable_t line_search_diving(const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + +template branch_variable_t pseudocost_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + +template branch_variable_t guided_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& incumbent, + logger_t& log); + +template void calculate_variable_locks(const lp_problem_t& lp_problem, + std::vector& up_locks, + std::vector& down_locks); + +template branch_variable_t coefficient_diving(const lp_problem_t& lp_problem, + const std::vector& fractional, + const std::vector& solution, + const std::vector& up_locks, + const std::vector& down_locks, + logger_t& log); +#endif + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/diving_heuristics.hpp b/cpp/src/dual_simplex/diving_heuristics.hpp new file mode 100644 index 000000000..3c6d77c04 --- /dev/null +++ b/cpp/src/dual_simplex/diving_heuristics.hpp @@ -0,0 +1,58 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +struct branch_variable_t { + i_t variable; + rounding_direction_t direction; +}; + +template +branch_variable_t line_search_diving(const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + +template +branch_variable_t pseudocost_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& root_solution, + logger_t& log); + +template +branch_variable_t guided_diving(pseudo_costs_t& pc, + const std::vector& fractional, + const std::vector& solution, + const std::vector& incumbent, + logger_t& log); + +// Calculate the variable locks assuming that the constraints +// has the following format: `Ax = b`. +template +void calculate_variable_locks(const lp_problem_t& lp_problem, + std::vector& up_locks, + std::vector& down_locks); + +template +branch_variable_t coefficient_diving(const lp_problem_t& lp_problem, + const std::vector& fractional, + const std::vector& solution, + const std::vector& up_locks, + const std::vector& down_locks, + logger_t& log); + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/diving_queue.hpp b/cpp/src/dual_simplex/diving_queue.hpp deleted file mode 100644 index f7035109e..000000000 --- a/cpp/src/dual_simplex/diving_queue.hpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - */ - -#pragma once - -#include -#include -#include - -#include - -namespace cuopt::linear_programming::dual_simplex { - -template -struct diving_root_t { - mip_node_t node; - std::vector lower; - std::vector upper; - - diving_root_t(mip_node_t&& node, std::vector&& lower, std::vector&& upper) - : node(std::move(node)), lower(std::move(lower)), upper(std::move(upper)) - { - } - - friend bool operator>(const diving_root_t& a, const diving_root_t& b) - { - return a.node.lower_bound > b.node.lower_bound; - } -}; - -// A min-heap for storing the starting nodes for the dives. -// This has a maximum size of 1024, such that the container -// will discard the least promising node if the queue is full. -template -class diving_queue_t { - private: - std::vector> buffer; - static constexpr i_t max_size_ = 1024; - - public: - diving_queue_t() { buffer.reserve(max_size_); } - - void push(diving_root_t&& node) - { - buffer.push_back(std::move(node)); - std::push_heap(buffer.begin(), buffer.end(), std::greater<>()); - if (buffer.size() > max_size() - 1) { buffer.pop_back(); } - } - - void emplace(mip_node_t&& node, std::vector&& lower, std::vector&& upper) - { - buffer.emplace_back(std::move(node), std::move(lower), std::move(upper)); - std::push_heap(buffer.begin(), buffer.end(), std::greater<>()); - if (buffer.size() > max_size() - 1) { buffer.pop_back(); } - } - - diving_root_t pop() - { - std::pop_heap(buffer.begin(), buffer.end(), std::greater<>()); - diving_root_t node = std::move(buffer.back()); - buffer.pop_back(); - return node; - } - - i_t size() const { return buffer.size(); } - constexpr i_t max_size() const { return max_size_; } - const diving_root_t& top() const { return buffer.front(); } - void clear() { buffer.clear(); } -}; - -} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/iterative_refinement.hpp b/cpp/src/dual_simplex/iterative_refinement.hpp index 9f6f90325..001e68a39 100644 --- a/cpp/src/dual_simplex/iterative_refinement.hpp +++ b/cpp/src/dual_simplex/iterative_refinement.hpp @@ -1,52 +1,127 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ #pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "dual_simplex/dense_vector.hpp" #include "dual_simplex/simplex_solver_settings.hpp" #include "dual_simplex/types.hpp" #include "dual_simplex/vector_math.hpp" -#include -#include -#include - namespace cuopt::linear_programming::dual_simplex { +// Functors for device operations (defined at namespace scope to avoid CUDA lambda restrictions) +template +struct scale_op { + T scale; + __host__ __device__ T operator()(T val) const { return val * scale; } +}; + +template +struct multiply_op { + __host__ __device__ T operator()(T a, T b) const { return a * b; } +}; + +template +struct axpy_op { + T alpha; + __host__ __device__ T operator()(T x, T y) const { return x + alpha * y; } +}; + +template +struct subtract_scaled_op { + T scale; + __host__ __device__ T operator()(T a, T b) const { return a - scale * b; } +}; + +template +f_t vector_norm_inf(const rmm::device_uvector& x) +{ + auto begin = x.data(); + auto end = x.data() + x.size(); + auto max_abs = thrust::transform_reduce( + rmm::exec_policy(x.stream()), + begin, + end, + [] __host__ __device__(f_t val) { return abs(val); }, + static_cast(0), + thrust::maximum{}); + RAFT_CHECK_CUDA(x.stream()); + return max_abs; +} + +template +f_t vector_norm2(const rmm::device_uvector& x) +{ + auto begin = x.data(); + auto end = x.data() + x.size(); + auto sum_of_squares = thrust::transform_reduce( + rmm::exec_policy(x.stream()), + begin, + end, + [] __host__ __device__(f_t val) { return val * val; }, + f_t(0), + thrust::plus{}); + RAFT_CHECK_CUDA(x.stream()); + return std::sqrt(sum_of_squares); +} + template -void iterative_refinement_simple(T& op, - const dense_vector_t& b, - dense_vector_t& x) +f_t iterative_refinement_simple(T& op, + const rmm::device_uvector& b, + rmm::device_uvector& x) { - dense_vector_t x_sav = x; - dense_vector_t r = b; + rmm::device_uvector x_sav(x, x.stream()); + const bool show_iterative_refinement_info = false; + // r = b - Ax + rmm::device_uvector r(b, b.stream()); op.a_multiply(-1.0, x, 1.0, r); - f_t error = vector_norm_inf(r); + f_t error = vector_norm_inf(r); if (show_iterative_refinement_info) { CUOPT_LOG_INFO( - "Iterative refinement. Initial error %e || x || %.16e", error, vector_norm2(x)); + "Iterative refinement. Initial error %e || x || %.16e", error, vector_norm2(x)); } - dense_vector_t delta_x(x.size()); + rmm::device_uvector delta_x(x.size(), op.data_.handle_ptr->get_stream()); i_t iter = 0; while (error > 1e-8 && iter < 30) { - delta_x.set_scalar(0.0); + thrust::fill(op.data_.handle_ptr->get_thrust_policy(), + delta_x.data(), + delta_x.data() + delta_x.size(), + 0.0); + RAFT_CHECK_CUDA(op.data_.handle_ptr->get_stream()); op.solve(r, delta_x); - x.axpy(1.0, delta_x, 1.0); - - r = b; + thrust::transform(op.data_.handle_ptr->get_thrust_policy(), + x.data(), + x.data() + x.size(), + delta_x.data(), + x.data(), + thrust::plus()); + RAFT_CHECK_CUDA(op.data_.handle_ptr->get_stream()); + // r = b - Ax + raft::copy(r.data(), b.data(), b.size(), x.stream()); op.a_multiply(-1.0, x, 1.0, r); - f_t new_error = vector_norm_inf(r); + f_t new_error = vector_norm_inf(r); if (new_error > error) { - x = x_sav; + raft::copy(x.data(), x_sav.data(), x.size(), x.stream()); if (show_iterative_refinement_info) { CUOPT_LOG_INFO( "Iterative refinement. Iter %d error increased %e %e. Stopping", iter, error, new_error); @@ -54,26 +129,27 @@ void iterative_refinement_simple(T& op, break; } error = new_error; - x_sav = x; + raft::copy(x_sav.data(), x.data(), x.size(), x.stream()); iter++; if (show_iterative_refinement_info) { CUOPT_LOG_INFO( "Iterative refinement. Iter %d error %e. || x || %.16e || dx || %.16e Continuing", iter, error, - vector_norm2(x), - vector_norm2(delta_x)); + vector_norm2(x), + vector_norm2(delta_x)); } } + return error; } /** @brief Iterative refinement with GMRES as solver */ template -void iterative_refinement_gmres(T& op, - const dense_vector_t& b, - dense_vector_t& x) +f_t iterative_refinement_gmres(T& op, + const rmm::device_uvector& b, + rmm::device_uvector& x) { // Parameters // Ideally, we do not need to restart here. But having restarts helps as a checkpoint to get @@ -83,9 +159,9 @@ void iterative_refinement_gmres(T& op, const int m = 10; // Krylov space dimension const f_t tol = 1e-8; - dense_vector_t r(x.size()); - dense_vector_t x_sav = x; - dense_vector_t delta_x(x.size()); + rmm::device_uvector r(x.size(), x.stream()); + rmm::device_uvector x_sav(x, x.stream()); + rmm::device_uvector delta_x(x.size(), x.stream()); // Host workspace for the Hessenberg matrix and other small arrays std::vector> H(m + 1, std::vector(m, 0.0)); @@ -96,17 +172,17 @@ void iterative_refinement_gmres(T& op, bool show_info = false; - f_t bnorm = max(1.0, vector_norm_inf(b)); + f_t bnorm = std::max(1.0, vector_norm_inf(b)); f_t rel_res = 1.0; int outer_iter = 0; // r = b - A*x - r = b; + raft::copy(r.data(), b.data(), b.size(), x.stream()); op.a_multiply(-1.0, x, 1.0, r); - f_t norm_r = vector_norm_inf(r); + f_t norm_r = vector_norm_inf(r); if (show_info) { CUOPT_LOG_INFO("GMRES IR: initial residual = %e, |b| = %e", norm_r, bnorm); } - if (norm_r <= 1e-8) { return; } + if (norm_r <= 1e-8) { return norm_r; } f_t residual = norm_r; f_t best_residual = norm_r; @@ -115,17 +191,23 @@ void iterative_refinement_gmres(T& op, while (residual > tol && outer_iter < max_restarts) { // For right preconditioning: Apply preconditioner on Krylov directions, not on the residual. // So, start GMRES on r = b - A*x. v0 = r / ||r|| - std::vector> V; - std::vector> Z; // Store preconditioned vectors Z[k] = M^{-1} V[k] + std::vector> V; + std::vector> Z; // Store preconditioned vectors Z[k] = M^{-1} V[k] for (int k = 0; k < m + 1; ++k) { - V.emplace_back(x.size()); - Z.emplace_back(x.size()); + V.emplace_back(x.size(), x.stream()); + Z.emplace_back(x.size(), x.stream()); } // v0 = r / ||r|| - f_t rnorm = vector_norm2(r); + f_t rnorm = vector_norm2(r); f_t inv_rnorm = (rnorm > 0) ? (f_t(1) / rnorm) : f_t(1); - V[0] = r; - V[0].multiply_scalar(inv_rnorm); + + raft::copy(V[0].data(), r.data(), r.size(), x.stream()); + thrust::transform(op.data_.handle_ptr->get_thrust_policy(), + V[0].data(), + V[0].data() + V[0].size(), + V[0].data(), + scale_op{inv_rnorm}); + RAFT_CHECK_CUDA(op.data_.handle_ptr->get_stream()); e1.assign(m + 1, 0.0); e1[0] = rnorm; @@ -141,19 +223,35 @@ void iterative_refinement_gmres(T& op, // Modified Gram-Schmidt orthogonalization for (int j = 0; j <= k; ++j) { // H[j][k] = dot(w, V[j]) - f_t hij = V[k + 1].inner_product(V[j]); + f_t hij = thrust::inner_product(op.data_.handle_ptr->get_thrust_policy(), + V[k + 1].data(), + V[k + 1].data() + x.size(), + V[j].data(), + f_t(0)); + RAFT_CHECK_CUDA(op.data_.handle_ptr->get_stream()); H[j][k] = hij; // w -= H[j][k] * V[j] - V[k + 1].axpy(-hij, V[j], 1.0); + thrust::transform(op.data_.handle_ptr->get_thrust_policy(), + V[k + 1].data(), + V[k + 1].data() + x.size(), + V[j].data(), + V[k + 1].data(), + subtract_scaled_op{hij}); + RAFT_CHECK_CUDA(op.data_.handle_ptr->get_stream()); } // H[k+1][k] = ||w|| - f_t h_k1k = vector_norm2(V[k + 1]); + f_t h_k1k = vector_norm2(V[k + 1]); H[k + 1][k] = h_k1k; if (h_k1k != 0.0) { // V[k+1] = V[k+1] / H[k+1][k] f_t inv_h = f_t(1) / h_k1k; - V[k + 1].multiply_scalar(inv_h); + thrust::transform(op.data_.handle_ptr->get_thrust_policy(), + V[k + 1].data(), + V[k + 1].data() + x.size(), + V[k + 1].data(), + scale_op{inv_h}); + RAFT_CHECK_CUDA(op.data_.handle_ptr->get_stream()); } // Apply Given's rotations to new column @@ -191,26 +289,47 @@ void iterative_refinement_gmres(T& op, for (int j = i + 1; j < k; ++j) { s -= H[i][j] * y[j]; } - y[i] = s / H[i][i]; + // avoid inf/nan breakdown + if (H[i][i] == 0.0) { + y[i] = 0.0; + break; + } else { + y[i] = s / H[i][i]; + } } // Compute GMRES update: delta_x = sum_j y_j * Z[j], where Z[j] = M^{-1} V[j] - std::fill(delta_x.begin(), delta_x.end(), 0.0); + thrust::fill(op.data_.handle_ptr->get_thrust_policy(), + delta_x.data(), + delta_x.data() + delta_x.size(), + 0.0); + RAFT_CHECK_CUDA(op.data_.handle_ptr->get_stream()); for (int j = 0; j < k; ++j) { - delta_x.axpy(y[j], Z[j], 1.0); + thrust::transform(op.data_.handle_ptr->get_thrust_policy(), + delta_x.data(), + delta_x.data() + delta_x.size(), + Z[j].data(), + delta_x.data(), + axpy_op{y[j]}); + RAFT_CHECK_CUDA(op.data_.handle_ptr->get_stream()); } // Update x = x + delta_x - x.axpy(1.0, delta_x, 1.0); - + thrust::transform(op.data_.handle_ptr->get_thrust_policy(), + x.data(), + x.data() + x.size(), + delta_x.data(), + x.data(), + thrust::plus()); + RAFT_CHECK_CUDA(op.data_.handle_ptr->get_stream()); // r = b - A*x - r = b; + raft::copy(r.data(), b.data(), b.size(), x.stream()); op.a_multiply(-1.0, x, 1.0, r); - residual = vector_norm_inf(r); + residual = vector_norm_inf(r); if (show_info) { - auto l2_residual = vector_norm2(r); + auto l2_residual = vector_norm2(r); CUOPT_LOG_INFO("GMRES IR: after outer_iter %d residual = %e, l2_residual = %e", outer_iter, residual, @@ -220,31 +339,41 @@ void iterative_refinement_gmres(T& op, // Track best solution if (residual < best_residual) { best_residual = residual; - x_sav = x; + raft::copy(x_sav.data(), x.data(), x.size(), x.stream()); } else { // Residual increased or stagnated, restore best and stop if (show_info) { CUOPT_LOG_INFO( "GMRES IR: residual increased from %e to %e, stopping", best_residual, residual); } - x = x_sav; + raft::copy(x.data(), x_sav.data(), x.size(), x.stream()); break; } ++outer_iter; } + return best_residual; } template -void iterative_refinement(T& op, const dense_vector_t& b, dense_vector_t& x) +f_t iterative_refinement(T& op, const dense_vector_t& b, dense_vector_t& x) { - const bool is_qp = op.data_.Q.n > 0; - if (is_qp) { - iterative_refinement_gmres(op, b, x); - } else { - iterative_refinement_simple(op, b, x); - } - return; + rmm::device_uvector d_b(b.size(), op.data_.handle_ptr->get_stream()); + raft::copy(d_b.data(), b.data(), b.size(), op.data_.handle_ptr->get_stream()); + rmm::device_uvector d_x(x.size(), op.data_.handle_ptr->get_stream()); + raft::copy(d_x.data(), x.data(), x.size(), op.data_.handle_ptr->get_stream()); + auto err = iterative_refinement_gmres(op, d_b, d_x); + + raft::copy(x.data(), d_x.data(), x.size(), op.data_.handle_ptr->get_stream()); + + RAFT_CUDA_TRY(cudaStreamSynchronize(op.data_.handle_ptr->get_stream())); + return err; +} + +template +f_t iterative_refinement(T& op, const rmm::device_uvector& b, rmm::device_uvector& x) +{ + return iterative_refinement_gmres(op, b, x); } } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/logger.hpp b/cpp/src/dual_simplex/logger.hpp index ac5e394f9..f81308670 100644 --- a/cpp/src/dual_simplex/logger.hpp +++ b/cpp/src/dual_simplex/logger.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -30,22 +30,24 @@ class logger_t { { } - void enable_log_to_file(std::string mode = "w") + void enable_log_to_file(const char* mode = "w") { if (log_file != nullptr) { std::fclose(log_file); } - log_file = std::fopen(log_filename.c_str(), mode.c_str()); + log_file = std::fopen(log_filename.c_str(), mode); log_to_file = true; } - void set_log_file(const std::string& filename) + void set_log_file(const std::string& filename, const char* mode = "w") { log_filename = filename; - enable_log_to_file(); + enable_log_to_file(mode); } void close_log_file() { if (log_file != nullptr) { std::fclose(log_file); } + log_file = nullptr; + log_to_file = false; } void printf(const char* fmt, ...) diff --git a/cpp/src/dual_simplex/mip_node.hpp b/cpp/src/dual_simplex/mip_node.hpp index 18ca43912..5ee4f49d1 100644 --- a/cpp/src/dual_simplex/mip_node.hpp +++ b/cpp/src/dual_simplex/mip_node.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -45,6 +45,7 @@ class mip_node_t { branch_var_lower(-std::numeric_limits::infinity()), branch_var_upper(std::numeric_limits::infinity()), fractional_val(std::numeric_limits::infinity()), + objective_estimate(std::numeric_limits::infinity()), vstatus(0) { children[0] = nullptr; @@ -60,6 +61,7 @@ class mip_node_t { branch_var(-1), branch_dir(rounding_direction_t::NONE), integer_infeasible(-1), + objective_estimate(std::numeric_limits::infinity()), vstatus(basis) { children[0] = nullptr; @@ -83,6 +85,7 @@ class mip_node_t { branch_dir(branch_direction), fractional_val(branch_var_value), integer_infeasible(integer_inf), + objective_estimate(parent_node->objective_estimate), vstatus(basis) { branch_var_lower = branch_direction == rounding_direction_t::DOWN ? problem.lower[branch_var] @@ -229,17 +232,19 @@ class mip_node_t { mip_node_t detach_copy() const { mip_node_t copy(lower_bound, vstatus); - copy.branch_var = branch_var; - copy.branch_dir = branch_dir; - copy.branch_var_lower = branch_var_lower; - copy.branch_var_upper = branch_var_upper; - copy.fractional_val = fractional_val; - copy.node_id = node_id; + copy.branch_var = branch_var; + copy.branch_dir = branch_dir; + copy.branch_var_lower = branch_var_lower; + copy.branch_var_upper = branch_var_upper; + copy.fractional_val = fractional_val; + copy.objective_estimate = objective_estimate; + copy.node_id = node_id; return copy; } node_status_t status; f_t lower_bound; + f_t objective_estimate; i_t depth; i_t node_id; i_t branch_var; @@ -265,22 +270,6 @@ void remove_fathomed_nodes(std::vector*>& stack) } } -template -class node_compare_t { - public: - bool operator()(const mip_node_t& a, const mip_node_t& b) const - { - return a.lower_bound > - b.lower_bound; // True if a comes before b, elements that come before are output last - } - - bool operator()(const mip_node_t* a, const mip_node_t* b) const - { - return a->lower_bound > - b->lower_bound; // True if a comes before b, elements that come before are output last - } -}; - template class search_tree_t { public: diff --git a/cpp/src/dual_simplex/node_queue.hpp b/cpp/src/dual_simplex/node_queue.hpp new file mode 100644 index 000000000..28072795a --- /dev/null +++ b/cpp/src/dual_simplex/node_queue.hpp @@ -0,0 +1,162 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace cuopt::linear_programming::dual_simplex { + +// This is a generic heap implementation based +// on the STL functions. The main benefit here is +// that we access the underlying container. +template +class heap_t { + public: + heap_t() = default; + virtual ~heap_t() = default; + + void push(const T& node) + { + buffer.push_back(node); + std::push_heap(buffer.begin(), buffer.end(), comp); + } + + void push(T&& node) + { + buffer.push_back(std::move(node)); + std::push_heap(buffer.begin(), buffer.end(), comp); + } + + template + void emplace(Args&&... args) + { + buffer.emplace_back(std::forward(args)...); + std::push_heap(buffer.begin(), buffer.end(), comp); + } + + std::optional pop() + { + if (buffer.empty()) return std::nullopt; + + std::pop_heap(buffer.begin(), buffer.end(), comp); + T node = std::move(buffer.back()); + buffer.pop_back(); + return node; + } + + size_t size() const { return buffer.size(); } + T& top() { return buffer.front(); } + void clear() { buffer.clear(); } + bool empty() const { return buffer.empty(); } + + private: + std::vector buffer; + Comp comp; +}; + +// A queue storing the nodes waiting to be explored/dived from. +template +class node_queue_t { + private: + struct heap_entry_t { + mip_node_t* node = nullptr; + f_t lower_bound = -inf; + f_t score = inf; + + heap_entry_t(mip_node_t* new_node) + : node(new_node), lower_bound(new_node->lower_bound), score(new_node->objective_estimate) + { + } + }; + + // Comparision function for ordering the nodes based on their lower bound with + // lowest one being explored first. + struct lower_bound_comp { + bool operator()(const std::shared_ptr& a, const std::shared_ptr& b) + { + // `a` will be placed after `b` + return a->lower_bound > b->lower_bound; + } + }; + + // Comparision function for ordering the nodes based on some score (currently the pseudocost + // estimate) with the lowest being explored first. + struct score_comp { + bool operator()(const std::shared_ptr& a, const std::shared_ptr& b) + { + // `a` will be placed after `b` + return a->score > b->score; + } + }; + + heap_t, lower_bound_comp> best_first_heap; + heap_t, score_comp> diving_heap; + omp_mutex_t mutex; + + public: + void push(mip_node_t* new_node) + { + std::lock_guard lock(mutex); + auto entry = std::make_shared(new_node); + best_first_heap.push(entry); + diving_heap.push(entry); + } + + std::optional*> pop_best_first() + { + auto entry = best_first_heap.pop(); + if (entry.has_value()) { return std::exchange(entry.value()->node, nullptr); } + return std::nullopt; + } + + std::optional*> pop_diving() + { + while (!diving_heap.empty()) { + auto entry = diving_heap.pop(); + if (entry.has_value()) { + if (auto node_ptr = entry.value()->node; node_ptr != nullptr) { return node_ptr; } + } + } + return std::nullopt; + } + + void lock() { mutex.lock(); } + + void unlock() { mutex.unlock(); } + + i_t diving_queue_size() + { + std::lock_guard lock(mutex); + return diving_heap.size(); + } + + i_t best_first_queue_size() + { + std::lock_guard lock(mutex); + return best_first_heap.size(); + } + + f_t get_lower_bound() + { + std::lock_guard lock(mutex); + return best_first_heap.empty() ? inf : best_first_heap.top()->lower_bound; + } + + mip_node_t* bfs_top() + { + std::lock_guard lock(mutex); + return best_first_heap.empty() ? nullptr : best_first_heap.top()->node; + } +}; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 94472edaa..d1c63b49d 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index d247fbf67..bbfe18d61 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -1311,7 +1311,10 @@ f_t crush_dual_solution(const user_problem_t& user_problem, } } const f_t dual_res_inf = vector_norm_inf(dual_residual); - assert(dual_res_inf < 1e-6); + // TODO: fix me! In test ./cpp/build/tests/linear_programming/C_API_TEST + // c_api/TimeLimitTestFixture.time_limit/2 this is crashing. It is crashing only if it is run as + // whole in sequence and not filtering the respective test. Crash could be observed in previous + // versions by setting probing cache time to zero. assert(dual_res_inf < 1e-6); return dual_res_inf; } diff --git a/cpp/src/dual_simplex/primal.cpp b/cpp/src/dual_simplex/primal.cpp index 3d9849fbe..69f15ba18 100644 --- a/cpp/src/dual_simplex/primal.cpp +++ b/cpp/src/dual_simplex/primal.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ diff --git a/cpp/src/dual_simplex/pseudo_costs.cpp b/cpp/src/dual_simplex/pseudo_costs.cpp index f391598b1..d944078b4 100644 --- a/cpp/src/dual_simplex/pseudo_costs.cpp +++ b/cpp/src/dual_simplex/pseudo_costs.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -181,8 +181,8 @@ void strong_branching(const lp_problem_t& original_lp, pseudo_costs_t& pc) { pc.resize(original_lp.num_cols); - pc.strong_branch_down.resize(fractional.size()); - pc.strong_branch_up.resize(fractional.size()); + pc.strong_branch_down.assign(fractional.size(), 0); + pc.strong_branch_up.assign(fractional.size(), 0); pc.num_strong_branches_completed = 0; settings.log.printf("Strong branching using %d threads and %ld fractional variables\n", @@ -232,7 +232,7 @@ template void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_ptr, f_t leaf_objective) { - mutex.lock(); + std::lock_guard lock(mutex); const f_t change_in_obj = leaf_objective - node_ptr->lower_bound; const f_t frac = node_ptr->branch_dir == rounding_direction_t::DOWN ? node_ptr->fractional_val - std::floor(node_ptr->fractional_val) @@ -244,7 +244,6 @@ void pseudo_costs_t::update_pseudo_costs(mip_node_t* node_pt pseudo_cost_sum_up[node_ptr->branch_var] += change_in_obj / frac; pseudo_cost_num_up[node_ptr->branch_var]++; } - mutex.unlock(); } template @@ -291,7 +290,7 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio const std::vector& solution, logger_t& log) { - mutex.lock(); + std::lock_guard lock(mutex); const i_t num_fractional = fractional.size(); std::vector pseudo_cost_up(num_fractional); @@ -345,11 +344,53 @@ i_t pseudo_costs_t::variable_selection(const std::vector& fractio log.printf( "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], score[select]); - mutex.unlock(); - return branch_var; } +template +f_t pseudo_costs_t::obj_estimate(const std::vector& fractional, + const std::vector& solution, + f_t lower_bound, + logger_t& log) +{ + std::lock_guard lock(mutex); + + const i_t num_fractional = fractional.size(); + f_t estimate = lower_bound; + + i_t num_initialized_down; + i_t num_initialized_up; + f_t pseudo_cost_down_avg; + f_t pseudo_cost_up_avg; + + initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); + + for (i_t k = 0; k < num_fractional; k++) { + const i_t j = fractional[k]; + f_t pseudo_cost_down = 0; + f_t pseudo_cost_up = 0; + + if (pseudo_cost_num_down[j] != 0) { + pseudo_cost_down = pseudo_cost_sum_down[j] / pseudo_cost_num_down[j]; + } else { + pseudo_cost_down = pseudo_cost_down_avg; + } + + if (pseudo_cost_num_up[j] != 0) { + pseudo_cost_up = pseudo_cost_sum_up[j] / pseudo_cost_num_up[j]; + } else { + pseudo_cost_up = pseudo_cost_up_avg; + } + constexpr f_t eps = 1e-6; + const f_t f_down = solution[j] - std::floor(solution[j]); + const f_t f_up = std::ceil(solution[j]) - solution[j]; + estimate += std::min(pseudo_cost_down * f_down, pseudo_cost_up * f_up); + } + + log.printf("pseudocost estimate = %e\n", estimate); + return estimate; +} + template i_t pseudo_costs_t::reliable_variable_selection(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -463,6 +504,7 @@ i_t pseudo_costs_t::reliable_variable_selection(const lp_problem_t void pseudo_costs_t::update_pseudo_costs_from_strong_branching( const std::vector& fractional, const std::vector& root_soln) diff --git a/cpp/src/dual_simplex/pseudo_costs.hpp b/cpp/src/dual_simplex/pseudo_costs.hpp index 20b2198e4..750230fa4 100644 --- a/cpp/src/dual_simplex/pseudo_costs.hpp +++ b/cpp/src/dual_simplex/pseudo_costs.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -32,10 +32,10 @@ class pseudo_costs_t { void resize(i_t num_variables) { - pseudo_cost_sum_down.resize(num_variables); - pseudo_cost_sum_up.resize(num_variables); - pseudo_cost_num_down.resize(num_variables); - pseudo_cost_num_up.resize(num_variables); + pseudo_cost_sum_down.assign(num_variables, 0); + pseudo_cost_sum_up.assign(num_variables, 0); + pseudo_cost_num_down.assign(num_variables, 0); + pseudo_cost_num_up.assign(num_variables, 0); } void initialized(i_t& num_initialized_down, @@ -43,6 +43,11 @@ class pseudo_costs_t { f_t& pseudo_cost_down_avg, f_t& pseudo_cost_up_avg) const; + f_t obj_estimate(const std::vector& fractional, + const std::vector& solution, + f_t lower_bound, + logger_t& log); + i_t variable_selection(const std::vector& fractional, const std::vector& solution, logger_t& log); diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index c31a6bbac..4248197c7 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -13,12 +13,29 @@ #include #include #include +#include #include #include #include namespace cuopt::linear_programming::dual_simplex { +template +struct diving_heuristics_settings_t { + i_t num_diving_workers = -1; + + // -1 automatic, 0 disabled, 1 enabled + i_t line_search_diving = -1; + i_t pseudocost_diving = -1; + i_t guided_diving = -1; + i_t coefficient_diving = -1; + + i_t min_node_depth = 10; + i_t node_limit = 500; + f_t iteration_limit_factor = 0.05; + i_t backtrack_limit = 5; +}; + template struct simplex_solver_settings_t { public: @@ -70,9 +87,8 @@ struct simplex_solver_settings_t { iteration_log_frequency(1000), first_iteration_log(2), num_threads(omp_get_max_threads() - 1), - num_bfs_threads(std::min(num_threads / 4, 1)), - num_diving_threads(std::min(num_threads - num_bfs_threads, 1)), max_cut_passes(0), + num_bfs_workers(std::max(num_threads / 4, 1)), random_seed(0), inside_mip(0), reliability_branching(-1), @@ -80,6 +96,7 @@ struct simplex_solver_settings_t { heuristic_preemption_callback(nullptr), concurrent_halt(nullptr) { + diving_settings.num_diving_workers = std::max(num_threads - num_bfs_workers, 1); } void set_log(bool logging) const { log.log = logging; } @@ -139,9 +156,11 @@ struct simplex_solver_settings_t { i_t first_iteration_log; // number of iterations to log at beginning of solve i_t num_threads; // number of threads to use i_t random_seed; // random seed - i_t num_bfs_threads; // number of threads dedicated to the best-first search - i_t num_diving_threads; // number of threads dedicated to diving i_t max_cut_passes; // number of cut passes to make + i_t num_bfs_workers; // number of threads dedicated to the best-first search + + diving_heuristics_settings_t diving_settings; // Settings for the diving heuristics + i_t inside_mip; // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node i_t reliability_branching; // -1 automatic, 0 to disable, >0 to enable reliability branching std::function&, f_t)> solution_callback; diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index ea5d197a8..2e01f1102 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -598,7 +598,7 @@ i_t solve(const user_problem_t& problem, { i_t status; if (is_mip(problem) && !settings.relaxation) { - branch_and_bound_t branch_and_bound(problem, settings); + branch_and_bound_t branch_and_bound(problem, settings, tic()); mip_solution_t mip_solution(problem.num_cols); mip_status_t mip_status = branch_and_bound.solve(mip_solution); if (mip_status == mip_status_t::OPTIMAL) { @@ -637,7 +637,7 @@ i_t solve_mip_with_guess(const user_problem_t& problem, { i_t status; if (is_mip(problem)) { - branch_and_bound_t branch_and_bound(problem, settings); + branch_and_bound_t branch_and_bound(problem, settings, tic()); branch_and_bound.set_initial_guess(guess); mip_status_t mip_status = branch_and_bound.solve(solution); if (mip_status == mip_status_t::OPTIMAL) { diff --git a/cpp/src/dual_simplex/sparse_cholesky.cuh b/cpp/src/dual_simplex/sparse_cholesky.cuh index 01e187fa8..eea7ee8f3 100644 --- a/cpp/src/dual_simplex/sparse_cholesky.cuh +++ b/cpp/src/dual_simplex/sparse_cholesky.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -396,8 +396,10 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t { nnz = Arow.row_start.element(Arow.m, Arow.row_start.stream()); const f_t density = static_cast(nnz) / (static_cast(n) * static_cast(n)); + // skip reordering if matrix diagonal if (first_factor && - ((settings_.ordering == -1 && density >= 0.05) || settings_.ordering == 1) && n > 1) { + ((settings_.ordering == -1 && density >= 0.05 && nnz > n) || settings_.ordering == 1) && + n > 1) { settings_.log.printf("Reordering algorithm : AMD\n"); // Tell cuDSS to use AMD cudssAlgType_t reorder_alg = CUDSS_ALG_3; diff --git a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu index 017616d6d..32dc36870 100644 --- a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu +++ b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -435,7 +435,7 @@ void pdlp_initial_scaling_strategy_t::scale_problem() op_problem_scaled_.variable_bounds.data(), primal_size_h_, divide_check_zero(), - stream_view_); + stream_view_.value()); raft::linalg::eltwiseMultiply( const_cast&>(op_problem_scaled_.constraint_lower_bounds).data(), @@ -471,7 +471,7 @@ void pdlp_initial_scaling_strategy_t::scale_problem() return {constraint_lower_bound * *bound_rescaling, constraint_upper_bound * *bound_rescaling}; }, - stream_view_); + stream_view_.value()); cub::DeviceTransform::Transform( cuda::std::make_tuple(op_problem_scaled_.variable_bounds.data(), @@ -486,7 +486,7 @@ void pdlp_initial_scaling_strategy_t::scale_problem() return {{variable_bounds.x * *bound_rescaling, variable_bounds.y * *bound_rescaling}, objective_coefficient * *objective_rescaling}; }, - stream_view_); + stream_view_.value()); } #ifdef CUPDLP_DEBUG_MODE diff --git a/cpp/src/linear_programming/optimization_problem.cu b/cpp/src/linear_programming/optimization_problem.cu index ebf617774..bbc1c5f9d 100644 --- a/cpp/src/linear_programming/optimization_problem.cu +++ b/cpp/src/linear_programming/optimization_problem.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -158,11 +158,7 @@ void optimization_problem_t::set_quadratic_objective_matrix( // Replace Q with Q + Q^T i_t qn = size_offsets - 1; // Number of variables i_t q_nnz = size_indices; - - // Construct H = Q + Q^T in triplet form first - // Then covert the triplet to CSR - std::vector H_i; std::vector H_j; std::vector H_x; @@ -180,14 +176,16 @@ void optimization_problem_t::set_quadratic_objective_matrix( // Add H(i,j) H_i.push_back(i); H_j.push_back(j); - H_x.push_back(x); - // Add H(j,i) - H_i.push_back(j); - H_j.push_back(i); - H_x.push_back(x); + if (i == j) { H_x.push_back(2 * x); } + if (i != j) { + H_x.push_back(x); + // Add H(j,i) + H_i.push_back(j); + H_j.push_back(i); + H_x.push_back(x); + } } } - // Convert H to CSR format // Get row counts i_t H_nz = H_x.size(); @@ -200,43 +198,13 @@ void optimization_problem_t::set_quadratic_objective_matrix( H_cumulative_counts[k + 1] = H_cumulative_counts[k] + H_row_counts[k]; } std::vector H_row_starts = H_cumulative_counts; - std::vector H_map(H_nz); std::vector H_indices(H_nz); std::vector H_values(H_nz); for (i_t k = 0; k < H_nz; ++k) { - const i_t p = H_cumulative_counts[H_i[k]]++; - H_map[k] = p; + i_t p = H_cumulative_counts[H_i[k]]++; + H_indices[p] = H_j[k]; + H_values[p] = H_x[k]; } - rmm::device_uvector d_H_map(H_nz, stream_view_); - rmm::device_uvector d_H_j(H_nz, stream_view_); - rmm::device_uvector d_H_x(H_nz, stream_view_); - rmm::device_uvector d_H_indices(H_nz, stream_view_); - rmm::device_uvector d_H_values(H_nz, stream_view_); - - raft::copy(d_H_map.data(), H_map.data(), H_nz, stream_view_); - raft::copy(d_H_j.data(), H_j.data(), H_nz, stream_view_); - raft::copy(d_H_x.data(), H_x.data(), H_nz, stream_view_); - stream_view_.synchronize(); - thrust::for_each_n(rmm::exec_policy(stream_view_), - thrust::make_counting_iterator(0), - H_nz, - [span_H_map = cuopt::make_span(d_H_map), - span_H_j = cuopt::make_span(d_H_j), - span_H_indices = cuopt::make_span(d_H_indices)] __device__(i_t k) { - span_H_indices[span_H_map[k]] = span_H_j[k]; - }); - thrust::for_each_n(rmm::exec_policy(stream_view_), - thrust::make_counting_iterator(0), - H_nz, - [span_H_map = cuopt::make_span(d_H_map), - span_H_x = cuopt::make_span(d_H_x), - span_H_values = cuopt::make_span(d_H_values)] __device__(i_t k) { - span_H_values[span_H_map[k]] = span_H_x[k]; - }); - - raft::copy(H_indices.data(), d_H_indices.data(), H_nz, stream_view_); - raft::copy(H_values.data(), d_H_values.data(), H_nz, stream_view_); - stream_view_.synchronize(); // H_row_starts, H_indices, H_values are the CSR representation of H // But this contains duplicate entries @@ -247,28 +215,27 @@ void optimization_problem_t::set_quadratic_objective_matrix( Q_indices_.resize(H_nz); Q_values_.resize(H_nz); i_t nz = 0; - for (i_t i = 0; i < qn; ++i) - { - i_t q = nz; // row i will start at q + for (i_t i = 0; i < qn; ++i) { + i_t q = nz; // row i will start at q const i_t row_start = H_row_starts[i]; - const i_t row_end = H_row_starts[i + 1]; + const i_t row_end = H_row_starts[i + 1]; for (i_t p = row_start; p < row_end; ++p) { i_t j = H_indices[p]; if (workspace[j] >= q) { Q_values_[workspace[j]] += H_values[p]; // H(i,j) is duplicate } else { - workspace[j] = nz; // record where column j occurs - Q_indices_[nz] = j; // keep H(i,j) - Q_values_[nz] = H_values[p]; + workspace[j] = nz; // record where column j occurs + Q_indices_[nz] = j; // keep H(i,j) + Q_values_[nz] = H_values[p]; nz++; } } - Q_offsets_[i] = q; // record start of row i + Q_offsets_[i] = q; // record start of row i } - Q_offsets_[qn] = nz; // finalize Q + + Q_offsets_[qn] = nz; // finalize Q Q_indices_.resize(nz); Q_values_.resize(nz); - // FIX ME:: check for positive semi definite matrix } @@ -408,7 +375,7 @@ i_t optimization_problem_t::get_n_integers() const { i_t n_integers = 0; if (get_n_variables() != 0) { - auto enum_variable_types = cuopt::host_copy(get_variable_types()); + auto enum_variable_types = cuopt::host_copy(get_variable_types(), handle_ptr_->get_stream()); for (size_t i = 0; i < enum_variable_types.size(); ++i) { if (enum_variable_types[i] == var_t::INTEGER) { n_integers++; } @@ -662,16 +629,17 @@ void optimization_problem_t::write_to_mps(const std::string& mps_file_ data_model_view.set_maximize(get_sense()); // Copy to host - auto constraint_matrix_values = cuopt::host_copy(get_constraint_matrix_values()); - auto constraint_matrix_indices = cuopt::host_copy(get_constraint_matrix_indices()); - auto constraint_matrix_offsets = cuopt::host_copy(get_constraint_matrix_offsets()); - auto constraint_bounds = cuopt::host_copy(get_constraint_bounds()); - auto objective_coefficients = cuopt::host_copy(get_objective_coefficients()); - auto variable_lower_bounds = cuopt::host_copy(get_variable_lower_bounds()); - auto variable_upper_bounds = cuopt::host_copy(get_variable_upper_bounds()); - auto constraint_lower_bounds = cuopt::host_copy(get_constraint_lower_bounds()); - auto constraint_upper_bounds = cuopt::host_copy(get_constraint_upper_bounds()); - auto row_types = cuopt::host_copy(get_row_types()); + auto stream = handle_ptr_->get_stream(); + auto constraint_matrix_values = cuopt::host_copy(get_constraint_matrix_values(), stream); + auto constraint_matrix_indices = cuopt::host_copy(get_constraint_matrix_indices(), stream); + auto constraint_matrix_offsets = cuopt::host_copy(get_constraint_matrix_offsets(), stream); + auto constraint_bounds = cuopt::host_copy(get_constraint_bounds(), stream); + auto objective_coefficients = cuopt::host_copy(get_objective_coefficients(), stream); + auto variable_lower_bounds = cuopt::host_copy(get_variable_lower_bounds(), stream); + auto variable_upper_bounds = cuopt::host_copy(get_variable_upper_bounds(), stream); + auto constraint_lower_bounds = cuopt::host_copy(get_constraint_lower_bounds(), stream); + auto constraint_upper_bounds = cuopt::host_copy(get_constraint_upper_bounds(), stream); + auto row_types = cuopt::host_copy(get_row_types(), stream); // Set constraint matrix in CSR format if (get_nnz() != 0) { @@ -723,7 +691,7 @@ void optimization_problem_t::write_to_mps(const std::string& mps_file_ std::vector variable_types(get_n_variables()); // Set variable types (convert from enum to char) if (get_n_variables() != 0) { - auto enum_variable_types = cuopt::host_copy(get_variable_types()); + auto enum_variable_types = cuopt::host_copy(get_variable_types(), stream); // Convert enum types to char types for (size_t i = 0; i < variable_types.size(); ++i) { @@ -748,13 +716,17 @@ void optimization_problem_t::write_to_mps(const std::string& mps_file_ template void optimization_problem_t::print_scaling_information() const { - std::vector constraint_matrix_values = cuopt::host_copy(get_constraint_matrix_values()); - std::vector constraint_rhs = cuopt::host_copy(get_constraint_bounds()); - std::vector objective_coefficients = cuopt::host_copy(get_objective_coefficients()); - std::vector variable_lower_bounds = cuopt::host_copy(get_variable_lower_bounds()); - std::vector variable_upper_bounds = cuopt::host_copy(get_variable_upper_bounds()); - std::vector constraint_lower_bounds = cuopt::host_copy(get_constraint_lower_bounds()); - std::vector constraint_upper_bounds = cuopt::host_copy(get_constraint_upper_bounds()); + auto stream = handle_ptr_->get_stream(); + std::vector constraint_matrix_values = + cuopt::host_copy(get_constraint_matrix_values(), stream); + std::vector constraint_rhs = cuopt::host_copy(get_constraint_bounds(), stream); + std::vector objective_coefficients = cuopt::host_copy(get_objective_coefficients(), stream); + std::vector variable_lower_bounds = cuopt::host_copy(get_variable_lower_bounds(), stream); + std::vector variable_upper_bounds = cuopt::host_copy(get_variable_upper_bounds(), stream); + std::vector constraint_lower_bounds = + cuopt::host_copy(get_constraint_lower_bounds(), stream); + std::vector constraint_upper_bounds = + cuopt::host_copy(get_constraint_upper_bounds(), stream); auto findMaxAbs = [](const std::vector& vec) -> f_t { if (vec.empty()) { return 0.0; } diff --git a/cpp/src/linear_programming/pdhg.cu b/cpp/src/linear_programming/pdhg.cu index 678a0554f..61ee2021f 100644 --- a/cpp/src/linear_programming/pdhg.cu +++ b/cpp/src/linear_programming/pdhg.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -135,7 +135,7 @@ void pdhg_solver_t::compute_next_dual_solution(rmm::device_scalar current_saddle_point_state_.get_delta_dual().data()), dual_size_h_, dual_projection(dual_step_size.data()), - stream_view_); + stream_view_.value()); } template @@ -194,7 +194,7 @@ void pdhg_solver_t::compute_primal_projection_with_gradient( tmp_primal_.data()), primal_size_h_, primal_projection(primal_step_size.data()), - stream_view_); + stream_view_.value()); } template @@ -343,7 +343,7 @@ void pdhg_solver_t::compute_next_primal_dual_solution_reflected( potential_next_primal_solution_.data(), dual_slack_.data(), reflected_primal_.data()), primal_size_h_, primal_reflected_major_projection(primal_step_size.data()), - stream_view_); + stream_view_.value()); #ifdef CUPDLP_DEBUG_MODE print("potential_next_primal_solution_", potential_next_primal_solution_); print("reflected_primal_", reflected_primal_); @@ -361,7 +361,7 @@ void pdhg_solver_t::compute_next_primal_dual_solution_reflected( thrust::make_zip_iterator(potential_next_dual_solution_.data(), reflected_dual_.data()), dual_size_h_, dual_reflected_major_projection(dual_step_size.data()), - stream_view_); + stream_view_.value()); #ifdef CUPDLP_DEBUG_MODE print("potential_next_dual_solution_", potential_next_dual_solution_); @@ -386,7 +386,7 @@ void pdhg_solver_t::compute_next_primal_dual_solution_reflected( reflected_primal_.data(), primal_size_h_, primal_reflected_projection(primal_step_size.data()), - stream_view_); + stream_view_.value()); #ifdef CUPDLP_DEBUG_MODE print("reflected_primal_", reflected_primal_); #endif @@ -402,7 +402,7 @@ void pdhg_solver_t::compute_next_primal_dual_solution_reflected( reflected_dual_.data(), dual_size_h_, dual_reflected_projection(dual_step_size.data()), - stream_view_); + stream_view_.value()); #ifdef CUPDLP_DEBUG_MODE print("reflected_dual_", reflected_dual_); #endif diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu index 076af6ee3..25382ee7f 100644 --- a/cpp/src/linear_programming/pdlp.cu +++ b/cpp/src/linear_programming/pdlp.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -1034,13 +1034,13 @@ void pdlp_solver_t::compute_fixed_error(bool& has_restarted) pdhg_solver_.get_saddle_point_state().get_delta_primal().data(), primal_size_h_, cuda::std::minus{}, - stream_view_); + stream_view_.value()); cub::DeviceTransform::Transform(cuda::std::make_tuple(pdhg_solver_.get_reflected_dual().data(), pdhg_solver_.get_dual_solution().data()), pdhg_solver_.get_saddle_point_state().get_delta_dual().data(), dual_size_h_, cuda::std::minus{}, - stream_view_); + stream_view_.value()); auto& cusparse_view = pdhg_solver_.get_cusparse_view(); // Make potential_next_dual_solution point towards reflected dual solution to reuse the code @@ -1140,14 +1140,14 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co pdhg_solver_.get_primal_solution().data(), primal_size_h_, clamp(), - stream_view_); + stream_view_.value()); cub::DeviceTransform::Transform( cuda::std::make_tuple(unscaled_primal_avg_solution_.data(), op_problem_scaled_.variable_bounds.data()), unscaled_primal_avg_solution_.data(), primal_size_h_, clamp(), - stream_view_); + stream_view_.value()); } if (verbose) { @@ -1402,7 +1402,7 @@ void pdlp_solver_t::halpern_update() (f_t(1.0) - reflection_coefficient) * current_primal; return weight * reflected + (f_t(1.0) - weight) * initial_primal; }, - stream_view_); + stream_view_.value()); // Update dual cub::DeviceTransform::Transform( @@ -1417,7 +1417,7 @@ void pdlp_solver_t::halpern_update() (f_t(1.0) - reflection_coefficient) * current_dual; return weight * reflected + (f_t(1.0) - weight) * initial_dual; }, - stream_view_); + stream_view_.value()); #ifdef CUPDLP_DEBUG_MODE print("halpen_update current primal", @@ -1511,7 +1511,7 @@ void pdlp_solver_t::compute_initial_step_size() const auto& cusparse_view_ = pdhg_solver_.get_cusparse_view(); - int sing_iters = 0; + [[maybe_unused]] int sing_iters = 0; for (int i = 0; i < max_iterations; ++i) { ++sing_iters; // d_q = d_z @@ -1527,7 +1527,7 @@ void pdlp_solver_t::compute_initial_step_size() d_q.data(), d_q.size(), [norm_q = norm_q.data()] __device__(f_t d_q) { return d_q / *norm_q; }, - stream_view_); + stream_view_.value()); // A_t_q = A_t @ d_q RAFT_CUSPARSE_TRY( @@ -1571,7 +1571,7 @@ void pdlp_solver_t::compute_initial_step_size() [sigma_max_sq = sigma_max_sq.data()] __device__(f_t d_q, f_t d_z) { return d_q * -(*sigma_max_sq) + d_z; }, - stream_view_); + stream_view_.value()); my_l2_norm(d_q, residual_norm, handle_ptr_); diff --git a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu index 565377682..09c225ae6 100644 --- a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -1682,7 +1682,7 @@ void pdlp_restart_strategy_t::solve_bound_constrained_trust_region( thrust::make_zip_iterator(thrust::make_tuple(lower_bound_.data(), upper_bound_.data())), primal_size_h_, extract_bounds_t(), - stream_view_); + stream_view_.value()); raft::copy(lower_bound_.data() + primal_size_h_, transformed_constraint_lower_bounds_.data(), dual_size_h_, @@ -1866,7 +1866,7 @@ void pdlp_restart_strategy_t::solve_bound_constrained_trust_region( duality_gap.primal_solution_tr_.data(), primal_size_h_, clamp(), - stream_view_); + stream_view_.value()); // project by max(min(y[i], upperbound[i]),lowerbound[i]) raft::linalg::ternaryOp(duality_gap.dual_solution_tr_.data(), diff --git a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu index 73b45d30f..65a4b92b8 100644 --- a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu +++ b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -79,14 +79,14 @@ void weighted_average_solution_t::add_current_solution_to_weighted_ave sum_primal_solutions_.data(), primal_size_h_, a_add_scalar_times_b(weight.data()), - stream_view_); + stream_view_.value()); cub::DeviceTransform::Transform( cuda::std::make_tuple(sum_dual_solutions_.data(), dual_solution), sum_dual_solutions_.data(), dual_size_h_, a_add_scalar_times_b(weight.data()), - stream_view_); + stream_view_.value()); // update weight sums and count (add weight and +1 respectively) add_weight_sums<<<1, 1, 0, stream_view_>>>(weight.data(), diff --git a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu index 3b2149864..59eee9cac 100644 --- a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu +++ b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -304,7 +304,7 @@ void adaptive_step_size_strategy_t::compute_interaction_and_movement( tmp_primal.data(), current_saddle_point_state.get_primal_size(), raft::sub_op(), - stream_view_); + stream_view_.value()); // compute interaction (x'-x) . (A(y'-y)) RAFT_CUBLAS_TRY( diff --git a/cpp/src/linear_programming/termination_strategy/convergence_information.cu b/cpp/src/linear_programming/termination_strategy/convergence_information.cu index d247a0240..ebba29758 100644 --- a/cpp/src/linear_programming/termination_strategy/convergence_information.cu +++ b/cpp/src/linear_programming/termination_strategy/convergence_information.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -278,7 +278,7 @@ void convergence_information_t::compute_primal_residual( raft::max(dual, f_t(0.0)) * finite_or_zero(lower) + raft::min(dual, f_t(0.0)) * finite_or_zero(upper)}; }, - stream_view_); + stream_view_.value()); } } @@ -361,7 +361,7 @@ void convergence_information_t::compute_dual_residual( dual_residual_.data(), dual_residual_.size(), cuda::std::minus<>{}, - stream_view_); + stream_view_.value()); } else { compute_reduced_cost_from_primal_gradient(tmp_primal, primal_solution); @@ -459,7 +459,7 @@ void convergence_information_t::compute_reduced_cost_from_primal_gradi bound_value_.data(), primal_size_h_, bound_value_gradient(), - stream_view_); + stream_view_.value()); if (pdlp_hyper_params::handle_some_primal_gradients_on_finite_bounds_as_residuals) { raft::linalg::ternaryOp(reduced_cost_.data(), @@ -492,7 +492,7 @@ void convergence_information_t::compute_reduced_costs_dual_objective_c bound_value_.data(), primal_size_h_, bound_value_reduced_cost_product(), - stream_view_); + stream_view_.value()); // sum over bound_value*reduced_cost, but should be -inf if any element is -inf cub::DeviceReduce::Sum(rmm_tmp_buffer_.data(), diff --git a/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu b/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu index 6e8bb8a8a..e05742c58 100644 --- a/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu +++ b/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -358,7 +358,7 @@ void infeasibility_information_t::compute_reduced_cost_from_primal_gra bound_value_.data(), primal_size_h_, bound_value_gradient(), - stream_view_); + stream_view_.value()); if (pdlp_hyper_params::handle_some_primal_gradients_on_finite_bounds_as_residuals) { raft::linalg::ternaryOp(reduced_cost_.data(), diff --git a/cpp/src/linear_programming/translate.hpp b/cpp/src/linear_programming/translate.hpp index 8453ac3e7..19f6c024c 100644 --- a/cpp/src/linear_programming/translate.hpp +++ b/cpp/src/linear_programming/translate.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -116,22 +116,23 @@ void translate_to_crossover_problem(const detail::problem_t& problem, { CUOPT_LOG_DEBUG("Starting translation"); - std::vector pdlp_objective = cuopt::host_copy(problem.objective_coefficients); + auto stream = problem.handle_ptr->get_stream(); + std::vector pdlp_objective = cuopt::host_copy(problem.objective_coefficients, stream); dual_simplex::csr_matrix_t csr_A( problem.n_constraints, problem.n_variables, problem.nnz); - csr_A.x = cuopt::host_copy(problem.coefficients); - csr_A.j = cuopt::host_copy(problem.variables); - csr_A.row_start = cuopt::host_copy(problem.offsets); + csr_A.x = cuopt::host_copy(problem.coefficients, stream); + csr_A.j = cuopt::host_copy(problem.variables, stream); + csr_A.row_start = cuopt::host_copy(problem.offsets, stream); - problem.handle_ptr->get_stream().synchronize(); + stream.synchronize(); CUOPT_LOG_DEBUG("Converting to compressed column"); csr_A.to_compressed_col(lp.A); CUOPT_LOG_DEBUG("Converted to compressed column"); std::vector slack(problem.n_constraints); - std::vector tmp_x = cuopt::host_copy(sol.get_primal_solution()); - problem.handle_ptr->get_stream().synchronize(); + std::vector tmp_x = cuopt::host_copy(sol.get_primal_solution(), stream); + stream.synchronize(); dual_simplex::matrix_vector_multiply(lp.A, 1.0, tmp_x, 0.0, slack); CUOPT_LOG_DEBUG("Multiplied A and x"); @@ -161,8 +162,8 @@ void translate_to_crossover_problem(const detail::problem_t& problem, auto [lower, upper] = extract_host_bounds(problem.variable_bounds, problem.handle_ptr); - std::vector constraint_lower = cuopt::host_copy(problem.constraint_lower_bounds); - std::vector constraint_upper = cuopt::host_copy(problem.constraint_upper_bounds); + std::vector constraint_lower = cuopt::host_copy(problem.constraint_lower_bounds, stream); + std::vector constraint_upper = cuopt::host_copy(problem.constraint_upper_bounds, stream); lp.objective.resize(n, 0.0); std::copy( @@ -187,10 +188,10 @@ void translate_to_crossover_problem(const detail::problem_t& problem, if (initial_solution.x[j] > lp.upper[j]) { initial_solution.x[j] = lp.upper[j]; } } CUOPT_LOG_DEBUG("Finished with x"); - initial_solution.y = cuopt::host_copy(sol.get_dual_solution()); + initial_solution.y = cuopt::host_copy(sol.get_dual_solution(), stream); - std::vector tmp_z = cuopt::host_copy(sol.get_reduced_cost()); - problem.handle_ptr->get_stream().synchronize(); + std::vector tmp_z = cuopt::host_copy(sol.get_reduced_cost(), stream); + stream.synchronize(); std::copy(tmp_z.begin(), tmp_z.begin() + problem.n_variables, initial_solution.z.begin()); for (i_t j = problem.n_variables; j < n; ++j) { initial_solution.z[j] = initial_solution.y[j - problem.n_variables]; diff --git a/cpp/src/linear_programming/utilities/cython_solve.cu b/cpp/src/linear_programming/utilities/cython_solve.cu index 2f7a37df8..f49e2057b 100644 --- a/cpp/src/linear_programming/utilities/cython_solve.cu +++ b/cpp/src/linear_programming/utilities/cython_solve.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -229,23 +229,55 @@ std::unique_ptr call_solve( bool is_batch_mode) { raft::common::nvtx::range fun_scope("Call Solve"); + rmm::cuda_stream stream(static_cast(flags)); + const raft::handle_t handle_{stream}; - // FIX: Use default handle constructor like CLI does, instead of explicit stream creation - // Original code created a non-blocking stream which causes synchronization issues with PDLP - // This is a workaround to fix the synchronization issues, please fix this in the future and - // remove this workaround. cudaStream_t stream; RAFT_CUDA_TRY(cudaStreamCreateWithFlags(&stream, - // flags)); // flags=cudaStreamNonBlocking const raft::handle_t handle_{stream}; - const raft::handle_t handle_{}; + solver_ret_t response; auto op_problem = data_model_to_optimization_problem(data_model, solver_settings, &handle_); - solver_ret_t response; if (op_problem.get_problem_category() == linear_programming::problem_category_t::LP) { response.lp_ret = call_solve_lp(op_problem, solver_settings->get_pdlp_settings(), is_batch_mode); response.problem_type = linear_programming::problem_category_t::LP; + // Reset stream to per-thread default as non-blocking stream is out of scope after the + // function returns. + response.lp_ret.primal_solution_->set_stream(rmm::cuda_stream_per_thread); + response.lp_ret.dual_solution_->set_stream(rmm::cuda_stream_per_thread); + response.lp_ret.reduced_cost_->set_stream(rmm::cuda_stream_per_thread); + response.lp_ret.current_primal_solution_->set_stream(rmm::cuda_stream_per_thread); + response.lp_ret.current_dual_solution_->set_stream(rmm::cuda_stream_per_thread); + response.lp_ret.initial_primal_average_->set_stream(rmm::cuda_stream_per_thread); + response.lp_ret.initial_dual_average_->set_stream(rmm::cuda_stream_per_thread); + response.lp_ret.current_ATY_->set_stream(rmm::cuda_stream_per_thread); + response.lp_ret.sum_primal_solutions_->set_stream(rmm::cuda_stream_per_thread); + response.lp_ret.sum_dual_solutions_->set_stream(rmm::cuda_stream_per_thread); + response.lp_ret.last_restart_duality_gap_primal_solution_->set_stream( + rmm::cuda_stream_per_thread); + response.lp_ret.last_restart_duality_gap_dual_solution_->set_stream( + rmm::cuda_stream_per_thread); } else { response.mip_ret = call_solve_mip(op_problem, solver_settings->get_mip_settings()); response.problem_type = linear_programming::problem_category_t::MIP; + // Reset stream to per-thread default as non-blocking stream is out of scope after the + // function returns. + response.mip_ret.solution_->set_stream(rmm::cuda_stream_per_thread); + } + + // Reset warmstart data streams in solver_settings to per-thread default before destroying our + // local stream. The warmstart data was created using our stream and its uvectors are associated + // with it. + auto& warmstart_data = solver_settings->get_pdlp_settings().get_pdlp_warm_start_data(); + if (warmstart_data.current_primal_solution_.size() > 0) { + warmstart_data.current_primal_solution_.set_stream(rmm::cuda_stream_per_thread); + warmstart_data.current_dual_solution_.set_stream(rmm::cuda_stream_per_thread); + warmstart_data.initial_primal_average_.set_stream(rmm::cuda_stream_per_thread); + warmstart_data.initial_dual_average_.set_stream(rmm::cuda_stream_per_thread); + warmstart_data.current_ATY_.set_stream(rmm::cuda_stream_per_thread); + warmstart_data.sum_primal_solutions_.set_stream(rmm::cuda_stream_per_thread); + warmstart_data.sum_dual_solutions_.set_stream(rmm::cuda_stream_per_thread); + warmstart_data.last_restart_duality_gap_primal_solution_.set_stream( + rmm::cuda_stream_per_thread); + warmstart_data.last_restart_duality_gap_dual_solution_.set_stream(rmm::cuda_stream_per_thread); } return std::make_unique(std::move(response)); @@ -265,8 +297,8 @@ static int compute_max_thread( for (const auto data_model : data_models) { const int nb_variables = data_model->get_objective_coefficients().size(); const int nb_constraints = data_model->get_constraint_bounds().size(); - // Currently we roughly need 8 times more memory than the size of each structure in the problem - // representation + // Currently we roughly need 8 times more memory than the size of each structure in the + // problem representation needed_memory += ((nb_variables * 3 * sizeof(double)) + (nb_constraints * 3 * sizeof(double)) + data_model->get_constraint_matrix_values().size() * sizeof(double) + data_model->get_constraint_matrix_indices().size() * sizeof(int) + @@ -277,8 +309,8 @@ static int compute_max_thread( const int res = std::min(max_total, std::min(total_mem / needed_memory, data_models.size())); cuopt_expects( res > 0, error_type_t::RuntimeError, "Problems too big to be solved in batch mode."); - // A front end mecanism should prevent users to pick one or more problems so large that this would - // return 0 + // A front end mecanism should prevent users to pick one or more problems so large that this + // would return 0 return res; } @@ -309,8 +341,7 @@ std::pair>, double> call_batch_solve( #pragma omp parallel for num_threads(max_thread) for (std::size_t i = 0; i < size; ++i) - list[i] = - std::move(call_solve(data_models[i], solver_settings, cudaStreamNonBlocking, is_batch_mode)); + list[i] = call_solve(data_models[i], solver_settings, cudaStreamNonBlocking, is_batch_mode); auto end = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(end - start_solver); diff --git a/cpp/src/linear_programming/utils.cuh b/cpp/src/linear_programming/utils.cuh index 0da5d25ce..18c023e5d 100644 --- a/cpp/src/linear_programming/utils.cuh +++ b/cpp/src/linear_programming/utils.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -62,9 +62,9 @@ struct max_abs_value { template i_t conditional_major(uint64_t total_pdlp_iterations) { - uint64_t step = 10; - uint64_t threshold = 1000; - uint64_t iteration = 0; + uint64_t step = 10; + uint64_t threshold = 1000; + [[maybe_unused]] uint64_t iteration = 0; [[maybe_unused]] constexpr uint64_t max_u64 = std::numeric_limits::max(); diff --git a/cpp/src/mip/diversity/diversity_config.hpp b/cpp/src/mip/diversity/diversity_config.hpp index 5d95a51df..de1426079 100644 --- a/cpp/src/mip/diversity/diversity_config.hpp +++ b/cpp/src/mip/diversity/diversity_config.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -14,8 +14,8 @@ namespace cuopt::linear_programming::detail { struct diversity_config_t { double time_ratio_on_init_lp = 0.1; double max_time_on_lp = 15.0; - double time_ratio_of_probing_cache = 0.04; - double max_time_on_probing = 15.0; + double time_ratio_of_probing_cache = 0.1; + double max_time_on_probing = 60.0; int max_var_diff = 256; size_t max_solutions = 32; double initial_infeasibility_weight = 1000.; diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu index 483ffeb68..74a2935e2 100644 --- a/cpp/src/mip/diversity/diversity_manager.cu +++ b/cpp/src/mip/diversity/diversity_manager.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -184,9 +184,22 @@ bool diversity_manager_t::run_presolve(f_t time_limit) } if (termination_criterion_t::NO_UPDATE != term_crit) { ls.constraint_prop.bounds_update.set_updated_bounds(*problem_ptr); - trivial_presolve(*problem_ptr); - if (!problem_ptr->empty && !check_bounds_sanity(*problem_ptr)) { return false; } } + if (!fj_only_run) { + // Run probing cache before trivial presolve to discover variable implications + const f_t time_ratio_of_probing_cache = diversity_config.time_ratio_of_probing_cache; + const f_t max_time_on_probing = diversity_config.max_time_on_probing; + f_t time_for_probing_cache = + std::min(max_time_on_probing, time_limit * time_ratio_of_probing_cache); + timer_t probing_timer{time_for_probing_cache}; + // this function computes probing cache, finds singletons, substitutions and changes the problem + bool problem_is_infeasible = + compute_probing_cache(ls.constraint_prop.bounds_update, *problem_ptr, probing_timer); + if (problem_is_infeasible) { return false; } + } + const bool remap_cache_ids = true; + trivial_presolve(*problem_ptr, remap_cache_ids); + if (!problem_ptr->empty && !check_bounds_sanity(*problem_ptr)) { return false; } // May overconstrain if Papilo presolve has been run before if (!context.settings.presolve) { if (!problem_ptr->empty) { @@ -203,10 +216,11 @@ bool diversity_manager_t::run_presolve(f_t time_limit) lp_dual_optimal_solution.resize(problem_ptr->n_constraints, problem_ptr->handle_ptr->get_stream()); problem_ptr->handle_ptr->sync_stream(); - CUOPT_LOG_INFO("After trivial presolve: %d constraints, %d variables, objective offset %f.", + CUOPT_LOG_INFO("After cuOpt presolve: %d constraints, %d variables, objective offset %f.", problem_ptr->n_constraints, problem_ptr->n_variables, problem_ptr->presolve_data.objective_offset); + CUOPT_LOG_INFO("cuOpt presolve time: %.2f", stats.presolve_time); return true; } @@ -320,17 +334,6 @@ solution_t diversity_manager_t::run_solver() ls_cpufj_raii_guard_t ls_cpufj_raii_guard(ls); // RAII to stop cpufj threads on solve stop ls.start_cpufj_scratch_threads(population); - // before probing cache or LP, run FJ to generate initial primal feasible solution - const f_t time_ratio_of_probing_cache = diversity_config.time_ratio_of_probing_cache; - const f_t max_time_on_probing = diversity_config.max_time_on_probing; - f_t time_for_probing_cache = - std::min(max_time_on_probing, time_limit * time_ratio_of_probing_cache); - timer_t probing_timer{time_for_probing_cache}; - if (check_b_b_preemption()) { return population.best_feasible(); } - if (!fj_only_run) { - compute_probing_cache(ls.constraint_prop.bounds_update, *problem_ptr, probing_timer); - } - if (check_b_b_preemption()) { return population.best_feasible(); } lp_state_t& lp_state = problem_ptr->lp_state; // resize because some constructor might be called before the presolve diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu index 9a125160c..dea6d57fa 100644 --- a/cpp/src/mip/diversity/lns/rins.cu +++ b/cpp/src/mip/diversity/lns/rins.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights * reserved. SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,6 +22,8 @@ #include #include +#include + namespace cuopt::linear_programming::detail { template rins_t::rins_t(mip_solver_context_t& context_, @@ -221,7 +223,7 @@ void rins_t::run_rins() &rins_handle, &fixed_problem, context.settings, context.scaling); fj_t fj(fj_context); solution_t fj_solution(fixed_problem); - fj_solution.copy_new_assignment(cuopt::host_copy(fixed_assignment)); + fj_solution.copy_new_assignment(cuopt::host_copy(fixed_assignment, rins_handle.get_stream())); std::vector default_weights(fixed_problem.n_constraints, 1.); cpu_fj_thread_t cpu_fj_thread; cpu_fj_thread.fj_cpu = fj.create_cpu_climber(fj_solution, @@ -256,19 +258,26 @@ void rins_t::run_rins() branch_and_bound_settings.absolute_mip_gap_tol = context.settings.tolerances.absolute_mip_gap; branch_and_bound_settings.relative_mip_gap_tol = std::min(current_mip_gap, (f_t)settings.target_mip_gap); - branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; - branch_and_bound_settings.num_threads = 2; - branch_and_bound_settings.num_bfs_threads = 1; - branch_and_bound_settings.num_diving_threads = 1; - branch_and_bound_settings.log.log = false; - branch_and_bound_settings.log.log_prefix = "[RINS] "; - branch_and_bound_settings.max_cut_passes = 0; - branch_and_bound_settings.solution_callback = [this, &rins_solution_queue]( - std::vector& solution, f_t objective) { + branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; + branch_and_bound_settings.num_threads = 2; + branch_and_bound_settings.num_bfs_workers = 1; + branch_and_bound_settings.max_cut_passes = 0; + + // In the future, let RINS use all the diving heuristics. For now, + // restricting to guided diving. + branch_and_bound_settings.diving_settings.num_diving_workers = 1; + branch_and_bound_settings.diving_settings.line_search_diving = 0; + branch_and_bound_settings.diving_settings.coefficient_diving = 0; + branch_and_bound_settings.diving_settings.pseudocost_diving = 0; + branch_and_bound_settings.log.log = false; + branch_and_bound_settings.log.log_prefix = "[RINS] "; + branch_and_bound_settings.solution_callback = [&rins_solution_queue](std::vector& solution, + f_t objective) { rins_solution_queue.push_back(solution); }; dual_simplex::branch_and_bound_t branch_and_bound(branch_and_bound_problem, - branch_and_bound_settings); + branch_and_bound_settings, + dual_simplex::tic()); branch_and_bound.set_initial_guess(cuopt::host_copy(fixed_assignment, rins_handle.get_stream())); branch_and_bound_status = branch_and_bound.solve(branch_and_bound_solution); diff --git a/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh b/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh index 051d51483..94cc66399 100644 --- a/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh +++ b/cpp/src/mip/diversity/recombiners/bound_prop_recombiner.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -190,7 +190,7 @@ class bound_prop_recombiner_t : public recombiner_t { probing_values, n_vars_from_other, variable_map); - probing_config.probing_values = host_copy(probing_values); + probing_config.probing_values = host_copy(probing_values, offspring.handle_ptr->get_stream()); probing_config.n_of_fixed_from_first = fixed_from_guiding; probing_config.n_of_fixed_from_second = fixed_from_other; probing_config.use_balanced_probing = true; @@ -214,7 +214,7 @@ class bound_prop_recombiner_t : public recombiner_t { timer_t timer(bp_recombiner_config_t::bounds_prop_time_limit); get_probing_values_for_infeasible( guiding_solution, other_solution, offspring, probing_values, n_vars_from_other); - probing_config.probing_values = host_copy(probing_values); + probing_config.probing_values = host_copy(probing_values, offspring.handle_ptr->get_stream()); constraint_prop.apply_round(offspring, lp_run_time_after_feasible, timer, probing_config); } constraint_prop.max_n_failed_repair_iterations = 1; diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh index 771c4781a..65c3f0143 100644 --- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -13,6 +13,7 @@ #include #include #include +#include namespace cuopt::linear_programming::detail { @@ -103,10 +104,16 @@ class sub_mip_recombiner_t : public recombiner_t { branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; branch_and_bound_settings.num_threads = 2; - branch_and_bound_settings.num_bfs_threads = 1; - branch_and_bound_settings.num_diving_threads = 1; - branch_and_bound_settings.max_cut_passes = 0; - branch_and_bound_settings.solution_callback = [this](std::vector& solution, + branch_and_bound_settings.num_bfs_workers = 1; + branch_and_bound_settings.max_cut_passes = 0; + + // In the future, let SubMIP use all the diving heuristics. For now, + // restricting to guided diving. + branch_and_bound_settings.diving_settings.num_diving_workers = 1; + branch_and_bound_settings.diving_settings.line_search_diving = 0; + branch_and_bound_settings.diving_settings.coefficient_diving = 0; + branch_and_bound_settings.diving_settings.pseudocost_diving = 0; + branch_and_bound_settings.solution_callback = [this](std::vector& solution, f_t objective) { this->solution_callback(solution, objective); }; @@ -114,7 +121,8 @@ class sub_mip_recombiner_t : public recombiner_t { // disable B&B logs, so that it is not interfering with the main B&B thread branch_and_bound_settings.log.log = false; dual_simplex::branch_and_bound_t branch_and_bound(branch_and_bound_problem, - branch_and_bound_settings); + branch_and_bound_settings, + dual_simplex::tic()); branch_and_bound_status = branch_and_bound.solve(branch_and_bound_solution); if (solution_vector.size() > 0) { cuopt_assert(fixed_assignment.size() == branch_and_bound_solution.x.size(), diff --git a/cpp/src/mip/local_search/local_search.cu b/cpp/src/mip/local_search/local_search.cu index a3353e72f..ecd277065 100644 --- a/cpp/src/mip/local_search/local_search.cu +++ b/cpp/src/mip/local_search/local_search.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -81,8 +81,7 @@ void local_search_t::start_cpufj_scratch_threads(population_t 0); cpu_fj.fj_cpu->log_prefix = "******* scratch " + std::to_string(counter) + ": "; - cpu_fj.fj_cpu->improvement_callback = [this, &population, &cpu_fj]( - f_t obj, const std::vector& h_vec) { + cpu_fj.fj_cpu->improvement_callback = [&population](f_t obj, const std::vector& h_vec) { population.add_external_solution(h_vec, obj, solution_origin_t::CPUFJ); if (obj < local_search_best_obj) { CUOPT_LOG_TRACE("******* New local search best obj %g, best overall %g", @@ -110,7 +109,8 @@ void local_search_t::start_cpufj_lptopt_scratch_threads( std::vector default_weights(context.problem_ptr->n_constraints, 1.); solution_t solution_lp(*context.problem_ptr); - solution_lp.copy_new_assignment(host_copy(lp_optimal_solution)); + solution_lp.copy_new_assignment( + host_copy(lp_optimal_solution, context.problem_ptr->handle_ptr->get_stream())); solution_lp.round_random_nearest(500); scratch_cpu_fj_on_lp_opt.fj_cpu = fj.create_cpu_climber( solution_lp, default_weights, default_weights, 0., context.preempt_heuristic_solver_); diff --git a/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh b/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh index 2906e648f..5cd219ec3 100644 --- a/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh +++ b/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -32,6 +32,7 @@ __global__ void simple_rounding_kernel(typename solution_t::view_t sol auto cstr_idx = solution.problem.reverse_constraints[i]; auto cstr_coeff = solution.problem.reverse_coefficients[i]; + // Here, we are storing the constraints in the following format: u <= Ax <= l // boxed constraint. can't be rounded safely if (std::isfinite(solution.problem.constraint_lower_bounds[cstr_idx]) && std::isfinite(solution.problem.constraint_upper_bounds[cstr_idx])) { diff --git a/cpp/src/mip/presolve/conditional_bound_strengthening.cu b/cpp/src/mip/presolve/conditional_bound_strengthening.cu index 0d8fb6a08..6d0fb767d 100644 --- a/cpp/src/mip/presolve/conditional_bound_strengthening.cu +++ b/cpp/src/mip/presolve/conditional_bound_strengthening.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -230,11 +230,12 @@ void conditional_bound_strengthening_t::select_constraint_pairs_host( #ifdef DEBUG_COND_BOUNDS_PROP auto start_time = std::chrono::high_resolution_clock::now(); #endif - auto variables = cuopt::host_copy(problem.variables); - auto offsets = cuopt::host_copy(problem.offsets); + auto stream = problem.handle_ptr->get_stream(); + auto variables = cuopt::host_copy(problem.variables, stream); + auto offsets = cuopt::host_copy(problem.offsets, stream); - auto reverse_constraints = cuopt::host_copy(problem.reverse_constraints); - auto reverse_offsets = cuopt::host_copy(problem.reverse_offsets); + auto reverse_constraints = cuopt::host_copy(problem.reverse_constraints, stream); + auto reverse_offsets = cuopt::host_copy(problem.reverse_offsets, stream); std::vector constraint_pairs_h(max_pair_per_row * problem.n_constraints, {-1, -1}); std::unordered_set cnstr_pair; @@ -295,8 +296,8 @@ void conditional_bound_strengthening_t::select_constraint_pairs_device colsC, valsC); std::vector constraint_pairs_h; - offsets_h = cuopt::host_copy(offsetsC); - cols_h = cuopt::host_copy(colsC); + offsets_h = cuopt::host_copy(offsetsC, stream); + cols_h = cuopt::host_copy(colsC, stream); constraint_pairs_h.reserve(max_pair_per_row * problem.n_constraints); for (int i = 0; i < problem.n_constraints; ++i) { @@ -654,8 +655,9 @@ void conditional_bound_strengthening_t::solve(problem_t& pro raft::alignTo(5 * sizeof(f_t) + sizeof(i_t) + sizeof(var_t), sizeof(i_t)) * max_row_size; #ifdef DEBUG_COND_BOUNDS_PROP - auto old_lb_h = cuopt::host_copy(problem.constraint_lower_bounds); - auto old_ub_h = cuopt::host_copy(problem.constraint_upper_bounds); + auto debug_stream = problem.handle_ptr->get_stream(); + auto old_lb_h = cuopt::host_copy(problem.constraint_lower_bounds, debug_stream); + auto old_ub_h = cuopt::host_copy(problem.constraint_upper_bounds, debug_stream); auto start_time = std::chrono::high_resolution_clock::now(); #endif @@ -674,8 +676,8 @@ void conditional_bound_strengthening_t::solve(problem_t& pro double time_for_presolve = std::chrono::duration_cast(end_time - start_time).count(); - auto new_lb_h = cuopt::host_copy(problem.constraint_lower_bounds); - auto new_ub_h = cuopt::host_copy(problem.constraint_upper_bounds); + auto new_lb_h = cuopt::host_copy(problem.constraint_lower_bounds, debug_stream); + auto new_ub_h = cuopt::host_copy(problem.constraint_upper_bounds, debug_stream); int num_improvements = 0; int num_new_equality = 0; diff --git a/cpp/src/mip/presolve/gf2_presolve.hpp b/cpp/src/mip/presolve/gf2_presolve.hpp index 19d4e7d81..623de4be4 100644 --- a/cpp/src/mip/presolve/gf2_presolve.hpp +++ b/cpp/src/mip/presolve/gf2_presolve.hpp @@ -1,19 +1,23 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ #pragma once +#if !defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstringop-overflow" // ignore boost error for pip wheel build +#endif #include #include #include #include +#if !defined(__clang__) #pragma GCC diagnostic pop +#endif namespace cuopt::linear_programming::detail { diff --git a/cpp/src/mip/presolve/lb_probing_cache.cu b/cpp/src/mip/presolve/lb_probing_cache.cu index 4a03a86fd..790ed32e4 100644 --- a/cpp/src/mip/presolve/lb_probing_cache.cu +++ b/cpp/src/mip/presolve/lb_probing_cache.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -302,7 +302,7 @@ inline std::vector compute_prioritized_integer_indices( } return false; }); - auto h_priority_indices = host_copy(priority_indices); + auto h_priority_indices = host_copy(priority_indices, problem.pb->handle_ptr->get_stream()); return h_priority_indices; } @@ -315,9 +315,10 @@ void compute_probing_cache(load_balanced_bounds_presolve_t& bound_pres auto priority_indices = compute_prioritized_integer_indices(bound_presolve, problem); // std::cout<<"priority_indices\n"; CUOPT_LOG_DEBUG("Computing probing cache"); - auto h_integer_indices = host_copy(problem.pb->integer_indices); - auto h_var_upper_bounds = host_copy(problem.pb->variable_upper_bounds); - auto h_var_lower_bounds = host_copy(problem.pb->variable_lower_bounds); + auto stream = problem.pb->handle_ptr->get_stream(); + auto h_integer_indices = host_copy(problem.pb->integer_indices, stream); + auto h_var_upper_bounds = host_copy(problem.pb->variable_upper_bounds, stream); + auto h_var_lower_bounds = host_copy(problem.pb->variable_lower_bounds, stream); size_t n_of_cached_probings = 0; // TODO adjust the iteration limit depending on the total time limit and time it takes for single // var diff --git a/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh b/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh index 03ff6b2c0..0ace09afb 100644 --- a/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh +++ b/cpp/src/mip/presolve/load_balanced_partition_helpers.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -181,7 +181,7 @@ class log_dist_t { log_dist_t() = default; log_dist_t(rmm::device_uvector& vertex_id, rmm::device_uvector& bin_offsets) - : vertex_id_begin_(vertex_id.data()), bin_offsets_(host_copy(bin_offsets)) + : vertex_id_begin_(vertex_id.data()), bin_offsets_(host_copy(bin_offsets, bin_offsets.stream())) { // If bin_offsets_ is smaller than NumberBins then resize it // so that the last element is repeated diff --git a/cpp/src/mip/presolve/multi_probe.cu b/cpp/src/mip/presolve/multi_probe.cu index 5a11164e4..f1f16b6b5 100644 --- a/cpp/src/mip/presolve/multi_probe.cu +++ b/cpp/src/mip/presolve/multi_probe.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -302,7 +302,12 @@ termination_criterion_t multi_probe_t::bound_update_loop(problem_tsync_stream(); - if (compute_stats) { constraint_stats(pb, handle_ptr); } + if (compute_stats) { + upd_0.init_changed_constraints(handle_ptr); + upd_1.init_changed_constraints(handle_ptr); + calculate_activity(pb, handle_ptr); + constraint_stats(pb, handle_ptr); + } return criteria; } diff --git a/cpp/src/mip/presolve/probing_cache.cu b/cpp/src/mip/presolve/probing_cache.cu index 18620dc51..fc2d974e3 100644 --- a/cpp/src/mip/presolve/probing_cache.cu +++ b/cpp/src/mip/presolve/probing_cache.cu @@ -1,17 +1,20 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ #include "probing_cache.cuh" +#include "trivial_presolve.cuh" #include #include +#include #include #include +#include #include #include #include @@ -117,7 +120,7 @@ f_t probing_cache_t::get_least_conflicting_rounding(problem_t::get_least_conflicting_rounding(problem_t compute_prioritized_integer_indices( } return false; }); - auto h_priority_indices = host_copy(priority_indices); + auto h_priority_indices = host_copy(priority_indices, problem.handle_ptr->get_stream()); problem.handle_ptr->sync_stream(); return h_priority_indices; } @@ -360,17 +363,25 @@ void compute_cache_for_var(i_t var_idx, const std::vector& h_integer_indices, std::atomic& n_of_implied_singletons, std::atomic& n_of_cached_probings, + std::atomic& problem_is_infeasible, + std::vector>& modification_vector, + std::vector>& substitution_vector, + timer_t timer, i_t device_id) { RAFT_CUDA_TRY(cudaSetDevice(device_id)); // test if we need per thread handle raft::handle_t handle{}; - std::vector h_improved_lower_bounds(h_var_bounds.size()); - std::vector h_improved_upper_bounds(h_var_bounds.size()); + std::vector h_improved_lower_bounds_0(h_var_bounds.size()); + std::vector h_improved_upper_bounds_0(h_var_bounds.size()); + std::vector h_improved_lower_bounds_1(h_var_bounds.size()); + std::vector h_improved_upper_bounds_1(h_var_bounds.size()); std::pair, val_interval_t> probe_vals; auto bounds = h_var_bounds[var_idx]; f_t lb = get_lower(bounds); f_t ub = get_upper(bounds); + // note that is_binary does not always mean the bound difference is one + bool is_binary = ub == 1 && lb == 0; for (i_t i = 0; i < 2; ++i) { auto& probe_val = i == 0 ? probe_vals.first : probe_vals.second; // if binary, probe both values @@ -423,12 +434,46 @@ void compute_cache_for_var(i_t var_idx, if (bounds_presolve_result != termination_criterion_t::NO_UPDATE) { CUOPT_LOG_TRACE("Adding cached bounds for var %d", var_idx); } + i_t n_of_infeasible_probings = 0; + i_t valid_host_bounds = 0; for (i_t i = 0; i < 2; ++i) { - // this only tracs the number of variables that have cached bounds + if (multi_probe_presolve.infeas_constraints_count_0 > 0 && + multi_probe_presolve.infeas_constraints_count_1 > 0) { + problem_is_infeasible.store(true); + return; + } + i_t infeas_constraints_count = i == 0 ? multi_probe_presolve.infeas_constraints_count_0 + : multi_probe_presolve.infeas_constraints_count_1; + const auto& probe_val = i == 0 ? probe_vals.first : probe_vals.second; + auto& h_improved_lower_bounds = i == 0 ? h_improved_lower_bounds_0 : h_improved_lower_bounds_1; + auto& h_improved_upper_bounds = i == 0 ? h_improved_upper_bounds_0 : h_improved_upper_bounds_1; + if (infeas_constraints_count > 0) { + CUOPT_LOG_TRACE("Var %d is infeasible for probe %d on value %f. Fixing other interval", + var_idx, + i, + probe_val.val); + const auto other_probe_val = i == 0 ? probe_vals.second : probe_vals.first; + const auto other_probe_interval_type = + i == 0 ? probe_vals.second.interval_type : probe_vals.first.interval_type; + // current probe is infeasible, remove the current var bound from the bounds + if (other_probe_interval_type == interval_type_t::EQUALS) { + modification_vector.emplace_back( + timer.elapsed_time(), var_idx, other_probe_val.val, other_probe_val.val); + } else if (other_probe_interval_type == interval_type_t::GEQ) { + modification_vector.emplace_back( + timer.elapsed_time(), var_idx, other_probe_val.val, bounds.y); + } else { + modification_vector.emplace_back( + timer.elapsed_time(), var_idx, bounds.x, other_probe_val.val); + } + n_of_infeasible_probings++; + continue; + } + // this only tracks the number of variable intervals that have cached bounds n_of_cached_probings++; // save the impacted bounds if (bounds_presolve_result != termination_criterion_t::NO_UPDATE) { - const auto& probe_val = i == 0 ? probe_vals.first : probe_vals.second; + valid_host_bounds++; auto& d_lb = i == 0 ? multi_probe_presolve.upd_0.lb : multi_probe_presolve.upd_1.lb; auto& d_ub = i == 0 ? multi_probe_presolve.upd_0.ub : multi_probe_presolve.upd_1.ub; raft::copy(h_improved_lower_bounds.data(), @@ -449,20 +494,363 @@ void compute_cache_for_var(i_t var_idx, n_of_implied_singletons); } } + // when both probes are feasible, we can infer some global bounds + if (n_of_infeasible_probings == 0 && valid_host_bounds == 2) { + // TODO do the check in parallel + for (size_t i = 0; i < h_improved_lower_bounds_0.size(); i++) { + if (i == (size_t)var_idx) { continue; } + f_t lower_bound = min(h_improved_lower_bounds_0[i], h_improved_lower_bounds_1[i]); + f_t upper_bound = max(h_improved_upper_bounds_0[i], h_improved_upper_bounds_1[i]); + cuopt_assert(h_var_bounds[i].x <= lower_bound, "lower bound violation"); + cuopt_assert(h_var_bounds[i].y >= upper_bound, "upper bound violation"); + // check why we might have invalid lower and upper bound here + if (h_var_bounds[i].x < lower_bound || h_var_bounds[i].y > upper_bound) { + modification_vector.emplace_back(timer.elapsed_time(), i, lower_bound, upper_bound); + CUOPT_LOG_TRACE( + "Var %d global bounds inferred from probing new bounds: [%f, %f] old bounds: [%f, %f]", + i, + lower_bound, + upper_bound, + h_var_bounds[i].x, + h_var_bounds[i].y); + } + f_t int_tol = bound_presolve.context.settings.tolerances.integrality_tolerance; + if (integer_equal(h_improved_lower_bounds_0[i], h_improved_upper_bounds_0[i], int_tol) && + integer_equal(h_improved_lower_bounds_1[i], h_improved_upper_bounds_1[i], int_tol) && + is_binary) { + // == case has been handled as fixing by the global bounds update + if (!integer_equal( + h_improved_lower_bounds_0[i], h_improved_lower_bounds_1[i], int_tol)) { + // trivial presolve handles eliminations + // x_i = l_0 + (l_1 - l_0) * x_var_idx + // this means + CUOPT_LOG_TRACE("Variable substitution found for var %d", i); + substitution_t substitution; + substitution.timestamp = timer.elapsed_time(); + substitution.substituted_var = i; + substitution.substituting_var = var_idx; + substitution.offset = h_improved_lower_bounds_0[i]; + substitution.coefficient = h_improved_lower_bounds_1[i] - h_improved_lower_bounds_0[i]; + substitution_vector.emplace_back(substitution); + } + } + } + } handle.sync_stream(); } template -void compute_probing_cache(bound_presolve_t& bound_presolve, +void apply_modification_queue_to_problem( + std::vector>>& modification_vector_pool, + problem_t& problem) +{ + // since each thread has its own deterministic chunk and the order of insertion here is + // deterministic this should be deterministic + std::unordered_map> var_bounds_modifications; + for (const auto& modification_vector : modification_vector_pool) { + for (const auto& modification : modification_vector) { + auto [time, var_idx, lb, ub] = modification; + if (var_bounds_modifications.count(var_idx) == 0) { + var_bounds_modifications[var_idx] = std::make_pair(lb, ub); + } else { + var_bounds_modifications[var_idx].first = max(var_bounds_modifications[var_idx].first, lb); + var_bounds_modifications[var_idx].second = + min(var_bounds_modifications[var_idx].second, ub); + } + } + } + std::vector var_indices; + std::vector lb_values; + std::vector ub_values; + for (const auto& [var_idx, modifications] : var_bounds_modifications) { + var_indices.push_back(var_idx); + lb_values.push_back(modifications.first); + ub_values.push_back(modifications.second); + } + if (var_indices.size() > 0) { + problem.update_variable_bounds(var_indices, lb_values, ub_values); + CUOPT_LOG_DEBUG("Updated %d variable bounds", var_indices.size()); + } +} + +// Ensures that if A subs B and B subs A, we only keep one deterministic direction. +template +void sanitize_graph( + std::unordered_map>>>& all_substitutions) +{ + for (auto& substitution : all_substitutions) { + auto& substituting_var = substitution.first; + auto& list = substitution.second; + // Use remove_if with a lambda to clean up the vector in-place + auto it = std::remove_if( + list.begin(), list.end(), [&](const std::pair>& item) { + i_t substituted_var = item.first; + // Check if the reverse edge exists, it should exists because of the nature of probing + if (all_substitutions.count(substituted_var)) { + const auto& reverse_list = all_substitutions[substituted_var]; + for (const auto& reverse_item : reverse_list) { + if (reverse_item.first == substituting_var) { + // Bidirectional edge detected! + // Keep edge only if substituting_var < substituted_var. + if (substituting_var > substituted_var) { + CUOPT_LOG_TRACE("Removing cycle edge: %d -> %d (keeping %d -> %d)", + substituting_var, + substituted_var, + substituted_var, + substituting_var); + return true; // delete the edge + } + } + } + } + return false; // keep the edge + }); + + list.erase(it, list.end()); + } +} + +template +void dfs( + std::unordered_map>>>& all_substitutions, + std::unordered_set& visited, + const substitution_t& parent_substitution, + i_t curr_var) +{ + // If we have already processed this node in the current traversal. + if (visited.count(curr_var)) return; + visited.insert(curr_var); + + // If 'curr_var' itself substitutes others, we must propagate the parent's substitution down. + if (all_substitutions.count(curr_var)) { + for (auto& [substituted_var_of_child, child_substitution] : all_substitutions[curr_var]) { + // Parent: curr_var = P_offset + P_coeff * Root_Var + // Child: child_var = C_offset + C_coeff * curr_var + // Result: child_var = C_offset + C_coeff * (P_offset + P_coeff * Root_Var) + // = (C_offset + C_coeff * P_offset) + (C_coeff * P_coeff) * Root_Var + child_substitution.offset = + child_substitution.offset + child_substitution.coefficient * parent_substitution.offset; + child_substitution.coefficient = + child_substitution.coefficient * parent_substitution.coefficient; + child_substitution.substituting_var = parent_substitution.substituting_var; + CUOPT_LOG_TRACE("Merged: Var %d is now substituted by %d via %d", + substituted_var_of_child, + child_substitution.substituting_var, + curr_var); + dfs(all_substitutions, visited, child_substitution, substituted_var_of_child); + } + } +} + +template +void merge_substitutions( + std::unordered_map>>>& all_substitutions) +{ + // Remove cycles (A->B and B->A) as probing always generates a pair of equivalent substitutions + sanitize_graph(all_substitutions); + + // Identify Roots + // A Root is a 'substituting' var that is never 'substituted' by anyone else. + std::unordered_set all_substituted_vars; + for (const auto& [key, list] : all_substitutions) { + for (const auto& item : list) { + all_substituted_vars.insert(item.first); + } + } + + std::vector roots; + for (const auto& [key, list] : all_substitutions) { + if (all_substituted_vars.find(key) == all_substituted_vars.end()) { roots.push_back(key); } + } + + // Run DFS from every Root + + for (i_t root : roots) { + // For the root, there is no "parent substitution". + std::unordered_set visited_in_this_path; + visited_in_this_path.insert(root); + for (auto& [substituted_var, substitution] : all_substitutions[root]) { + // Pass the substitution connecting Root->Child as the "parent" for the next level + dfs(all_substitutions, visited_in_this_path, substitution, substituted_var); + } + } +} + +template +void apply_substitution_queue_to_problem( + std::vector>>& substitution_vector_pool, + problem_t& problem) +{ + std::unordered_map>>> all_substitutions; + + for (const auto& substitution_vector : substitution_vector_pool) { + for (const auto& substitution : substitution_vector) { + all_substitutions[substitution.substituting_var].push_back( + {substitution.substituted_var, substitution}); + } + } + + // Flatten Graph + merge_substitutions(all_substitutions); + + std::vector var_indices; + std::vector substituting_var_indices; + std::vector offset_values; + std::vector coefficient_values; + + // Get variable_mapping to convert current indices to original indices + auto h_variable_mapping = + host_copy(problem.presolve_data.variable_mapping, problem.handle_ptr->get_stream()); + problem.handle_ptr->sync_stream(); + + for (const auto& [substituting_var, substitutions] : all_substitutions) { + for (const auto& [substituted_var, substitution] : substitutions) { + CUOPT_LOG_TRACE("Applying substitution: %d -> %d", + substitution.substituting_var, + substitution.substituted_var); + var_indices.push_back(substitution.substituted_var); + substituting_var_indices.push_back(substitution.substituting_var); + offset_values.push_back(substitution.offset); + coefficient_values.push_back(substitution.coefficient); + + // Store substitution for post-processing (convert to original variable IDs) + substitution_t sub; + sub.timestamp = substitution.timestamp; + sub.substituted_var = h_variable_mapping[substitution.substituted_var]; + sub.substituting_var = h_variable_mapping[substitution.substituting_var]; + sub.offset = substitution.offset; + sub.coefficient = substitution.coefficient; + problem.presolve_data.variable_substitutions.push_back(sub); + CUOPT_LOG_TRACE("Stored substitution for post-processing: x[%d] = %f + %f * x[%d]", + sub.substituted_var, + sub.offset, + sub.coefficient, + sub.substituting_var); + } + } + + if (!var_indices.empty()) { + problem.substitute_variables( + var_indices, substituting_var_indices, offset_values, coefficient_values); + } +} + +template +std::vector compute_priority_indices_by_implied_integers(problem_t& problem) +{ + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + auto input_transform_it = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), [view = problem.view()] __device__(i_t idx) -> i_t { + return view.is_integer_var(view.variables[idx]); + }); + // keeps the number of constraints that contain integer variables + rmm::device_uvector num_int_vars_per_constraint(problem.n_constraints, + problem.handle_ptr->get_stream()); + cub::DeviceSegmentedReduce::Reduce(d_temp_storage, + temp_storage_bytes, + input_transform_it, + num_int_vars_per_constraint.data(), + problem.n_constraints, + problem.offsets.data(), + problem.offsets.data() + 1, + cuda::std::plus<>{}, + 0, + problem.handle_ptr->get_stream()); + + rmm::device_uvector temp_storage(temp_storage_bytes, + problem.handle_ptr->get_stream()); + d_temp_storage = thrust::raw_pointer_cast(temp_storage.data()); + + // Run reduction + cub::DeviceSegmentedReduce::Reduce(d_temp_storage, + temp_storage_bytes, + input_transform_it, + num_int_vars_per_constraint.data(), + problem.n_constraints, + problem.offsets.data(), + problem.offsets.data() + 1, + cuda::std::plus<>{}, + 0, + problem.handle_ptr->get_stream()); + // keeps the count of number of other integers that this variables shares a constraint with + rmm::device_uvector count_per_variable(problem.n_variables, + problem.handle_ptr->get_stream()); + auto input_transform_it_2 = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [num_int_vars_per_constraint = make_span(num_int_vars_per_constraint), + view = problem.view()] __device__(i_t idx) -> i_t { + return num_int_vars_per_constraint[view.reverse_constraints[idx]]; + }); + // run second reduction operation, reset sizes so query works correctly + d_temp_storage = nullptr; + temp_storage_bytes = 0; + cub::DeviceSegmentedReduce::Reduce(d_temp_storage, + temp_storage_bytes, + input_transform_it_2, + count_per_variable.data(), + problem.n_variables, + problem.reverse_offsets.data(), + problem.reverse_offsets.data() + 1, + cuda::std::plus<>{}, + 0, + problem.handle_ptr->get_stream()); + + temp_storage.resize(temp_storage_bytes, problem.handle_ptr->get_stream()); + d_temp_storage = thrust::raw_pointer_cast(temp_storage.data()); + + // Run reduction + cub::DeviceSegmentedReduce::Reduce(d_temp_storage, + temp_storage_bytes, + input_transform_it_2, + count_per_variable.data(), + problem.n_variables, + problem.reverse_offsets.data(), + problem.reverse_offsets.data() + 1, + cuda::std::plus<>{}, + 0, + problem.handle_ptr->get_stream()); + thrust::for_each(problem.handle_ptr->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(problem.n_variables), + [count_per_variable = make_span(count_per_variable), + view = problem.view()] __device__(i_t idx) { + if (!view.is_integer_var(idx)) { count_per_variable[idx] = 0; } + }); + rmm::device_uvector priority_indices(problem.n_variables, problem.handle_ptr->get_stream()); + thrust::sequence( + problem.handle_ptr->get_thrust_policy(), priority_indices.begin(), priority_indices.end()); + thrust::sort_by_key(problem.handle_ptr->get_thrust_policy(), + count_per_variable.data(), + count_per_variable.data() + problem.n_variables, + priority_indices.data(), + thrust::greater()); + auto h_priority_indices = host_copy(priority_indices, problem.handle_ptr->get_stream()); + // Find the index of the first 0 element in count_per_variable + auto first_zero_it = thrust::lower_bound(problem.handle_ptr->get_thrust_policy(), + count_per_variable.begin(), + count_per_variable.end(), + 0, + thrust::greater()); + size_t first_zero_index = (first_zero_it != count_per_variable.end()) + ? std::distance(count_per_variable.begin(), first_zero_it) + : count_per_variable.size(); + h_priority_indices.erase(h_priority_indices.begin() + first_zero_index, h_priority_indices.end()); + return h_priority_indices; +} + +template +bool compute_probing_cache(bound_presolve_t& bound_presolve, problem_t& problem, timer_t timer) { raft::common::nvtx::range fun_scope("compute_probing_cache"); // we dont want to compute the probing cache for all variables for time and computation resources - auto priority_indices = compute_prioritized_integer_indices(bound_presolve, problem); + auto priority_indices = compute_priority_indices_by_implied_integers(problem); CUOPT_LOG_DEBUG("Computing probing cache"); - auto h_integer_indices = host_copy(problem.integer_indices); - const auto h_var_bounds = host_copy(problem.variable_bounds); + auto stream = problem.handle_ptr->get_stream(); + auto h_integer_indices = host_copy(problem.integer_indices, stream); + auto h_var_bounds = host_copy(problem.variable_bounds, stream); // TODO adjust the iteration limit depending on the total time limit and time it takes for single // var bound_presolve.settings.iteration_limit = 50; @@ -474,51 +862,83 @@ void compute_probing_cache(bound_presolve_t& bound_presolve, // Create a vector of multi_probe_t objects std::vector> multi_probe_presolve_pool; + std::vector>> modification_vector_pool(max_threads); + std::vector>> substitution_vector_pool(max_threads); // Initialize multi_probe_presolve_pool for (size_t i = 0; i < max_threads; i++) { multi_probe_presolve_pool.emplace_back(bound_presolve.context); multi_probe_presolve_pool[i].resize(problem); - multi_probe_presolve_pool[i].compute_stats = false; + multi_probe_presolve_pool[i].compute_stats = true; } // Atomic variables for tracking progress std::atomic n_of_implied_singletons(0); std::atomic n_of_cached_probings(0); - + std::atomic problem_is_infeasible(false); + size_t last_it_implied_singletons = 0; + bool early_exit = false; + const size_t step_size = min((size_t)2048, priority_indices.size()); + for (size_t step_start = 0; step_start < priority_indices.size(); step_start += step_size) { + if (timer.check_time_limit() || early_exit || problem_is_infeasible.load()) { break; } + size_t step_end = std::min(step_start + step_size, priority_indices.size()); // Main parallel loop #pragma omp parallel - { + { #pragma omp for schedule(static, 4) - for (auto var_idx : priority_indices) { - if (timer.check_time_limit()) { continue; } + for (size_t i = step_start; i < step_end; ++i) { + auto var_idx = priority_indices[i]; + if (timer.check_time_limit()) { continue; } - int thread_idx = omp_get_thread_num(); - CUOPT_LOG_TRACE("Computing probing cache for var %d on thread %d", var_idx, thread_idx); + int thread_idx = omp_get_thread_num(); + CUOPT_LOG_TRACE("Computing probing cache for var %d on thread %d", var_idx, thread_idx); - auto& multi_probe_presolve = multi_probe_presolve_pool[thread_idx]; + auto& multi_probe_presolve = multi_probe_presolve_pool[thread_idx]; - compute_cache_for_var(var_idx, - bound_presolve, - problem, - multi_probe_presolve, - h_var_bounds, - h_integer_indices, - n_of_implied_singletons, - n_of_cached_probings, - problem.handle_ptr->get_device()); + compute_cache_for_var(var_idx, + bound_presolve, + problem, + multi_probe_presolve, + h_var_bounds, + h_integer_indices, + n_of_implied_singletons, + n_of_cached_probings, + problem_is_infeasible, + modification_vector_pool[thread_idx], + substitution_vector_pool[thread_idx], + timer, + problem.handle_ptr->get_device()); + } } - } - +#pragma omp single + { + // TODO when we have determinism, check current threads work/time counter and filter queue + // items that are smaller or equal to that + apply_modification_queue_to_problem(modification_vector_pool, problem); + // copy host bounds again, because we changed some problem bounds + raft::copy(h_var_bounds.data(), + problem.variable_bounds.data(), + h_var_bounds.size(), + problem.handle_ptr->get_stream()); + problem.handle_ptr->sync_stream(); + if (n_of_implied_singletons - last_it_implied_singletons < + (size_t)std::max(2, (min(100, problem.n_variables / 50)))) { + early_exit = true; + } + last_it_implied_singletons = n_of_implied_singletons; + } + } // end of step + apply_substitution_queue_to_problem(substitution_vector_pool, problem); CUOPT_LOG_DEBUG("Total number of cached probings %lu number of implied singletons %lu", n_of_cached_probings.load(), n_of_implied_singletons.load()); // restore the settings bound_presolve.settings = {}; + return problem_is_infeasible.load(); } #define INSTANTIATE(F_TYPE) \ - template void compute_probing_cache(bound_presolve_t & bound_presolve, \ + template bool compute_probing_cache(bound_presolve_t & bound_presolve, \ problem_t & problem, \ timer_t timer); \ template class probing_cache_t; diff --git a/cpp/src/mip/presolve/probing_cache.cuh b/cpp/src/mip/presolve/probing_cache.cuh index 908ec1de9..b5bf77685 100644 --- a/cpp/src/mip/presolve/probing_cache.cuh +++ b/cpp/src/mip/presolve/probing_cache.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -115,7 +115,7 @@ class lb_probing_cache_t { }; template -void compute_probing_cache(bound_presolve_t& bound_presolve, +bool compute_probing_cache(bound_presolve_t& bound_presolve, problem_t& problem, timer_t timer); diff --git a/cpp/src/mip/presolve/third_party_presolve.cpp b/cpp/src/mip/presolve/third_party_presolve.cpp index f7eb6d2c2..cc5a9dd5b 100644 --- a/cpp/src/mip/presolve/third_party_presolve.cpp +++ b/cpp/src/mip/presolve/third_party_presolve.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -14,11 +14,15 @@ #include +#if !defined(__clang__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstringop-overflow" // ignore boost error for pip wheel build +#endif #include #include +#if !defined(__clang__) #pragma GCC diagnostic pop +#endif namespace cuopt::linear_programming::detail { @@ -405,7 +409,7 @@ std::optional> third_party_presolve_t presolver; set_presolve_methods(presolver, category, dual_postsolve); @@ -438,6 +442,11 @@ std::optional> third_party_presolve_t(papilo_problem, op_problem.get_handle_ptr(), category); auto col_flags = papilo_problem.getColFlags(); diff --git a/cpp/src/mip/presolve/trivial_presolve.cu b/cpp/src/mip/presolve/trivial_presolve.cu index dbc61f202..199a95db8 100644 --- a/cpp/src/mip/presolve/trivial_presolve.cu +++ b/cpp/src/mip/presolve/trivial_presolve.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -11,11 +11,11 @@ namespace cuopt::linear_programming::detail { #if MIP_INSTANTIATE_FLOAT -template void trivial_presolve(problem_t& problem); +template void trivial_presolve(problem_t& problem, bool remap_cache_ids); #endif #if MIP_INSTANTIATE_DOUBLE -template void trivial_presolve(problem_t& problem); +template void trivial_presolve(problem_t& problem, bool remap_cache_ids); #endif } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip/presolve/trivial_presolve.cuh b/cpp/src/mip/presolve/trivial_presolve.cuh index cf9659662..44cc17af5 100644 --- a/cpp/src/mip/presolve/trivial_presolve.cuh +++ b/cpp/src/mip/presolve/trivial_presolve.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -104,7 +104,7 @@ void cleanup_vectors(problem_t& pb, } template -void update_from_csr(problem_t& pb) +void update_from_csr(problem_t& pb, bool remap_cache_ids) { using f_t2 = typename type_2::type; auto handle_ptr = pb.handle_ptr; @@ -181,7 +181,20 @@ void update_from_csr(problem_t& pb) cuda::std::identity{}); pb.presolve_data.variable_mapping.resize(used_iter - pb.presolve_data.variable_mapping.begin(), handle_ptr->get_stream()); - + if (remap_cache_ids) { + pb.original_ids.resize(pb.presolve_data.variable_mapping.size()); + raft::copy(pb.original_ids.data(), + pb.presolve_data.variable_mapping.data(), + pb.presolve_data.variable_mapping.size(), + handle_ptr->get_stream()); + std::fill(pb.reverse_original_ids.begin(), pb.reverse_original_ids.end(), -1); + handle_ptr->sync_stream(); + for (size_t i = 0; i < pb.original_ids.size(); ++i) { + cuopt_assert(pb.original_ids[i] < pb.reverse_original_ids.size(), + "Variable index out of bounds"); + pb.reverse_original_ids[pb.original_ids[i]] = i; + } + } RAFT_CHECK_CUDA(handle_ptr->get_stream()); } @@ -313,12 +326,13 @@ void update_from_csr(problem_t& pb) template void test_reverse_matches(const problem_t& pb) { - auto h_offsets = cuopt::host_copy(pb.offsets); - auto h_coefficients = cuopt::host_copy(pb.coefficients); - auto h_variables = cuopt::host_copy(pb.variables); - auto h_reverse_offsets = cuopt::host_copy(pb.reverse_offsets); - auto h_reverse_constraints = cuopt::host_copy(pb.reverse_constraints); - auto h_reverse_coefficients = cuopt::host_copy(pb.reverse_coefficients); + auto stream = pb.handle_ptr->get_stream(); + auto h_offsets = cuopt::host_copy(pb.offsets, stream); + auto h_coefficients = cuopt::host_copy(pb.coefficients, stream); + auto h_variables = cuopt::host_copy(pb.variables, stream); + auto h_reverse_offsets = cuopt::host_copy(pb.reverse_offsets, stream); + auto h_reverse_constraints = cuopt::host_copy(pb.reverse_constraints, stream); + auto h_reverse_coefficients = cuopt::host_copy(pb.reverse_coefficients, stream); std::vector> vars_per_constr(pb.n_constraints); std::vector> coeff_per_constr(pb.n_constraints); @@ -340,12 +354,12 @@ void test_reverse_matches(const problem_t& pb) } template -void trivial_presolve(problem_t& problem) +void trivial_presolve(problem_t& problem, bool remap_cache_ids = false) { cuopt_expects(problem.preprocess_called, error_type_t::RuntimeError, "preprocess_problem should be called before running the solver"); - update_from_csr(problem); + update_from_csr(problem, remap_cache_ids); problem.recompute_auxilliary_data( false); // check problem representation later once cstr bounds are computed cuopt_func_call(test_reverse_matches(problem)); diff --git a/cpp/src/mip/presolve/trivial_presolve_helpers.cuh b/cpp/src/mip/presolve/trivial_presolve_helpers.cuh index 89e4c6625..73f466635 100644 --- a/cpp/src/mip/presolve/trivial_presolve_helpers.cuh +++ b/cpp/src/mip/presolve/trivial_presolve_helpers.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -28,6 +28,9 @@ struct is_variable_free_t { template __device__ bool operator()(tuple_t edge) { + // eliminate zero coefficient entries + auto coeff = thrust::get<1>(edge); + if (coeff == 0.) { return false; } auto var = thrust::get<2>(edge); auto bounds = bnd[var]; return abs(get_upper(bounds) - get_lower(bounds)) > tol; diff --git a/cpp/src/mip/problem/presolve_data.cuh b/cpp/src/mip/problem/presolve_data.cuh index 2dba376e0..be1f9f8cb 100644 --- a/cpp/src/mip/problem/presolve_data.cuh +++ b/cpp/src/mip/problem/presolve_data.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -19,6 +19,15 @@ namespace linear_programming::detail { template class problem_t; +template +struct substitution_t { + f_t timestamp; + i_t substituting_var; + i_t substituted_var; + f_t offset; + f_t coefficient; +}; + template class presolve_data_t { public: @@ -42,7 +51,8 @@ class presolve_data_t { objective_scaling_factor(other.objective_scaling_factor), variable_mapping(other.variable_mapping, stream), fixed_var_assignment(other.fixed_var_assignment, stream), - var_flags(other.var_flags, stream) + var_flags(other.var_flags, stream), + variable_substitutions(other.variable_substitutions) { } @@ -56,6 +66,7 @@ class presolve_data_t { fixed_var_assignment.begin(), fixed_var_assignment.end(), 0.); + variable_substitutions.clear(); } void reset_additional_vars(const problem_t& problem, const raft::handle_t* handle_ptr) @@ -79,6 +90,10 @@ class presolve_data_t { rmm::device_uvector variable_mapping; rmm::device_uvector fixed_var_assignment; rmm::device_uvector var_flags; + + // Variable substitutions from probing: x_substituted = offset + coefficient * x_substituting + // Applied in post_process_assignment to recover substituted variable values + std::vector> variable_substitutions; }; } // namespace linear_programming::detail diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu index 5be50fb05..9e9b74a2e 100644 --- a/cpp/src/mip/problem/problem.cu +++ b/cpp/src/mip/problem/problem.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -823,6 +823,22 @@ void problem_t::post_process_assignment(rmm::device_uvector& curr h_assignment[i] -= h_assignment[presolve_data.additional_var_id_per_var[i]]; } } + + // Apply variable substitutions from probing: x_substituted = offset + coefficient * + // x_substituting + for (const auto& sub : presolve_data.variable_substitutions) { + cuopt_assert(sub.substituted_var < (i_t)h_assignment.size(), "substituted_var out of bounds"); + cuopt_assert(sub.substituting_var < (i_t)h_assignment.size(), "substituting_var out of bounds"); + h_assignment[sub.substituted_var] = + sub.offset + sub.coefficient * h_assignment[sub.substituting_var]; + CUOPT_LOG_DEBUG("Post-process substitution: x[%d] = %f + %f * x[%d] = %f", + sub.substituted_var, + sub.offset, + sub.coefficient, + sub.substituting_var, + h_assignment[sub.substituted_var]); + } + raft::copy( current_assignment.data(), h_assignment.data(), h_assignment.size(), handle_ptr->get_stream()); // this separate resizing is needed because of the callback @@ -1220,6 +1236,197 @@ void problem_t::set_implied_integers(const std::vector& implied_i }); } +template +bool are_exclusive(const std::vector& var_indices, + const std::vector& var_to_substitute_indices) +{ + std::vector A_sorted = var_indices; + std::vector B_sorted = var_to_substitute_indices; + std::sort(A_sorted.begin(), A_sorted.end()); + std::sort(B_sorted.begin(), B_sorted.end()); + std::vector intersection(std::min(A_sorted.size(), B_sorted.size())); + auto end_iter = std::set_intersection( + A_sorted.begin(), A_sorted.end(), B_sorted.begin(), B_sorted.end(), intersection.begin()); + return (end_iter == intersection.begin()); // true if no overlap +} + +// note that this only substitutes the variables, for problem modification trivial_presolve needs to +// be called. +// note that, this function assumes var_indices and var_to_substitute_indices don't contain any +// common indices +template +void problem_t::substitute_variables(const std::vector& var_indices, + const std::vector& var_to_substitute_indices, + const std::vector& offset_values, + const std::vector& coefficient_values) +{ + raft::common::nvtx::range fun_scope("substitute_variables"); + cuopt_assert((are_exclusive(var_indices, var_to_substitute_indices)), + "variables and var_to_substitute_indices are not exclusive"); + const i_t dummy_substituted_variable = var_indices[0]; + cuopt_assert(var_indices.size() == var_to_substitute_indices.size(), "size mismatch"); + cuopt_assert(var_indices.size() == offset_values.size(), "size mismatch"); + cuopt_assert(var_indices.size() == coefficient_values.size(), "size mismatch"); + auto d_var_indices = device_copy(var_indices, handle_ptr->get_stream()); + auto d_var_to_substitute_indices = + device_copy(var_to_substitute_indices, handle_ptr->get_stream()); + auto d_offset_values = device_copy(offset_values, handle_ptr->get_stream()); + auto d_coefficient_values = device_copy(coefficient_values, handle_ptr->get_stream()); + fixing_helpers.reduction_in_rhs.resize(n_constraints, handle_ptr->get_stream()); + fixing_helpers.variable_fix_mask.resize(n_variables, handle_ptr->get_stream()); + thrust::fill(handle_ptr->get_thrust_policy(), + fixing_helpers.reduction_in_rhs.begin(), + fixing_helpers.reduction_in_rhs.end(), + 0); + thrust::fill(handle_ptr->get_thrust_policy(), + fixing_helpers.variable_fix_mask.begin(), + fixing_helpers.variable_fix_mask.end(), + -1); + + rmm::device_scalar objective_offset(0., handle_ptr->get_stream()); + constexpr f_t zero_value = f_t(0.); + rmm::device_uvector objective_offset_delta_per_variable(d_var_indices.size(), + handle_ptr->get_stream()); + thrust::fill(handle_ptr->get_thrust_policy(), + objective_offset_delta_per_variable.begin(), + objective_offset_delta_per_variable.end(), + zero_value); + thrust::for_each( + handle_ptr->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + d_var_indices.size(), + [variable_fix_mask = make_span(fixing_helpers.variable_fix_mask), + var_indices = make_span(d_var_indices), + n_variables = n_variables, + substitute_coefficient = make_span(d_coefficient_values), + substitute_offset = make_span(d_offset_values), + var_to_substitute_indices = make_span(d_var_to_substitute_indices), + objective_coefficients = make_span(objective_coefficients), + objective_offset_delta_per_variable = make_span(objective_offset_delta_per_variable), + objective_offset = objective_offset.data()] __device__(i_t idx) { + i_t var_idx = var_indices[idx]; + i_t substituting_var_idx = var_to_substitute_indices[idx]; + variable_fix_mask[var_idx] = idx; + f_t objective_offset_difference = objective_coefficients[var_idx] * substitute_offset[idx]; + objective_offset_delta_per_variable[idx] += objective_offset_difference; + // atomicAdd(objective_offset, objective_offset_difference); + atomicAdd(&objective_coefficients[substituting_var_idx], + objective_coefficients[var_idx] * substitute_coefficient[idx]); + }); + presolve_data.objective_offset += thrust::reduce(handle_ptr->get_thrust_policy(), + objective_offset_delta_per_variable.begin(), + objective_offset_delta_per_variable.end(), + f_t(0.), + thrust::plus()); + const i_t num_segments = n_constraints; + f_t initial_value{0.}; + + auto input_transform_it = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [coefficients = make_span(coefficients), + variables = make_span(variables), + variable_fix_mask = make_span(fixing_helpers.variable_fix_mask), + substitute_coefficient = make_span(d_coefficient_values), + substitute_offset = make_span(d_offset_values), + substitute_var_indices = make_span(d_var_to_substitute_indices), + int_tol = tolerances.integrality_tolerance] __device__(i_t idx) -> f_t { + i_t var_idx = variables[idx]; + if (variable_fix_mask[var_idx] != -1) { + i_t reference_idx = variable_fix_mask[var_idx]; + f_t substituted_coefficient = substitute_coefficient[reference_idx]; + f_t substituted_offset = substitute_offset[reference_idx]; + f_t reduction = coefficients[idx] * substituted_offset; + coefficients[idx] = coefficients[idx] * substituted_coefficient; + // note that this might cause duplicates if these two variables are in the same row + // we will handle duplicates in later + variables[idx] = substitute_var_indices[reference_idx]; + return reduction; + } else { + return 0.; + } + }); + // Determine temporary device storage requirements + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + cub::DeviceSegmentedReduce::Reduce(d_temp_storage, + temp_storage_bytes, + input_transform_it, + fixing_helpers.reduction_in_rhs.data(), + num_segments, + offsets.data(), + offsets.data() + 1, + cuda::std::plus<>{}, + initial_value, + handle_ptr->get_stream()); + + rmm::device_uvector temp_storage(temp_storage_bytes, handle_ptr->get_stream()); + d_temp_storage = thrust::raw_pointer_cast(temp_storage.data()); + + // Run reduction + cub::DeviceSegmentedReduce::Reduce(d_temp_storage, + temp_storage_bytes, + input_transform_it, + fixing_helpers.reduction_in_rhs.data(), + num_segments, + offsets.data(), + offsets.data() + 1, + cuda::std::plus<>{}, + initial_value, + handle_ptr->get_stream()); + RAFT_CHECK_CUDA(handle_ptr->get_stream()); + thrust::for_each( + handle_ptr->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + n_constraints, + [lower_bounds = make_span(constraint_lower_bounds), + upper_bounds = make_span(constraint_upper_bounds), + reduction_in_rhs = make_span(fixing_helpers.reduction_in_rhs)] __device__(i_t cstr_idx) { + lower_bounds[cstr_idx] = lower_bounds[cstr_idx] - reduction_in_rhs[cstr_idx]; + upper_bounds[cstr_idx] = upper_bounds[cstr_idx] - reduction_in_rhs[cstr_idx]; + }); + // sort indices so we can detect duplicates + sort_rows_by_variables(handle_ptr); + // now remove the duplicate substituted variables by summing their coefficients on one and + // assigning a dummy variable on another + thrust::for_each(handle_ptr->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(n_constraints), + [variables = make_span(variables), + coefficients = make_span(coefficients), + offsets = make_span(offsets), + objective_coefficients = make_span(objective_coefficients), + dummy_substituted_variable] __device__(i_t cstr_idx) { + i_t offset_begin = offsets[cstr_idx]; + i_t offset_end = offsets[cstr_idx + 1]; + i_t duplicate_start_idx = -1; + while (offset_begin < offset_end - 1) { + i_t var_idx = variables[offset_begin]; + i_t next_var_idx = variables[offset_begin + 1]; + if (var_idx == next_var_idx) { + if (duplicate_start_idx == -1) { duplicate_start_idx = offset_begin; } + coefficients[duplicate_start_idx] += coefficients[offset_begin + 1]; + variables[duplicate_start_idx] = variables[offset_begin + 1]; + // mark those for elimination + variables[offset_begin + 1] = dummy_substituted_variable; + coefficients[offset_begin + 1] = 0.; + } else { + duplicate_start_idx = -1; + } + offset_begin++; + } + }); + // in case we use this function in context other than propagation, it is possible that substituted + // var doesn't exist in the constraint(they are not detected by duplicate detection). so we need + // to take care of that. + thrust::for_each(handle_ptr->get_thrust_policy(), + d_var_indices.begin(), + d_var_indices.end(), + [objective_coefficients = make_span(objective_coefficients)] __device__( + i_t var_idx) { objective_coefficients[var_idx] = 0.; }); + handle_ptr->sync_stream(); + CUOPT_LOG_DEBUG("Substituted %d variables", var_indices.size()); +} + template void problem_t::fix_given_variables(problem_t& original_problem, rmm::device_uvector& assignment, @@ -1305,6 +1512,12 @@ void problem_t::fix_given_variables(problem_t& original_prob handle_ptr->sync_stream(); } +template +void problem_t::sort_rows_by_variables(const raft::handle_t* handle_ptr) +{ + csrsort_cusparse(coefficients, variables, offsets, n_constraints, n_variables, handle_ptr); +} + template problem_t problem_t::get_problem_after_fixing_vars( rmm::device_uvector& assignment, @@ -1344,7 +1557,7 @@ problem_t problem_t::get_problem_after_fixing_vars( // do an assignment from the original_ids of the current problem problem.original_ids.resize(variable_map.size()); std::fill(problem.reverse_original_ids.begin(), problem.reverse_original_ids.end(), -1); - auto h_variable_map = cuopt::host_copy(variable_map); + auto h_variable_map = cuopt::host_copy(variable_map, handle_ptr->get_stream()); for (size_t i = 0; i < variable_map.size(); ++i) { cuopt_assert(h_variable_map[i] < original_ids.size(), "Variable index out of bounds"); problem.original_ids[i] = original_ids[h_variable_map[i]]; @@ -1522,9 +1735,10 @@ std::vector>> compute_var_to_constraint_map( { raft::common::nvtx::range fun_scope("compute_var_to_constraint_map"); std::vector>> variable_constraint_map(pb.n_variables); - auto h_variables = cuopt::host_copy(pb.variables); - auto h_coefficients = cuopt::host_copy(pb.coefficients); - auto h_offsets = cuopt::host_copy(pb.offsets); + auto stream = pb.handle_ptr->get_stream(); + auto h_variables = cuopt::host_copy(pb.variables, stream); + auto h_coefficients = cuopt::host_copy(pb.coefficients, stream); + auto h_offsets = cuopt::host_copy(pb.offsets, stream); for (i_t cnst = 0; cnst < pb.n_constraints; ++cnst) { for (i_t i = h_offsets[cnst]; i < h_offsets[cnst + 1]; ++i) { i_t var = h_variables[i]; @@ -1542,10 +1756,11 @@ void standardize_bounds(std::vector>>& variable_ { raft::common::nvtx::range fun_scope("standardize_bounds"); auto handle_ptr = pb.handle_ptr; - auto h_var_bounds = cuopt::host_copy(pb.variable_bounds); - auto h_objective_coefficients = cuopt::host_copy(pb.objective_coefficients); - auto h_variable_types = cuopt::host_copy(pb.variable_types); - auto h_var_flags = cuopt::host_copy(pb.presolve_data.var_flags); + auto stream = handle_ptr->get_stream(); + auto h_var_bounds = cuopt::host_copy(pb.variable_bounds, stream); + auto h_objective_coefficients = cuopt::host_copy(pb.objective_coefficients, stream); + auto h_variable_types = cuopt::host_copy(pb.variable_types, stream); + auto h_var_flags = cuopt::host_copy(pb.presolve_data.var_flags, stream); handle_ptr->sync_stream(); const i_t n_vars_originally = (i_t)h_var_bounds.size(); @@ -1682,17 +1897,19 @@ void problem_t::get_host_user_problem( cuopt::linear_programming::dual_simplex::user_problem_t& user_problem) const { raft::common::nvtx::range fun_scope("get_host_user_problem"); + // std::lock_guard lock(problem_mutex); i_t m = n_constraints; i_t n = n_variables; i_t nz = nnz; user_problem.num_rows = m; user_problem.num_cols = n; - user_problem.objective = cuopt::host_copy(objective_coefficients); + auto stream = handle_ptr->get_stream(); + user_problem.objective = cuopt::host_copy(objective_coefficients, stream); dual_simplex::csr_matrix_t csr_A(m, n, nz); - csr_A.x = cuopt::host_copy(coefficients); - csr_A.j = cuopt::host_copy(variables); - csr_A.row_start = cuopt::host_copy(offsets); + csr_A.x = cuopt::host_copy(coefficients, stream); + csr_A.j = cuopt::host_copy(variables, stream); + csr_A.row_start = cuopt::host_copy(offsets, stream); csr_A.to_compressed_col(user_problem.A); @@ -1701,8 +1918,8 @@ void problem_t::get_host_user_problem( user_problem.range_rows.clear(); user_problem.range_value.clear(); - auto model_constraint_lower_bounds = cuopt::host_copy(constraint_lower_bounds); - auto model_constraint_upper_bounds = cuopt::host_copy(constraint_upper_bounds); + auto model_constraint_lower_bounds = cuopt::host_copy(constraint_lower_bounds, stream); + auto model_constraint_upper_bounds = cuopt::host_copy(constraint_upper_bounds, stream); // All constraints have lower and upper bounds // lr <= a_i^T x <= ur @@ -1763,7 +1980,7 @@ void problem_t::get_host_user_problem( user_problem.obj_scale = presolve_data.objective_scaling_factor; user_problem.var_types.resize(n); - auto model_variable_types = cuopt::host_copy(variable_types); + auto model_variable_types = cuopt::host_copy(variable_types, stream); for (int j = 0; j < n; ++j) { user_problem.var_types[j] = model_variable_types[j] == var_t::CONTINUOUS @@ -1781,7 +1998,8 @@ template void problem_t::compute_vars_with_objective_coeffs() { raft::common::nvtx::range fun_scope("compute_vars_with_objective_coeffs"); - auto h_objective_coefficients = cuopt::host_copy(objective_coefficients); + auto h_objective_coefficients = + cuopt::host_copy(objective_coefficients, handle_ptr->get_stream()); std::vector vars_with_objective_coeffs_; std::vector objective_coeffs_; for (i_t i = 0; i < n_variables; ++i) { @@ -1815,6 +2033,36 @@ void problem_t::add_cutting_plane_at_objective(f_t objective) cuopt_func_call(check_problem_representation(true)); } +template +void problem_t::update_variable_bounds(const std::vector& var_indices, + const std::vector& lb_values, + const std::vector& ub_values) +{ + if (var_indices.size() == 0) { return; } + // std::lock_guard lock(problem_mutex); + cuopt_assert(var_indices.size() == lb_values.size(), "size of variable lower bound mismatch"); + cuopt_assert(var_indices.size() == ub_values.size(), "size of variable upper bound mismatch"); + auto d_var_indices = device_copy(var_indices, handle_ptr->get_stream()); + auto d_lb_values = device_copy(lb_values, handle_ptr->get_stream()); + auto d_ub_values = device_copy(ub_values, handle_ptr->get_stream()); + thrust::for_each( + handle_ptr->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + d_var_indices.size(), + [lb_values = make_span(d_lb_values), + ub_values = make_span(d_ub_values), + variable_bounds = make_span(variable_bounds), + var_indices = make_span(d_var_indices)] __device__(auto i) { + i_t var_idx = var_indices[i]; + cuopt_assert(variable_bounds[var_idx].x <= lb_values[i], "variable lower bound violation"); + cuopt_assert(variable_bounds[var_idx].y >= ub_values[i], "variable upper bound violation"); + variable_bounds[var_idx].x = lb_values[i]; + variable_bounds[var_idx].y = ub_values[i]; + }); + handle_ptr->sync_stream(); + RAFT_CHECK_CUDA(handle_ptr->get_stream()); +} + #if MIP_INSTANTIATE_FLOAT template class problem_t; #endif diff --git a/cpp/src/mip/problem/problem.cuh b/cpp/src/mip/problem/problem.cuh index ed0adb971..9719f0b54 100644 --- a/cpp/src/mip/problem/problem.cuh +++ b/cpp/src/mip/problem/problem.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -107,7 +107,14 @@ class problem_t { void add_cutting_plane_at_objective(f_t objective); void compute_vars_with_objective_coeffs(); void test_problem_fixing_time(); - + void update_variable_bounds(const std::vector& var_indices, + const std::vector& lb_values, + const std::vector& ub_values); + void substitute_variables(const std::vector& var_indices, + const std::vector& var_to_substitude_indices, + const std::vector& offset_values, + const std::vector& coefficient_values); + void sort_rows_by_variables(const raft::handle_t* handle_ptr); enum var_flags_t : i_t { VAR_IMPLIED_INTEGER = 1 << 0, }; diff --git a/cpp/src/mip/problem/problem_helpers.cuh b/cpp/src/mip/problem/problem_helpers.cuh index 655f764d5..eadc7e309 100644 --- a/cpp/src/mip/problem/problem_helpers.cuh +++ b/cpp/src/mip/problem/problem_helpers.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -313,6 +314,13 @@ static bool check_bounds_sanity(const detail::problem_t& problem) check_constraint_bounds_sanity(problem); } +static void check_cusparse_status(cusparseStatus_t status) +{ + if (status != CUSPARSE_STATUS_SUCCESS) { + throw std::runtime_error("CUSPARSE error: " + std::string(cusparseGetErrorString(status))); + } +} + template __global__ void kernel_convert_greater_to_less(raft::device_span coefficients, raft::device_span offsets, @@ -340,6 +348,53 @@ __global__ void kernel_convert_greater_to_less(raft::device_span coefficien } } +template +static void csrsort_cusparse(rmm::device_uvector& values, + rmm::device_uvector& indices, + rmm::device_uvector& offsets, + i_t rows, + i_t cols, + const raft::handle_t* handle_ptr) +{ + auto stream = offsets.stream(); + cusparseHandle_t handle; + cusparseCreate(&handle); + cusparseSetStream(handle, stream); + + i_t nnz = values.size(); + i_t m = rows; + i_t n = cols; + + cusparseMatDescr_t matA; + cusparseCreateMatDescr(&matA); + cusparseSetMatIndexBase(matA, CUSPARSE_INDEX_BASE_ZERO); + cusparseSetMatType(matA, CUSPARSE_MATRIX_TYPE_GENERAL); + + size_t pBufferSizeInBytes = 0; + check_cusparse_status(cusparseXcsrsort_bufferSizeExt( + handle, m, n, nnz, offsets.data(), indices.data(), &pBufferSizeInBytes)); + rmm::device_uvector pBuffer(pBufferSizeInBytes, stream); + cuopt_assert(((intptr_t)pBuffer.data() % 128) == 0, + "CUSPARSE buffer size is not aligned to 128 bytes"); + rmm::device_uvector P(nnz, stream); + thrust::sequence(handle_ptr->get_thrust_policy(), P.begin(), P.end()); + + check_cusparse_status(cusparseXcsrsort( + handle, m, n, nnz, matA, offsets.data(), indices.data(), P.data(), pBuffer.data())); + + // apply the permutation to the values + rmm::device_uvector values_sorted(nnz, stream); + thrust::gather( + handle_ptr->get_thrust_policy(), P.begin(), P.end(), values.begin(), values_sorted.begin()); + thrust::copy( + handle_ptr->get_thrust_policy(), values_sorted.begin(), values_sorted.end(), values.begin()); + + cusparseDestroyMatDescr(matA); + cusparseDestroy(handle); + + check_csr_representation(values, offsets, indices, handle_ptr, cols, rows); +} + template static void convert_greater_to_less(detail::problem_t& problem) { diff --git a/cpp/src/mip/solution/solution.cu b/cpp/src/mip/solution/solution.cu index 36bef4142..9e9a2d75f 100644 --- a/cpp/src/mip/solution/solution.cu +++ b/cpp/src/mip/solution/solution.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -229,11 +229,12 @@ void solution_t::assign_random_within_bounds(f_t ratio_of_vars_to_rand bool only_integers) { std::mt19937 rng(cuopt::seed_generator::get_seed()); - std::vector h_assignment = host_copy(assignment); + auto stream = handle_ptr->get_stream(); + std::vector h_assignment = host_copy(assignment, stream); std::uniform_real_distribution unif_prob(0, 1); - auto variable_bounds = cuopt::host_copy(problem_ptr->variable_bounds); - auto variable_types = cuopt::host_copy(problem_ptr->variable_types); + auto variable_bounds = cuopt::host_copy(problem_ptr->variable_bounds, stream); + auto variable_types = cuopt::host_copy(problem_ptr->variable_types, stream); problem_ptr->handle_ptr->sync_stream(); for (size_t i = 0; i < problem_ptr->variable_bounds.size(); ++i) { if (only_integers && variable_types[i] != var_t::INTEGER) { continue; } diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu index e5161882e..1790be33b 100644 --- a/cpp/src/mip/solve.cu +++ b/cpp/src/mip/solve.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -222,7 +222,7 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, CUOPT_LOG_INFO("%d implied integers", result->implied_integer_indices.size()); } if (problem.is_objective_integral()) { CUOPT_LOG_INFO("Objective function is integral"); } - CUOPT_LOG_INFO("Papilo presolve time: %f", presolve_time); + CUOPT_LOG_INFO("Papilo presolve time: %.2f", presolve_time); } if (settings.user_problem_file != "") { CUOPT_LOG_INFO("Writing user problem to file: %s", settings.user_problem_file.c_str()); @@ -259,7 +259,8 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, std::numeric_limits::signaling_NaN()); detail::problem_t full_problem(op_problem); detail::solution_t full_sol(full_problem); - full_sol.copy_new_assignment(cuopt::host_copy(primal_solution)); + full_sol.copy_new_assignment( + cuopt::host_copy(primal_solution, op_problem.get_handle_ptr()->get_stream())); full_sol.compute_feasibility(); if (!full_sol.get_feasible()) { CUOPT_LOG_WARN("The solution is not feasible after post solve"); diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 92d548c2f..549030873 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -137,7 +137,8 @@ solution_t mip_solver_t::run_solver() auto opt_sol = solve_lp_with_method(*context.problem_ptr, settings, lp_timer); solution_t sol(*context.problem_ptr); - sol.copy_new_assignment(host_copy(opt_sol.get_primal_solution())); + sol.copy_new_assignment( + host_copy(opt_sol.get_primal_solution(), context.problem_ptr->handle_ptr->get_stream())); if (opt_sol.get_termination_status() == pdlp_termination_status_t::Optimal || opt_sol.get_termination_status() == pdlp_termination_status_t::PrimalInfeasible || opt_sol.get_termination_status() == pdlp_termination_status_t::DualInfeasible) { @@ -162,7 +163,7 @@ solution_t mip_solver_t::run_solver() branch_and_bound_solution.resize(branch_and_bound_problem.num_cols); // Fill in the settings for branch and bound - branch_and_bound_settings.time_limit = timer_.remaining_time(); + branch_and_bound_settings.time_limit = timer_.get_time_limit(); branch_and_bound_settings.node_limit = context.settings.node_limit; branch_and_bound_settings.reliability_branching = context.settings.reliability_branching; branch_and_bound_settings.print_presolve_stats = false; @@ -176,13 +177,12 @@ solution_t mip_solver_t::run_solver() } else { branch_and_bound_settings.num_threads = std::max(1, context.settings.num_cpu_threads); } - CUOPT_LOG_INFO("Using %d CPU threads for B&B", branch_and_bound_settings.num_threads); - i_t num_threads = branch_and_bound_settings.num_threads; - i_t num_bfs_threads = std::max(1, num_threads / 4); - i_t num_diving_threads = std::max(1, num_threads - num_bfs_threads); - branch_and_bound_settings.num_bfs_threads = num_bfs_threads; - branch_and_bound_settings.num_diving_threads = num_diving_threads; + i_t num_threads = branch_and_bound_settings.num_threads; + i_t num_bfs_workers = std::max(1, num_threads / 4); + i_t num_diving_workers = std::max(1, num_threads - num_bfs_workers); + branch_and_bound_settings.num_bfs_workers = num_bfs_workers; + branch_and_bound_settings.diving_settings.num_diving_workers = num_diving_workers; // Set the branch and bound -> primal heuristics callback branch_and_bound_settings.solution_callback = @@ -208,7 +208,7 @@ solution_t mip_solver_t::run_solver() // Create the branch and bound object branch_and_bound = std::make_unique>( - branch_and_bound_problem, branch_and_bound_settings); + branch_and_bound_problem, branch_and_bound_settings, timer_.get_tic_start()); context.branch_and_bound_ptr = branch_and_bound.get(); branch_and_bound->set_concurrent_lp_root_solve(true); diff --git a/cpp/src/mip/utilities/cpu_worker_thread.cuh b/cpp/src/mip/utilities/cpu_worker_thread.cuh index 0f1671c94..60bd5685b 100644 --- a/cpp/src/mip/utilities/cpu_worker_thread.cuh +++ b/cpp/src/mip/utilities/cpu_worker_thread.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights * reserved. SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -83,6 +83,7 @@ void cpu_worker_thread_base_t::cpu_worker_thread() std::lock_guard lock(cpu_mutex); cpu_thread_done = true; } + cpu_cv.notify_all(); } } @@ -131,9 +132,8 @@ void cpu_worker_thread_base_t::start_cpu_solver() template bool cpu_worker_thread_base_t::wait_for_cpu_solver() { - while (!cpu_thread_done && !cpu_thread_terminate) { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - } + std::unique_lock lock(cpu_mutex); + cpu_cv.wait(lock, [this] { return cpu_thread_done || cpu_thread_terminate; }); return static_cast(this)->get_result(); } diff --git a/cpp/src/mip/utilities/work_unit_ordered_queue.cuh b/cpp/src/mip/utilities/work_unit_ordered_queue.cuh new file mode 100644 index 000000000..aa9ad4698 --- /dev/null +++ b/cpp/src/mip/utilities/work_unit_ordered_queue.cuh @@ -0,0 +1,136 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace cuopt { + +/** + * @brief A queue that orders work units by timestamp (float). The earliest timestamp is at the + * front. + * + * @tparam T The type of the work unit. + */ +template +class work_unit_ordered_queue_t { + public: + // Work entry: (timestamp, work) + using entry_t = std::pair; + + work_unit_ordered_queue_t() = default; + + /** + * @brief Push a work unit with a timestamp into the queue. + * + * @param timestamp The associated float timestamp. + * @param work The work unit to enqueue. + */ + void push(float timestamp, const T& work) + { + std::lock_guard lock(mutex_); + queue_.emplace(timestamp, work); + } + + /** + * @brief Push a work unit with a timestamp into the queue (move version). + * + * @param timestamp The associated float timestamp. + * @param work The work unit to enqueue (moved). + */ + void push(float timestamp, T&& work) + { + std::lock_guard lock(mutex_); + queue_.emplace(timestamp, std::move(work)); + } + + /** + * @brief Pop the entry at the front of the queue. + */ + void pop() + { + std::lock_guard lock(mutex_); + queue_.pop(); + } + + /** + * @brief Get the entry at the front of the queue. + * + * @return const entry_t& Earliest (timestamp, work). + */ + entry_t top_with_timestamp() const + { + std::lock_guard lock(mutex_); + return queue_.top(); + } + + /** + * @brief Get only the content, not the timestamp. + * + */ + T top() const + { + std::lock_guard lock(mutex_); + return queue_.top().second; + } + + /** + * @brief Check if the queue is empty. + * + * @return true if empty, false otherwise. + */ + bool empty() const + { + std::lock_guard lock(mutex_); + return queue_.empty(); + } + + /** + * @brief Number of items in the queue. + */ + std::size_t size() const + { + std::lock_guard lock(mutex_); + return queue_.size(); + } + + void clear() + { + std::lock_guard lock(mutex_); + queue_ = std::priority_queue, min_heap_cmp>(); + } + + private: + // Custom comparator for min-heap based on timestamp + struct min_heap_cmp { + bool operator()(const entry_t& a, const entry_t& b) const + { + return a.first > b.first; // earlier time has higher priority + } + }; + + std::priority_queue, min_heap_cmp> queue_; + mutable std::mutex mutex_; +}; + +} // namespace cuopt diff --git a/cpp/src/routing/adapters/adapted_sol.cuh b/cpp/src/routing/adapters/adapted_sol.cuh index 48ebbdf2f..463f876ff 100644 --- a/cpp/src/routing/adapters/adapted_sol.cuh +++ b/cpp/src/routing/adapters/adapted_sol.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -449,8 +449,9 @@ struct adapted_sol_t { void populate_unserviced_nodes() { raft::common::nvtx::range fun_scope("populate_unserviced_nodes"); - has_unserviced_nodes = false; - auto h_route_id_per_node = host_copy(sol.route_node_map.route_id_per_node); + has_unserviced_nodes = false; + auto h_route_id_per_node = + host_copy(sol.route_node_map.route_id_per_node, sol.sol_handle->get_stream()); for (size_t i = 0; i < h_route_id_per_node.size(); ++i) { if (h_route_id_per_node[i] == -1) { pred[i] = NodeInfo<>(); @@ -487,12 +488,13 @@ struct adapted_sol_t { skip_route_copy = false; } std::vector h_routes_to_copy; - if (!copy_all) h_routes_to_copy = host_copy(sol.routes_to_copy); + if (!copy_all) h_routes_to_copy = host_copy(sol.routes_to_copy, sol.sol_handle->get_stream()); for (i_t i = 0; i < sol.n_routes && !skip_route_copy; ++i) { if (!copy_all && h_routes_to_copy[i] == 0) continue; - auto& curr_route = sol.get_route(i); - auto node_infos_temp = host_copy(curr_route.dimensions.requests.node_info); - i_t n_nodes = curr_route.n_nodes.value(sol.sol_handle->get_stream()); + auto& curr_route = sol.get_route(i); + auto node_infos_temp = + host_copy(curr_route.dimensions.requests.node_info, sol.sol_handle->get_stream()); + i_t n_nodes = curr_route.n_nodes.value(sol.sol_handle->get_stream()); // Remove break nodes for diversity std::vector> node_infos; diff --git a/cpp/src/routing/adapters/assignment_adapter.cuh b/cpp/src/routing/adapters/assignment_adapter.cuh index e7e945eb9..c41c3e161 100644 --- a/cpp/src/routing/adapters/assignment_adapter.cuh +++ b/cpp/src/routing/adapters/assignment_adapter.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -84,18 +84,18 @@ assignment_t ges_solver_t::get_ges_assignment( auto route_id = route.route_id.value(stream); auto vehicle_id = route.vehicle_id.value(stream); - auto node_infos_h = cuopt::host_copy(route.dimensions.requests.node_info); + auto node_infos_h = cuopt::host_copy(route.dimensions.requests.node_info, stream); std::vector departure_forward_h(node_infos_h.size(), 0.); std::vector actual_arrival_h(node_infos_h.size(), 0.); std::vector earliest_arrival_backward_h(node_infos_h.size(), 0.); std::vector latest_arrival_forward_h(node_infos_h.size(), 0.); if (problem.dimensions_info.has_dimension(detail::dim_t::TIME)) { - departure_forward_h = cuopt::host_copy(route.dimensions.time_dim.departure_forward); - actual_arrival_h = cuopt::host_copy(route.dimensions.time_dim.actual_arrival); + departure_forward_h = cuopt::host_copy(route.dimensions.time_dim.departure_forward, stream); + actual_arrival_h = cuopt::host_copy(route.dimensions.time_dim.actual_arrival, stream); earliest_arrival_backward_h = - cuopt::host_copy(route.dimensions.time_dim.earliest_arrival_backward); + cuopt::host_copy(route.dimensions.time_dim.earliest_arrival_backward, stream); latest_arrival_forward_h = - cuopt::host_copy(route.dimensions.time_dim.latest_arrival_forward); + cuopt::host_copy(route.dimensions.time_dim.latest_arrival_forward, stream); } i_t drop_return_trip = sol.problem_ptr->drop_return_trip_h[vehicle_id]; diff --git a/cpp/src/routing/adapters/solution_adapter.cuh b/cpp/src/routing/adapters/solution_adapter.cuh index ba292a688..5571f4b3b 100644 --- a/cpp/src/routing/adapters/solution_adapter.cuh +++ b/cpp/src/routing/adapters/solution_adapter.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -25,10 +25,11 @@ void fill_routes_data(solution_t& sol, const problem_t& problem) { const auto n_routes = assignment.get_vehicle_count(); - auto h_route = cuopt::host_copy(assignment.get_route()); - auto h_truck_ids = cuopt::host_copy(assignment.get_truck_id()); - auto h_route_locations = cuopt::host_copy(assignment.get_order_locations()); - auto h_node_types = cuopt::host_copy(assignment.get_node_types()); + auto stream = sol.sol_handle->get_stream(); + auto h_route = cuopt::host_copy(assignment.get_route(), stream); + auto h_truck_ids = cuopt::host_copy(assignment.get_truck_id(), stream); + auto h_route_locations = cuopt::host_copy(assignment.get_order_locations(), stream); + auto h_node_types = cuopt::host_copy(assignment.get_node_types(), stream); sol.sol_handle->sync_stream(); assignment.get_truck_id().stream().synchronize(); diff --git a/cpp/src/routing/assignment.cu b/cpp/src/routing/assignment.cu index 6bf468502..4636fa735 100644 --- a/cpp/src/routing/assignment.cu +++ b/cpp/src/routing/assignment.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -248,13 +248,14 @@ void assignment_t::print(std::ostream& os) const noexcept template host_assignment_t::host_assignment_t(const assignment_t& routing_solution) { - route = cuopt::host_copy(routing_solution.get_route()); - truck_id = cuopt::host_copy(routing_solution.get_truck_id()); - stamp = cuopt::host_copy(routing_solution.get_arrival_stamp()); - locations = cuopt::host_copy(routing_solution.get_order_locations()); - node_types = cuopt::host_copy(routing_solution.get_node_types()); - unserviced_nodes = cuopt::host_copy(routing_solution.get_unserviced_nodes()); - accepted = cuopt::host_copy(routing_solution.get_accepted()); + auto stream = routing_solution.get_route().stream(); + route = cuopt::host_copy(routing_solution.get_route(), stream); + truck_id = cuopt::host_copy(routing_solution.get_truck_id(), stream); + stamp = cuopt::host_copy(routing_solution.get_arrival_stamp(), stream); + locations = cuopt::host_copy(routing_solution.get_order_locations(), stream); + node_types = cuopt::host_copy(routing_solution.get_node_types(), stream); + unserviced_nodes = cuopt::host_copy(routing_solution.get_unserviced_nodes(), stream); + accepted = cuopt::host_copy(routing_solution.get_accepted(), stream); } template diff --git a/cpp/src/routing/crossovers/ox_graph.hpp b/cpp/src/routing/crossovers/ox_graph.hpp index 6f65b0843..55d2e39a0 100644 --- a/cpp/src/routing/crossovers/ox_graph.hpp +++ b/cpp/src/routing/crossovers/ox_graph.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -39,14 +39,14 @@ struct ox_graph_t { std::vector buckets; }; - host_t to_host() + host_t to_host(rmm::cuda_stream_view stream) { host_t h; - h.row_sizes = host_copy(row_sizes); - h.route_ids = host_copy(route_ids); - h.indices = host_copy(indices); - h.weights = host_copy(weights); - h.buckets = host_copy(buckets); + h.row_sizes = host_copy(row_sizes, stream); + h.route_ids = host_copy(route_ids, stream); + h.indices = host_copy(indices, stream); + h.weights = host_copy(weights, stream); + h.buckets = host_copy(buckets, stream); return h; } diff --git a/cpp/src/routing/crossovers/ox_recombiner.cuh b/cpp/src/routing/crossovers/ox_recombiner.cuh index 17823c28b..66d90d06c 100644 --- a/cpp/src/routing/crossovers/ox_recombiner.cuh +++ b/cpp/src/routing/crossovers/ox_recombiner.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -336,7 +336,7 @@ struct OX { int i = routes_number; if (optimal_routes_search) { i = optimal_routes_number; } int end_index = offspring.size() - 1; - double cost_n, cost_p, total_delta = 0.; + [[maybe_unused]] double cost_n, cost_p, total_delta = 0.; std::vector>>> routes_to_add; std::vector tmp_route; @@ -433,7 +433,7 @@ struct OX { h_graph[i].reserve(problem_size); } - adj_to_host(h_graph); + adj_to_host(h_graph, A.sol.sol_handle->get_stream()); std::vector> h_path_cost(problem_size + 1); // Vector of parents to recreate optimal path @@ -500,7 +500,7 @@ struct OX { } } - void test_transpose_graph() + void test_transpose_graph(rmm::cuda_stream_view stream) { std::vector>> h_transpose_graph(offspring.size()); for (size_t i = 0; i < h_transpose_graph.size(); ++i) { @@ -512,7 +512,7 @@ struct OX { tmp_graph[i].reserve(problem_size); } - adj_to_host(tmp_graph); + adj_to_host(tmp_graph, stream); for (size_t i = 0; i < tmp_graph.size(); ++i) { for (size_t j = 0; j < tmp_graph[i].size(); ++j) { @@ -521,7 +521,7 @@ struct OX { } } - auto tmp_transpose = transpose_graph.to_host(); + auto tmp_transpose = transpose_graph.to_host(stream); for (size_t i = 0; i < h_transpose_graph.size(); ++i) { auto transpose_offset = @@ -530,7 +530,7 @@ struct OX { "Mismatch number of edges"); for (size_t j = 0; j < h_transpose_graph[i].size(); ++j) { auto [ref_edge, ref_weight, ref_veh] = h_transpose_graph[i][j]; - bool found = false; + [[maybe_unused]] bool found = false; for (int x = 0; x < tmp_transpose.row_sizes[i]; ++x) { auto edge = tmp_transpose.indices[transpose_offset + x]; auto veh = tmp_transpose.buckets[transpose_offset + x]; @@ -618,7 +618,7 @@ struct OX { raft::common::nvtx::range fun_scope("bellman_ford"); compute_transpose_graph(A); - cuopt_func_call(test_transpose_graph()); + cuopt_func_call(test_transpose_graph(A.sol.sol_handle->get_stream())); auto row_size = offspring.size(); d_path_cost.resize((problem_size + 1) * row_size, A.sol.sol_handle->get_stream()); @@ -789,9 +789,10 @@ struct OX { offspring[0] = 0; } - void adj_to_host(std::vector>>& h_graph) + void adj_to_host(std::vector>>& h_graph, + rmm::cuda_stream_view stream) { - auto tmp_graph = d_graph.to_host(); + auto tmp_graph = d_graph.to_host(stream); for (int veh = 0; veh < n_buckets; ++veh) { for (size_t i = 0; i < d_graph.get_num_vertices(); ++i) { auto row_size = tmp_graph.row_sizes[veh * d_graph.get_num_vertices() + i]; @@ -818,7 +819,7 @@ struct OX { for (size_t i = 0; i < h_graph.size(); ++i) { h_graph[i].reserve(max_route_len); } - adj_to_host(h_graph); + adj_to_host(h_graph, A.sol.sol_handle->get_stream()); const auto& dimensions_info = A.problem->dimensions_info; diff --git a/cpp/src/routing/cuda_graph.cuh b/cpp/src/routing/cuda_graph.cuh index aa7d890d1..1fb2425d2 100644 --- a/cpp/src/routing/cuda_graph.cuh +++ b/cpp/src/routing/cuda_graph.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -20,7 +20,9 @@ namespace detail { struct cuda_graph_t { void start_capture(rmm::cuda_stream_view stream) { - cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal); + // Use ThreadLocal mode to allow multi-threaded batch execution + // Global mode blocks other streams from performing operations during capture + cudaStreamBeginCapture(stream, cudaStreamCaptureModeThreadLocal); capture_started = true; } diff --git a/cpp/src/routing/fleet_info.hpp b/cpp/src/routing/fleet_info.hpp index 929a26959..1a37c6655 100644 --- a/cpp/src/routing/fleet_info.hpp +++ b/cpp/src/routing/fleet_info.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -70,32 +70,29 @@ class fleet_info_t { v_buckets_.resize(size, stream); } - auto to_host() + auto to_host(rmm::cuda_stream_view stream) { host_t h; - h.break_offset = host_copy(v_break_offset_); - h.break_durations = host_copy(v_break_duration_); - h.break_earliest = host_copy(v_break_earliest_); - h.break_latest = host_copy(v_break_latest_); - h.earliest_time = host_copy(v_earliest_time_); - h.latest_time = host_copy(v_latest_time_); - h.start_locations = host_copy(v_start_locations_); - h.return_locations = host_copy(v_return_locations_); - h.drop_return_trip = host_copy(v_drop_return_trip_); - h.skip_first_trip = host_copy(v_skip_first_trip_); - h.capacities = host_copy(v_capacities_); - h.max_costs = host_copy(v_max_costs_); - h.max_times = host_copy(v_max_times_); - h.fixed_costs = host_copy(v_fixed_costs_); - h.fleet_order_constraints = fleet_order_constraints_.to_host(); - h.types = host_copy(v_types_); - h.buckets = host_copy(v_buckets_); + h.break_offset = host_copy(v_break_offset_, stream); + h.break_durations = host_copy(v_break_duration_, stream); + h.break_earliest = host_copy(v_break_earliest_, stream); + h.break_latest = host_copy(v_break_latest_, stream); + h.earliest_time = host_copy(v_earliest_time_, stream); + h.latest_time = host_copy(v_latest_time_, stream); + h.start_locations = host_copy(v_start_locations_, stream); + h.return_locations = host_copy(v_return_locations_, stream); + h.drop_return_trip = host_copy(v_drop_return_trip_, stream); + h.skip_first_trip = host_copy(v_skip_first_trip_, stream); + h.capacities = host_copy(v_capacities_, stream); + h.max_costs = host_copy(v_max_costs_, stream); + h.max_times = host_copy(v_max_times_, stream); + h.fixed_costs = host_copy(v_fixed_costs_, stream); + h.fleet_order_constraints = fleet_order_constraints_.to_host(stream); + h.types = host_copy(v_types_, stream); + h.buckets = host_copy(v_buckets_, stream); h.matrices = detail::create_host_mdarray( matrices_.extent[2], matrices_.extent[0], matrices_.extent[1]); - raft::copy(h.matrices.buffer.data(), - matrices_.buffer.data(), - matrices_.buffer.size(), - matrices_.buffer.stream()); + raft::copy(h.matrices.buffer.data(), matrices_.buffer.data(), matrices_.buffer.size(), stream); return h; } diff --git a/cpp/src/routing/fleet_order_constraints.hpp b/cpp/src/routing/fleet_order_constraints.hpp index fdd8f8fd8..c6be63a87 100644 --- a/cpp/src/routing/fleet_order_constraints.hpp +++ b/cpp/src/routing/fleet_order_constraints.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -67,11 +67,11 @@ struct fleet_order_constraints_t { i_t n_vehicles; }; - host_t to_host() + host_t to_host(rmm::cuda_stream_view stream) { host_t h; - h.order_service_times = host_copy(order_service_times); - auto tmp_order_match = host_copy(order_match); + h.order_service_times = host_copy(order_service_times, stream); + auto tmp_order_match = host_copy(order_match, stream); h.order_match = thrust::host_vector(tmp_order_match); h.n_orders = n_orders; h.n_vehicles = n_vehicles; diff --git a/cpp/src/routing/ges_solver.cu b/cpp/src/routing/ges_solver.cu index 37e5905dd..194f73b99 100644 --- a/cpp/src/routing/ges_solver.cu +++ b/cpp/src/routing/ges_solver.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -30,7 +30,10 @@ ges_solver_t::ges_solver_t(const data_model_view_t& : timer(time_limit_), problem(data_model, solver_settings), // override for now - pool_allocator(problem, max_sol_per_population, expected_route_count_), + pool_allocator(problem, + max_sol_per_population, + data_model.get_handle_ptr()->get_stream(), + expected_route_count_), expected_route_count(expected_route_count_), intermediate_file(intermediate_file_) { diff --git a/cpp/src/routing/local_search/cycle_finder/cycle.hpp b/cpp/src/routing/local_search/cycle_finder/cycle.hpp index b38cb72c6..7e3e275e1 100644 --- a/cpp/src/routing/local_search/cycle_finder/cycle.hpp +++ b/cpp/src/routing/local_search/cycle_finder/cycle.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -45,11 +45,11 @@ struct ret_cycles_t { i_t n_cycles; }; - host_t to_host() + host_t to_host(rmm::cuda_stream_view stream) { host_t h; - h.paths = host_copy(paths); - h.offsets = host_copy(offsets); + h.paths = host_copy(paths, stream); + h.offsets = host_copy(offsets, stream); h.n_cycles = size(); return h; } diff --git a/cpp/src/routing/local_search/cycle_finder/cycle_finder.cu b/cpp/src/routing/local_search/cycle_finder/cycle_finder.cu index a8a5d8aaf..65d654b06 100644 --- a/cpp/src/routing/local_search/cycle_finder/cycle_finder.cu +++ b/cpp/src/routing/local_search/cycle_finder/cycle_finder.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -213,8 +213,9 @@ template bool ExactCycleFinder::check_cycle(graph_t& graph, ret_cycles_t& ret) { - auto h_graph = graph.to_host(); - auto h_cycles = ret.to_host(); + auto stream = handle_ptr->get_stream(); + auto h_graph = graph.to_host(stream); + auto h_cycles = ret.to_host(stream); bool cost_matches = true; std::unordered_set changed_route_ids; for (i_t cycle = 0; cycle < h_cycles.n_cycles; ++cycle) { diff --git a/cpp/src/routing/local_search/cycle_finder/cycle_graph.hpp b/cpp/src/routing/local_search/cycle_finder/cycle_graph.hpp index a08f5f1a3..3c28f78bc 100644 --- a/cpp/src/routing/local_search/cycle_finder/cycle_graph.hpp +++ b/cpp/src/routing/local_search/cycle_finder/cycle_graph.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -45,13 +45,13 @@ struct graph_t { std::vector weights; }; - host_t to_host() + host_t to_host(rmm::cuda_stream_view stream) { host_t h; - h.row_sizes = host_copy(row_sizes); - h.route_ids = host_copy(route_ids); - h.indices = host_copy(indices); - h.weights = host_copy(weights); + h.row_sizes = host_copy(row_sizes, stream); + h.route_ids = host_copy(route_ids, stream); + h.indices = host_copy(indices, stream); + h.weights = host_copy(weights, stream); return h; } diff --git a/cpp/src/routing/local_search/local_search.cu b/cpp/src/routing/local_search/local_search.cu index d26ec1ab9..c889d9963 100644 --- a/cpp/src/routing/local_search/local_search.cu +++ b/cpp/src/routing/local_search/local_search.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -126,8 +126,8 @@ bool local_search_t::run_cross_search(solution_t EPSILON, "Cost should improve!"); cuopt_assert(abs((cost_before - cost_after) - - move_candidates.debug_delta.value(sol.sol_handle->get_stream()) < - EPSILON * (1 + abs(cost_before))), + move_candidates.debug_delta.value(sol.sol_handle->get_stream())) < + EPSILON * (1 + abs(cost_before)), "Cost mismatch on cross costs!"); return true; } diff --git a/cpp/src/routing/local_search/sliding_tsp.cu b/cpp/src/routing/local_search/sliding_tsp.cu index 5fcf9ad7c..c7923c361 100644 --- a/cpp/src/routing/local_search/sliding_tsp.cu +++ b/cpp/src/routing/local_search/sliding_tsp.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -566,8 +566,8 @@ bool local_search_t::perform_sliding_tsp( sol.get_cost(false, move_candidates.weights)); cuopt_assert(abs((cost_before - cost_after) + - move_candidates.debug_delta.value(sol.sol_handle->get_stream()) < - EPSILON * (1 + abs(cost_before))), + move_candidates.debug_delta.value(sol.sol_handle->get_stream())) < + EPSILON * (1 + abs(cost_before)), "Cost mismatch on sliding_tsp costs!"); cuopt_assert(cost_before - cost_after >= EPSILON, "Cost should improve!"); diff --git a/cpp/src/routing/local_search/sliding_window.cu b/cpp/src/routing/local_search/sliding_window.cu index 5c19d22a2..2d676d9b3 100644 --- a/cpp/src/routing/local_search/sliding_window.cu +++ b/cpp/src/routing/local_search/sliding_window.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -1116,9 +1116,8 @@ bool local_search_t::perform_sliding_window( cuopt_assert(cost_before - cost_after >= EPSILON, "Cost should improve!"); cuopt_assert(abs((cost_before - cost_after) - - move_candidates.debug_delta.value(solution.sol_handle->get_stream()) < - EPSILON), - "Cost mismatch on cross costs!"); + move_candidates.debug_delta.value(solution.sol_handle->get_stream())) < EPSILON, + "Cost mismatch on sliding_window costs!"); return true; } diff --git a/cpp/src/routing/local_search/two_opt.cu b/cpp/src/routing/local_search/two_opt.cu index 966917b98..abe6e8a92 100644 --- a/cpp/src/routing/local_search/two_opt.cu +++ b/cpp/src/routing/local_search/two_opt.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -458,9 +458,8 @@ bool local_search_t::perform_two_opt( : sol.get_cost(move_candidates.include_objective, move_candidates.weights)); cuopt_assert(abs((cost_before - cost_after) + - - move_candidates.debug_delta.value(sol.sol_handle->get_stream()) < - EPSILON * (1 + abs(cost_before))), + move_candidates.debug_delta.value(sol.sol_handle->get_stream())) < + EPSILON * (1 + abs(cost_before)), "Cost mismatch on two_opt costs!"); cuopt_assert(cost_before - cost_after >= EPSILON, "Cost should improve!"); sol.global_runtime_checks(false, false, "two_opt_end"); diff --git a/cpp/src/routing/local_search/vrp/vrp_execute.cu b/cpp/src/routing/local_search/vrp/vrp_execute.cu index cafe0e5ef..5e417a934 100644 --- a/cpp/src/routing/local_search/vrp/vrp_execute.cu +++ b/cpp/src/routing/local_search/vrp/vrp_execute.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -465,8 +465,8 @@ bool execute_vrp_moves(solution_t& sol, sol.get_cost(move_candidates.include_objective, move_candidates.weights)); cuopt_assert(cost_before - cost_after > EPSILON, "Cost should improve!"); cuopt_assert(abs((cost_before - cost_after) + - move_candidates.debug_delta.value(sol.sol_handle->get_stream()) < - EPSILON * (1 + abs(cost_before))), + move_candidates.debug_delta.value(sol.sol_handle->get_stream())) < + EPSILON * (1 + abs(cost_before)), "Cost mismatch on vrp costs!"); return true; } diff --git a/cpp/src/routing/order_info.hpp b/cpp/src/routing/order_info.hpp index 6f0fb1ecc..d20c46a8e 100644 --- a/cpp/src/routing/order_info.hpp +++ b/cpp/src/routing/order_info.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -57,14 +57,14 @@ class order_info_t { bool is_pdp() const { return !v_pair_indices_.is_empty(); } - auto to_host() + auto to_host(rmm::cuda_stream_view stream) { host_t h; - h.earliest_time = cuopt::host_copy(v_earliest_time_); - h.latest_time = cuopt::host_copy(v_latest_time_); - h.demand = cuopt::host_copy(v_demand_); - h.prizes = cuopt::host_copy(v_prizes_); - h.order_locations = cuopt::host_copy(v_order_locations_); + h.earliest_time = cuopt::host_copy(v_earliest_time_, stream); + h.latest_time = cuopt::host_copy(v_latest_time_, stream); + h.demand = cuopt::host_copy(v_demand_, stream); + h.prizes = cuopt::host_copy(v_prizes_, stream); + h.order_locations = cuopt::host_copy(v_order_locations_, stream); h.depot_included = depot_included_; return h; } diff --git a/cpp/src/routing/problem/problem.cu b/cpp/src/routing/problem/problem.cu index 1df7077fb..4335b9373 100644 --- a/cpp/src/routing/problem/problem.cu +++ b/cpp/src/routing/problem/problem.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -55,7 +55,7 @@ problem_t::problem_t(const data_model_view_t& data_model_vie pair_indices_h.size(), handle_ptr->get_stream()); - vehicle_types_h = cuopt::host_copy(fleet_info.v_types_); + vehicle_types_h = cuopt::host_copy(fleet_info.v_types_, handle_ptr->get_stream()); for (auto& vtype : vehicle_types_h) { if (!distance_matrices_h.count(vtype)) { auto cost_matrix = fleet_info.matrices_.get_cost_matrix(vtype); @@ -104,7 +104,7 @@ void problem_t::populate_vehicle_buckets() { auto fleet_size = data_view_ptr->get_fleet_size(); vehicle_buckets_h.resize(fleet_size); - fleet_info_h = fleet_info.to_host(); + fleet_info_h = fleet_info.to_host(handle_ptr->get_stream()); // infer vehicle types from data model for (int vehicle_id = 0; vehicle_id < fleet_size; ++vehicle_id) { @@ -375,7 +375,7 @@ void problem_t::populate_host_arrays() auto pickup_indices = data_view_ptr->get_pickup_delivery_pair().first; auto stream = data_view_ptr->get_handle_ptr()->get_stream(); - order_locations_h = cuopt::host_copy(order_info.v_order_locations_); + order_locations_h = cuopt::host_copy(order_info.v_order_locations_, stream); // Temporarily fill is_pickup_h for diversity, should use NodeInfo instead bool is_pdp = pickup_indices != nullptr; std::vector h_pickup_indices(get_num_requests()); @@ -387,18 +387,20 @@ void problem_t::populate_host_arrays() } } - drop_return_trip_h = cuopt::host_copy(fleet_info.v_drop_return_trip_); - skip_first_trip_h = cuopt::host_copy(fleet_info.v_skip_first_trip_); - order_info_h = order_info.to_host(); + drop_return_trip_h = cuopt::host_copy(fleet_info.v_drop_return_trip_, stream); + skip_first_trip_h = cuopt::host_copy(fleet_info.v_skip_first_trip_, stream); + order_info_h = order_info.to_host(stream); handle_ptr->sync_stream(); } template void problem_t::initialize_depot_info() { - int nvehicles = fleet_info.v_start_locations_.size(); - auto vehicle_start_locations = cuopt::host_copy(fleet_info.v_start_locations_); - auto vehicle_return_locations = cuopt::host_copy(fleet_info.v_return_locations_); + int nvehicles = fleet_info.v_start_locations_.size(); + auto vehicle_start_locations = + cuopt::host_copy(fleet_info.v_start_locations_, handle_ptr->get_stream()); + auto vehicle_return_locations = + cuopt::host_copy(fleet_info.v_return_locations_, handle_ptr->get_stream()); start_depot_node_infos_h.resize(nvehicles); return_depot_node_infos_h.resize(nvehicles); @@ -518,8 +520,8 @@ void problem_t::populate_special_nodes() int n_vehicles = get_fleet_size(); - auto vehicle_earliest_h = cuopt::host_copy(fleet_info.v_earliest_time_); - auto vehicle_latest_h = cuopt::host_copy(fleet_info.v_latest_time_); + auto vehicle_earliest_h = cuopt::host_copy(fleet_info.v_earliest_time_, handle_ptr->get_stream()); + auto vehicle_latest_h = cuopt::host_copy(fleet_info.v_latest_time_, handle_ptr->get_stream()); std::map> break_earliest_h, break_latest_h, break_duration_h; std::vector break_offset_h(n_vehicles + 1, 0), break_nodes_offset_h; diff --git a/cpp/src/routing/solution/pool_allocator.cuh b/cpp/src/routing/solution/pool_allocator.cuh index 89049a698..d78df6951 100644 --- a/cpp/src/routing/solution/pool_allocator.cuh +++ b/cpp/src/routing/solution/pool_allocator.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -42,8 +42,11 @@ class routing_resource_t { template class pool_allocator_t { public: - pool_allocator_t(const Problem& problem_, i_t n_solutions_, i_t desired_n_routes = -1) - : stream_pool(n_solutions_), problem(problem_) + pool_allocator_t(const Problem& problem_, + i_t n_solutions_, + rmm::cuda_stream_view stream_, + i_t desired_n_routes = -1) + : problem(problem_), stream(stream_) { raft::common::nvtx::range fun_scope("pool_allocator_t"); // FIXME:: This is temporary, we should let the diversity manager decide this @@ -54,8 +57,7 @@ class pool_allocator_t { } sol_handles.reserve(n_solutions_); for (i_t i = 0; i < n_solutions_; ++i) { - sol_handles.emplace_back( - std::make_unique>(stream_pool.get_stream(i))); + sol_handles.emplace_back(std::make_unique>(stream)); } Solution dummy_sol{problem_, 0, sol_handles[0].get()}; resource_pool = @@ -68,22 +70,10 @@ class pool_allocator_t { } } - void sync_all_streams() const - { - for (size_t i = 0; i < stream_pool.get_pool_size(); ++i) { - stream_pool.get_stream(i).synchronize(); - } - } - - // a stream pool that will be used to execute different solutions on - // we are currently not using raft handles stream pool as it is constructed in python layer - // TODO: later consider using raft stream pool and construct it on python layer - // however that pushes some internal logic to the higher levels which we want to avoid - // rmm::cuda_stream_pool is non-movable as it contains an atomic variables - // KEEP THIS MEMBER ABOVE OTHER MEMBERS, so that it is destructed the last - rmm::cuda_stream_pool stream_pool; + void sync_all_streams() const { stream.synchronize(); } // problem description + rmm::cuda_stream_view stream; const Problem& problem; std::vector>> sol_handles; // keep a thread safe pool of local search and ges objects that can be reused diff --git a/cpp/src/routing/solution/solution.cu b/cpp/src/routing/solution/solution.cu index eeed02568..edd3bef9a 100644 --- a/cpp/src/routing/solution/solution.cu +++ b/cpp/src/routing/solution/solution.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -735,7 +735,7 @@ std::vector solution_t::get_unserviced_nodes() const std::vector unserviced_nodes; unserviced_nodes.reserve(get_num_orders()); const bool depot_included = problem_ptr->order_info.depot_included_; - auto h_route_id_per_node = host_copy(route_node_map.route_id_per_node); + auto h_route_id_per_node = host_copy(route_node_map.route_id_per_node, sol_handle->get_stream()); for (size_t i = 0; i < h_route_id_per_node.size(); ++i) { if (h_route_id_per_node[i] == -1) { if (i > 0 || !depot_included) { unserviced_nodes.push_back(i); } diff --git a/cpp/src/routing/utilities/cython.cu b/cpp/src/routing/utilities/cython.cu index 74b3776f4..60bc3495f 100644 --- a/cpp/src/routing/utilities/cython.cu +++ b/cpp/src/routing/utilities/cython.cu @@ -1,16 +1,20 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ #include #include +#include #include #include #include +#include +#include + namespace cuopt { namespace cython { @@ -86,6 +90,73 @@ std::unique_ptr call_solve( return std::make_unique(std::move(vr_ret)); } +/** + * @brief Wrapper for batch vehicle_routing to expose the API to cython + * + * @param data_models Vector of data model pointers + * @param settings Composable solver settings object + * @return std::vector> + */ +std::vector> call_batch_solve( + std::vector*> data_models, + routing::solver_settings_t* settings) +{ + const std::size_t size = data_models.size(); + std::vector> list(size); + + // Use OpenMP for parallel execution + const int max_thread = std::min(static_cast(size), omp_get_max_threads()); + rmm::cuda_stream_pool stream_pool(size, rmm::cuda_stream::flags::non_blocking); + + int device_id = raft::resource::get_device_id(*(data_models[0]->get_handle_ptr())); + +#pragma omp parallel for num_threads(max_thread) + for (std::size_t i = 0; i < size; ++i) { + // Required in multi-GPU environments to set the device for each thread + RAFT_CUDA_TRY(cudaSetDevice(device_id)); + + auto old_stream = data_models[i]->get_handle_ptr()->get_stream(); + // Make sure previous operations are finished + data_models[i]->get_handle_ptr()->sync_stream(); + + // Set new non blocking stream for current data model + raft::resource::set_cuda_stream(*(data_models[i]->get_handle_ptr()), stream_pool.get_stream(i)); + auto routing_solution = cuopt::routing::solve(*data_models[i], *settings); + + // Make sure current solve is finished + stream_pool.get_stream(i).synchronize(); + + // Create buffers and reassociate them with the original stream so they + // outlive the local stream which will be destroyed at end of loop iteration + auto make_buffer = [old_stream = old_stream](rmm::device_buffer&& buf) { + buf.set_stream(old_stream); + return std::make_unique(std::move(buf)); + }; + + vehicle_routing_ret_t vr_ret{routing_solution.get_vehicle_count(), + routing_solution.get_total_objective(), + routing_solution.get_objectives(), + make_buffer(routing_solution.get_route().release()), + make_buffer(routing_solution.get_order_locations().release()), + make_buffer(routing_solution.get_arrival_stamp().release()), + make_buffer(routing_solution.get_truck_id().release()), + make_buffer(routing_solution.get_node_types().release()), + make_buffer(routing_solution.get_unserviced_nodes().release()), + make_buffer(routing_solution.get_accepted().release()), + routing_solution.get_status(), + routing_solution.get_status_string(), + routing_solution.get_error_status().get_error_type(), + routing_solution.get_error_status().what()}; + list[i] = std::make_unique(std::move(vr_ret)); + + // Restore the old stream + raft::resource::set_cuda_stream(*(data_models[i]->get_handle_ptr()), old_stream); + old_stream.synchronize(); + } + + return list; +} + /** * @brief Wrapper for dataset_t to expose the API to cython. * @param solver Composable solver object diff --git a/cpp/src/utilities/copy_helpers.hpp b/cpp/src/utilities/copy_helpers.hpp index 24158816a..2b1890728 100644 --- a/cpp/src/utilities/copy_helpers.hpp +++ b/cpp/src/utilities/copy_helpers.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -150,36 +150,6 @@ inline auto host_copy(bool const* device_ptr, size_t size, rmm::cuda_stream_view return h_bool_vec; } -/** - * @brief Simple utility function to copy device_uvector to host - * - * @tparam T - * @param device_vec - * @param stream_view - * @return auto - */ -template -auto host_copy(rmm::device_uvector const& device_vec) -{ - return host_copy(device_vec.data(), device_vec.size(), device_vec.stream()); -} - -/** - * @brief Simple utility function to copy device_uvector to host - * - * @tparam T - * @param device_vec - * @return auto - */ -template -auto host_copy(rmm::device_uvector const& device_vec) -{ - std::vector host_vec(device_vec.size()); - raft::copy(host_vec.data(), device_vec.data(), device_vec.size(), device_vec.stream()); - device_vec.stream().synchronize(); - return host_vec; -} - /** * @brief Simple utility function to copy device_uvector to host * @@ -369,8 +339,9 @@ template std::tuple, std::vector> extract_host_bounds( const rmm::device_uvector& variable_bounds, const raft::handle_t* handle_ptr) { - rmm::device_uvector var_lb(variable_bounds.size(), handle_ptr->get_stream()); - rmm::device_uvector var_ub(variable_bounds.size(), handle_ptr->get_stream()); + auto stream = handle_ptr->get_stream(); + rmm::device_uvector var_lb(variable_bounds.size(), stream); + rmm::device_uvector var_ub(variable_bounds.size(), stream); thrust::transform( handle_ptr->get_thrust_policy(), variable_bounds.begin(), @@ -378,8 +349,8 @@ std::tuple, std::vector> extract_host_bounds( thrust::make_zip_iterator(thrust::make_tuple(var_lb.begin(), var_ub.begin())), [] __device__(auto i) { return thrust::make_tuple(get_lower(i), get_upper(i)); }); handle_ptr->sync_stream(); - auto h_var_lb = cuopt::host_copy(var_lb); - auto h_var_ub = cuopt::host_copy(var_ub); + auto h_var_lb = cuopt::host_copy(var_lb, stream); + auto h_var_ub = cuopt::host_copy(var_ub, stream); return std::make_tuple(h_var_lb, h_var_ub); } diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh index c968f70a8..ae50e9967 100644 --- a/cpp/src/utilities/cuda_helpers.cuh +++ b/cpp/src/utilities/cuda_helpers.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -10,12 +10,14 @@ #include #include +#include #include #include #include #include #include #include +#include namespace cuopt { @@ -175,14 +177,25 @@ HDI To bit_cast(const From& src) template inline bool set_shmem_of_kernel(Function* function, size_t dynamic_request_size) { + static std::mutex mtx; + static std::unordered_map shmem_sizes; + if (dynamic_request_size != 0) { dynamic_request_size = raft::alignTo(dynamic_request_size, size_t(1024)); - cudaFuncSetAttribute( - function, cudaFuncAttributeMaxDynamicSharedMemorySize, dynamic_request_size); - return (cudaSuccess == cudaGetLastError()); - } else { - return true; + size_t current_size = shmem_sizes[function]; + if (dynamic_request_size > current_size) { + std::lock_guard lock(mtx); + current_size = shmem_sizes[function]; + + if (dynamic_request_size > current_size) { + cudaFuncSetAttribute( + function, cudaFuncAttributeMaxDynamicSharedMemorySize, dynamic_request_size); + shmem_sizes[function] = dynamic_request_size; + return (cudaSuccess == cudaGetLastError()); + } + } } + return true; } template diff --git a/cpp/src/utilities/driver_helpers.cuh b/cpp/src/utilities/driver_helpers.cuh index aec097a51..8fbc10e16 100644 --- a/cpp/src/utilities/driver_helpers.cuh +++ b/cpp/src/utilities/driver_helpers.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -15,11 +15,17 @@ namespace detail { inline auto get_driver_entry_point(const char* name) { - void* func; + void* func = nullptr; cudaDriverEntryPointQueryResult driver_status; - cudaGetDriverEntryPointByVersion(name, &func, CUDART_VERSION, cudaEnableDefault, &driver_status); + + // Request CUDA 13.0 (13000) version of symbols for Green Context API + // Green contexts are guarded by CUDART_VERSION >= 13000, so we know they're only + // used when compiled with CUDA 13.0+. Requesting v13000 ensures compatibility + // across CUDA 13.x versions (e.g., built with 13.1, run on 13.0). + cudaGetDriverEntryPointByVersion(name, &func, 13000, cudaEnableDefault, &driver_status); if (driver_status != cudaDriverEntryPointSuccess) { fprintf(stderr, "Failed to fetch symbol for %s\n", name); + return static_cast(nullptr); } return func; } diff --git a/cpp/src/utilities/omp_helpers.hpp b/cpp/src/utilities/omp_helpers.hpp index 33eda66cb..e1b68bf88 100644 --- a/cpp/src/utilities/omp_helpers.hpp +++ b/cpp/src/utilities/omp_helpers.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -53,7 +53,7 @@ class omp_atomic_t { T operator--() { return fetch_sub(T(1)) - 1; } T operator--(int) { return fetch_sub(T(1)); } - T load() + T load() const { T res; #pragma omp atomic read diff --git a/cpp/src/utilities/timer.hpp b/cpp/src/utilities/timer.hpp index 1d1a4881e..2838ab832 100644 --- a/cpp/src/utilities/timer.hpp +++ b/cpp/src/utilities/timer.hpp @@ -55,6 +55,35 @@ class timer_t { double get_time_limit() const noexcept { return time_limit; } + double get_tic_start() const noexcept + { + /** + * Converts a std::chrono::steady_clock::time_point to a struct timeval. + * This is an approximate conversion because steady_clock is relative to an + * unspecified epoch (e.g., system boot time), not the system clock epoch (UTC). + */ + // Get the current time from both clocks at approximately the same instant + std::chrono::system_clock::time_point sys_now = std::chrono::system_clock::now(); + std::chrono::steady_clock::time_point steady_now = std::chrono::steady_clock::now(); + + // Calculate the difference between the given steady_clock time point and the current steady + // time + auto diff_from_now = begin - steady_now; + + // Apply that same difference to the current system clock time point + std::chrono::system_clock::time_point sys_t = sys_now + diff_from_now; + + // Convert the resulting system_clock time point to microseconds since the system epoch + auto us_since_epoch = + std::chrono::duration_cast(sys_t.time_since_epoch()); + + // Populate the timeval struct + double tv_sec = us_since_epoch.count() / 1000000; + double tv_usec = us_since_epoch.count() % 1000000; + + return tv_sec + 1e-6 * tv_usec; + } + private: double time_limit; steady_clock::time_point begin; diff --git a/cpp/src/utilities/tsan_suppressions.txt b/cpp/src/utilities/tsan_suppressions.txt new file mode 100644 index 000000000..b6f413e37 --- /dev/null +++ b/cpp/src/utilities/tsan_suppressions.txt @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +# Ignore races in external header-only libraries +race:tbb +race:Papilo diff --git a/cpp/tests/distance_engine/waypoint_matrix_test.cpp b/cpp/tests/distance_engine/waypoint_matrix_test.cpp index 80288bc6f..2db3953c2 100644 --- a/cpp/tests/distance_engine/waypoint_matrix_test.cpp +++ b/cpp/tests/distance_engine/waypoint_matrix_test.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -44,7 +44,7 @@ class waypoint_matrix_waypoints_sequence_test_t this->expected_sequence_offsets = param.sequence_offsets; } - void TearDown() {} + void TearDown() override {} void test_compute_waypoint_sequence() { @@ -131,7 +131,7 @@ class waypoint_matrix_shortest_path_cost_t this->weights.data()); } - void TearDown() {} + void TearDown() override {} void test_compute_shortest_path_costs() { @@ -192,7 +192,7 @@ class waypoint_matrix_cost_matrix_test_t this->weights.data()); } - void TearDown() {} + void TearDown() override {} void test_compute_cost_matrix() { diff --git a/cpp/tests/examples/routing/CMakeLists.txt b/cpp/tests/examples/routing/CMakeLists.txt index 97d96b7aa..a91a04957 100644 --- a/cpp/tests/examples/routing/CMakeLists.txt +++ b/cpp/tests/examples/routing/CMakeLists.txt @@ -1,5 +1,5 @@ # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on @@ -17,7 +17,7 @@ set_target_properties( pdptw_mixed_fleet cvrp_daily_deliveries PROPERTIES - CUDA_STANDARD 17 + CUDA_STANDARD 20 CUDA_STANDARD_REQUIRED ON ) diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_test.c b/cpp/tests/linear_programming/c_api_tests/c_api_test.c index 17f644ab0..52be9e16f 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_test.c +++ b/cpp/tests/linear_programming/c_api_tests/c_api_test.c @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -53,6 +53,7 @@ const char* termination_status_to_string(cuopt_int_t termination_status) case CUOPT_TERIMINATION_STATUS_FEASIBLE_FOUND: return "Feasible found"; } + return "Unknown"; } diff --git a/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu b/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu index 18d49e3b8..9f73c5035 100644 --- a/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu +++ b/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -127,16 +127,17 @@ TEST(SolverSettingsTest, warm_start_smaller_vector) -1); solver_settings.set_pdlp_warm_start_data(warm_start_data, d_primal_mapping, d_dual_mapping); + auto stream = handle_.get_stream(); std::vector h_current_primal_solution = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_, stream); std::vector h_initial_primal_average = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_, stream); std::vector h_current_ATY = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_, stream); std::vector h_sum_primal_solutions = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_, stream); std::vector h_last_restart_duality_gap_primal_solution = cuopt::host_copy( - solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_); + solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_, stream); EXPECT_EQ(h_current_primal_solution.size(), primal_expected.size()); EXPECT_EQ(h_initial_primal_average.size(), primal_expected.size()); @@ -151,13 +152,13 @@ TEST(SolverSettingsTest, warm_start_smaller_vector) EXPECT_EQ(h_last_restart_duality_gap_primal_solution, primal_expected); std::vector h_current_dual_solution = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_, stream); std::vector h_initial_dual_average = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_, stream); std::vector h_sum_dual_solutions = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_, stream); std::vector h_last_restart_duality_gap_dual_solution = cuopt::host_copy( - solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_); + solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_, stream); EXPECT_EQ(h_current_dual_solution.size(), dual_expected.size()); EXPECT_EQ(h_initial_dual_average.size(), dual_expected.size()); @@ -227,16 +228,17 @@ TEST(SolverSettingsTest, warm_start_bigger_vector) -1); solver_settings.set_pdlp_warm_start_data(warm_start_data, d_primal_mapping, d_dual_mapping); + auto stream = handle_.get_stream(); std::vector h_current_primal_solution = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_primal_solution_, stream); std::vector h_initial_primal_average = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_primal_average_, stream); std::vector h_current_ATY = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_ATY_, stream); std::vector h_sum_primal_solutions = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_primal_solutions_, stream); std::vector h_last_restart_duality_gap_primal_solution = cuopt::host_copy( - solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_); + solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_, stream); EXPECT_EQ(h_current_primal_solution.size(), primal_expected.size()); EXPECT_EQ(h_initial_primal_average.size(), primal_expected.size()); @@ -251,13 +253,13 @@ TEST(SolverSettingsTest, warm_start_bigger_vector) EXPECT_EQ(h_last_restart_duality_gap_primal_solution, primal_expected); std::vector h_current_dual_solution = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().current_dual_solution_, stream); std::vector h_initial_dual_average = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().initial_dual_average_, stream); std::vector h_sum_dual_solutions = - cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_); + cuopt::host_copy(solver_settings.get_pdlp_warm_start_data().sum_dual_solutions_, stream); std::vector h_last_restart_duality_gap_dual_solution = cuopt::host_copy( - solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_); + solver_settings.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_, stream); EXPECT_EQ(h_current_dual_solution.size(), dual_expected.size()); EXPECT_EQ(h_initial_dual_average.size(), dual_expected.size()); diff --git a/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh b/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh index de3e82fdb..11dbdba4b 100644 --- a/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh +++ b/cpp/tests/linear_programming/utilities/pdlp_test_utilities.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -38,7 +38,7 @@ static void test_objective_sanity( double objective_value, double epsilon = tolerance) { - const auto primal_vars = host_copy(primal_solution); + const auto primal_vars = host_copy(primal_solution, primal_solution.stream()); const auto& c_vector = op_problem.get_objective_coefficients(); std::vector out(primal_vars.size()); std::transform(primal_vars.cbegin(), @@ -62,7 +62,8 @@ static void test_constraint_sanity( double epsilon = tolerance, bool presolve_enabled = false) { - const std::vector primal_vars = host_copy(solution.get_primal_solution()); + const std::vector primal_vars = + host_copy(solution.get_primal_solution(), solution.get_primal_solution().stream()); const std::vector& values = op_problem.get_constraint_matrix_values(); const std::vector& indices = op_problem.get_constraint_matrix_indices(); const std::vector& offsets = op_problem.get_constraint_matrix_offsets(); diff --git a/cpp/tests/mip/elim_var_remap_test.cu b/cpp/tests/mip/elim_var_remap_test.cu index 2b2f3f576..e1d66ac21 100644 --- a/cpp/tests/mip/elim_var_remap_test.cu +++ b/cpp/tests/mip/elim_var_remap_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -120,8 +120,8 @@ void test_elim_var_remap(std::string test_instance) sub_problem.post_process_solution(sol); - auto golden_full_assignment = host_copy(full_assignment); - auto fixed_sub_problem_assignment = host_copy(sol.assignment); + auto golden_full_assignment = host_copy(full_assignment, handle_.get_stream()); + auto fixed_sub_problem_assignment = host_copy(sol.assignment, handle_.get_stream()); EXPECT_EQ(op_problem.get_n_variables(), fixed_sub_problem_assignment.size()); diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu index 20f359fcb..019585d90 100644 --- a/cpp/tests/mip/load_balancing_test.cu +++ b/cpp/tests/mip/load_balancing_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -49,8 +49,9 @@ std::tuple, std::vector, std::vector> select_k_ auto seed = std::random_device{}(); std::cerr << "Tested with seed " << seed << "\n"; problem.compute_n_integer_vars(); - auto v_bnd = host_copy(problem.variable_bounds); - auto int_var_id = host_copy(problem.integer_indices); + auto stream = problem.handle_ptr->get_stream(); + auto v_bnd = host_copy(problem.variable_bounds, stream); + auto int_var_id = host_copy(problem.integer_indices, stream); int_var_id.erase( std::remove_if( int_var_id.begin(), @@ -106,10 +107,11 @@ bounds_probe_results(detail::bound_presolve_t& bnd_prb_0, bnd_prb_1.solve(problem, probe_second); bnd_prb_1.set_updated_bounds(problem.handle_ptr, make_span(b_lb_1), make_span(b_ub_1)); - auto h_lb_0 = host_copy(b_lb_0); - auto h_ub_0 = host_copy(b_ub_0); - auto h_lb_1 = host_copy(b_lb_1); - auto h_ub_1 = host_copy(b_ub_1); + auto stream = problem.handle_ptr->get_stream(); + auto h_lb_0 = host_copy(b_lb_0, stream); + auto h_ub_0 = host_copy(b_ub_0, stream); + auto h_lb_1 = host_copy(b_lb_1, stream); + auto h_ub_1 = host_copy(b_ub_1, stream); return std::make_tuple( std::move(h_lb_0), std::move(h_ub_0), std::move(h_lb_1), std::move(h_ub_1)); } @@ -151,12 +153,13 @@ void test_multi_probe(std::string path) rmm::device_uvector b_ub(problem.n_variables, problem.handle_ptr->get_stream()); bnd_prb.set_updated_bounds(problem.handle_ptr, make_span(b_lb), make_span(b_ub)); - auto h_lb = host_copy(b_lb); - auto h_ub = host_copy(b_ub); + auto stream = problem.handle_ptr->get_stream(); + auto h_lb = host_copy(b_lb, stream); + auto h_ub = host_copy(b_ub, stream); lb_prs.solve(probe_first); - auto bnds = host_copy(lb_prs.vars_bnd); + auto bnds = host_copy(lb_prs.vars_bnd, stream); for (int i = 0; i < (int)h_lb.size(); ++i) { EXPECT_DOUBLE_EQ(bnds[2 * i], h_lb[i]); EXPECT_DOUBLE_EQ(bnds[2 * i + 1], h_ub[i]); diff --git a/cpp/tests/mip/mip_utils.cuh b/cpp/tests/mip/mip_utils.cuh index 7d5683998..19c44b2fd 100644 --- a/cpp/tests/mip/mip_utils.cuh +++ b/cpp/tests/mip/mip_utils.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -21,7 +21,7 @@ static void test_variable_bounds( { const double* lower_bound_ptr = problem.get_variable_lower_bounds().data(); const double* upper_bound_ptr = problem.get_variable_upper_bounds().data(); - auto host_assignment = cuopt::host_copy(solution); + auto host_assignment = cuopt::host_copy(solution, solution.stream()); double* assignment_ptr = host_assignment.data(); cuopt_assert(host_assignment.size() == problem.get_variable_lower_bounds().size(), ""); cuopt_assert(host_assignment.size() == problem.get_variable_upper_bounds().size(), ""); @@ -81,7 +81,7 @@ static void test_constraint_sanity_per_row( const std::vector& variable_upper_bounds = op_problem.get_variable_upper_bounds(); std::vector residual(constraint_lower_bounds.size(), 0.0); std::vector viol(constraint_lower_bounds.size(), 0.0); - auto h_solution = cuopt::host_copy(solution); + auto h_solution = cuopt::host_copy(solution, solution.stream()); // CSR SpMV for (size_t i = 0; i < offsets.size() - 1; ++i) { for (int j = offsets[i]; j < offsets[i + 1]; ++j) { diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index cb960425f..9a933c054 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -49,7 +49,7 @@ std::tuple, std::vector, std::vector> select_k_ std::cerr << "Tested with seed " << seed << "\n"; problem.compute_n_integer_vars(); auto [v_lb, v_ub] = extract_host_bounds(problem.variable_bounds, problem.handle_ptr); - auto int_var_id = host_copy(problem.integer_indices); + auto int_var_id = host_copy(problem.integer_indices, problem.handle_ptr->get_stream()); int_var_id.erase( std::remove_if(int_var_id.begin(), int_var_id.end(), @@ -106,10 +106,11 @@ bounds_probe_results(detail::bound_presolve_t& bnd_prb_0, bnd_prb_1.solve(problem, probe_second); bnd_prb_1.set_updated_bounds(problem.handle_ptr, make_span(b_lb_1), make_span(b_ub_1)); - auto h_lb_0 = host_copy(b_lb_0); - auto h_ub_0 = host_copy(b_ub_0); - auto h_lb_1 = host_copy(b_lb_1); - auto h_ub_1 = host_copy(b_ub_1); + auto stream = problem.handle_ptr->get_stream(); + auto h_lb_0 = host_copy(b_lb_0, stream); + auto h_ub_0 = host_copy(b_ub_0, stream); + auto h_lb_1 = host_copy(b_lb_1, stream); + auto h_ub_1 = host_copy(b_ub_1, stream); return std::make_tuple( std::move(h_lb_0), std::move(h_ub_0), std::move(h_lb_1), std::move(h_ub_1)); } @@ -121,17 +122,18 @@ multi_probe_results( const std::tuple, std::vector, std::vector>& probe_tuple) { prb.solve(problem, probe_tuple); - rmm::device_uvector m_lb_0(problem.n_variables, problem.handle_ptr->get_stream()); - rmm::device_uvector m_ub_0(problem.n_variables, problem.handle_ptr->get_stream()); - rmm::device_uvector m_lb_1(problem.n_variables, problem.handle_ptr->get_stream()); - rmm::device_uvector m_ub_1(problem.n_variables, problem.handle_ptr->get_stream()); + auto stream = problem.handle_ptr->get_stream(); + rmm::device_uvector m_lb_0(problem.n_variables, stream); + rmm::device_uvector m_ub_0(problem.n_variables, stream); + rmm::device_uvector m_lb_1(problem.n_variables, stream); + rmm::device_uvector m_ub_1(problem.n_variables, stream); prb.set_updated_bounds(problem.handle_ptr, make_span(m_lb_0), make_span(m_ub_0), 0); prb.set_updated_bounds(problem.handle_ptr, make_span(m_lb_1), make_span(m_ub_1), 1); - auto h_lb_0 = host_copy(m_lb_0); - auto h_ub_0 = host_copy(m_ub_0); - auto h_lb_1 = host_copy(m_lb_1); - auto h_ub_1 = host_copy(m_ub_1); + auto h_lb_0 = host_copy(m_lb_0, stream); + auto h_ub_0 = host_copy(m_ub_0, stream); + auto h_lb_1 = host_copy(m_lb_1, stream); + auto h_ub_1 = host_copy(m_ub_1, stream); return std::make_tuple( std::move(h_lb_0), std::move(h_ub_0), std::move(h_lb_1), std::move(h_ub_1)); } @@ -170,15 +172,16 @@ void test_multi_probe(std::string path) auto [m_lb_0, m_ub_0, m_lb_1, m_ub_1] = multi_probe_results(multi_probe_prs, problem, probe_tuple); - auto bnd_min_act_0 = host_copy(bnd_prb_0.upd.min_activity); - auto bnd_max_act_0 = host_copy(bnd_prb_0.upd.max_activity); - auto bnd_min_act_1 = host_copy(bnd_prb_1.upd.min_activity); - auto bnd_max_act_1 = host_copy(bnd_prb_1.upd.max_activity); + auto stream = problem.handle_ptr->get_stream(); + auto bnd_min_act_0 = host_copy(bnd_prb_0.upd.min_activity, stream); + auto bnd_max_act_0 = host_copy(bnd_prb_0.upd.max_activity, stream); + auto bnd_min_act_1 = host_copy(bnd_prb_1.upd.min_activity, stream); + auto bnd_max_act_1 = host_copy(bnd_prb_1.upd.max_activity, stream); - auto mlp_min_act_0 = host_copy(multi_probe_prs.upd_0.min_activity); - auto mlp_max_act_0 = host_copy(multi_probe_prs.upd_0.max_activity); - auto mlp_min_act_1 = host_copy(multi_probe_prs.upd_1.min_activity); - auto mlp_max_act_1 = host_copy(multi_probe_prs.upd_1.max_activity); + auto mlp_min_act_0 = host_copy(multi_probe_prs.upd_0.min_activity, stream); + auto mlp_max_act_0 = host_copy(multi_probe_prs.upd_0.max_activity, stream); + auto mlp_min_act_1 = host_copy(multi_probe_prs.upd_1.min_activity, stream); + auto mlp_max_act_1 = host_copy(multi_probe_prs.upd_1.max_activity, stream); for (int i = 0; i < (int)bnd_min_act_0.size(); ++i) { EXPECT_DOUBLE_EQ(bnd_min_act_0[i], mlp_min_act_0[i]); diff --git a/cpp/tests/mip/presolve_test.cu b/cpp/tests/mip/presolve_test.cu index d27dd1db9..893602e20 100644 --- a/cpp/tests/mip/presolve_test.cu +++ b/cpp/tests/mip/presolve_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -44,7 +44,7 @@ TEST(problem, find_implied_integers) auto problem = detail::problem_t(result->reduced_problem); problem.set_implied_integers(result->implied_integer_indices); ASSERT_TRUE(result->implied_integer_indices.size() > 0); - auto var_types = host_copy(problem.variable_types); + auto var_types = host_copy(problem.variable_types, handle_.get_stream()); // Find the index of the one continuous variable auto it = std::find_if(var_types.begin(), var_types.end(), [](var_t var_type) { return var_type == var_t::CONTINUOUS; diff --git a/cpp/tests/mip/problem_test.cu b/cpp/tests/mip/problem_test.cu index 7113e265b..f884fda10 100644 --- a/cpp/tests/mip/problem_test.cu +++ b/cpp/tests/mip/problem_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -234,8 +234,10 @@ TEST(problem, setting_both_rhs_and_constraints_bounds) fill_problem(op_problem); cuopt::linear_programming::detail::problem_t problem(op_problem); - const auto constraints_lower_bounds = host_copy(problem.constraint_lower_bounds); - const auto constraints_upper_bounds = host_copy(problem.constraint_upper_bounds); + const auto constraints_lower_bounds = + host_copy(problem.constraint_lower_bounds, handle.get_stream()); + const auto constraints_upper_bounds = + host_copy(problem.constraint_upper_bounds, handle.get_stream()); EXPECT_EQ(constraints_lower_bounds[0], 1.0); EXPECT_EQ(constraints_upper_bounds[0], 1.0); @@ -252,8 +254,10 @@ TEST(problem, setting_both_rhs_and_constraints_bounds) op_problem.set_constraint_upper_bounds(upper, 1); cuopt::linear_programming::detail::problem_t problem(op_problem); - const auto constraints_lower_bounds = host_copy(problem.constraint_lower_bounds); - const auto constraints_upper_bounds = host_copy(problem.constraint_upper_bounds); + const auto constraints_lower_bounds = + host_copy(problem.constraint_lower_bounds, handle.get_stream()); + const auto constraints_upper_bounds = + host_copy(problem.constraint_upper_bounds, handle.get_stream()); EXPECT_EQ(constraints_lower_bounds[0], 2.0); EXPECT_EQ(constraints_upper_bounds[0], 3.0); } @@ -270,8 +274,10 @@ TEST(problem, setting_both_rhs_and_constraints_bounds) fill_problem(op_problem); cuopt::linear_programming::detail::problem_t problem(op_problem); - const auto constraints_lower_bounds = host_copy(problem.constraint_lower_bounds); - const auto constraints_upper_bounds = host_copy(problem.constraint_upper_bounds); + const auto constraints_lower_bounds = + host_copy(problem.constraint_lower_bounds, handle.get_stream()); + const auto constraints_upper_bounds = + host_copy(problem.constraint_upper_bounds, handle.get_stream()); EXPECT_EQ(constraints_lower_bounds[0], 2.0); EXPECT_EQ(constraints_upper_bounds[0], 3.0); } diff --git a/cpp/tests/mip/unit_test.cu b/cpp/tests/mip/unit_test.cu index eb7e4bb3b..f9d76611d 100644 --- a/cpp/tests/mip/unit_test.cu +++ b/cpp/tests/mip/unit_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -180,7 +180,7 @@ TEST(LPTest, TestSampleLP2) ASSERT_EQ(result.get_primal_solution().size(), 1); // Copy solution to host to access values - auto primal_host = cuopt::host_copy(result.get_primal_solution()); + auto primal_host = cuopt::host_copy(result.get_primal_solution(), handle.get_stream()); EXPECT_NEAR(primal_host[0], 0.0, 1e-6); EXPECT_NEAR(result.get_additional_termination_information().primal_objective, 0.0, 1e-6); diff --git a/cpp/tests/qp/CMakeLists.txt b/cpp/tests/qp/CMakeLists.txt index bbbba2717..eefa843c7 100644 --- a/cpp/tests/qp/CMakeLists.txt +++ b/cpp/tests/qp/CMakeLists.txt @@ -1,8 +1,9 @@ # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on ConfigureTest(QP_UNIT_TEST + ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/no_constraints.cu ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/two_variable_test.cu ) diff --git a/cpp/tests/qp/unit_tests/no_constraints.cu b/cpp/tests/qp/unit_tests/no_constraints.cu new file mode 100644 index 000000000..8c9ab2b91 --- /dev/null +++ b/cpp/tests/qp/unit_tests/no_constraints.cu @@ -0,0 +1,66 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include + +namespace cuopt::linear_programming { + +TEST(no_constraints_test, simple_test) +{ + raft::handle_t handle; + + // optimize: x1^2 + x2^2 + // Constraints set through row types + auto op_problem = optimization_problem_t(&handle); + + double A_values_host[] = {}; + int A_indices_host[] = {}; + int A_offsets_host[] = {0}; + op_problem.set_csr_constraint_matrix(A_values_host, 0, A_indices_host, 0, A_offsets_host, 1); + + double lb_host[] = {0.0, 0.0}; + double ub_host[] = {std::numeric_limits::infinity(), + std::numeric_limits::infinity()}; + op_problem.set_variable_lower_bounds(lb_host, 2); + op_problem.set_variable_upper_bounds(ub_host, 2); + + double c_host[] = {0.0, 0.0}; + op_problem.set_objective_coefficients(c_host, 2); + + double Q_values_host[] = {1.0, 1.0}; + int Q_indices_host[] = {0, 1}; + int Q_offsets_host[] = {0, 1, 2}; + op_problem.set_quadratic_objective_matrix(Q_values_host, 2, Q_indices_host, 2, Q_offsets_host, 3); + + auto settings = cuopt::linear_programming::pdlp_solver_settings_t(); + auto solution = cuopt::linear_programming::solve_lp(op_problem, settings); + + EXPECT_EQ(solution.get_termination_status(), + cuopt::linear_programming::pdlp_termination_status_t::Optimal); + EXPECT_NEAR(solution.get_objective_value(), 0.0, 1e-6); + + auto sol_vec = cuopt::host_copy(solution.get_primal_solution(), handle.get_stream()); + + EXPECT_NEAR(sol_vec[0], 0.0, 1e-6); + EXPECT_NEAR(sol_vec[1], 0.0, 1e-6); +} +} // namespace cuopt::linear_programming diff --git a/cpp/tests/qp/unit_tests/two_variable_test.cu b/cpp/tests/qp/unit_tests/two_variable_test.cu index b18150899..e27c16624 100644 --- a/cpp/tests/qp/unit_tests/two_variable_test.cu +++ b/cpp/tests/qp/unit_tests/two_variable_test.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights * reserved. SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -65,7 +65,7 @@ TEST(two_variable_test, simple_test) cuopt::linear_programming::pdlp_termination_status_t::Optimal); EXPECT_NEAR(solution.get_objective_value(), -32.0, 1e-6); - auto sol_vec = cuopt::host_copy(solution.get_primal_solution()); + auto sol_vec = cuopt::host_copy(solution.get_primal_solution(), handle.get_stream()); EXPECT_NEAR(sol_vec[0], 4.0, 1e-6); EXPECT_NEAR(sol_vec[1], 2.0, 1e-6); } diff --git a/cpp/tests/routing/CMakeLists.txt b/cpp/tests/routing/CMakeLists.txt index 0ee757c3e..99cfdb9de 100644 --- a/cpp/tests/routing/CMakeLists.txt +++ b/cpp/tests/routing/CMakeLists.txt @@ -1,5 +1,5 @@ # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on @@ -32,4 +32,5 @@ ConfigureTest(ROUTING_UNIT_TEST ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/prize_collection.cu ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/objective_function.cu ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/top_k.cu + ${CMAKE_CURRENT_SOURCE_DIR}/unit_tests/batch_tsp.cu ) diff --git a/cpp/tests/routing/level0/l0_ges_test.cu b/cpp/tests/routing/level0/l0_ges_test.cu index 22373f704..b3e72c56e 100644 --- a/cpp/tests/routing/level0/l0_ges_test.cu +++ b/cpp/tests/routing/level0/l0_ges_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -55,7 +55,7 @@ class routing_ges_test_t : public ::testing::TestWithParam>, this->populate_device_vectors(); } - void TearDown() {} + void TearDown() override {} assignment_t solve(const cuopt::routing::data_model_view_t& data_model, const cuopt::routing::solver_settings_t& solver_settings, @@ -163,7 +163,7 @@ class simple_routes_ges_test_t : public ::testing::TestWithParampopulate_device_vectors(); } - void TearDown() {} + void TearDown() override {} assignment_t solve(const cuopt::routing::data_model_view_t& data_model, const cuopt::routing::solver_settings_t& solver_settings, diff --git a/cpp/tests/routing/level0/l0_objective_function_test.cu b/cpp/tests/routing/level0/l0_objective_function_test.cu index 935575026..53217807c 100644 --- a/cpp/tests/routing/level0/l0_objective_function_test.cu +++ b/cpp/tests/routing/level0/l0_objective_function_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -25,7 +25,7 @@ template class objective_function_test_t : public base_test_t, public ::testing::TestWithParam> { public: - objective_function_test_t() : base_test_t(512, 5E-2, 0) {} + objective_function_test_t() : base_test_t(512, 0, 0) {} void SetUp() override { auto p = GetParam(); diff --git a/cpp/tests/routing/level0/l0_routing_test.cu b/cpp/tests/routing/level0/l0_routing_test.cu index 4d7bbad02..e078a3499 100644 --- a/cpp/tests/routing/level0/l0_routing_test.cu +++ b/cpp/tests/routing/level0/l0_routing_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -320,7 +320,7 @@ class routing_retail_test_t : public base_test_t, this->populate_device_vectors(); } - void TearDown() {} + void TearDown() override {} void test_cvrptw() { diff --git a/cpp/tests/routing/level0/l0_vehicle_order_match.cu b/cpp/tests/routing/level0/l0_vehicle_order_match.cu index 6c4d40ab7..782f93edc 100644 --- a/cpp/tests/routing/level0/l0_vehicle_order_match.cu +++ b/cpp/tests/routing/level0/l0_vehicle_order_match.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -24,7 +24,7 @@ namespace test { template class vehicle_order_test_t : public base_test_t, public ::testing::TestWithParam { public: - vehicle_order_test_t() : base_test_t(512, 5E-2, 0) {} + vehicle_order_test_t() : base_test_t(512, 0, 0) {} void SetUp() override { this->not_matching_constraints_fraction = GetParam(); diff --git a/cpp/tests/routing/level0/l0_vehicle_types_test.cu b/cpp/tests/routing/level0/l0_vehicle_types_test.cu index f7f247683..2fa3c559f 100644 --- a/cpp/tests/routing/level0/l0_vehicle_types_test.cu +++ b/cpp/tests/routing/level0/l0_vehicle_types_test.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -23,7 +23,7 @@ namespace test { template class vehicle_types_test_t : public base_test_t, public ::testing::Test { public: - vehicle_types_test_t() : base_test_t(512, 5E-2, 0) {} + vehicle_types_test_t() : base_test_t(512, 0, 0) {} void SetUp() override { this->n_locations = input_.n_locations; diff --git a/cpp/tests/routing/routing_test.cuh b/cpp/tests/routing/routing_test.cuh index 31ad78332..cdafbbf1f 100644 --- a/cpp/tests/routing/routing_test.cuh +++ b/cpp/tests/routing/routing_test.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -482,7 +482,7 @@ class base_test_t { vehicle_max_times_d.data(), vehicle_max_times_d.size(), stream_view_); - fleet_order_constraints_h = fleet_order_constraints_d.to_host(); + fleet_order_constraints_h = fleet_order_constraints_d.to_host(stream_view_); } void check_time_windows(host_assignment_t const& routing_solution, bool is_soft_tw = false) @@ -492,7 +492,7 @@ class base_test_t { auto truck_id = routing_solution.truck_id; auto locations = routing_solution.locations; auto node_types = routing_solution.node_types; - fleet_order_constraints_h = fleet_order_constraints_d.to_host(); + fleet_order_constraints_h = fleet_order_constraints_d.to_host(stream_view_); std::vector temp_truck_ids(truck_id); auto end_it = std::unique(temp_truck_ids.begin(), temp_truck_ids.end()); diff --git a/cpp/tests/routing/unit_tests/batch_tsp.cu b/cpp/tests/routing/unit_tests/batch_tsp.cu new file mode 100644 index 000000000..01ae36e70 --- /dev/null +++ b/cpp/tests/routing/unit_tests/batch_tsp.cu @@ -0,0 +1,89 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include +#include +#include + +#include + +#include + +#include + +namespace cuopt { +namespace routing { +namespace test { + +using i_t = int; +using f_t = float; + +/** + * @brief Creates a small symmetric cost matrix for TSP + * @param n_locations Number of locations + * @return Cost matrix as a flattened vector + */ +std::vector create_small_tsp_cost_matrix(i_t n_locations) +{ + std::vector cost_matrix(n_locations * n_locations, 0.0f); + + // Create a simple distance matrix based on coordinates on a line + for (i_t i = 0; i < n_locations; ++i) { + for (i_t j = 0; j < n_locations; ++j) { + cost_matrix[i * n_locations + j] = static_cast(std::abs(i - j)); + } + } + return cost_matrix; +} + +/** + * @brief Test running TSPs of varying sizes in parallel using call_batch_solve API + */ +TEST(batch_tsp, varying_sizes) +{ + std::vector tsp_sizes = {5, 8, 10, 6, 7, 9}; + const i_t n_problems = static_cast(tsp_sizes.size()); + + // Create handles and cost matrices for each problem + std::vector> handles; + std::vector> cost_matrices_d; + std::vector>> data_models; + std::vector*> data_model_ptrs; + + for (i_t i = 0; i < n_problems; ++i) { + handles.push_back(std::make_unique()); + auto& handle = *handles.back(); + + auto cost_matrix_h = create_small_tsp_cost_matrix(tsp_sizes[i]); + cost_matrices_d.push_back(cuopt::device_copy(cost_matrix_h, handle.get_stream())); + + data_models.push_back(std::make_unique>( + &handle, tsp_sizes[i], 1, tsp_sizes[i])); + data_models.back()->add_cost_matrix(cost_matrices_d.back().data()); + data_model_ptrs.push_back(data_models.back().get()); + } + + // Configure solver settings + cuopt::routing::solver_settings_t settings; + settings.set_time_limit(5); + + // Call batch solve + auto solutions = cuopt::cython::call_batch_solve(data_model_ptrs, &settings); + + // Verify all solutions + ASSERT_EQ(solutions.size(), n_problems); + for (i_t i = 0; i < n_problems; ++i) { + EXPECT_EQ(solutions[i]->status_, cuopt::routing::solution_status_t::SUCCESS) + << "TSP " << i << " (size " << tsp_sizes[i] << ") failed"; + EXPECT_EQ(solutions[i]->vehicle_count_, 1) + << "TSP " << i << " (size " << tsp_sizes[i] << ") used multiple vehicles"; + } +} + +} // namespace test +} // namespace routing +} // namespace cuopt diff --git a/cpp/tests/routing/unit_tests/vehicle_order_match.cu b/cpp/tests/routing/unit_tests/vehicle_order_match.cu index ba7dbbf0d..22691b3b8 100644 --- a/cpp/tests/routing/unit_tests/vehicle_order_match.cu +++ b/cpp/tests/routing/unit_tests/vehicle_order_match.cu @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -48,8 +48,9 @@ TEST(vehicle_order_match, two_vehicle_four_orders) EXPECT_EQ(routing_solution.get_status(), cuopt::routing::solution_status_t::SUCCESS); - auto route_id = cuopt::host_copy(routing_solution.get_route()); - auto truck_id = cuopt::host_copy(routing_solution.get_truck_id()); + auto stream = handle.get_stream(); + auto route_id = cuopt::host_copy(routing_solution.get_route(), stream); + auto truck_id = cuopt::host_copy(routing_solution.get_truck_id(), stream); for (size_t i = 0; i < route_id.size(); ++i) { if (route_id[i] == 3 || route_id[i] == 1) { EXPECT_EQ(truck_id[i], 0); } } @@ -71,12 +72,13 @@ TEST(vehicle_order_match, one_order_per_vehicle) raft::handle_t handle; cuopt::routing::data_model_view_t data_model(&handle, n_locations, n_vehicles); - auto time_mat_d = cuopt::device_copy(time_mat, handle.get_stream()); + auto stream = handle.get_stream(); + auto time_mat_d = cuopt::device_copy(time_mat, stream); data_model.add_cost_matrix(time_mat_d.data()); std::unordered_map> vehicle_order_match_d; for (const auto& [id, orders] : vehicle_order_match) { - vehicle_order_match_d.emplace(id, cuopt::device_copy(orders, handle.get_stream())); + vehicle_order_match_d.emplace(id, cuopt::device_copy(orders, stream)); } for (const auto& [id, orders] : vehicle_order_match_d) { @@ -87,8 +89,8 @@ TEST(vehicle_order_match, one_order_per_vehicle) EXPECT_EQ(routing_solution.get_status(), cuopt::routing::solution_status_t::SUCCESS); - auto route_id = cuopt::host_copy(routing_solution.get_route()); - auto truck_id = cuopt::host_copy(routing_solution.get_truck_id()); + auto route_id = cuopt::host_copy(routing_solution.get_route(), stream); + auto truck_id = cuopt::host_copy(routing_solution.get_truck_id(), stream); for (size_t i = 0; i < route_id.size(); ++i) { auto order = route_id[i]; auto vehicle = truck_id[i]; diff --git a/datasets/cuopt_service_data/lpmip_configs.json b/datasets/cuopt_service_data/lpmip_configs.json index cb920de50..974d29568 100644 --- a/datasets/cuopt_service_data/lpmip_configs.json +++ b/datasets/cuopt_service_data/lpmip_configs.json @@ -66,19 +66,7 @@ "mip_absolute_gap": 0.0001, "mip_relative_gap": 0.0001, "mip_absolute_tolerance": 0.0001, - "mip_relative_tolerance": 0.0001, - - "absolute_primal": 0.0001, - "absolute_dual": 0.0001, - "absolute_gap": 0.0001, - "relative_primal": 0.0001, - "relative_dual": 0.0001, - "relative_gap": 0.0001, - "primal_infeasible": 0.0001, - "dual_infeasible": 0.0001, - "integrality_tolerance": 0.0001, - "absolute_mip_gap": 0.0001, - "relative_mip_gap": 0.0001 + "mip_relative_tolerance": 0.0001 }, "infeasibility_detection": true, "time_limit": 5, @@ -96,8 +84,6 @@ "save_best_primal_so_far": true, "first_primal_feasible": true, "log_file": "bill", - "solution_file": "barry", - "solver_mode": 3, - "heuristics_only": false + "solution_file": "barry" } } diff --git a/datasets/get_test_data.sh b/datasets/get_test_data.sh index a7ef2759a..528455e13 100755 --- a/datasets/get_test_data.sh +++ b/datasets/get_test_data.sh @@ -1,10 +1,87 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 set -e set -o pipefail +################################################################################ +# S3 Dataset Download Support +################################################################################ +# Set CUOPT_DATASET_S3_URI to base S3 path +# AWS credentials should be configured via: +# - Environment variables (CUOPT_AWS_ACCESS_KEY_ID, CUOPT_AWS_SECRET_ACCESS_KEY) +# - Standard AWS variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) +# - AWS CLI configuration (~/.aws/credentials) +# - IAM role (for EC2 instances) + +function try_download_from_s3() { + local s3_dirs=("$@") # Array of directories to sync from S3 + + if [ -z "${CUOPT_DATASET_S3_URI:-}" ]; then + echo "CUOPT_DATASET_S3_URI not set, skipping S3 download..." + return 1 + fi + + # Require explicit CUOPT credentials to avoid accidentally using generic AWS credentials + if [ -z "${CUOPT_AWS_ACCESS_KEY_ID:-}" ]; then + echo "CUOPT_AWS_ACCESS_KEY_ID not set, skipping S3 download..." + return 1 + fi + + if ! command -v aws &> /dev/null; then + echo "AWS CLI not found, skipping S3 download..." + return 1 + fi + + # Append routing subdirectory to base S3 URI + local s3_uri="${CUOPT_DATASET_S3_URI}routing/" + echo "Downloading datasets from S3..." + + # Use CUOPT-specific credentials only + local region="${CUOPT_AWS_REGION:-us-east-1}" + + # Export credentials for AWS CLI + export AWS_ACCESS_KEY_ID="$CUOPT_AWS_ACCESS_KEY_ID" + export AWS_SECRET_ACCESS_KEY="$CUOPT_AWS_SECRET_ACCESS_KEY" + # Unset session token to avoid mixing credentials + unset AWS_SESSION_TOKEN + export AWS_DEFAULT_REGION="$region" + + # Test AWS credentials + if ! aws sts get-caller-identity &> /dev/null 2>&1; then + echo "AWS credentials invalid, skipping S3 download..." + return 1 + fi + + # Try to sync from S3 + local success=true + if [ ${#s3_dirs[@]} -eq 0 ]; then + # No specific directories - download everything + if ! aws s3 sync "$s3_uri" . --exclude "tmp/*" --exclude "get_test_data.sh" --exclude "*.sh" --exclude "*.md" --only-show-errors; then + success=false + fi + else + # Download specific directories only + for dir in "${s3_dirs[@]}"; do + if ! aws s3 sync "${s3_uri}${dir}/" "${dir}/" --exclude "*.sh" --exclude "*.md" --only-show-errors; then + success=false + fi + done + fi + + if $success; then + echo "✓ Downloaded datasets from S3" + return 0 + else + echo "S3 download failed, falling back to HTTP..." + return 1 + fi +} + +################################################################################ +# HTTP Dataset Download Configuration +################################################################################ # Update this to add/remove/change a dataset, using the following format: # # comment about the dataset @@ -107,7 +184,13 @@ URLS=($(echo "$DATASET_DATA"|awk '{if (NR%4 == 3) print $0}')) # extract 3rd fi # shellcheck disable=SC2207 DESTDIRS=($(echo "$DATASET_DATA"|awk '{if (NR%4 == 0) print $0}')) # extract 4th fields to a bash array -echo Downloading ... +# Try S3 download first with selected directories +if try_download_from_s3 "${DESTDIRS[@]}"; then + echo "Datasets successfully retrieved from S3, skipping HTTP download." + exit 0 +fi + +echo "Downloading from HTTP sources..." # Download all tarfiles to a tmp dir rm -rf tmp diff --git a/datasets/linear_programming/download_pdlp_test_dataset.sh b/datasets/linear_programming/download_pdlp_test_dataset.sh index faf18d788..a0c75d590 100755 --- a/datasets/linear_programming/download_pdlp_test_dataset.sh +++ b/datasets/linear_programming/download_pdlp_test_dataset.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 set -euo pipefail @@ -20,6 +20,87 @@ datasets=( "square41" ) +BASEDIR=$(dirname "$0") + +################################################################################ +# S3 Download Support +################################################################################ +# Requires explicit CUOPT credentials to avoid using unintended AWS credentials: +# - CUOPT_DATASET_S3_URI: Base S3 path +# - CUOPT_AWS_ACCESS_KEY_ID: AWS access key +# - CUOPT_AWS_SECRET_ACCESS_KEY: AWS secret key +# - CUOPT_AWS_REGION (optional): AWS region, defaults to us-east-1 + +function try_download_from_s3() { + if [ -z "${CUOPT_DATASET_S3_URI:-}" ]; then + return 1 + fi + + # Require explicit CUOPT credentials to avoid accidentally using generic AWS credentials + if [ -z "${CUOPT_AWS_ACCESS_KEY_ID:-}" ]; then + echo "CUOPT_AWS_ACCESS_KEY_ID not set, skipping S3 download..." + return 1 + fi + + if [ -z "${CUOPT_AWS_SECRET_ACCESS_KEY:-}" ]; then + echo "CUOPT_AWS_SECRET_ACCESS_KEY not set, skipping S3 download..." + return 1 + fi + + if ! command -v aws &> /dev/null; then + echo "AWS CLI not found, skipping S3 download..." + return 1 + fi + + # Append linear_programming/pdlp subdirectory to base S3 URI + local s3_uri="${CUOPT_DATASET_S3_URI}linear_programming/pdlp/" + echo "Downloading PDLP datasets from S3..." + + # Use CUOPT-specific credentials only + local region="${CUOPT_AWS_REGION:-us-east-1}" + + # Export credentials for AWS CLI + export AWS_ACCESS_KEY_ID="$CUOPT_AWS_ACCESS_KEY_ID" + export AWS_SECRET_ACCESS_KEY="$CUOPT_AWS_SECRET_ACCESS_KEY" + # Unset session token to avoid mixing credentials + unset AWS_SESSION_TOKEN + export AWS_DEFAULT_REGION="$region" + + # Test AWS credentials + if ! aws sts get-caller-identity &> /dev/null 2>&1; then + echo "AWS credentials invalid, skipping S3 download..." + return 1 + fi + + # Try to sync from S3 (downloads from pdlp/ subdirectory) + local success=true + local total=${#datasets[@]} + local count=0 + for dataset in "${datasets[@]}"; do + count=$((count + 1)) + if ! aws s3 sync "${s3_uri}${dataset}/" "$BASEDIR/${dataset}/" --exclude "*.sh" --only-show-errors; then + success=false + fi + printf "\rProgress: %d/%d" "$count" "$total" + done + echo "" + + if $success; then + echo "✓ Downloaded PDLP datasets from S3" + return 0 + else + echo "S3 download failed, falling back to HTTP..." + return 1 + fi +} + +# Try S3 first +if try_download_from_s3; then + exit 0 +fi + +# HTTP fallback using Python script +echo "Downloading PDLP datasets using Python script..." for dataset in "${datasets[@]}"; do python benchmarks/linear_programming/utils/get_datasets.py -d "$dataset" done diff --git a/datasets/mip/download_miplib_test_dataset.sh b/datasets/mip/download_miplib_test_dataset.sh index d0ffe45d2..dc2dd7966 100755 --- a/datasets/mip/download_miplib_test_dataset.sh +++ b/datasets/mip/download_miplib_test_dataset.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 INSTANCES=( @@ -29,6 +29,85 @@ INSTANCES=( BASE_URL="https://miplib.zib.de/WebData/instances" BASEDIR=$(dirname "$0") +################################################################################ +# S3 Download Support +################################################################################ +# Requires explicit CUOPT credentials to avoid using unintended AWS credentials: +# - CUOPT_DATASET_S3_URI: Base S3 path +# - CUOPT_AWS_ACCESS_KEY_ID: AWS access key +# - CUOPT_AWS_SECRET_ACCESS_KEY: AWS secret key +# - CUOPT_AWS_REGION (optional): AWS region, defaults to us-east-1 + +function try_download_from_s3() { + if [ -z "${CUOPT_DATASET_S3_URI:-}" ]; then + return 1 + fi + + # Require explicit CUOPT credentials to avoid accidentally using generic AWS credentials + if [ -z "${CUOPT_AWS_ACCESS_KEY_ID:-}" ]; then + echo "CUOPT_AWS_ACCESS_KEY_ID not set, skipping S3 download..." + return 1 + fi + + if [ -z "${CUOPT_AWS_SECRET_ACCESS_KEY:-}" ]; then + echo "CUOPT_AWS_SECRET_ACCESS_KEY not set, skipping S3 download..." + return 1 + fi + + if ! command -v aws &> /dev/null; then + echo "AWS CLI not found, skipping S3 download..." + return 1 + fi + + # Append linear_programming/miplib subdirectory to base S3 URI + local s3_uri="${CUOPT_DATASET_S3_URI}linear_programming/miplib/" + echo "Downloading MIPLIB datasets from S3..." + + # Use CUOPT-specific credentials only + local region="${CUOPT_AWS_REGION:-us-east-1}" + + # Export credentials for AWS CLI + export AWS_ACCESS_KEY_ID="$CUOPT_AWS_ACCESS_KEY_ID" + export AWS_SECRET_ACCESS_KEY="$CUOPT_AWS_SECRET_ACCESS_KEY" + # Unset session token to avoid mixing credentials + unset AWS_SESSION_TOKEN + export AWS_DEFAULT_REGION="$region" + + # Test AWS credentials + if ! aws sts get-caller-identity &> /dev/null 2>&1; then + echo "AWS credentials invalid, skipping S3 download..." + return 1 + fi + + # Try to sync from S3 (downloads from miplib/ subdirectory) + local success=true + local total=${#INSTANCES[@]} + local count=0 + for instance in "${INSTANCES[@]}"; do + count=$((count + 1)) + if ! aws s3 cp "${s3_uri}${instance}.mps" "$BASEDIR/${instance}.mps" --only-show-errors; then + success=false + fi + printf "\rProgress: %d/%d" "$count" "$total" + done + echo "" + + if $success; then + echo "✓ Downloaded MIPLIB datasets from S3" + return 0 + else + echo "S3 download failed, falling back to HTTP..." + return 1 + fi +} + +# Try S3 first +if try_download_from_s3; then + exit 0 +fi + +# HTTP fallback +echo "Downloading MIPLIB datasets from HTTP..." for INSTANCE in "${INSTANCES[@]}"; do URL="${BASE_URL}/${INSTANCE}.mps.gz" OUTFILE="${BASEDIR}/${INSTANCE}.mps.gz" diff --git a/dependencies.yaml b/dependencies.yaml index db059451a..7dc6b9490 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -7,7 +7,7 @@ files: all: output: conda matrix: - cuda: ["12.9", "13.0"] + cuda: ["12.9", "13.1"] arch: [x86_64, aarch64] includes: - build_common @@ -349,6 +349,7 @@ dependencies: - numba-cuda>=0.22.1,<0.23.0 - numba>=0.60.0 - &pandas pandas>=2.0 + - &pyyaml pyyaml>=6.0.0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -672,6 +673,10 @@ dependencies: cuda: "13.0" packages: - cuda-version=13.0 + - matrix: + cuda: "13.1" + packages: + - cuda-version=13.1 cuda: common: - output_types: [conda] diff --git a/docs/cuopt/source/cuopt-c/index.rst b/docs/cuopt/source/cuopt-c/index.rst index 15414db50..9f424cdcb 100644 --- a/docs/cuopt/source/cuopt-c/index.rst +++ b/docs/cuopt/source/cuopt-c/index.rst @@ -13,8 +13,8 @@ NVIDIA cuOpt supports a C API for GPU-accelerated optimization that enables user .. toctree:: :maxdepth: 3 - :caption: LP and MILP Optimization - :name: LP and MILP Optimization + :caption: LP, QP and MILP Optimization + :name: LP, QP and MILP Optimization :titlesonly: - LP and MILP + LP, QP and MILP diff --git a/docs/cuopt/source/cuopt-c/lp-milp/index.rst b/docs/cuopt/source/cuopt-c/lp-milp/index.rst deleted file mode 100644 index 086bddfc0..000000000 --- a/docs/cuopt/source/cuopt-c/lp-milp/index.rst +++ /dev/null @@ -1,15 +0,0 @@ -Linear Programming -================== - -This section contains details on the cuOpt LP/MILP C API. - -.. toctree:: - :maxdepth: 3 - :caption: LP/MILP - :name: LP/MILP - :titlesonly: - - lp-milp-c-api.rst - ../../lp-milp-settings.rst - lp-example.rst - milp-examples.rst diff --git a/docs/cuopt/source/cuopt-c/lp-milp/examples/Makefile b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/Makefile similarity index 100% rename from docs/cuopt/source/cuopt-c/lp-milp/examples/Makefile rename to docs/cuopt/source/cuopt-c/lp-qp-milp/examples/Makefile diff --git a/docs/cuopt/source/cuopt-c/lp-milp/examples/milp_mps_example.c b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/milp_mps_example.c similarity index 100% rename from docs/cuopt/source/cuopt-c/lp-milp/examples/milp_mps_example.c rename to docs/cuopt/source/cuopt-c/lp-qp-milp/examples/milp_mps_example.c diff --git a/docs/cuopt/source/cuopt-c/lp-milp/examples/mip_sample.mps b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/mip_sample.mps similarity index 100% rename from docs/cuopt/source/cuopt-c/lp-milp/examples/mip_sample.mps rename to docs/cuopt/source/cuopt-c/lp-qp-milp/examples/mip_sample.mps diff --git a/docs/cuopt/source/cuopt-c/lp-milp/examples/mps_file_example.c b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/mps_file_example.c similarity index 100% rename from docs/cuopt/source/cuopt-c/lp-milp/examples/mps_file_example.c rename to docs/cuopt/source/cuopt-c/lp-qp-milp/examples/mps_file_example.c diff --git a/docs/cuopt/source/cuopt-c/lp-milp/examples/sample.mps b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/sample.mps similarity index 100% rename from docs/cuopt/source/cuopt-c/lp-milp/examples/sample.mps rename to docs/cuopt/source/cuopt-c/lp-qp-milp/examples/sample.mps diff --git a/docs/cuopt/source/cuopt-c/lp-milp/examples/simple_lp_example.c b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_lp_example.c similarity index 100% rename from docs/cuopt/source/cuopt-c/lp-milp/examples/simple_lp_example.c rename to docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_lp_example.c diff --git a/docs/cuopt/source/cuopt-c/lp-milp/examples/simple_milp_example.c b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_milp_example.c similarity index 100% rename from docs/cuopt/source/cuopt-c/lp-milp/examples/simple_milp_example.c rename to docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_milp_example.c diff --git a/docs/cuopt/source/cuopt-c/lp-milp/examples/simple_qp_example.c b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_qp_example.c similarity index 97% rename from docs/cuopt/source/cuopt-c/lp-milp/examples/simple_qp_example.c rename to docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_qp_example.c index 01b4a89f6..c88890115 100644 --- a/docs/cuopt/source/cuopt-c/lp-milp/examples/simple_qp_example.c +++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/examples/simple_qp_example.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* @@ -130,7 +130,6 @@ cuopt_int_t test_simple_qp() constraint_rhs, var_lower_bounds, var_upper_bounds, - variable_types, &problem); if (status != CUOPT_SUCCESS) { printf("Error creating problem: %d\n", status); diff --git a/docs/cuopt/source/cuopt-c/lp-qp-milp/index.rst b/docs/cuopt/source/cuopt-c/lp-qp-milp/index.rst new file mode 100644 index 000000000..57de6053a --- /dev/null +++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/index.rst @@ -0,0 +1,15 @@ +Linear Programming +================== + +This section contains details on the cuOpt LP/QP/MILP C API. + +.. toctree:: + :maxdepth: 3 + :caption: LP/QP/MILP + :name: LP/QP/MILP + :titlesonly: + + lp-qp-milp-c-api.rst + ../../lp-qp-milp-settings.rst + lp-qp-example.rst + milp-examples.rst diff --git a/docs/cuopt/source/cuopt-c/lp-milp/lp-example.rst b/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-example.rst similarity index 97% rename from docs/cuopt/source/cuopt-c/lp-milp/lp-example.rst rename to docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-example.rst index be99873e5..fc382bbbb 100644 --- a/docs/cuopt/source/cuopt-c/lp-milp/lp-example.rst +++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-example.rst @@ -1,11 +1,12 @@ -LP C API Examples -================= +==================== +LP/QP C API Examples +==================== Example With Data ----------------- -This example demonstrates how to use the LP solver in C. More details on the API can be found in :doc:`C API `. +This example demonstrates how to use the LP solver in C. More details on the API can be found in :doc:`C API `. The example code is available at ``examples/cuopt-c/lp/simple_lp_example.c`` (:download:`download `): diff --git a/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst b/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-milp-c-api.rst similarity index 97% rename from docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst rename to docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-milp-c-api.rst index 0207f6406..43d15eca6 100644 --- a/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst +++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-milp-c-api.rst @@ -1,7 +1,7 @@ -cuOpt LP/MILP C API Reference +cuOpt LP/QP/MILP C API Reference ======================================== -This section contains the cuOpt LP/MILP C API reference. +This section contains the cuOpt LP/QP/MILP C API reference. Integer and Floating-Point Types --------------------------------- @@ -130,7 +130,7 @@ When you are done with a solve you should destroy a `cuOptSolverSettings` object Setting Parameters ------------------ -The following functions are used to set and get parameters. You can find more details on the available parameters in the :doc:`LP/MILP settings <../../lp-milp-settings>` section. +The following functions are used to set and get parameters. You can find more details on the available parameters in the :doc:`LP/MILP settings <../../lp-qp-milp-settings>` section. .. doxygenfunction:: cuOptSetParameter .. doxygenfunction:: cuOptGetParameter @@ -144,7 +144,7 @@ The following functions are used to set and get parameters. You can find more de Parameter Constants ------------------- -These constants are used as parameter names in the :c:func:`cuOptSetParameter`, :c:func:`cuOptGetParameter`, and similar functions. For more details on the available parameters, see the :doc:`LP/MILP settings <../../lp-milp-settings>` section. +These constants are used as parameter names in the :c:func:`cuOptSetParameter`, :c:func:`cuOptGetParameter`, and similar functions. For more details on the available parameters, see the :doc:`LP/MILP settings <../../lp-qp-milp-settings>` section. .. LP/MIP parameter string constants .. doxygendefine:: CUOPT_ABSOLUTE_DUAL_TOLERANCE diff --git a/docs/cuopt/source/cuopt-c/lp-milp/milp-examples.rst b/docs/cuopt/source/cuopt-c/lp-qp-milp/milp-examples.rst similarity index 98% rename from docs/cuopt/source/cuopt-c/lp-milp/milp-examples.rst rename to docs/cuopt/source/cuopt-c/lp-qp-milp/milp-examples.rst index a4117ac82..3389b53d4 100644 --- a/docs/cuopt/source/cuopt-c/lp-milp/milp-examples.rst +++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/milp-examples.rst @@ -5,7 +5,7 @@ MILP C API Examples Example With Data ----------------- -This example demonstrates how to use the MILP solver in C. More details on the API can be found in :doc:`C API `. +This example demonstrates how to use the MILP solver in C. More details on the API can be found in :doc:`C API `. The example code is available at ``../lp-milp/examples/simple_milp_example.c`` (:download:`download `): diff --git a/docs/cuopt/source/cuopt-c/quick-start.rst b/docs/cuopt/source/cuopt-c/quick-start.rst index 63b536aaa..286cd0b7b 100644 --- a/docs/cuopt/source/cuopt-c/quick-start.rst +++ b/docs/cuopt/source/cuopt-c/quick-start.rst @@ -2,7 +2,7 @@ Quickstart Guide ================= -NVIDIA cuOpt provides C API for LP and MILP. This section will show you how to install cuOpt C API and how to use it to solve LP and MILP problems. +NVIDIA cuOpt provides C API for LP, QP and MILP. This section will show you how to install cuOpt C API and how to use it to solve LP, QP and MILP problems. Installation diff --git a/docs/cuopt/source/cuopt-cli/quick-start.rst b/docs/cuopt/source/cuopt-cli/quick-start.rst index 4939aa788..527c01223 100644 --- a/docs/cuopt/source/cuopt-cli/quick-start.rst +++ b/docs/cuopt/source/cuopt-cli/quick-start.rst @@ -17,4 +17,4 @@ This will display the complete list of command-line arguments and their usage: :language: shell :linenos: -Please refer to :doc:`../lp-milp-settings` for more details on default values and other options. +Please refer to :doc:`../lp-qp-milp-settings` for more details on default values and other options. diff --git a/docs/cuopt/source/cuopt-python/index.rst b/docs/cuopt/source/cuopt-python/index.rst index 7351162d0..49b805a80 100644 --- a/docs/cuopt/source/cuopt-python/index.rst +++ b/docs/cuopt/source/cuopt-python/index.rst @@ -2,7 +2,7 @@ Python API ======================================== -NVIDIA cuOpt supports Python API for routing optimization. +NVIDIA cuOpt supports Python API for routing optimization and LP/QP/MILP optimization This section contains details on the cuOpt Python package. @@ -26,8 +26,8 @@ This section contains details on the cuOpt Python package. .. toctree:: :maxdepth: 3 - :caption: Linear Programming and Mixed Integer Linear Programming - :name: LP and MILP API + :caption: Linear Programming, Quadratic Programming and Mixed Integer Linear Programming + :name: LP, QP and MILP API :titlesonly: - Linear Programming and Mixed Integer Linear Programming + LP, QP and MILP diff --git a/docs/cuopt/source/cuopt-python/lp-milp/index.rst b/docs/cuopt/source/cuopt-python/lp-milp/index.rst deleted file mode 100644 index 358ddb6ad..000000000 --- a/docs/cuopt/source/cuopt-python/lp-milp/index.rst +++ /dev/null @@ -1,14 +0,0 @@ -======================================================= -Linear Programming and Mixed Integer Linear Programming -======================================================= - -This section contains details on the cuOpt linear programming and mixed integer linear programming Python API. - -.. toctree:: - :maxdepth: 3 - :caption: LP and MILP - :name: LP and MILP - :titlesonly: - - lp-milp-api.rst - lp-milp-examples.rst diff --git a/docs/cuopt/source/cuopt-python/lp-milp/examples/expressions_constraints_example.py b/docs/cuopt/source/cuopt-python/lp-qp-milp/examples/expressions_constraints_example.py similarity index 100% rename from docs/cuopt/source/cuopt-python/lp-milp/examples/expressions_constraints_example.py rename to docs/cuopt/source/cuopt-python/lp-qp-milp/examples/expressions_constraints_example.py diff --git a/docs/cuopt/source/cuopt-python/lp-milp/examples/incumbent_solutions_example.py b/docs/cuopt/source/cuopt-python/lp-qp-milp/examples/incumbent_solutions_example.py similarity index 100% rename from docs/cuopt/source/cuopt-python/lp-milp/examples/incumbent_solutions_example.py rename to docs/cuopt/source/cuopt-python/lp-qp-milp/examples/incumbent_solutions_example.py diff --git a/docs/cuopt/source/cuopt-python/lp-milp/examples/pdlp_warmstart_example.py b/docs/cuopt/source/cuopt-python/lp-qp-milp/examples/pdlp_warmstart_example.py similarity index 100% rename from docs/cuopt/source/cuopt-python/lp-milp/examples/pdlp_warmstart_example.py rename to docs/cuopt/source/cuopt-python/lp-qp-milp/examples/pdlp_warmstart_example.py diff --git a/docs/cuopt/source/cuopt-python/lp-milp/examples/production_planning_example.py b/docs/cuopt/source/cuopt-python/lp-qp-milp/examples/production_planning_example.py similarity index 100% rename from docs/cuopt/source/cuopt-python/lp-milp/examples/production_planning_example.py rename to docs/cuopt/source/cuopt-python/lp-qp-milp/examples/production_planning_example.py diff --git a/docs/cuopt/source/cuopt-python/lp-milp/examples/simple_lp_example.py b/docs/cuopt/source/cuopt-python/lp-qp-milp/examples/simple_lp_example.py similarity index 100% rename from docs/cuopt/source/cuopt-python/lp-milp/examples/simple_lp_example.py rename to docs/cuopt/source/cuopt-python/lp-qp-milp/examples/simple_lp_example.py diff --git a/docs/cuopt/source/cuopt-python/lp-milp/examples/simple_milp_example.py b/docs/cuopt/source/cuopt-python/lp-qp-milp/examples/simple_milp_example.py similarity index 100% rename from docs/cuopt/source/cuopt-python/lp-milp/examples/simple_milp_example.py rename to docs/cuopt/source/cuopt-python/lp-qp-milp/examples/simple_milp_example.py diff --git a/docs/cuopt/source/cuopt-python/lp-milp/examples/simple_qp_example.py b/docs/cuopt/source/cuopt-python/lp-qp-milp/examples/simple_qp_example.py similarity index 82% rename from docs/cuopt/source/cuopt-python/lp-milp/examples/simple_qp_example.py rename to docs/cuopt/source/cuopt-python/lp-qp-milp/examples/simple_qp_example.py index 3ec85ac27..ec70b0bdc 100644 --- a/docs/cuopt/source/cuopt-python/lp-milp/examples/simple_qp_example.py +++ b/docs/cuopt/source/cuopt-python/lp-qp-milp/examples/simple_qp_example.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: Apache-2.0 """ @@ -14,10 +14,11 @@ Problem: minimize x^2 + y^2 subject to x + y >= 1 + 0.75 * x + y <= 1 x, y >= 0 This is a convex QP that minimizes the squared distance from the origin -while requiring the sum of x and y to be at least 1. +while satisfying other constraints. """ from cuopt.linear_programming.problem import ( @@ -31,11 +32,12 @@ def main(): prob = Problem("Simple QP") # Add variables with non-negative bounds - x = prob.addVariable(lb=0, name="x") - y = prob.addVariable(lb=0, name="y") + x = prob.addVariable(lb=0) + y = prob.addVariable(lb=0) # Add constraint: x + y >= 1 prob.addConstraint(x + y >= 1) + prob.addConstraint(0.75 * x + y <= 1) # Set quadratic objective: minimize x^2 + y^2 # Using Variable * Variable to create quadratic terms diff --git a/docs/cuopt/source/cuopt-python/lp-qp-milp/index.rst b/docs/cuopt/source/cuopt-python/lp-qp-milp/index.rst new file mode 100644 index 000000000..e7fb2f644 --- /dev/null +++ b/docs/cuopt/source/cuopt-python/lp-qp-milp/index.rst @@ -0,0 +1,14 @@ +============================================================================== +Linear Programming, Quadratic Programming and Mixed Integer Linear Programming +============================================================================== + +This section contains details on the cuOpt linear programming, quadratic programming and mixed integer linear programming Python API. + +.. toctree:: + :maxdepth: 3 + :caption: LP, QP and MILP + :name: LP, QP and MILP + :titlesonly: + + lp-qp-milp-api.rst + lp-qp-milp-examples.rst diff --git a/docs/cuopt/source/cuopt-python/lp-milp/lp-milp-api.rst b/docs/cuopt/source/cuopt-python/lp-qp-milp/lp-qp-milp-api.rst similarity index 95% rename from docs/cuopt/source/cuopt-python/lp-milp/lp-milp-api.rst rename to docs/cuopt/source/cuopt-python/lp-qp-milp/lp-qp-milp-api.rst index 75a279e9b..cf87fa381 100644 --- a/docs/cuopt/source/cuopt-python/lp-milp/lp-milp-api.rst +++ b/docs/cuopt/source/cuopt-python/lp-qp-milp/lp-qp-milp-api.rst @@ -1,6 +1,8 @@ -========================= -LP and MILP API Reference -========================= +.. _problem_modeling : + +============================= +LP, QP and MILP API Reference +============================= .. autoclass:: cuopt.linear_programming.problem.Problem :members: diff --git a/docs/cuopt/source/cuopt-python/lp-milp/lp-milp-examples.rst b/docs/cuopt/source/cuopt-python/lp-qp-milp/lp-qp-milp-examples.rst similarity index 92% rename from docs/cuopt/source/cuopt-python/lp-milp/lp-milp-examples.rst rename to docs/cuopt/source/cuopt-python/lp-qp-milp/lp-qp-milp-examples.rst index 134602182..bb6f428ac 100644 --- a/docs/cuopt/source/cuopt-python/lp-milp/lp-milp-examples.rst +++ b/docs/cuopt/source/cuopt-python/lp-qp-milp/lp-qp-milp-examples.rst @@ -1,12 +1,12 @@ -==================== -LP and MILP Examples -==================== +======================== +LP, QP and MILP Examples +======================== -This section contains examples of how to use the cuOpt linear programming and mixed integer linear programming Python API. +This section contains examples of how to use the cuOpt linear programming, quadratic programming and mixed integer linear programming Python API. .. note:: - The examples in this section are not exhaustive. They are provided to help you get started with the cuOpt linear programming and mixed integer linear programming Python API. For more examples, please refer to the `cuopt-examples GitHub repository `_. + The examples in this section are not exhaustive. They are provided to help you get started with the cuOpt linear programming, quadratic programming and mixed integer linear programming Python API. For more examples, please refer to the `cuopt-examples GitHub repository `_. Simple Linear Programming Example @@ -27,6 +27,7 @@ The response is as follows: y = 0.0 Objective value = 10.0 + .. _simple-qp-example-python: Simple Quadratic Programming Example @@ -50,6 +51,7 @@ The response is as follows: y = 0.5 Objective value = 0.5 + Mixed Integer Linear Programming Example ---------------------------------------- diff --git a/docs/cuopt/source/cuopt-python/quick-start.rst b/docs/cuopt/source/cuopt-python/quick-start.rst index 042e7b932..e7ac1a7e7 100644 --- a/docs/cuopt/source/cuopt-python/quick-start.rst +++ b/docs/cuopt/source/cuopt-python/quick-start.rst @@ -2,7 +2,7 @@ Quickstart Guide ================= -NVIDIA cuOpt provides a Python API for routing optimization that enables users to solve complex optimization problems efficiently. For now, cuOpt python API supports routing optimization only. +NVIDIA cuOpt provides a Python API for routing optimization and LP/QP/MILP that enables users to solve complex optimization problems efficiently. Installation ============ diff --git a/docs/cuopt/source/cuopt-server/examples/lp-examples.rst b/docs/cuopt/source/cuopt-server/examples/lp-examples.rst index 85caae2c1..52d401281 100644 --- a/docs/cuopt/source/cuopt-server/examples/lp-examples.rst +++ b/docs/cuopt/source/cuopt-server/examples/lp-examples.rst @@ -261,7 +261,7 @@ The response is: Generate Datamodel from MPS Parser ---------------------------------- -Use a datamodel generated from mps file as input; this yields a solution object in response. For more details please refer to :doc:`LP/MILP parameters <../../lp-milp-settings>`. +Use a datamodel generated from mps file as input; this yields a solution object in response. For more details please refer to :doc:`LP/QP/MILP parameters <../../lp-qp-milp-settings>`. :download:`mps_datamodel_example.py ` diff --git a/docs/cuopt/source/cuopt-server/server-api/index.rst b/docs/cuopt/source/cuopt-server/server-api/index.rst index cc702ae03..23340843b 100644 --- a/docs/cuopt/source/cuopt-server/server-api/index.rst +++ b/docs/cuopt/source/cuopt-server/server-api/index.rst @@ -12,4 +12,4 @@ This section contains details on Server options supported and open-api specifica server-cli.rst ../../open-api.rst - ../../lp-milp-settings.rst + ../../lp-qp-milp-settings.rst diff --git a/docs/cuopt/source/index.rst b/docs/cuopt/source/index.rst index b40db1f7e..fc51649b2 100644 --- a/docs/cuopt/source/index.rst +++ b/docs/cuopt/source/index.rst @@ -18,7 +18,7 @@ NVIDIA cuOpt :name: Features routing-features.rst - lp-features.rst + lp-qp-features.rst milp-features.rst ========================== diff --git a/docs/cuopt/source/lp-features.rst b/docs/cuopt/source/lp-qp-features.rst similarity index 99% rename from docs/cuopt/source/lp-features.rst rename to docs/cuopt/source/lp-qp-features.rst index 96778b207..fde57017e 100644 --- a/docs/cuopt/source/lp-features.rst +++ b/docs/cuopt/source/lp-qp-features.rst @@ -1,5 +1,5 @@ ================== -LP Features +LP/QP Features ================== Availability diff --git a/docs/cuopt/source/lp-milp-settings.rst b/docs/cuopt/source/lp-qp-milp-settings.rst similarity index 98% rename from docs/cuopt/source/lp-milp-settings.rst rename to docs/cuopt/source/lp-qp-milp-settings.rst index 5b65285dd..592cdd025 100644 --- a/docs/cuopt/source/lp-milp-settings.rst +++ b/docs/cuopt/source/lp-qp-milp-settings.rst @@ -1,10 +1,10 @@ ================================= -LP and MILP Settings +LP, QP and MILP Settings ================================= -This page describes the parameter settings available for cuOpt's LP and MILP solvers. These parameters are set as :ref:`parameter constants ` in case of C API and in case of Server Thin client as raw strings. -Please refer to examples in :doc:`C ` and :doc:`Server Thin client ` for more details. +This page describes the parameter settings available for cuOpt's LP, QP and MILP solvers. These parameters are set as :ref:`parameter constants ` in case of C API and in case of Server Thin client as raw strings. +Please refer to examples in :doc:`C ` and :doc:`Server Thin client ` for more details. .. note:: When setting parameters in thin client solver settings, remove ``CUOPT_`` from the parameter name and convert to lowercase. For example, ``CUOPT_TIME_LIMIT`` would be set as ``time_limit``. diff --git a/docs/cuopt/source/transition.rst b/docs/cuopt/source/transition.rst index 28e4cad13..3c6e0aa9c 100644 --- a/docs/cuopt/source/transition.rst +++ b/docs/cuopt/source/transition.rst @@ -16,7 +16,7 @@ Parameter/option statuses are listed below, they express how each of these optio **Removed** - These features were deprecated in a previous release and completely removed in this one. -For all solver_configs fields, see the LP/MILP settings guide :doc:`lp-milp-settings` or the service openapi spec :doc:`open-api`. +For all solver_configs fields, see the LP/QP/MILP settings guide :doc:`lp-qp-milp-settings` or the service openapi spec :doc:`open-api`. Changes to solver_configs.tolerances ------------------------------------ diff --git a/docs/cuopt/source/versions1.json b/docs/cuopt/source/versions1.json index 2ee27e353..12bebcd03 100644 --- a/docs/cuopt/source/versions1.json +++ b/docs/cuopt/source/versions1.json @@ -1,32 +1,32 @@ [ { "version": "26.02.00", - "url": "../26.02.00/", + "url": "https://docs.nvidia.com/cuopt/user-guide/26.02.00/", "name": "latest", "preferred": true }, { "version": "25.12.00", - "url": "../25.12.00/" + "url": "https://docs.nvidia.com/cuopt/user-guide/25.12.00/" }, { "version": "25.10.00", - "url": "../25.10.00/" + "url": "https://docs.nvidia.com/cuopt/user-guide/25.10.00/" }, { "version": "25.08.00", - "url": "../25.08.00/" + "url": "https://docs.nvidia.com/cuopt/user-guide/25.08.00/" }, { "version": "25.05", - "url": "../25.05/" + "url": "https://docs.nvidia.com/cuopt/user-guide/25.05/" }, { "version": "25.02", - "url": "../25.02/" + "url": "https://docs.nvidia.com/cuopt/user-guide/25.02/" }, { "version": "24.11", - "url": "../24.11/" + "url": "https://docs.nvidia.com/cuopt/user-guide/24.11/" } ] diff --git a/python/cuopt/cuopt/distance_engine/waypoint_matrix_wrapper.pyx b/python/cuopt/cuopt/distance_engine/waypoint_matrix_wrapper.pyx index 1c959a593..68222a133 100644 --- a/python/cuopt/cuopt/distance_engine/waypoint_matrix_wrapper.pyx +++ b/python/cuopt/cuopt/distance_engine/waypoint_matrix_wrapper.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa +# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa # SPDX-License-Identifier: Apache-2.0 # cython: profile=False @@ -20,7 +20,6 @@ import numpy as np from numba import cuda import cudf -from cudf.core.column_accessor import ColumnAccessor from cuopt.utilities import series_from_buf diff --git a/python/cuopt/cuopt/routing/__init__.py b/python/cuopt/cuopt/routing/__init__.py index efa61b477..081d58f99 100644 --- a/python/cuopt/cuopt/routing/__init__.py +++ b/python/cuopt/cuopt/routing/__init__.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 from cuopt.routing.assignment import Assignment, SolutionStatus @@ -9,5 +9,5 @@ update_routes_and_vehicles, ) from cuopt.routing.utils_wrapper import DatasetDistribution -from cuopt.routing.vehicle_routing import DataModel, Solve, SolverSettings +from cuopt.routing.vehicle_routing import BatchSolve, DataModel, Solve, SolverSettings from cuopt.routing.vehicle_routing_wrapper import ErrorStatus, Objective diff --git a/python/cuopt/cuopt/routing/utils_wrapper.pyx b/python/cuopt/cuopt/routing/utils_wrapper.pyx index 659ac7a73..aa0800cf1 100644 --- a/python/cuopt/cuopt/routing/utils_wrapper.pyx +++ b/python/cuopt/cuopt/routing/utils_wrapper.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa # SPDX-License-Identifier: Apache-2.0 @@ -21,7 +21,6 @@ import numpy as np from numba import cuda import cudf -from cudf.core.buffer import as_buffer from libcpp.utility cimport move @@ -106,9 +105,7 @@ def generate_dataset(locations=100, asymmetric=True, min_demand=cudf.Series(), coordinates['x'] = series_from_buf(x_pos, pa.float32()) coordinates['y'] = series_from_buf(y_pos, pa.float32()) - matrices_buf = as_buffer( - DeviceBuffer.c_from_unique_ptr(move(g_ret.d_matrices_)) - ) + matrices_buf = DeviceBuffer.c_from_unique_ptr(move(g_ret.d_matrices_)) desc = matrices_buf.__cuda_array_interface__ desc["shape"] = (n_vehicle_types, n_matrix_types, locations, locations) desc["typestr"] = "f4" @@ -140,9 +137,7 @@ def generate_dataset(locations=100, asymmetric=True, min_demand=cudf.Series(), ) fleet_size = vehicles["earliest_time"].shape[0] - capacities_buf = as_buffer( - DeviceBuffer.c_from_unique_ptr(move(g_ret.d_caps_)) - ) + capacities_buf = DeviceBuffer.c_from_unique_ptr(move(g_ret.d_caps_)) desc = capacities_buf.__cuda_array_interface__ desc["shape"] = (dim, fleet_size) desc["typestr"] = "u2" @@ -152,9 +147,7 @@ def generate_dataset(locations=100, asymmetric=True, min_demand=cudf.Series(), vehicles["capacity_" + str(i)] = capacities[i] # Fleet order constraints - service_times_buf = as_buffer( - DeviceBuffer.c_from_unique_ptr(move(g_ret.d_service_time_)) - ) + service_times_buf = DeviceBuffer.c_from_unique_ptr(move(g_ret.d_service_time_)) desc = service_times_buf.__cuda_array_interface__ desc["shape"] = (fleet_size, locations) desc["typestr"] = "i4" @@ -175,9 +168,7 @@ def generate_dataset(locations=100, asymmetric=True, min_demand=cudf.Series(), orders["earliest_time"] = series_from_buf(earliest_time, pa.int32()) orders["latest_time"] = series_from_buf(latest_time, pa.int32()) - demands_buf = as_buffer( - DeviceBuffer.c_from_unique_ptr(move(g_ret.d_demands_)) - ) + demands_buf = DeviceBuffer.c_from_unique_ptr(move(g_ret.d_demands_)) desc = demands_buf.__cuda_array_interface__ desc["shape"] = (dim, locations) desc["typestr"] = "i2" diff --git a/python/cuopt/cuopt/routing/vehicle_routing.pxd b/python/cuopt/cuopt/routing/vehicle_routing.pxd index 4638f8ae7..7f89d33ff 100644 --- a/python/cuopt/cuopt/routing/vehicle_routing.pxd +++ b/python/cuopt/cuopt/routing/vehicle_routing.pxd @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa # SPDX-License-Identifier: Apache-2.0 @@ -9,6 +9,7 @@ from libcpp cimport bool from libcpp.string cimport string +from libcpp.vector cimport vector from pylibraft.common.handle cimport * @@ -133,3 +134,8 @@ cdef extern from "cuopt/routing/cython/cython.hpp" namespace "cuopt::cython": # data_model_view_t[int, float]* data_model, solver_settings_t[int, float]* solver_settings ) except + + + cdef vector[unique_ptr[vehicle_routing_ret_t]] call_batch_solve( + vector[data_model_view_t[int, float] *] data_models, + solver_settings_t[int, float]* solver_settings + ) except + diff --git a/python/cuopt/cuopt/routing/vehicle_routing.py b/python/cuopt/cuopt/routing/vehicle_routing.py index 365709147..990283667 100644 --- a/python/cuopt/cuopt/routing/vehicle_routing.py +++ b/python/cuopt/cuopt/routing/vehicle_routing.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import numpy as np @@ -1540,3 +1540,52 @@ def Solve(data_model, solver_settings=None): solver_settings.get_config_file_name(), ) return solution + + +@catch_cuopt_exception +def BatchSolve(data_model_list, solver_settings=None): + """ + Solves multiple routing problems in batch mode using parallel execution. + + Parameters + ---------- + data_model_list: list of DataModel + List of data model objects representing routing problems to solve. + solver_settings: SolverSettings + Settings to configure solver configurations. + By default, it uses default solver settings to solve. + + Returns + ------- + tuple + A tuple containing: + - list of Assignment: Solutions for each routing problem + + Examples + -------- + >>> from cuopt import routing + >>> import cudf + >>> # Create multiple data models + >>> data_models = [] + >>> for i in range(5): + ... cost_matrix = cudf.DataFrame([[0, 1, 2], [1, 0, 3], [2, 3, 0]]) + ... dm = routing.DataModel(3, 1) + ... dm.add_cost_matrix(cost_matrix) + ... data_models.append(dm) + >>> settings = routing.SolverSettings() + >>> settings.set_time_limit(1.0) + >>> solutions, solve_time = routing.BatchSolve(data_models, settings) + """ + + if not isinstance(data_model_list, list): + raise ValueError("data_model_list must be a list of DataModel objects") + if len(data_model_list) == 0: + raise ValueError("data_model_list cannot be empty") + if not all(isinstance(dm, DataModel) for dm in data_model_list): + raise ValueError( + "All elements in data_model_list must be DataModel instances" + ) + if solver_settings is None: + solver_settings = SolverSettings() + + return vehicle_routing_wrapper.BatchSolve(data_model_list, solver_settings) diff --git a/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx b/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx index bf4a2570c..c1d4bd01a 100644 --- a/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx +++ b/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa +# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa # SPDX-License-Identifier: Apache-2.0 @@ -11,6 +11,7 @@ from pylibraft.common.handle cimport * from cuopt.routing.structure.routing_utilities cimport * from cuopt.routing.vehicle_routing cimport ( + call_batch_solve, call_solve, data_model_view_t, node_type_t, @@ -32,8 +33,10 @@ from libc.stdlib cimport free, malloc from libc.string cimport memcpy, strcpy, strlen from libcpp cimport bool from libcpp.memory cimport unique_ptr +from libcpp.pair cimport pair from libcpp.string cimport string from libcpp.utility cimport move +from libcpp.vector cimport vector from rmm.pylibrmm.device_buffer cimport DeviceBuffer @@ -834,3 +837,126 @@ def Solve(DataModel data_model, SolverSettings solver_settings): error_message, unserviced_nodes ) + + +cdef create_assignment_from_vr_ret(vehicle_routing_ret_t& vr_ret): + """Helper function to create an Assignment from a vehicle_routing_ret_t""" + vehicle_count = vr_ret.vehicle_count_ + total_objective_value = vr_ret.total_objective_value_ + + objective_values = {} + for k in vr_ret.objective_values_: + obj = Objective(int(k.first)) + objective_values[obj] = k.second + + status = vr_ret.status_ + cdef char* c_sol_string = c_get_string(vr_ret.solution_string_) + try: + solver_status_string = \ + c_sol_string[:vr_ret.solution_string_.length()].decode('UTF-8') + finally: + free(c_sol_string) + + route = DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_route_)) + route_locations = DeviceBuffer.c_from_unique_ptr( + move(vr_ret.d_route_locations_) + ) + arrival_stamp = DeviceBuffer.c_from_unique_ptr( + move(vr_ret.d_arrival_stamp_) + ) + truck_id = DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_truck_id_)) + node_types = DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_node_types_)) + unserviced_nodes_buf = \ + DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_unserviced_nodes_)) + accepted_buf = \ + DeviceBuffer.c_from_unique_ptr(move(vr_ret.d_accepted_)) + + route_df = cudf.DataFrame() + route_df['route'] = series_from_buf(route, pa.int32()) + route_df['arrival_stamp'] = series_from_buf(arrival_stamp, pa.float64()) + route_df['truck_id'] = series_from_buf(truck_id, pa.int32()) + route_df['location'] = series_from_buf(route_locations, pa.int32()) + route_df['type'] = series_from_buf(node_types, pa.int32()) + + unserviced_nodes = cudf.Series._from_column( + series_from_buf(unserviced_nodes_buf, pa.int32()) + ) + accepted = cudf.Series._from_column( + series_from_buf(accepted_buf, pa.int32()) + ) + + def get_type_from_int(type_in_int): + if type_in_int == int(NodeType.DEPOT): + return "Depot" + elif type_in_int == int(NodeType.PICKUP): + return "Pickup" + elif type_in_int == int(NodeType.DELIVERY): + return "Delivery" + elif type_in_int == int(NodeType.BREAK): + return "Break" + + node_types_string = [ + get_type_from_int(type_in_int) + for type_in_int in route_df['type'].to_pandas()] + route_df['type'] = node_types_string + error_status = vr_ret.error_status_ + error_message = vr_ret.error_message_ + + return Assignment( + vehicle_count, + total_objective_value, + objective_values, + route_df, + accepted, + status, + solver_status_string, + error_status, + error_message, + unserviced_nodes + ) + + +def BatchSolve(py_data_model_list, SolverSettings solver_settings): + """ + Solve multiple routing problems in batch mode using parallel execution. + + Parameters + ---------- + py_data_model_list : list of DataModel + List of data model objects representing routing problems to solve. + solver_settings : SolverSettings + Solver settings to use for all problems. + + Returns + ------- + tuple + A tuple containing: + - list of Assignment: Solutions for each routing problem + - float: Total solve time in seconds + """ + cdef solver_settings_t[int, float]* c_solver_settings = ( + solver_settings.c_solver_settings.get() + ) + + cdef vector[data_model_view_t[int, float] *] data_model_views + + for data_model_obj in py_data_model_list: + data_model_views.push_back( + (data_model_obj).c_data_model_view.get() + ) + + cdef vector[unique_ptr[vehicle_routing_ret_t]] batch_solve_result = ( + move(call_batch_solve(data_model_views, c_solver_settings)) + ) + + cdef vector[unique_ptr[vehicle_routing_ret_t]] c_solutions = ( + move(batch_solve_result) + ) + + solutions = [] + for i in range(c_solutions.size()): + solutions.append( + create_assignment_from_vr_ret(c_solutions[i].get()[0]) + ) + + return solutions diff --git a/python/cuopt/cuopt/tests/routing/test_batch_solve.py b/python/cuopt/cuopt/tests/routing/test_batch_solve.py new file mode 100644 index 000000000..31d09c202 --- /dev/null +++ b/python/cuopt/cuopt/tests/routing/test_batch_solve.py @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import cudf +import numpy as np + +from cuopt import routing + + +def create_tsp_cost_matrix(n_locations): + """Creates a simple symmetric cost matrix for TSP.""" + cost_matrix = np.zeros((n_locations, n_locations), dtype=np.float32) + for i in range(n_locations): + for j in range(n_locations): + cost_matrix[i, j] = abs(i - j) + return cudf.DataFrame(cost_matrix) + + +def test_batch_solve_varying_sizes(): + """Test batch solving TSPs of varying sizes.""" + tsp_sizes = [ + 5, + 8, + 10, + 6, + 7, + 9, + 12, + 15, + 11, + 4, + 13, + 14, + 8, + 6, + 10, + 9, + 7, + 11, + 5, + 12, + ] + + # Create data models for each TSP + data_models = [] + for n_locations in tsp_sizes: + cost_matrix = create_tsp_cost_matrix(n_locations) + dm = routing.DataModel(n_locations, 1) + dm.add_cost_matrix(cost_matrix) + data_models.append(dm) + + # Configure solver settings + settings = routing.SolverSettings() + settings.set_time_limit(5.0) + + # Call batch solve + solutions = routing.BatchSolve(data_models, settings) + + # Verify results + assert len(solutions) == len(tsp_sizes) + for i, solution in enumerate(solutions): + assert solution.get_status() == 0, ( + f"TSP {i} (size {tsp_sizes[i]}) failed" + ) + assert solution.get_vehicle_count() == 1, ( + f"TSP {i} (size {tsp_sizes[i]}) used multiple vehicles" + ) diff --git a/python/cuopt/pyproject.toml b/python/cuopt/pyproject.toml index 36560e46a..05ca7ffa8 100644 --- a/python/cuopt/pyproject.toml +++ b/python/cuopt/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "numpy>=1.23.5,<3.0", "pandas>=2.0", "pylibraft==26.2.*,>=0.0.0a0", + "pyyaml>=6.0.0", "rapids-logger==0.2.*,>=0.0.0a0", "rmm==26.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/python/cuopt_self_hosted/cuopt_sh_client/cuopt_self_host_client.py b/python/cuopt_self_hosted/cuopt_sh_client/cuopt_self_host_client.py index 4236586b1..28447bf2e 100644 --- a/python/cuopt_self_hosted/cuopt_sh_client/cuopt_self_host_client.py +++ b/python/cuopt_self_hosted/cuopt_sh_client/cuopt_self_host_client.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import json @@ -257,8 +257,6 @@ class CuOptServiceSelfHostClient: polling_interval : int The duration in seconds between consecutive polling attempts. Defaults to 1. - request_excess_timeout : int - Note: Deprecated, Use polling_timeout instead only_validate : boolean Only validates input. Defaults to False. polling_timeout : int @@ -303,7 +301,6 @@ def __init__( use_https: bool = False, self_signed_cert="", polling_interval=1, - request_excess_timeout=None, only_validate=False, polling_timeout=600, timeout_exception=True, @@ -361,11 +358,7 @@ def __init__( self.solution_url = f"{self.protocol}://{self.ip}/cuopt/solution" # noqa self.polling_interval = polling_interval - self.timeout = ( - request_excess_timeout - if request_excess_timeout is not None - else polling_timeout - ) + self.timeout = polling_timeout def _get_response(self, response): if response.headers["content-type"] == mime_type.JSON.value: diff --git a/python/cuopt_self_hosted/cuopt_sh_client/thin_client_solver_settings.py b/python/cuopt_self_hosted/cuopt_sh_client/thin_client_solver_settings.py index 0a5ad87f2..fca85b33c 100644 --- a/python/cuopt_self_hosted/cuopt_sh_client/thin_client_solver_settings.py +++ b/python/cuopt_self_hosted/cuopt_sh_client/thin_client_solver_settings.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 from enum import IntEnum, auto @@ -167,18 +167,6 @@ def toDict(self): "mip_relative_gap", "mip_absolute_tolerance", "mip_relative_tolerance", - # deprecated parameters - "absolute_primal", - "absolute_dual", - "absolute_gap", - "relative_primal", - "relative_dual", - "relative_gap", - "primal_infeasible", - "dual_infeasible", - "integrality_tolerance", - "absolute_mip_gap", - "relative_mip_gap", ] # Grab everything that is not a tolerance diff --git a/python/cuopt_server/cuopt_server/tests/test_lp.py b/python/cuopt_server/cuopt_server/tests/test_lp.py index 054f3776c..7b8589935 100644 --- a/python/cuopt_server/cuopt_server/tests/test_lp.py +++ b/python/cuopt_server/cuopt_server/tests/test_lp.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import pytest @@ -70,15 +70,15 @@ def get_std_data_for_lp(): "time_limit": 5, "tolerances": { "optimality": 0.0001, - "absolute_primal": 0.0001, - "absolute_dual": 0.0001, - "absolute_gap": 0.0001, - "relative_primal": 0.0001, - "relative_dual": 0.0001, - "relative_gap": 0.0001, - "primal_infeasible": 0.00000001, - "dual_infeasible": 0.00000001, - "integrality_tolerance": 0.00001, + "absolute_primal_tolerance": 0.0001, + "absolute_dual_tolerance": 0.0001, + "absolute_gap_tolerance": 0.0001, + "relative_primal_tolerance": 0.0001, + "relative_dual_tolerance": 0.0001, + "relative_gap_tolerance": 0.0001, + "primal_infeasible_tolerance": 0.00000001, + "dual_infeasible_tolerance": 0.00000001, + "mip_integrality_tolerance": 0.00001, }, }, } @@ -123,7 +123,7 @@ def test_sample_milp( data = get_std_data_for_milp() data["maximize"] = maximize data["solver_config"]["mip_scaling"] = scaling - data["solver_config"]["heuristics_only"] = heuristics_only + data["solver_config"]["mip_heuristics_only"] = heuristics_only data["solver_config"]["num_cpu_threads"] = 4 res = get_lp(client, data) diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py index b6f84e54d..6be53bade 100644 --- a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py +++ b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import copy @@ -372,68 +372,6 @@ class Tolerances(StrictModel): mip_relative_tolerance: float = Field( default=None, description="MIP relative tolerance" ) - absolute_primal: float = Field( - default=None, - deprecated=True, - description="Deprecated in 25.08. " - "Use absolute_primal_tolerance instead", - ) - absolute_dual: float = Field( - default=None, - deprecated=True, - description="Deprecated in 25.08. Use absolute_dual_tolerance instead", - ) - absolute_gap: float = Field( - default=None, - deprecated=True, - description="Deprecated in 25.08. Use absolute_gap_tolerance instead", - ) - relative_primal: float = Field( - default=None, - deprecated=True, - description="Deprecated in 25.08. " - "Use relative_primal_tolerance instead", - ) - relative_dual: float = Field( - default=None, - deprecated=True, - description="Deprecated in 25.08. Use relative_dual_tolerance instead", - ) - relative_gap: float = Field( - default=None, - deprecated=True, - description="Deprecated in 25.08. Use relative_gap_tolerance instead", - ) - primal_infeasible: float = Field( - default=None, - deprecated=True, - description="Deprecated in 25.08. " - "Use primal_infeasible_tolerance instead", - ) - dual_infeasible: float = Field( - default=None, - deprecated=True, - description="Deprecated in 25.08. " - "Use dual_infeasible_tolerance instead", - ) - integrality_tolerance: float = Field( - default=None, - deprecated=True, - description="Deprecated starting in 25.05. " - "Use mip_integratlity_tolerance instead.", - ) - absolute_mip_gap: float = Field( - default=None, - deprecated=True, - description="Deprecated starting in 25.05. " - "Use mip_absolute_gap instead.", - ) - relative_mip_gap: float = Field( - default=None, - deprecated=True, - description="Deprecated starting in 25.05. " - "Use mip_relative_gap instead.", - ) class SolverConfig(StrictModel): @@ -629,18 +567,6 @@ class SolverConfig(StrictModel): description="Ignored by the service but included " "for dataset compatibility", ) - solver_mode: Optional[int] = Field( - default=None, - deprecated=True, - description="Deprecated starting in 25.05. " - "Use pdlp_solver_mode instead.", - ) - heuristics_only: Optional[bool] = Field( - default=None, - deprecated=True, - description="Deprecated starting in 25.05. " - "Use mip_heuristics_only instead.", - ) class LPData(StrictModel): diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py b/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py index 096eb4642..81d146bc3 100644 --- a/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py +++ b/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import logging @@ -167,15 +167,7 @@ def create_solver(LP_data, warmstart_data): CUOPT_INFEASIBILITY_DETECTION, solver_config.infeasibility_detection, ) - if solver_config.solver_mode is not None: - solver_settings.set_parameter( - CUOPT_PDLP_SOLVER_MODE, - linear_programming.solver_settings.PDLPSolverMode( - solver_config.solver_mode - ), - ) - warnings.append(dep_warning("solver_mode")) - elif solver_config.pdlp_solver_mode is not None: + if solver_config.pdlp_solver_mode is not None: solver_settings.set_parameter( CUOPT_PDLP_SOLVER_MODE, linear_programming.solver_settings.PDLPSolverMode( @@ -236,111 +228,54 @@ def create_solver(LP_data, warmstart_data): CUOPT_ABSOLUTE_DUAL_TOLERANCE, tolerance.absolute_dual_tolerance, ) - elif tolerance.absolute_dual is not None: - solver_settings.set_parameter( - CUOPT_ABSOLUTE_DUAL_TOLERANCE, tolerance.absolute_dual - ) - warnings.append(dep_warning("absolute_dual")) if tolerance.absolute_primal_tolerance is not None: solver_settings.set_parameter( CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, tolerance.absolute_primal_tolerance, ) - elif tolerance.absolute_primal is not None: - solver_settings.set_parameter( - CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, tolerance.absolute_primal - ) - warnings.append(dep_warning("absolute_primal")) if tolerance.absolute_gap_tolerance is not None: solver_settings.set_parameter( CUOPT_ABSOLUTE_GAP_TOLERANCE, tolerance.absolute_gap_tolerance, ) - elif tolerance.absolute_gap is not None: - solver_settings.set_parameter( - CUOPT_ABSOLUTE_GAP_TOLERANCE, tolerance.absolute_gap - ) - warnings.append(dep_warning("absolute_gap")) if tolerance.relative_dual_tolerance is not None: solver_settings.set_parameter( CUOPT_RELATIVE_DUAL_TOLERANCE, tolerance.relative_dual_tolerance, ) - elif tolerance.relative_dual is not None: - solver_settings.set_parameter( - CUOPT_RELATIVE_DUAL_TOLERANCE, tolerance.relative_dual - ) - warnings.append(dep_warning("relative_dual")) if tolerance.relative_primal_tolerance is not None: solver_settings.set_parameter( CUOPT_RELATIVE_PRIMAL_TOLERANCE, tolerance.relative_primal_tolerance, ) - elif tolerance.relative_primal is not None: - solver_settings.set_parameter( - CUOPT_RELATIVE_PRIMAL_TOLERANCE, tolerance.relative_primal - ) - warnings.append(dep_warning("relative_primal")) if tolerance.relative_gap_tolerance is not None: solver_settings.set_parameter( CUOPT_RELATIVE_GAP_TOLERANCE, tolerance.relative_gap_tolerance, ) - elif tolerance.relative_gap is not None: - solver_settings.set_parameter( - CUOPT_RELATIVE_GAP_TOLERANCE, tolerance.relative_gap - ) - warnings.append(dep_warning("relative_gap")) if tolerance.primal_infeasible_tolerance is not None: solver_settings.set_parameter( CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, tolerance.primal_infeasible_tolerance, ) - elif tolerance.primal_infeasible is not None: - solver_settings.set_parameter( - CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, - tolerance.primal_infeasible, - ) - warnings.append(dep_warning("primal_infeasible")) if tolerance.dual_infeasible_tolerance is not None: solver_settings.set_parameter( CUOPT_DUAL_INFEASIBLE_TOLERANCE, tolerance.dual_infeasible_tolerance, ) - elif tolerance.dual_infeasible is not None: - solver_settings.set_parameter( - CUOPT_DUAL_INFEASIBLE_TOLERANCE, tolerance.dual_infeasible - ) - warnings.append(dep_warning("dual_infeasible")) if tolerance.mip_integrality_tolerance is not None: solver_settings.set_parameter( CUOPT_MIP_INTEGRALITY_TOLERANCE, tolerance.mip_integrality_tolerance, ) - elif tolerance.integrality_tolerance is not None: - solver_settings.set_parameter( - CUOPT_MIP_INTEGRALITY_TOLERANCE, - tolerance.integrality_tolerance, - ) - warnings.append(dep_warning("integrality_tolerance")) if tolerance.mip_absolute_gap is not None: solver_settings.set_parameter( CUOPT_MIP_ABSOLUTE_GAP, tolerance.mip_absolute_gap ) - elif tolerance.absolute_mip_gap is not None: - solver_settings.set_parameter( - CUOPT_MIP_ABSOLUTE_GAP, tolerance.absolute_mip_gap - ) - warnings.append(dep_warning("absolute_mip_gap")) if tolerance.mip_relative_gap is not None: solver_settings.set_parameter( CUOPT_MIP_RELATIVE_GAP, tolerance.mip_relative_gap ) - elif tolerance.relative_mip_gap is not None: - solver_settings.set_parameter( - CUOPT_MIP_RELATIVE_GAP, tolerance.relative_mip_gap - ) - warnings.append(dep_warning("relative_mip_gap")) if tolerance.mip_absolute_tolerance is not None: solver_settings.set_parameter( CUOPT_MIP_ABSOLUTE_TOLERANCE, @@ -357,12 +292,7 @@ def create_solver(LP_data, warmstart_data): solver_settings.set_parameter( CUOPT_MIP_SCALING, solver_config.mip_scaling ) - if solver_config.heuristics_only is not None: - solver_settings.set_parameter( - CUOPT_MIP_HEURISTICS_ONLY, solver_config.heuristics_only - ) - warnings.append(dep_warning("heuristics_only")) - elif solver_config.mip_heuristics_only is not None: + if solver_config.mip_heuristics_only is not None: solver_settings.set_parameter( CUOPT_MIP_HEURISTICS_ONLY, solver_config.mip_heuristics_only ) From 8635cc3dad95539c2eb871084b7045725a44c23d Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Wed, 21 Jan 2026 17:54:08 -0800 Subject: [PATCH 37/45] Fix an issue with crossover. Add options to turn off individual cuts --- .../cuopt/linear_programming/constants.h | 92 ++++++++++--------- .../mip/solver_settings.hpp | 4 + cpp/src/dual_simplex/branch_and_bound.cpp | 52 ++++++----- cpp/src/dual_simplex/cuts.cpp | 46 ++++++---- .../dual_simplex/simplex_solver_settings.hpp | 10 ++ cpp/src/math_optimization/solver_settings.cu | 4 + cpp/src/mip/diversity/lns/rins.cu | 1 + cpp/src/mip/diversity/recombiners/sub_mip.cuh | 1 + cpp/src/mip/solver.cu | 4 + 9 files changed, 127 insertions(+), 87 deletions(-) diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index 2a4a781c1..faaf1b9a3 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -20,50 +20,54 @@ #define CUOPT_INSTANTIATE_INT64 0 /* @brief LP/MIP parameter string constants */ -#define CUOPT_ABSOLUTE_DUAL_TOLERANCE "absolute_dual_tolerance" -#define CUOPT_RELATIVE_DUAL_TOLERANCE "relative_dual_tolerance" -#define CUOPT_ABSOLUTE_PRIMAL_TOLERANCE "absolute_primal_tolerance" -#define CUOPT_RELATIVE_PRIMAL_TOLERANCE "relative_primal_tolerance" -#define CUOPT_ABSOLUTE_GAP_TOLERANCE "absolute_gap_tolerance" -#define CUOPT_RELATIVE_GAP_TOLERANCE "relative_gap_tolerance" -#define CUOPT_INFEASIBILITY_DETECTION "infeasibility_detection" -#define CUOPT_STRICT_INFEASIBILITY "strict_infeasibility" -#define CUOPT_PRIMAL_INFEASIBLE_TOLERANCE "primal_infeasible_tolerance" -#define CUOPT_DUAL_INFEASIBLE_TOLERANCE "dual_infeasible_tolerance" -#define CUOPT_ITERATION_LIMIT "iteration_limit" -#define CUOPT_TIME_LIMIT "time_limit" -#define CUOPT_PDLP_SOLVER_MODE "pdlp_solver_mode" -#define CUOPT_METHOD "method" -#define CUOPT_PER_CONSTRAINT_RESIDUAL "per_constraint_residual" -#define CUOPT_SAVE_BEST_PRIMAL_SO_FAR "save_best_primal_so_far" -#define CUOPT_FIRST_PRIMAL_FEASIBLE "first_primal_feasible" -#define CUOPT_LOG_FILE "log_file" -#define CUOPT_LOG_TO_CONSOLE "log_to_console" -#define CUOPT_CROSSOVER "crossover" -#define CUOPT_FOLDING "folding" -#define CUOPT_AUGMENTED "augmented" -#define CUOPT_DUALIZE "dualize" -#define CUOPT_ORDERING "ordering" -#define CUOPT_BARRIER_DUAL_INITIAL_POINT "barrier_dual_initial_point" -#define CUOPT_ELIMINATE_DENSE_COLUMNS "eliminate_dense_columns" -#define CUOPT_CUDSS_DETERMINISTIC "cudss_deterministic" -#define CUOPT_PRESOLVE "presolve" -#define CUOPT_DUAL_POSTSOLVE "dual_postsolve" -#define CUOPT_MIP_ABSOLUTE_TOLERANCE "mip_absolute_tolerance" -#define CUOPT_MIP_RELATIVE_TOLERANCE "mip_relative_tolerance" -#define CUOPT_MIP_INTEGRALITY_TOLERANCE "mip_integrality_tolerance" -#define CUOPT_MIP_ABSOLUTE_GAP "mip_absolute_gap" -#define CUOPT_MIP_RELATIVE_GAP "mip_relative_gap" -#define CUOPT_MIP_HEURISTICS_ONLY "mip_heuristics_only" -#define CUOPT_MIP_SCALING "mip_scaling" -#define CUOPT_MIP_PRESOLVE "mip_presolve" -#define CUOPT_MIP_CUT_PASSES "mip_cut_passes" -#define CUOPT_MIP_NODE_LIMIT "mip_node_limit" -#define CUOPT_MIP_RELIABILITY_BRANCHING "mip_reliability_branching" -#define CUOPT_SOLUTION_FILE "solution_file" -#define CUOPT_NUM_CPU_THREADS "num_cpu_threads" -#define CUOPT_NUM_GPUS "num_gpus" -#define CUOPT_USER_PROBLEM_FILE "user_problem_file" +#define CUOPT_ABSOLUTE_DUAL_TOLERANCE "absolute_dual_tolerance" +#define CUOPT_RELATIVE_DUAL_TOLERANCE "relative_dual_tolerance" +#define CUOPT_ABSOLUTE_PRIMAL_TOLERANCE "absolute_primal_tolerance" +#define CUOPT_RELATIVE_PRIMAL_TOLERANCE "relative_primal_tolerance" +#define CUOPT_ABSOLUTE_GAP_TOLERANCE "absolute_gap_tolerance" +#define CUOPT_RELATIVE_GAP_TOLERANCE "relative_gap_tolerance" +#define CUOPT_INFEASIBILITY_DETECTION "infeasibility_detection" +#define CUOPT_STRICT_INFEASIBILITY "strict_infeasibility" +#define CUOPT_PRIMAL_INFEASIBLE_TOLERANCE "primal_infeasible_tolerance" +#define CUOPT_DUAL_INFEASIBLE_TOLERANCE "dual_infeasible_tolerance" +#define CUOPT_ITERATION_LIMIT "iteration_limit" +#define CUOPT_TIME_LIMIT "time_limit" +#define CUOPT_PDLP_SOLVER_MODE "pdlp_solver_mode" +#define CUOPT_METHOD "method" +#define CUOPT_PER_CONSTRAINT_RESIDUAL "per_constraint_residual" +#define CUOPT_SAVE_BEST_PRIMAL_SO_FAR "save_best_primal_so_far" +#define CUOPT_FIRST_PRIMAL_FEASIBLE "first_primal_feasible" +#define CUOPT_LOG_FILE "log_file" +#define CUOPT_LOG_TO_CONSOLE "log_to_console" +#define CUOPT_CROSSOVER "crossover" +#define CUOPT_FOLDING "folding" +#define CUOPT_AUGMENTED "augmented" +#define CUOPT_DUALIZE "dualize" +#define CUOPT_ORDERING "ordering" +#define CUOPT_BARRIER_DUAL_INITIAL_POINT "barrier_dual_initial_point" +#define CUOPT_ELIMINATE_DENSE_COLUMNS "eliminate_dense_columns" +#define CUOPT_CUDSS_DETERMINISTIC "cudss_deterministic" +#define CUOPT_PRESOLVE "presolve" +#define CUOPT_DUAL_POSTSOLVE "dual_postsolve" +#define CUOPT_MIP_ABSOLUTE_TOLERANCE "mip_absolute_tolerance" +#define CUOPT_MIP_RELATIVE_TOLERANCE "mip_relative_tolerance" +#define CUOPT_MIP_INTEGRALITY_TOLERANCE "mip_integrality_tolerance" +#define CUOPT_MIP_ABSOLUTE_GAP "mip_absolute_gap" +#define CUOPT_MIP_RELATIVE_GAP "mip_relative_gap" +#define CUOPT_MIP_HEURISTICS_ONLY "mip_heuristics_only" +#define CUOPT_MIP_SCALING "mip_scaling" +#define CUOPT_MIP_PRESOLVE "mip_presolve" +#define CUOPT_MIP_CUT_PASSES "mip_cut_passes" +#define CUOPT_MIP_MIR_CUTS "mip_mir_cuts" +#define CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS "mip_mixed_integer_gomory_cuts" +#define CUOPT_MIP_KNAPSACK_CUTS "mip_knapsack_cuts" +#define CUOPT_MIP_STRONG_CHVATAL_GOMORY_CUTS "mip_strong_chvatal_gomory_cuts" +#define CUOPT_MIP_NODE_LIMIT "mip_node_limit" +#define CUOPT_MIP_RELIABILITY_BRANCHING "mip_reliability_branching" +#define CUOPT_SOLUTION_FILE "solution_file" +#define CUOPT_NUM_CPU_THREADS "num_cpu_threads" +#define CUOPT_NUM_GPUS "num_gpus" +#define CUOPT_USER_PROBLEM_FILE "user_problem_file" /* @brief LP/MIP termination status constants */ #define CUOPT_TERIMINATION_STATUS_NO_TERMINATION 0 diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 6da848b40..34d472ca7 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -84,6 +84,10 @@ class mip_solver_settings_t { bool heuristics_only = false; i_t num_cpu_threads = -1; // -1 means use default number of threads in branch and bound i_t max_cut_passes = 0; // number of cut passes to make + i_t mir_cuts = -1; + i_t mixed_integer_gomory_cuts = -1; + i_t knapsack_cuts = -1; + i_t strong_chvatal_gomory_cuts = -1; i_t num_gpus = 1; bool log_to_console = true; std::string log_file; diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 59f94c9a9..9a98321d2 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -610,28 +610,31 @@ void branch_and_bound_t::set_final_solution(mip_solution_t& if (gap <= settings_.absolute_mip_gap_tol || gap_rel <= settings_.relative_mip_gap_tol) { solver_status_ = mip_status_t::OPTIMAL; #if 1 - FILE* fid = NULL; - fid = fopen("solution.dat", "w"); - if (fid != NULL) { - printf("Writing solution.dat\n"); - - std::vector residual = original_lp_.rhs; - matrix_vector_multiply(original_lp_.A, 1.0, incumbent_.x, -1.0, residual); - printf("|| A*x - b ||_inf %e\n", vector_norm_inf(residual)); - auto hash_combine_f = [](size_t seed, f_t x) { - seed ^= std::hash{}(x) + 0x9e3779b9 + (seed << 6) + (seed >> 2); - return seed; - }; - printf("incumbent size %ld original lp cols %d\n", incumbent_.x.size(), original_lp_.num_cols); - i_t n = original_lp_.num_cols; - size_t seed = n; - fprintf(fid, "%d\n", n); - for (i_t j = 0; j < n; ++j) { - fprintf(fid, "%.17g\n", incumbent_.x[j]); - seed = hash_combine_f(seed, incumbent_.x[j]); + if (settings_.sub_mip == 0) { + FILE* fid = NULL; + fid = fopen("solution.dat", "w"); + if (fid != NULL) { + printf("Writing solution.dat\n"); + + std::vector residual = original_lp_.rhs; + matrix_vector_multiply(original_lp_.A, 1.0, incumbent_.x, -1.0, residual); + printf("|| A*x - b ||_inf %e\n", vector_norm_inf(residual)); + auto hash_combine_f = [](size_t seed, f_t x) { + seed ^= std::hash{}(x) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; + }; + printf( + "incumbent size %ld original lp cols %d\n", incumbent_.x.size(), original_lp_.num_cols); + i_t n = original_lp_.num_cols; + size_t seed = n; + fprintf(fid, "%d\n", n); + for (i_t j = 0; j < n; ++j) { + fprintf(fid, "%.17g\n", incumbent_.x[j]); + seed = hash_combine_f(seed, incumbent_.x[j]); + } + printf("Solution hash: %20x\n", seed); + fclose(fid); } - printf("Solution hash: %20x\n", seed); - fclose(fid); } #endif if (gap > 0 && gap <= settings_.absolute_mip_gap_tol) { @@ -1567,7 +1570,9 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( root_relax_soln = root_crossover_soln_; root_vstatus = crossover_vstatus_; root_status = lp_status_t::OPTIMAL; - + basic_list.clear(); + nonbasic_list.reserve(original_lp_.num_cols - original_lp_.num_rows); + nonbasic_list.clear(); // Get the basic list and nonbasic list from the vstatus for (i_t j = 0; j < original_lp_.num_cols; j++) { if (crossover_vstatus_[j] == variable_status_t::BASIC) { @@ -1587,7 +1592,6 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( original_lp_.num_cols - original_lp_.num_rows); assert(nonbasic_list.size() == original_lp_.num_cols - original_lp_.num_rows); } - root_crossover_settings.max_cut_passes = 3; // Populate the basis_update from the crossover vstatus basis_update.refactor_basis(original_lp_.A, root_crossover_settings, @@ -1976,7 +1980,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); if (cut_status != dual::status_t::OPTIMAL) { - settings_.log.printf("Cut status %d\n", cut_status); + settings_.log.printf("Cut status %s\n", dual::status_to_string(cut_status).c_str()); return mip_status_t::NUMERICAL; } diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 57431edfb..f2e9b83c2 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -605,28 +605,34 @@ void cut_generation_t::generate_cuts(const lp_problem_t& lp, const std::vector& nonbasic_list) { // Generate Gomory and CG Cuts - f_t cut_start_time = tic(); - generate_gomory_cuts( - lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list); - f_t cut_generation_time = toc(cut_start_time); - if (cut_generation_time > 1.0) { - settings.log.printf("Gomory and CG cut generation time %.2f seconds\n", cut_generation_time); + if (settings.mixed_integer_gomory_cuts != 0 || settings.strong_chvatal_gomory_cuts != 0) { + f_t cut_start_time = tic(); + generate_gomory_cuts( + lp, settings, Arow, new_slacks, var_types, basis_update, xstar, basic_list, nonbasic_list); + f_t cut_generation_time = toc(cut_start_time); + if (cut_generation_time > 1.0) { + settings.log.printf("Gomory and CG cut generation time %.2f seconds\n", cut_generation_time); + } } // Generate Knapsack cuts - cut_start_time = tic(); - generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar); - cut_generation_time = toc(cut_start_time); - if (cut_generation_time > 1.0) { - settings.log.printf("Knapsack cut generation time %.2f seconds\n", cut_generation_time); + if (settings.knapsack_cuts != 0) { + f_t cut_start_time = tic(); + generate_knapsack_cuts(lp, settings, Arow, new_slacks, var_types, xstar); + f_t cut_generation_time = toc(cut_start_time); + if (cut_generation_time > 1.0) { + settings.log.printf("Knapsack cut generation time %.2f seconds\n", cut_generation_time); + } } // Generate MIR and CG cuts - cut_start_time = tic(); - generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar); - cut_generation_time = toc(cut_start_time); - if (cut_generation_time > 1.0) { - settings.log.printf("MIR and CG cut generation time %.2f seconds\n", cut_generation_time); + if (settings.mir_cuts != 0 || settings.strong_chvatal_gomory_cuts != 0) { + f_t cut_start_time = tic(); + generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar); + f_t cut_generation_time = toc(cut_start_time); + if (cut_generation_time > 1.0) { + settings.log.printf("MIR and CG cut generation time %.2f seconds\n", cut_generation_time); + } } } @@ -744,7 +750,7 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& sparse_vector_t inequality(Arow, i); f_t inequality_rhs = lp.rhs[i]; - const bool generate_cg_cut = true; + const bool generate_cg_cut = settings.strong_chvatal_gomory_cuts != 0; f_t fractional_part_rhs = fractional_part(inequality_rhs); if (generate_cg_cut && fractional_part_rhs > 1e-6 && fractional_part_rhs < (1-1e-6)) { @@ -892,7 +898,9 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& if (num_aggregated > 0) { settings.log.printf("MIR cut with aggregation %d\n", num_aggregated); } - cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs); + if (settings.mir_cuts != 0) { + cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs); + } break; } else { // Perform aggregation to try and find a cut @@ -1045,7 +1053,7 @@ void cut_generation_t::generate_gomory_cuts( inequality_rhs); if (tableau_status == 0) { // Generate a CG cut - const bool generate_cg_cut = false; + const bool generate_cg_cut = settings.strong_chvatal_gomory_cuts != 0; if (generate_cg_cut) { // Try to generate a CG cut diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 4248197c7..61fb79d9a 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -88,9 +88,14 @@ struct simplex_solver_settings_t { first_iteration_log(2), num_threads(omp_get_max_threads() - 1), max_cut_passes(0), + mir_cuts(-1), + mixed_integer_gomory_cuts(-1), + knapsack_cuts(-1), + strong_chvatal_gomory_cuts(-1), num_bfs_workers(std::max(num_threads / 4, 1)), random_seed(0), inside_mip(0), + sub_mip(0), reliability_branching(-1), solution_callback(nullptr), heuristic_preemption_callback(nullptr), @@ -157,11 +162,16 @@ struct simplex_solver_settings_t { i_t num_threads; // number of threads to use i_t random_seed; // random seed i_t max_cut_passes; // number of cut passes to make + i_t mir_cuts; // -1 automatic, 0 to disable, >0 to enable MIR cuts + i_t mixed_integer_gomory_cuts; // -1 automatic, 0 to disable, >0 to enable mixed integer Gomory cuts + i_t knapsack_cuts; // -1 automatic, 0 to disable, >0 to enable knapsack cuts + i_t strong_chvatal_gomory_cuts; // -1 automatic, 0 to disable, >0 to enable strong Chvatal Gomory cuts i_t num_bfs_workers; // number of threads dedicated to the best-first search diving_heuristics_settings_t diving_settings; // Settings for the diving heuristics i_t inside_mip; // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node + i_t sub_mip; // 0 if in regular MIP solve, 1 if in sub-MIP solve i_t reliability_branching; // -1 automatic, 0 to disable, >0 to enable reliability branching std::function&, f_t)> solution_callback; std::function&, f_t)> node_processed_callback; diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 0198e119d..7126273be 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -90,6 +90,10 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_MIP_CUT_PASSES, &mip_settings.max_cut_passes, -1, std::numeric_limits::max(), 0}, {CUOPT_MIP_NODE_LIMIT, &mip_settings.node_limit, 0, std::numeric_limits::max(), std::numeric_limits::max()}, {CUOPT_MIP_RELIABILITY_BRANCHING, &mip_settings.reliability_branching, -1, std::numeric_limits::max(), -1}, + {CUOPT_MIP_MIR_CUTS, &mip_settings.mir_cuts, -1, 1, -1}, + {CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS, &mip_settings.mixed_integer_gomory_cuts, -1, 1, -1}, + {CUOPT_MIP_KNAPSACK_CUTS, &mip_settings.knapsack_cuts, -1, 1, -1}, + {CUOPT_MIP_STRONG_CHVATAL_GOMORY_CUTS, &mip_settings.strong_chvatal_gomory_cuts, -1, 1, -1}, {CUOPT_NUM_GPUS, &pdlp_settings.num_gpus, 1, 2, 1}, {CUOPT_NUM_GPUS, &mip_settings.num_gpus, 1, 2, 1} }; diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu index dea6d57fa..c886dc156 100644 --- a/cpp/src/mip/diversity/lns/rins.cu +++ b/cpp/src/mip/diversity/lns/rins.cu @@ -262,6 +262,7 @@ void rins_t::run_rins() branch_and_bound_settings.num_threads = 2; branch_and_bound_settings.num_bfs_workers = 1; branch_and_bound_settings.max_cut_passes = 0; + branch_and_bound_settings.sub_mip = 1; // In the future, let RINS use all the diving heuristics. For now, // restricting to guided diving. diff --git a/cpp/src/mip/diversity/recombiners/sub_mip.cuh b/cpp/src/mip/diversity/recombiners/sub_mip.cuh index 65c3f0143..e252745b7 100644 --- a/cpp/src/mip/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip/diversity/recombiners/sub_mip.cuh @@ -106,6 +106,7 @@ class sub_mip_recombiner_t : public recombiner_t { branch_and_bound_settings.num_threads = 2; branch_and_bound_settings.num_bfs_workers = 1; branch_and_bound_settings.max_cut_passes = 0; + branch_and_bound_settings.sub_mip = 1; // In the future, let SubMIP use all the diving heuristics. For now, // restricting to guided diving. diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index 549030873..48989f26f 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -171,6 +171,10 @@ solution_t mip_solver_t::run_solver() branch_and_bound_settings.relative_mip_gap_tol = context.settings.tolerances.relative_mip_gap; branch_and_bound_settings.integer_tol = context.settings.tolerances.integrality_tolerance; branch_and_bound_settings.max_cut_passes = context.settings.max_cut_passes; + branch_and_bound_settings.mir_cuts = context.settings.mir_cuts; + branch_and_bound_settings.mixed_integer_gomory_cuts = context.settings.mixed_integer_gomory_cuts; + branch_and_bound_settings.knapsack_cuts = context.settings.knapsack_cuts; + branch_and_bound_settings.strong_chvatal_gomory_cuts = context.settings.strong_chvatal_gomory_cuts; if (context.settings.num_cpu_threads < 0) { branch_and_bound_settings.num_threads = omp_get_max_threads() - 1; From 61e5a2c9a4b8b8ef854548ff7ea7711b53e5ee2d Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 22 Jan 2026 12:48:31 -0800 Subject: [PATCH 38/45] Fix bugs in concurrent root relaxation and phase2 --- cpp/src/dual_simplex/basis_solves.cpp | 4 ++-- cpp/src/dual_simplex/basis_updates.cpp | 18 ++++++++++----- cpp/src/dual_simplex/branch_and_bound.cpp | 28 +++++++++++++++-------- cpp/src/dual_simplex/cuts.cpp | 6 ++--- cpp/src/dual_simplex/phase2.cpp | 19 +++++++++++---- 5 files changed, 50 insertions(+), 25 deletions(-) diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index f5cd54053..7b0eb150a 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -363,7 +363,7 @@ i_t factorize_basis(const csc_matrix_t& A, S_perm_inv); if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { settings.log.printf("Concurrent halt\n"); - return -1; + return -2; } if (Srank != Sdim) { // Get the rank deficient columns @@ -582,7 +582,7 @@ i_t factorize_basis(const csc_matrix_t& A, } if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { settings.log.printf("Concurrent halt\n"); - return -1; + return -2; } if (verbose) { printf("Right Lnz+Unz %d t %.3f\n", L.col_start[m] + U.col_start[m], toc(fact_start)); diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index ddc8fb5a8..64d08d87a 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -1145,7 +1145,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts CBT_col_sparse.to_dense(CBT_col_dense); for (i_t h = 0; h < m; h++) { if (std::abs(CBT_col_dense[h] - CBT_col[h]) > 1e-6) { - printf("col %d CBT_col_dense[%d] = %e CBT_col[%d] = %e\n", k, h, CBT_col_dense[h], h, CBT_col[h]); + printf("W: col %d CBT_col_dense[%d] = %e CBT_col[%d] = %e\n", k, h, CBT_col_dense[h], h, CBT_col[h]); exit(1); } } @@ -1225,7 +1225,7 @@ i_t basis_update_mpf_t::append_cuts(const csr_matrix_t& cuts CB_col.load_a_column(k, CB_col_dense); for (i_t l = 0; l < cuts_basic.m; l++) { if (std::abs(CB_col_dense[l] - CB_column[l]) > 1e-6) { - printf("col %d CB_col_dense[%d] = %e CB_column[%d] = %e\n", k, l, CB_col_dense[l], l, CB_column[l]); + printf("V: col %d CB_col_dense[%d] = %e CB_column[%d] = %e\n", k, l, CB_col_dense[l], l, CB_column[l]); exit(1); } } @@ -2263,7 +2263,7 @@ int basis_update_mpf_t::refactor_basis( if (L0_.m != A.m) { resize(A.m); } std::vector q; - if (factorize_basis(A, + i_t status = factorize_basis(A, settings, basic_list, L0_, @@ -2272,7 +2272,11 @@ int basis_update_mpf_t::refactor_basis( inverse_row_permutation_, q, deficient, - slacks_needed) == -1) { + slacks_needed); + if (status == -2) { + return -2; + } + if (status == -1) { settings.log.debug("Initial factorization failed\n"); basis_repair( A, settings, lower, upper, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); @@ -2294,7 +2298,7 @@ int basis_update_mpf_t::refactor_basis( } #endif - if (factorize_basis(A, + status = factorize_basis(A, settings, basic_list, L0_, @@ -2303,7 +2307,9 @@ int basis_update_mpf_t::refactor_basis( inverse_row_permutation_, q, deficient, - slacks_needed) == -1) { + slacks_needed); + if (status == -2) { return -2; } + if (status == -1) { #ifdef CHECK_L_FACTOR if (L0_.check_matrix() == -1) { settings.log.printf("Bad L after basis repair\n"); } #endif diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 9a98321d2..6373068c3 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1565,7 +1565,8 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( // Check if crossover was stopped by dual simplex if (crossover_status == crossover_status_t::OPTIMAL) { set_root_concurrent_halt(1); // Stop dual simplex - root_status = root_status_future.get(); + root_status = root_status_future.get(); // Wait for dual simplex to finish + set_root_concurrent_halt(0); // Clear the concurrent halt flag // Override the root relaxation solution with the crossover solution root_relax_soln = root_crossover_soln_; root_vstatus = crossover_vstatus_; @@ -1593,13 +1594,18 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( assert(nonbasic_list.size() == original_lp_.num_cols - original_lp_.num_rows); } // Populate the basis_update from the crossover vstatus - basis_update.refactor_basis(original_lp_.A, - root_crossover_settings, - original_lp_.lower, - original_lp_.upper, - basic_list, - nonbasic_list, - crossover_vstatus_); + i_t refactor_status = basis_update.refactor_basis(original_lp_.A, + root_crossover_settings, + original_lp_.lower, + original_lp_.upper, + basic_list, + nonbasic_list, + crossover_vstatus_); + if (refactor_status != 0) { + settings_.log.printf("Failed to refactor basis. %d deficient columns.\n", refactor_status); + assert(refactor_status == 0); + root_status = lp_status_t::NUMERICAL_ISSUES; + } // Set the edge norms to a default value edge_norms.resize(original_lp_.num_cols, -1.0); @@ -1724,12 +1730,16 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } return mip_status_t::UNBOUNDED; } - if (root_status == lp_status_t::TIME_LIMIT) { solver_status_ = mip_status_t::TIME_LIMIT; set_final_solution(solution, -inf); return solver_status_; } + if (root_status == lp_status_t::NUMERICAL_ISSUES) { + solver_status_ = mip_status_t::NUMERICAL; + set_final_solution(solution, -inf); + return solver_status_; + } assert(root_vstatus_.size() == original_lp_.num_cols); set_uninitialized_steepest_edge_norms(edge_norms_); diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index f2e9b83c2..3b1042769 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -768,7 +768,7 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& i_t cg_status = cg.generate_strong_cg_cut( lp, settings, var_types, cg_inequality, cg_inequality_rhs, xstar, cg_cut, cg_cut_rhs); if (cg_status == 0) { - printf("Adding CG cut nz %ld status %d row %d rhs %e inequality nz %d\n", cg_cut.i.size(), cg_status, i, cg_inequality_rhs, cg_inequality.i.size()); + //printf("Adding CG cut nz %ld status %d row %d rhs %e inequality nz %d\n", cg_cut.i.size(), cg_status, i, cg_inequality_rhs, cg_inequality.i.size()); cut_pool_.add_cut(cut_type_t::CHVATAL_GOMORY, cg_cut, cg_cut_rhs); } } @@ -1069,7 +1069,7 @@ void cut_generation_t::generate_gomory_cuts( i_t cg_status = cg.generate_strong_cg_cut( lp, settings, var_types, cg_inequality, cg_inequality_rhs, xstar, cg_cut, cg_cut_rhs); if (cg_status == 0) { - printf("Adding CG cut nz %ld\n", cg_cut.i.size()); + //printf("Adding CG cut nz %ld\n", cg_cut.i.size()); cut_pool_.add_cut(cut_type_t::CHVATAL_GOMORY, cg_cut, cg_cut_rhs); } } @@ -2417,7 +2417,7 @@ i_t strong_cg_cut_t::generate_strong_cg_cut( f_t violation = dot - cut_rhs; const f_t min_violation_threshold = 1e-6; if (violation > min_violation_threshold) { - printf("CG violation %e nz %ld\n", violation, cut.i.size()); + //printf("CG violation %e nz %ld\n", violation, cut.i.size()); // Note that no slacks are currently present. Since slacks are currently treated as continuous. // However, this may change. We may need to substitute out the slacks here diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index d1c63b49d..08496ca42 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -1231,8 +1231,14 @@ i_t initialize_steepest_edge_norms(const lp_problem_t& lp, last_log = tic(); settings.log.printf("Initialized %d of %d steepest edge norms in %.2fs\n", k, m, now); } - if (toc(start_time) > settings.time_limit) { printf("initialize_steepest_edge time limit\n"); return -1; } - if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { printf("initialize_steepest_edge concurrent_halt\n"); return -1; } + if (toc(start_time) > settings.time_limit) { + printf("initialize_steepest_edge time limit\n"); + return -1; + } + if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) { + printf("initialize_steepest_edge concurrent_halt\n"); + return -2; + } } return 0; } @@ -2413,9 +2419,12 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, basic_list, nonbasic_list, delta_y_steepest_edge); } else { std::fill(delta_y_steepest_edge.begin(), delta_y_steepest_edge.end(), -1); - if (phase2::initialize_steepest_edge_norms( - lp, settings, start_time, basic_list, ft, delta_y_steepest_edge) == -1) { - printf("Bad return from initialize steepest edge norms\n"); + i_t status = phase2::initialize_steepest_edge_norms( + lp, settings, start_time, basic_list, ft, delta_y_steepest_edge); + if (status == -2) { + return dual::status_t::CONCURRENT_LIMIT; + } + if (status == -1) { return dual::status_t::TIME_LIMIT; } } From 2c68d94ce1d5bb5df2e641baea1a64d042638b74 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 22 Jan 2026 16:04:16 -0800 Subject: [PATCH 39/45] Fix issue in strong CG where rocI-4-11 was incorrectly declared infeasible with a bad cut --- cpp/src/dual_simplex/branch_and_bound.cpp | 1 - cpp/src/dual_simplex/cuts.cpp | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index 73e88962f..e9983ce32 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -1686,7 +1686,6 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut basis_update_mpf_t basis_update(original_lp_.num_rows, settings_.refactor_frequency); lp_status_t root_status; if (!enable_concurrent_lp_root_solve()) { - printf("Non concurrent LP root solve\n"); // RINS/SUBMIP path root_status = solve_linear_program_with_advanced_basis(original_lp_, exploration_stats_.start_time, diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 3b1042769..799bd0ce1 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -1074,6 +1074,10 @@ void cut_generation_t::generate_gomory_cuts( } } + if (settings.mixed_integer_gomory_cuts == 0) { + continue; + } + // Given the base inequality, generate a MIR cut sparse_vector_t cut_A(lp.num_cols, 0); f_t cut_A_rhs; @@ -2279,6 +2283,12 @@ i_t strong_cg_cut_t::generate_strong_cg_cut_helper( const bool verbose = false; const i_t nz = indicies.size(); const f_t f_a_0 = fractional_part(rhs); + + const f_t min_fractional_part = 1e-2; + if (f_a_0 < min_fractional_part) { + return -1; + } + // We will try to generat a strong CG cut. // Find the unique integer k such that // 1/(k+1) <= f(a_0) < 1/k @@ -2317,6 +2327,10 @@ i_t strong_cg_cut_t::generate_strong_cg_cut_helper( // Need to compute the p such that // f(a_0) + (p-1)/k * alpha < f(a_j) <= f(a_0) + p/k * alpha const f_t value = static_cast(k) * (f_a_j - f_a_0) / alpha; + if (value < 1e-6) { + return -1; // Safegaurd to prevent numerical issues when f(a_j) is very close to f(a_0) + // You might also be able to adjust p here to avoid this issue + } i_t p = static_cast(std::ceil(value)); if (fractional_part(value) < 1e-12) { //printf("Warning: p %d value %.16e is close to an integer\n", p, value, p + 1); From 89d73e76c7e4e55c79d2b8fdb22954c8cbd08ab6 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 22 Jan 2026 17:35:19 -0800 Subject: [PATCH 40/45] Declare optimal in cut passes if abs/rel gap satisified. Clean up cut reporting --- cpp/src/dual_simplex/branch_and_bound.cpp | 130 ++++++++++------------ cpp/src/dual_simplex/branch_and_bound.hpp | 4 + cpp/src/dual_simplex/cuts.hpp | 21 ++++ 3 files changed, 86 insertions(+), 69 deletions(-) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index e9983ce32..676cdface 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -575,6 +575,35 @@ void branch_and_bound_t::repair_heuristic_solutions() } } +template +void branch_and_bound_t::set_solution_at_root(mip_solution_t& solution, + const cut_info_t& cut_info) +{ + mutex_upper_.lock(); + incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x); + upper_bound_ = root_objective_; + mutex_upper_.unlock(); + + print_cut_info(settings_, cut_info); + + // We should be done here + uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); + solution.objective = incumbent_.objective; + solution.lower_bound = root_objective_; + solution.nodes_explored = 0; + solution.simplex_iterations = root_relax_soln_.iterations; + settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n", + compute_user_objective(original_lp_, root_objective_), + toc(exploration_stats_.start_time)); + + if (settings_.solution_callback != nullptr) { + settings_.solution_callback(solution.x, solution.objective); + } + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); + } +} + template void branch_and_bound_t::set_final_solution(mip_solution_t& solution, f_t lower_bound) @@ -1761,27 +1790,9 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut i_t num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); + cut_info_t cut_info; if (num_fractional == 0) { - mutex_upper_.lock(); - incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x); - upper_bound_ = root_objective_; - mutex_upper_.unlock(); - // We should be done here - uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); - solution.objective = incumbent_.objective; - solution.lower_bound = root_objective_; - solution.nodes_explored = 0; - solution.simplex_iterations = root_relax_soln_.iterations; - settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n", - compute_user_objective(original_lp_, root_objective_), - toc(exploration_stats_.start_time)); - - if (settings_.solution_callback != nullptr) { - settings_.solution_callback(solution.x, solution.objective); - } - if (settings_.heuristic_preemption_callback != nullptr) { - settings_.heuristic_preemption_callback(); - } + set_solution_at_root(solution, cut_info); return mip_status_t::OPTIMAL; } @@ -1802,41 +1813,11 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut read_saved_solution_for_cut_verification(original_lp_, settings_, saved_solution); #endif - i_t num_gomory_cuts = 0; - i_t num_mir_cuts = 0; - i_t num_knapsack_cuts = 0; - i_t num_cg_cuts = 0; + i_t cut_pool_size = 0; for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { if (num_fractional == 0) { - mutex_upper_.lock(); - incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x); - upper_bound_ = root_objective_; - mutex_upper_.unlock(); - if (num_gomory_cuts + num_mir_cuts + num_knapsack_cuts > 0) { - settings_.log.printf("Gomory cuts : %d\n", num_gomory_cuts); - settings_.log.printf("MIR cuts : %d\n", num_mir_cuts); - settings_.log.printf("Knapsack cuts : %d\n", num_knapsack_cuts); - settings_.log.printf("CG cuts : %d\n", num_cg_cuts); - settings_.log.printf("Cut pool size : %d\n", cut_pool_size); - settings_.log.printf("Size with cuts: %d constraints, %d variables, %d nonzeros\n", original_lp_.num_rows, original_lp_.num_cols, original_lp_.A.col_start[original_lp_.A.n]); - } - // We should be done here - uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); - solution.objective = incumbent_.objective; - solution.lower_bound = root_objective_; - solution.nodes_explored = 0; - solution.simplex_iterations = root_relax_soln_.iterations; - settings_.log.printf("Optimal solution found at root node. Objective %.16e. Time %.2f.\n", - compute_user_objective(original_lp_, root_objective_), - toc(exploration_stats_.start_time)); - - if (settings_.solution_callback != nullptr) { - settings_.solution_callback(solution.x, solution.objective); - } - if (settings_.heuristic_preemption_callback != nullptr) { - settings_.heuristic_preemption_callback(); - } + set_solution_at_root(solution, cut_info); return mip_status_t::OPTIMAL; } else { #ifdef PRINT_FRACTIONAL_INFO @@ -1867,13 +1848,13 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } for (i_t k = 0; k < cut_types.size(); k++) { if (cut_types[k] == cut_type_t::MIXED_INTEGER_GOMORY) { - num_gomory_cuts++; + cut_info.num_gomory_cuts++; } else if (cut_types[k] == cut_type_t::MIXED_INTEGER_ROUNDING) { - num_mir_cuts++; + cut_info.num_mir_cuts++; } else if (cut_types[k] == cut_type_t::KNAPSACK) { - num_knapsack_cuts++; + cut_info.num_knapsack_cuts++; } else if (cut_types[k] == cut_type_t::CHVATAL_GOMORY) { - num_cg_cuts++; + cut_info.num_cg_cuts++; } } #ifdef PRINT_CUT_INFO @@ -1977,17 +1958,18 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut root_relax_soln_, iter, edge_norms_); - - settings_.log.debug("Cut LP iterations %d. A nz %d\n", - iter, - original_lp_.A.col_start[original_lp_.A.n]); - exploration_stats_.total_lp_iters += root_relax_soln_.iterations; - root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); + if (cut_status == dual::status_t::TIME_LIMIT) { + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, root_objective_); + return solver_status_; + } if (cut_status != dual::status_t::OPTIMAL) { settings_.log.printf("Cut status %s\n", dual::status_to_string(cut_status).c_str()); return mip_status_t::NUMERICAL; } + exploration_stats_.total_lp_iters += root_relax_soln_.iterations; + root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); local_lower_bounds_.assign(settings_.num_bfs_workers, root_objective_); @@ -2010,8 +1992,13 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut fractional.clear(); num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); - // TODO: Get upper bound from heuristics - f_t obj = num_fractional != 0 ? upper_bound_.load() : root_objective_; + if (num_fractional == 0) { + upper_bound_ = root_objective_; + mutex_upper_.lock(); + incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x); + mutex_upper_.unlock(); + } + f_t obj = upper_bound_.load(); f_t user_obj = compute_user_objective(original_lp_, obj); f_t user_lower = compute_user_objective(original_lp_, root_objective_); std::string gap = num_fractional != 0 ? user_mip_gap(user_obj, user_lower) : "0.0%"; @@ -2027,14 +2014,19 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut static_cast(iter), gap.c_str(), toc(exploration_stats_.start_time)); + + f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), root_objective_); + f_t abs_gap = upper_bound_.load() - root_objective_; + if (rel_gap < settings_.relative_mip_gap_tol || abs_gap < settings_.absolute_mip_gap_tol) { + set_final_solution(solution, root_objective_); + return mip_status_t::OPTIMAL; + } } } - if (num_gomory_cuts + num_mir_cuts + num_knapsack_cuts + num_cg_cuts > 0) { - settings_.log.printf("Gomory cuts : %d\n", num_gomory_cuts); - settings_.log.printf("MIR cuts : %d\n", num_mir_cuts); - settings_.log.printf("Knapsack cuts : %d\n", num_knapsack_cuts); - settings_.log.printf("CG cuts : %d\n", num_cg_cuts); + print_cut_info(settings_, cut_info); + + if (cut_info.has_cuts()) { settings_.log.printf("Cut pool size : %d\n", cut_pool_size); settings_.log.printf("Size with cuts: %d constraints, %d variables, %d nonzeros\n", original_lp_.num_rows, original_lp_.num_cols, original_lp_.A.col_start[original_lp_.A.n]); } diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index 4a46db1da..1a947b2a2 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -8,6 +8,7 @@ #pragma once #include +#include #include #include #include @@ -201,6 +202,9 @@ class branch_and_bound_t { void report_heuristic(f_t obj); void report(char symbol, f_t obj, f_t lower_bound, i_t node_depth, i_t node_int_infeas); + // Set the solution when found at the root node + void set_solution_at_root(mip_solution_t& solution, const cut_info_t& cut_info); + // Set the final solution. void set_final_solution(mip_solution_t& solution, f_t lower_bound); diff --git a/cpp/src/dual_simplex/cuts.hpp b/cpp/src/dual_simplex/cuts.hpp index a914e189c..a02cd807b 100644 --- a/cpp/src/dual_simplex/cuts.hpp +++ b/cpp/src/dual_simplex/cuts.hpp @@ -25,6 +25,27 @@ enum cut_type_t : int8_t { CHVATAL_GOMORY = 3 }; +template +struct cut_info_t { + bool has_cuts() const { return num_gomory_cuts + num_mir_cuts + num_knapsack_cuts + num_cg_cuts > 0; } + i_t num_gomory_cuts = 0; + i_t num_mir_cuts = 0; + i_t num_knapsack_cuts = 0; + i_t num_cg_cuts = 0; +}; + + +template +void print_cut_info(const simplex_solver_settings_t& settings, const cut_info_t& cut_info) +{ + if (cut_info.has_cuts()) { + settings.log.printf("Gomory cuts : %d\n", cut_info.num_gomory_cuts); + settings.log.printf("MIR cuts : %d\n", cut_info.num_mir_cuts); + settings.log.printf("Knapsack cuts : %d\n", cut_info.num_knapsack_cuts); + settings.log.printf("CG cuts : %d\n", cut_info.num_cg_cuts); + } +} + template void print_cut_types(const std::string& prefix, const std::vector& cut_types, From 5954aa0ebb756a04598a44cd567c199597a9455b Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Thu, 22 Jan 2026 18:38:26 -0800 Subject: [PATCH 41/45] Be more conservative with the strong-CG cut to avoid wrong answer on istanbul-no-cutoff --- cpp/src/dual_simplex/cuts.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 799bd0ce1..1e6a490bb 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -2288,6 +2288,9 @@ i_t strong_cg_cut_t::generate_strong_cg_cut_helper( if (f_a_0 < min_fractional_part) { return -1; } + if (f_a_0 > 1 - min_fractional_part) { + return -1; + } // We will try to generat a strong CG cut. // Find the unique integer k such that From 350e0a71e27baa9a2165a6e26a758c1cdcbd3675 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 23 Jan 2026 10:03:00 -0800 Subject: [PATCH 42/45] Silence warning about negative lower bounds --- cpp/src/dual_simplex/cuts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 1e6a490bb..101c4241f 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -1349,7 +1349,7 @@ mixed_integer_rounding_cut_t::mixed_integer_rounding_cut_t( needs_complement_ = false; for (i_t j = 0; j < num_vars_; j++) { if (lp.lower[j] < 0) { - settings_.log.printf("Variable %d has negative lower bound %e\n", j, lp.lower[j]); + //settings_.log.printf("Variable %d has negative lower bound %e\n", j, lp.lower[j]); } const f_t uj = lp.upper[j]; const f_t lj = lp.lower[j]; From 121af294993084d54899c50eca4d2074bda07612 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 23 Jan 2026 10:06:03 -0800 Subject: [PATCH 43/45] Silence MIR aggregation count --- cpp/src/dual_simplex/cuts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 101c4241f..5a48cec86 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -896,7 +896,7 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& if (add_cut) { if (num_aggregated > 0) { - settings.log.printf("MIR cut with aggregation %d\n", num_aggregated); + //settings.log.printf("MIR cut with aggregation %d\n", num_aggregated); } if (settings.mir_cuts != 0) { cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs); From 59d4ac04f7fd86d691197c89fda95be3558d4af0 Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 23 Jan 2026 15:17:37 -0800 Subject: [PATCH 44/45] Fix bug in MIR cut generation when slack variable had negative coefficient --- cpp/src/dual_simplex/cuts.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index 5a48cec86..aa041edb6 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -774,15 +774,24 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& } // Remove the slack from the equality to get an inequality + bool negate_inequality = true; for (i_t k = 0; k < inequality.i.size(); k++) { const i_t j = inequality.i[k]; - if (j == slack) { inequality.x[k] = 0.0; } + if (j == slack) { + if (inequality.x[k] != 1.0) { + negate_inequality = false; + } + inequality.x[k] = 0.0; + } } + if (negate_inequality) { // inequaility'*x <= inequality_rhs // But for MIR we need: inequality'*x >= inequality_rhs - inequality_rhs *= -1; - inequality.negate(); + inequality_rhs *= -1; + inequality.negate(); + } + // We should now have: inequality'*x >= inequality_rhs // Transform the relaxation solution std::vector transformed_xstar; @@ -798,7 +807,8 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& while (!add_cut && num_aggregated < max_aggregated) { //printf("\t add_cut %d num_aggregated %d nz %ld\n", static_cast(add_cut), num_aggregated, inequality.i.size()); - sparse_vector_t transformed_inequality = inequality; + sparse_vector_t transformed_inequality; + inequality.squeeze(transformed_inequality); f_t transformed_rhs = inequality_rhs; mir.to_nonnegative(lp, transformed_inequality, transformed_rhs); @@ -1353,15 +1363,16 @@ mixed_integer_rounding_cut_t::mixed_integer_rounding_cut_t( } const f_t uj = lp.upper[j]; const f_t lj = lp.lower[j]; - if (uj != inf || lj != 0.0) { needs_complement_ = true; } const f_t xstar_j = xstar[j]; if (uj < inf) { if (uj - xstar_j <= xstar_j - lj) { has_upper_[j] = 1; bound_info_[j] = 1; - } else { + needs_complement_ = true; + } else if (lj != 0.0) { has_lower_[j] = 1; bound_info_[j] = -1; + needs_complement_ = true; } continue; } @@ -1369,6 +1380,7 @@ mixed_integer_rounding_cut_t::mixed_integer_rounding_cut_t( if (lj > -inf && lj != 0.0) { has_lower_[j] = 1; bound_info_[j] = -1; + needs_complement_ = true; } } } From c52b0658b002f023d7afef8b1cbbaeec868566eb Mon Sep 17 00:00:00 2001 From: Christopher Maes Date: Fri, 23 Jan 2026 18:10:37 -0800 Subject: [PATCH 45/45] Same bug with MIR inequalities and ranged rows. Another fix --- cpp/src/dual_simplex/cuts.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/cpp/src/dual_simplex/cuts.cpp b/cpp/src/dual_simplex/cuts.cpp index aa041edb6..a75644ec0 100644 --- a/cpp/src/dual_simplex/cuts.cpp +++ b/cpp/src/dual_simplex/cuts.cpp @@ -774,17 +774,27 @@ void cut_generation_t::generate_mir_cuts(const lp_problem_t& } // Remove the slack from the equality to get an inequality - bool negate_inequality = true; + i_t negate_inequality = 1; for (i_t k = 0; k < inequality.i.size(); k++) { const i_t j = inequality.i[k]; if (j == slack) { if (inequality.x[k] != 1.0) { - negate_inequality = false; + if (inequality.x[k] == -1.0 && lp.lower[j] >= 0.0) { + negate_inequality = 0; + } else { + printf("Bad slack %d in inequality: aj %e lo %e up %e\n", j, inequality.x[k], lp.lower[j], lp.upper[j]); + negate_inequality = -1; + break; + } } inequality.x[k] = 0.0; } } + if (negate_inequality == -1) { + break; // TODO: this stops us from generating further MIR cuts for other rows. + } + if (negate_inequality) { // inequaility'*x <= inequality_rhs // But for MIR we need: inequality'*x >= inequality_rhs