diff --git a/ortools/algorithms/BUILD.bazel b/ortools/algorithms/BUILD.bazel index 970599bb97..b773091dc1 100644 --- a/ortools/algorithms/BUILD.bazel +++ b/ortools/algorithms/BUILD.bazel @@ -321,7 +321,7 @@ cc_library( "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", - "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", "@com_google_absl//absl/types:span", ], ) @@ -348,7 +348,6 @@ cc_library( deps = [ ":set_cover_model", "//ortools/base:file", - "//ortools/base:strong_vector", "//ortools/util:filelineiter", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", @@ -368,6 +367,7 @@ cc_test( ":set_cover_invariant", ":set_cover_mip", ":set_cover_model", + "//ortools/base:parse_text_proto", "//ortools/base:gmock_main", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", diff --git a/ortools/algorithms/CMakeLists.txt b/ortools/algorithms/CMakeLists.txt index 8419297312..4e7a926eda 100644 --- a/ortools/algorithms/CMakeLists.txt +++ b/ortools/algorithms/CMakeLists.txt @@ -46,8 +46,9 @@ if(BUILD_TESTING) ${_FILE_NAME} LINK_LIBRARIES benchmark::benchmark - GTest::gmock + GTest::gtest GTest::gtest_main + GTest::gmock ) endforeach() endif() diff --git a/ortools/algorithms/binary_search_test.cc b/ortools/algorithms/binary_search_test.cc index be670d2646..643ab7d7f0 100644 --- a/ortools/algorithms/binary_search_test.cc +++ b/ortools/algorithms/binary_search_test.cc @@ -26,6 +26,7 @@ #include "absl/random/distributions.h" #include "absl/random/random.h" #include "absl/strings/str_format.h" +#include "absl/time/clock.h" #include "absl/time/time.h" #include "benchmark/benchmark.h" #include "gtest/gtest.h" @@ -36,7 +37,7 @@ namespace operations_research { // Correctly picking the midpoint of two integers in all cases isn't trivial! template <> -int BinarySearchMidpoint(int x, int y) { +inline int BinarySearchMidpoint(int x, int y) { if (x > y) std::swap(x, y); if (x >= 0 || y < 0) return x + (y - x) / 2; return (x + y) / 2; @@ -182,7 +183,7 @@ TEST(BinarySearchDeathTest, DiesIfEitherBoundaryConditionViolatedInFastbuild) { // hence the presence of these tests outside the unnamed namespace. template <> -absl::Time BinarySearchMidpoint(absl::Time x, absl::Time y) { +inline absl::Time BinarySearchMidpoint(absl::Time x, absl::Time y) { return x + (y - x) / 2; } diff --git a/ortools/algorithms/find_graph_symmetries_test.cc b/ortools/algorithms/find_graph_symmetries_test.cc index 93933158af..bf287e2924 100644 --- a/ortools/algorithms/find_graph_symmetries_test.cc +++ b/ortools/algorithms/find_graph_symmetries_test.cc @@ -63,7 +63,7 @@ using ::util::GraphIsSymmetric; // Shortcut that calls RecursivelyRefinePartitionByAdjacency() on all nodes // of a graph, and outputs the resulting partition. -std::string FullyRefineGraph(const std::vector>& arcs) { +std::string FullyRefineGraph(absl::Span> arcs) { Graph graph; for (const std::pair& arc : arcs) { graph.AddArc(arc.first, arc.second); diff --git a/ortools/algorithms/knapsack_solver_test.cc b/ortools/algorithms/knapsack_solver_test.cc index 05efb92bd1..962e35a386 100644 --- a/ortools/algorithms/knapsack_solver_test.cc +++ b/ortools/algorithms/knapsack_solver_test.cc @@ -123,7 +123,7 @@ int64_t SolveKnapsackProblem( return kInvalidSolution; } -#ifdef USE_SCIP +#if defined(USE_SCIP) const int64_t scip_profit = SolveKnapsackProblemUsingSpecificSolver( profit_array, number_of_items, weight_array, capacity_array, number_of_dimensions, @@ -131,9 +131,9 @@ int64_t SolveKnapsackProblem( if (scip_profit != generic_profit) { return kInvalidSolution; } -#else +#else // !defined(USE_SCIP) #warning SCIP support disable -#endif +#endif // !defined(USE_SCIP) const int64_t cpsat_profit = SolveKnapsackProblemUsingSpecificSolver( profit_array, number_of_items, weight_array, capacity_array, diff --git a/ortools/algorithms/python/set_cover.cc b/ortools/algorithms/python/set_cover.cc index a4263078c1..2261c039f8 100644 --- a/ortools/algorithms/python/set_cover.cc +++ b/ortools/algorithms/python/set_cover.cc @@ -37,6 +37,7 @@ using ::operations_research::ElementIndex; using ::operations_research::GreedySolutionGenerator; using ::operations_research::GuidedLocalSearch; using ::operations_research::GuidedTabuSearch; +using ::operations_research::LazyElementDegreeSolutionGenerator; using ::operations_research::Preprocessor; using ::operations_research::RandomSolutionGenerator; using ::operations_research::ReadBeasleySetCoverProblem; @@ -127,44 +128,46 @@ PYBIND11_MODULE(set_cover, m) { [](SetCoverModel& model) -> const std::vector& { return model.subset_costs().get(); }) - .def("columns", - [](SetCoverModel& model) -> std::vector> { - // Due to the inner StrongVector, make a deep copy. Anyway, - // columns() returns a const ref, so this keeps the semantics, not - // the efficiency. - std::vector> columns; - std::transform( - model.columns().begin(), model.columns().end(), - columns.begin(), - [](const SparseColumn& column) -> std::vector { - std::vector col(column.size()); - std::transform(column.begin(), column.end(), col.begin(), - [](ElementIndex element) -> BaseInt { - return element.value(); - }); - return col; - }); - return columns; - }) - .def("rows", - [](SetCoverModel& model) -> std::vector> { - // Due to the inner StrongVector, make a deep copy. Anyway, - // rows() returns a const ref, so this keeps the semantics, not - // the efficiency. - std::vector> rows; - std::transform( - model.rows().begin(), model.rows().end(), rows.begin(), - [](const SparseRow& row) -> std::vector { - std::vector r(row.size()); - std::transform(row.begin(), row.end(), r.begin(), - [](SubsetIndex element) -> BaseInt { - return element.value(); - }); - return r; - }); - return rows; - }) - .def("row_view_is_valid", &SetCoverModel::row_view_is_valid) + .def_property_readonly( + "columns", + [](SetCoverModel& model) -> std::vector> { + // Due to the inner StrongVector, make a deep copy. Anyway, + // columns() returns a const ref, so this keeps the semantics, not + // the efficiency. + std::vector> columns(model.columns().size()); + std::transform( + model.columns().begin(), model.columns().end(), columns.begin(), + [](const SparseColumn& column) -> std::vector { + std::vector col(column.size()); + std::transform(column.begin(), column.end(), col.begin(), + [](ElementIndex element) -> BaseInt { + return element.value(); + }); + return col; + }); + return columns; + }) + .def_property_readonly( + "rows", + [](SetCoverModel& model) -> std::vector> { + // Due to the inner StrongVector, make a deep copy. Anyway, + // rows() returns a const ref, so this keeps the semantics, not + // the efficiency. + std::vector> rows(model.rows().size()); + std::transform(model.rows().begin(), model.rows().end(), + rows.begin(), + [](const SparseRow& row) -> std::vector { + std::vector r(row.size()); + std::transform(row.begin(), row.end(), r.begin(), + [](SubsetIndex element) -> BaseInt { + return element.value(); + }); + return r; + }); + return rows; + }) + .def_property_readonly("row_view_is_valid", + &SetCoverModel::row_view_is_valid) .def("SubsetRange", [](SetCoverModel& model) { return make_iterator<>(IntIterator::begin(model.num_subsets()), @@ -242,11 +245,17 @@ PYBIND11_MODULE(set_cover, m) { }) .def("decision", &SetCoverDecision::decision); + py::enum_(m, "consistency_level") + .value("COST_AND_COVERAGE", + SetCoverInvariant::ConsistencyLevel::kCostAndCoverage) + .value("FREE_AND_UNCOVERED", + SetCoverInvariant::ConsistencyLevel::kFreeAndUncovered) + .value("REDUNDANCY", SetCoverInvariant::ConsistencyLevel::kRedundancy); + py::class_(m, "SetCoverInvariant") .def(py::init()) .def("initialize", &SetCoverInvariant::Initialize) .def("clear", &SetCoverInvariant::Clear) - .def("recompute_invariant", &SetCoverInvariant::RecomputeInvariant) .def("model", &SetCoverInvariant::model) .def_property( "model", @@ -295,9 +304,10 @@ PYBIND11_MODULE(set_cover, m) { .def("clear_trace", &SetCoverInvariant::ClearTrace) .def("clear_removability_information", &SetCoverInvariant::ClearRemovabilityInformation) - .def("new_removable_subsets", &SetCoverInvariant::new_removable_subsets) - .def("new_non_removable_subsets", - &SetCoverInvariant::new_non_removable_subsets) + .def("newly_removable_subsets", + &SetCoverInvariant::newly_removable_subsets) + .def("newly_non_removable_subsets", + &SetCoverInvariant::newly_non_removable_subsets) .def("compress_trace", &SetCoverInvariant::CompressTrace) .def("load_solution", [](SetCoverInvariant& invariant, @@ -312,43 +322,28 @@ PYBIND11_MODULE(set_cover, m) { return invariant.ComputeIsRedundant(SubsetIndex(subset)); }, arg("subset")) - .def("make_fully_updated", &SetCoverInvariant::MakeFullyUpdated) + .def("recompute", &SetCoverInvariant::Recompute) .def( "flip", - [](SetCoverInvariant& invariant, BaseInt subset) { - invariant.Flip(SubsetIndex(subset)); + [](SetCoverInvariant& invariant, BaseInt subset, + SetCoverInvariant::ConsistencyLevel consistency) { + invariant.Flip(SubsetIndex(subset), consistency); }, - arg("subset")) - .def( - "flip_and_fully_update", - [](SetCoverInvariant& invariant, BaseInt subset) { - invariant.FlipAndFullyUpdate(SubsetIndex(subset)); - }, - arg("subset")) + arg("subset"), arg("consistency")) .def( "select", - [](SetCoverInvariant& invariant, BaseInt subset) { - invariant.Select(SubsetIndex(subset)); + [](SetCoverInvariant& invariant, BaseInt subset, + SetCoverInvariant::ConsistencyLevel consistency) { + invariant.Select(SubsetIndex(subset), consistency); }, - arg("subset")) - .def( - "select_and_fully_update", - [](SetCoverInvariant& invariant, BaseInt subset) { - invariant.SelectAndFullyUpdate(SubsetIndex(subset)); - }, - arg("subset")) + arg("subset"), arg("consistency")) .def( "deselect", - [](SetCoverInvariant& invariant, BaseInt subset) { - invariant.Deselect(SubsetIndex(subset)); + [](SetCoverInvariant& invariant, BaseInt subset, + SetCoverInvariant::ConsistencyLevel consistency) { + invariant.Deselect(SubsetIndex(subset), consistency); }, - arg("subset")) - .def( - "deselect_and_fully_update", - [](SetCoverInvariant& invariant, BaseInt subset) { - invariant.DeselectAndFullyUpdate(SubsetIndex(subset)); - }, - arg("subset")) + arg("subset"), arg("consistency")) .def("export_solution_as_proto", &SetCoverInvariant::ExportSolutionAsProto) .def("import_solution_from_proto", @@ -434,6 +429,27 @@ PYBIND11_MODULE(set_cover, m) { VectorDoubleToSubsetCostVector(costs)); }); + py::class_( + m, "LazyElementDegreeSolutionGenerator") + .def(py::init()) + .def("next_solution", + [](LazyElementDegreeSolutionGenerator& heuristic) -> bool { + return heuristic.NextSolution(); + }) + .def("next_solution", + [](LazyElementDegreeSolutionGenerator& heuristic, + const std::vector& focus) -> bool { + return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus)); + }) + .def("next_solution", + [](LazyElementDegreeSolutionGenerator& heuristic, + const std::vector& focus, + const std::vector& costs) -> bool { + return heuristic.NextSolution( + VectorIntToVectorSubsetIndex(focus), + VectorDoubleToSubsetCostVector(costs)); + }); + py::class_(m, "SteepestSearch") .def(py::init()) .def("next_solution", diff --git a/ortools/algorithms/python/set_cover_test.py b/ortools/algorithms/python/set_cover_test.py index 73624eaa8c..6fbb375720 100644 --- a/ortools/algorithms/python/set_cover_test.py +++ b/ortools/algorithms/python/set_cover_test.py @@ -62,9 +62,10 @@ class SetCoverTest(absltest.TestCase): self.assertEqual(model.num_subsets, reloaded.num_subsets) self.assertEqual(model.num_elements, reloaded.num_elements) - # TODO(user): these methods are not yet wrapped. - # self.assertEqual(model.subset_costs, reloaded.subset_costs) - # self.assertEqual(model.columns, reloaded.columns) + self.assertEqual(model.subset_costs, reloaded.subset_costs) + self.assertEqual(model.columns, reloaded.columns) + if model.row_view_is_valid and reloaded.row_view_is_valid: + self.assertEqual(model.rows, reloaded.rows) def test_save_reload_twice(self): model = create_knights_cover_model(3, 3) @@ -72,17 +73,23 @@ class SetCoverTest(absltest.TestCase): greedy = set_cover.GreedySolutionGenerator(inv) self.assertTrue(greedy.next_solution()) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED) + ) greedy_proto = inv.export_solution_as_proto() steepest = set_cover.SteepestSearch(inv) self.assertTrue(steepest.next_solution(500)) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED) + ) steepest_proto = inv.export_solution_as_proto() inv.import_solution_from_proto(greedy_proto) self.assertTrue(steepest.next_solution(500)) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED) + ) reloaded_proto = inv.export_solution_as_proto() self.assertEqual(str(steepest_proto), str(reloaded_proto)) @@ -93,16 +100,22 @@ class SetCoverTest(absltest.TestCase): inv = set_cover.SetCoverInvariant(model) trivial = set_cover.TrivialSolutionGenerator(inv) self.assertTrue(trivial.next_solution()) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.COST_AND_COVERAGE) + ) greedy = set_cover.GreedySolutionGenerator(inv) self.assertTrue(greedy.next_solution()) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED) + ) self.assertEqual(inv.num_uncovered_elements(), 0) steepest = set_cover.SteepestSearch(inv) self.assertTrue(steepest.next_solution(500)) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.COST_AND_COVERAGE) + ) def test_preprocessor(self): model = create_initial_cover_model() @@ -111,11 +124,15 @@ class SetCoverTest(absltest.TestCase): inv = set_cover.SetCoverInvariant(model) preprocessor = set_cover.Preprocessor(inv) self.assertTrue(preprocessor.next_solution()) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.COST_AND_COVERAGE) + ) greedy = set_cover.GreedySolutionGenerator(inv) self.assertTrue(greedy.next_solution()) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED) + ) def test_infeasible(self): model = set_cover.SetCoverModel() @@ -136,11 +153,15 @@ class SetCoverTest(absltest.TestCase): greedy = set_cover.GreedySolutionGenerator(inv) self.assertTrue(greedy.next_solution()) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED) + ) steepest = set_cover.SteepestSearch(inv) self.assertTrue(steepest.next_solution(500)) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED) + ) def test_knights_cover_degree(self): model = create_knights_cover_model(16, 16) @@ -149,11 +170,15 @@ class SetCoverTest(absltest.TestCase): degree = set_cover.ElementDegreeSolutionGenerator(inv) self.assertTrue(degree.next_solution()) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.COST_AND_COVERAGE) + ) steepest = set_cover.SteepestSearch(inv) self.assertTrue(steepest.next_solution(500)) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED) + ) def test_knights_cover_gls(self): model = create_knights_cover_model(16, 16) @@ -162,11 +187,15 @@ class SetCoverTest(absltest.TestCase): greedy = set_cover.GreedySolutionGenerator(inv) self.assertTrue(greedy.next_solution()) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED) + ) gls = set_cover.GuidedLocalSearch(inv) self.assertTrue(gls.next_solution(500)) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED) + ) def test_knights_cover_random(self): model = create_knights_cover_model(16, 16) @@ -175,11 +204,15 @@ class SetCoverTest(absltest.TestCase): random = set_cover.RandomSolutionGenerator(inv) self.assertTrue(random.next_solution()) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.COST_AND_COVERAGE) + ) steepest = set_cover.SteepestSearch(inv) self.assertTrue(steepest.next_solution(500)) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED) + ) def test_knights_cover_trivial(self): model = create_knights_cover_model(16, 16) @@ -188,11 +221,15 @@ class SetCoverTest(absltest.TestCase): trivial = set_cover.TrivialSolutionGenerator(inv) self.assertTrue(trivial.next_solution()) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.COST_AND_COVERAGE) + ) steepest = set_cover.SteepestSearch(inv) self.assertTrue(steepest.next_solution(500)) - self.assertTrue(inv.check_consistency()) + self.assertTrue( + inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED) + ) # TODO(user): KnightsCoverGreedyAndTabu, KnightsCoverGreedyRandomClear, # KnightsCoverElementDegreeRandomClear, KnightsCoverRandomClearMip, diff --git a/ortools/algorithms/set_cover_heuristics.cc b/ortools/algorithms/set_cover_heuristics.cc index dc6377b893..6e19eaaaa8 100644 --- a/ortools/algorithms/set_cover_heuristics.cc +++ b/ortools/algorithms/set_cover_heuristics.cc @@ -14,13 +14,13 @@ #include "ortools/algorithms/set_cover_heuristics.h" #include -#include #include #include #include #include #include "absl/log/check.h" +#include "absl/random/distributions.h" #include "absl/random/random.h" #include "absl/types/span.h" #include "ortools/algorithms/adjustable_k_ary_heap.h" @@ -45,6 +45,8 @@ SubsetBoolVector MakeBoolVector(absl::Span focus, } } // anonymous namespace +using CL = SetCoverInvariant::ConsistencyLevel; + // Preprocessor. bool Preprocessor::NextSolution() { @@ -54,7 +56,6 @@ bool Preprocessor::NextSolution() { bool Preprocessor::NextSolution(absl::Span focus) { DVLOG(1) << "Entering Preprocessor::NextSolution"; const SubsetIndex num_subsets(inv_->model()->num_subsets()); - SubsetBoolVector choices(num_subsets, false); const ElementIndex num_elements(inv_->model()->num_elements()); const SparseRowView& rows = inv_->model()->rows(); SubsetBoolVector in_focus = MakeBoolVector(focus, num_subsets); @@ -62,12 +63,13 @@ bool Preprocessor::NextSolution(absl::Span focus) { if (rows[element].size() == 1) { const SubsetIndex subset = rows[element][RowEntryIndex(0)]; if (in_focus[subset] && !inv_->is_selected()[subset]) { - inv_->Select(subset); + inv_->Select(subset, CL::kCostAndCoverage); ++num_columns_fixed_by_singleton_row_; } } } inv_->CompressTrace(); + inv_->Recompute(CL::kCostAndCoverage); return true; } @@ -85,6 +87,7 @@ bool TrivialSolutionGenerator::NextSolution( choices[subset] = true; } inv_->LoadSolution(choices); + inv_->Recompute(CL::kCostAndCoverage); return true; } @@ -102,11 +105,11 @@ bool RandomSolutionGenerator::NextSolution( for (const SubsetIndex subset : shuffled) { if (inv_->is_selected()[subset]) continue; if (inv_->num_free_elements()[subset] != 0) { - inv_->Select(subset); + inv_->Select(subset, CL::kFreeAndUncovered); } } inv_->CompressTrace(); - DCHECK(inv_->CheckConsistency()); + DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered)); return true; } @@ -124,9 +127,10 @@ bool GreedySolutionGenerator::NextSolution( bool GreedySolutionGenerator::NextSolution(absl::Span focus, const SubsetCostVector& costs) { - DCHECK(inv_->CheckConsistency()); + DCHECK(inv_->CheckConsistency(CL::kCostAndCoverage)); + inv_->Recompute(CL::kFreeAndUncovered); inv_->ClearTrace(); - SubsetCostVector elements_per_cost(costs.size(), 0.0); + SubsetCostVector elements_per_cost(costs.size(), 0); for (const SubsetIndex subset : focus) { elements_per_cost[subset] = 1.0 / costs[subset]; } @@ -150,7 +154,7 @@ bool GreedySolutionGenerator::NextSolution(absl::Span focus, while (!pq.IsEmpty()) { const SubsetIndex best_subset(pq.TopIndex()); pq.Pop(); - inv_->Select(best_subset); + inv_->Select(best_subset, CL::kFreeAndUncovered); // NOMUTANTS -- reason, for C++ if (inv_->num_uncovered_elements() == 0) break; for (IntersectingSubsetsIterator it(*inv_->model(), best_subset); @@ -170,10 +174,83 @@ bool GreedySolutionGenerator::NextSolution(absl::Span focus, inv_->CompressTrace(); // Don't expect the queue to be empty, because of the break in the while // loop. - DCHECK(inv_->CheckConsistency()); + DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered)); return true; } +namespace { +// This class gathers statistics about the usefulness of the ratio computation. +class ComputationUsefulnessStats { + public: + // If is_active is true, the stats are gathered, otherwise there is no + // overhead, in particular no memory allocation. + explicit ComputationUsefulnessStats(const SetCoverInvariant* inv, + bool is_active) + : inv_(inv), + is_active_(is_active), + num_ratio_computations_(), + num_useless_computations_(), + num_free_elements_() { + if (is_active) { + BaseInt num_subsets = inv_->model()->num_subsets(); + num_ratio_computations_.assign(num_subsets, 0); + num_useless_computations_.assign(num_subsets, 0); + num_free_elements_.assign(num_subsets, -1); // -1 means not computed yet. + } + } + + // To be called each time a num_free_elements is computed. + void Update(SubsetIndex subset, BaseInt new_num_free_elements) { + if (is_active_) { + if (new_num_free_elements == num_free_elements_[subset]) { + ++num_useless_computations_[subset]; + } + ++num_ratio_computations_[subset]; + num_free_elements_[subset] = new_num_free_elements; + } + } + + // To be called at the end of the algorithm. + void PrintStats() { + if (is_active_) { + BaseInt num_subsets_considered = 0; + BaseInt num_ratio_updates = 0; + BaseInt num_wasted_ratio_updates = 0; + for (const SubsetIndex subset : inv_->model()->SubsetRange()) { + if (num_ratio_computations_[subset] > 0) { + ++num_subsets_considered; + if (num_ratio_computations_[subset] > 1) { + num_ratio_updates += num_ratio_computations_[subset] - 1; + } + } + num_wasted_ratio_updates += num_useless_computations_[subset]; + } + LOG(INFO) << "num_subsets_considered = " << num_subsets_considered; + LOG(INFO) << "num_ratio_updates = " << num_ratio_updates; + LOG(INFO) << "num_wasted_ratio_updates = " << num_wasted_ratio_updates; + } + } + + private: + // The invariant on which the stats are performed. + const SetCoverInvariant* inv_; + + // Whether the stats are active or not. + bool is_active_; + + // Number of times the ratio was computed for a subset. + SubsetToIntVector num_ratio_computations_; + + // Number of times the ratio was computed for a subset and was the same as the + // previous one. + SubsetToIntVector num_useless_computations_; + + // The value num_free_elements_ for the subset the last time it was computed. + // Used to detect useless computations. + SubsetToIntVector num_free_elements_; +}; +} // namespace + // ElementDegreeSolutionGenerator. // There is no need to use a priority queue here, as the ratios are computed // on-demand. Also elements are sorted based on degree once and for all and @@ -201,7 +278,7 @@ bool ElementDegreeSolutionGenerator::NextSolution( bool ElementDegreeSolutionGenerator::NextSolution( const SubsetBoolVector& in_focus, const SubsetCostVector& costs) { DVLOG(1) << "Entering ElementDegreeSolutionGenerator::NextSolution"; - DCHECK(inv_->CheckConsistency()); + inv_->Recompute(CL::kFreeAndUncovered); // Create the list of all the indices in the problem. const BaseInt num_elements = inv_->model()->num_elements(); std::vector degree_sorted_elements(num_elements); @@ -216,6 +293,7 @@ bool ElementDegreeSolutionGenerator::NextSolution( if (rows[a].size() == rows[b].size()) return a < b; return false; }); + ComputationUsefulnessStats stats(inv_, false); for (const ElementIndex element : degree_sorted_elements) { // No need to cover an element that is already covered. if (inv_->coverage()[element] != 0) continue; @@ -223,19 +301,114 @@ bool ElementDegreeSolutionGenerator::NextSolution( SubsetIndex best_subset(-1); for (const SubsetIndex subset : rows[element]) { if (!in_focus[subset]) continue; - const Cost ratio = costs[subset] / inv_->num_free_elements()[subset]; + const BaseInt num_free_elements = inv_->ComputeNumFreeElements(subset); + const Cost ratio = costs[subset] / num_free_elements; + stats.Update(subset, num_free_elements); if (ratio < min_ratio) { min_ratio = ratio; best_subset = subset; + } else if (ratio == min_ratio) { + // If the ratio is the same, we choose the subset with the most free + // elements. + // TODO(user): What about adding a tolerance for equality, which could + // further favor larger columns? + if (inv_->num_free_elements()[subset] > + inv_->num_free_elements()[best_subset]) { + best_subset = subset; + } } } - DCHECK_NE(best_subset, SubsetIndex(-1)); - inv_->Select(best_subset); + if (best_subset.value() == -1) { + LOG(WARNING) << "Best subset not found. Algorithmic error or invalid " + "input."; + continue; + } + DCHECK_NE(best_subset.value(), -1); + inv_->Select(best_subset, CL::kFreeAndUncovered); DVLOG(1) << "Cost = " << inv_->cost() << " num_uncovered_elements = " << inv_->num_uncovered_elements(); } inv_->CompressTrace(); - DCHECK(inv_->CheckConsistency()); + stats.PrintStats(); + DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered)); + return true; +} + +// LazyElementDegreeSolutionGenerator. +// There is no need to use a priority queue here, as the ratios are computed +// on-demand. Also elements are sorted based on degree once and for all and +// moved past when the elements become already covered. +bool LazyElementDegreeSolutionGenerator::NextSolution() { + const SubsetIndex num_subsets(inv_->model()->num_subsets()); + const SubsetBoolVector in_focus(num_subsets, true); + return NextSolution(in_focus, inv_->model()->subset_costs()); +} + +bool LazyElementDegreeSolutionGenerator::NextSolution( + absl::Span focus) { + const SubsetIndex num_subsets(inv_->model()->num_subsets()); + const SubsetBoolVector in_focus = MakeBoolVector(focus, num_subsets); + return NextSolution(in_focus, inv_->model()->subset_costs()); +} + +bool LazyElementDegreeSolutionGenerator::NextSolution( + absl::Span focus, const SubsetCostVector& costs) { + const SubsetIndex num_subsets(inv_->model()->num_subsets()); + const SubsetBoolVector in_focus = MakeBoolVector(focus, num_subsets); + return NextSolution(in_focus, costs); +} + +bool LazyElementDegreeSolutionGenerator::NextSolution( + const SubsetBoolVector& in_focus, const SubsetCostVector& costs) { + DVLOG(1) << "Entering LazyElementDegreeSolutionGenerator::NextSolution"; + DCHECK(inv_->CheckConsistency( + SetCoverInvariant::ConsistencyLevel::kCostAndCoverage)); + // Create the list of all the indices in the problem. + const BaseInt num_elements = inv_->model()->num_elements(); + std::vector degree_sorted_elements(num_elements); + std::iota(degree_sorted_elements.begin(), degree_sorted_elements.end(), + ElementIndex(0)); + const SparseRowView& rows = inv_->model()->rows(); + // Sort indices by degree i.e. the size of the row corresponding to an + // element. + std::sort(degree_sorted_elements.begin(), degree_sorted_elements.end(), + [&rows](const ElementIndex a, const ElementIndex b) { + if (rows[a].size() < rows[b].size()) return true; + if (rows[a].size() == rows[b].size()) return a < b; + return false; + }); + ComputationUsefulnessStats stats(inv_, false); + for (const ElementIndex element : degree_sorted_elements) { + // No need to cover an element that is already covered. + if (inv_->coverage()[element] != 0) continue; + Cost min_ratio = std::numeric_limits::max(); + SubsetIndex best_subset(-1); + for (const SubsetIndex subset : rows[element]) { + if (!in_focus[subset]) continue; + const BaseInt num_free_elements = inv_->ComputeNumFreeElements(subset); + const Cost ratio = costs[subset] / num_free_elements; + stats.Update(subset, num_free_elements); + if (ratio < min_ratio) { + min_ratio = ratio; + best_subset = subset; + } else if (ratio == min_ratio) { + // If the ratio is the same, we choose the subset with the most free + // elements. + // TODO(user): What about adding a tolerance for equality, which could + // further favor larger columns? + if (num_free_elements > inv_->num_free_elements()[best_subset]) { + best_subset = subset; + } + } + } + DCHECK_NE(best_subset, SubsetIndex(-1)); + inv_->Select(best_subset, CL::kCostAndCoverage); + DVLOG(1) << "Cost = " << inv_->cost() + << " num_uncovered_elements = " << inv_->num_uncovered_elements(); + } + inv_->CompressTrace(); + DCHECK(inv_->CheckConsistency(CL::kCostAndCoverage)); + stats.PrintStats(); return true; } @@ -267,7 +440,8 @@ bool SteepestSearch::NextSolution(absl::Span focus, bool SteepestSearch::NextSolution(const SubsetBoolVector& in_focus, const SubsetCostVector& costs, int num_iterations) { - DCHECK(inv_->CheckConsistency()); + DCHECK(inv_->CheckConsistency(CL::kCostAndCoverage)); + inv_->Recompute(CL::kFreeAndUncovered); DVLOG(1) << "Entering SteepestSearch::NextSolution, num_iterations = " << num_iterations; // Return false if inv_ contains no solution. @@ -298,7 +472,7 @@ bool SteepestSearch::NextSolution(const SubsetBoolVector& in_focus, DCHECK(inv_->is_selected()[best_subset]); DCHECK(inv_->ComputeIsRedundant(best_subset)); DCHECK_GT(costs[best_subset], 0.0); - inv_->Deselect(best_subset); + inv_->Deselect(best_subset, CL::kFreeAndUncovered); for (IntersectingSubsetsIterator it(*inv_->model(), best_subset); !it.at_end(); ++it) { @@ -312,7 +486,7 @@ bool SteepestSearch::NextSolution(const SubsetBoolVector& in_focus, inv_->CompressTrace(); // TODO(user): change this to enable working on partial solutions. DCHECK_EQ(inv_->num_uncovered_elements(), 0); - DCHECK(inv_->CheckConsistency()); + DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered)); return true; } @@ -364,7 +538,7 @@ bool GuidedTabuSearch::NextSolution(int num_iterations) { bool GuidedTabuSearch::NextSolution(absl::Span focus, int num_iterations) { - DCHECK(inv_->CheckConsistency()); + DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered)); DVLOG(1) << "Entering GuidedTabuSearch::NextSolution, num_iterations = " << num_iterations; const SubsetCostVector& subset_costs = inv_->model()->subset_costs(); @@ -419,7 +593,7 @@ bool GuidedTabuSearch::NextSolution(absl::Span focus, UpdatePenalties(focus); tabu_list_.Add(best_subset); - inv_->Flip(best_subset); + inv_->Flip(best_subset, CL::kFreeAndUncovered); // TODO(user): make the cost computation incremental. augmented_cost = std::accumulate(augmented_costs_.begin(), augmented_costs_.end(), 0.0); @@ -437,7 +611,7 @@ bool GuidedTabuSearch::NextSolution(absl::Span focus, } inv_->LoadSolution(best_choices); inv_->CompressTrace(); - DCHECK(inv_->CheckConsistency()); + DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered)); return true; } @@ -474,7 +648,7 @@ Cost GuidedLocalSearch::ComputeDelta(SubsetIndex subset) const { bool GuidedLocalSearch::NextSolution(absl::Span focus, int num_iterations) { - inv_->MakeFullyUpdated(); + inv_->Recompute(CL::kRedundancy); Cost best_cost = inv_->cost(); SubsetBoolVector best_choices = inv_->is_selected(); @@ -485,7 +659,8 @@ bool GuidedLocalSearch::NextSolution(absl::Span focus, } } - for (int iteration = 0; iteration < num_iterations; ++iteration) { + for (int iteration = 0; + !priority_heap_.IsEmpty() && iteration < num_iterations; ++iteration) { // Improve current solution respective to the current penalties. const SubsetIndex best_subset(priority_heap_.TopIndex()); if (inv_->is_selected()[best_subset]) { @@ -496,9 +671,11 @@ bool GuidedLocalSearch::NextSolution(absl::Span focus, (1 + penalties_[best_subset])), best_subset.value()}); } - inv_->FlipAndFullyUpdate(best_subset); // Flip the best subset. + inv_->Flip(best_subset, CL::kRedundancy); // Flip the best subset. + DCHECK(!utility_heap_.IsEmpty()); - // Getting the subset with highest utility. + // Getting the subset with highest utility. utility_heap_ is not empty, + // because we just inserted a pair. const SubsetIndex penalized_subset(utility_heap_.TopIndex()); utility_heap_.Pop(); ++penalties_[penalized_subset]; @@ -506,13 +683,15 @@ bool GuidedLocalSearch::NextSolution(absl::Span focus, {static_cast(inv_->model()->subset_costs()[penalized_subset] / (1 + penalties_[penalized_subset])), penalized_subset.value()}); + DCHECK(!utility_heap_.IsEmpty()); // Get removable subsets (Add them to the heap). - for (const SubsetIndex subset : inv_->new_removable_subsets()) { + for (const SubsetIndex subset : inv_->newly_removable_subsets()) { const float delta_selected = (penalization_factor_ * penalties_[subset] + inv_->model()->subset_costs()[subset]); priority_heap_.Insert({delta_selected, subset.value()}); } + DCHECK(!priority_heap_.IsEmpty()); for (const SubsetIndex subset : {penalized_subset, best_subset}) { const float delta = ComputeDelta(subset); @@ -520,10 +699,12 @@ bool GuidedLocalSearch::NextSolution(absl::Span focus, priority_heap_.Insert({delta, subset.value()}); } } + DCHECK(!priority_heap_.IsEmpty()); - // Get new non removable subsets. - // (Delete them from the heap) - for (const SubsetIndex subset : inv_->new_non_removable_subsets()) { + // Get new non removable subsets and remove them from the heap. + // This is when the priority_heap_ can become empty and end the outer loop + // early. + for (const SubsetIndex subset : inv_->newly_non_removable_subsets()) { priority_heap_.Remove(subset.value()); } @@ -537,7 +718,7 @@ bool GuidedLocalSearch::NextSolution(absl::Span focus, // Improve the solution by removing redundant subsets. for (const SubsetIndex& subset : focus) { if (inv_->is_selected()[subset] && inv_->ComputeIsRedundant(subset)) - inv_->DeselectAndFullyUpdate(subset); + inv_->Deselect(subset, CL::kRedundancy); } DCHECK_EQ(inv_->num_uncovered_elements(), 0); return true; @@ -571,12 +752,12 @@ std::vector ClearRandomSubsets(absl::Span focus, SampleSubsets(&chosen_indices, num_subsets); BaseInt num_deselected = 0; for (const SubsetIndex subset : chosen_indices) { - inv->Deselect(subset); + inv->Deselect(subset, CL::kCostAndCoverage); ++num_deselected; for (IntersectingSubsetsIterator it(*inv->model(), subset); !it.at_end(); ++it) { if (!inv->is_selected()[subset]) continue; - inv->Deselect(subset); + inv->Deselect(subset, CL::kCostAndCoverage); ++num_deselected; } // Note that num_deselected may exceed num_subsets by more than 1. @@ -631,7 +812,7 @@ std::vector ClearMostCoveredElements( // Testing has shown that sorting sampled_subsets is not necessary. // Now, un-select the subset in sampled_subsets. for (const SubsetIndex subset : sampled_subsets) { - inv->Deselect(subset); + inv->Deselect(subset, CL::kCostAndCoverage); } return sampled_subsets; } diff --git a/ortools/algorithms/set_cover_heuristics.h b/ortools/algorithms/set_cover_heuristics.h index c8440bf64e..9a5221ab72 100644 --- a/ortools/algorithms/set_cover_heuristics.h +++ b/ortools/algorithms/set_cover_heuristics.h @@ -14,7 +14,6 @@ #ifndef OR_TOOLS_ALGORITHMS_SET_COVER_HEURISTICS_H_ #define OR_TOOLS_ALGORITHMS_SET_COVER_HEURISTICS_H_ -#include #include #include "absl/base/nullability.h" @@ -25,28 +24,6 @@ namespace operations_research { -// Priority aggregate for the subset priority queue. -class SubsetIndexWithPriority { - public: - using Index = SubsetIndex::ValueType; - using Priority = float; - SubsetIndexWithPriority() : index_(-1), priority_(0) {} - SubsetIndexWithPriority(Priority priority, Index index) - : index_(index), priority_(priority) {} - Priority priority() const { return priority_; } - Index index() const { return index_; } - inline bool operator<(const SubsetIndexWithPriority other) const { - if (other.priority() != priority()) { - return priority() < other.priority(); - } - return index() < other.index(); - } - - private: - Index index_; - Priority priority_; -}; - // Solver classes for the weighted set covering problem. // // The solution procedure is based on the general scheme known as local search. @@ -71,6 +48,8 @@ class SubsetIndexWithPriority { // The preprocessor finds the elements that can only be covered by one subset. // Obviously, such subsets which are the only ones that can cover a given // element are chosen. + +// The consistency level is maintained up to kFreeAndUncovered. class Preprocessor { public: explicit Preprocessor(absl::Nonnull inv) @@ -102,6 +81,8 @@ class Preprocessor { // An obvious idea is to take all the S_j's (or equivalently to set all the // x_j's to 1). It's a bit silly but fast, and we can improve on it later using // local search. + +// The consistency level is maintained up to kFreeAndUncovered. class TrivialSolutionGenerator { public: explicit TrivialSolutionGenerator(SetCoverInvariant* inv) : inv_(inv) {} @@ -125,6 +106,8 @@ class TrivialSolutionGenerator { // much better results. // TODO(user): make it possible to use other random generators. Idea: bias the // generator towards the columns with the least marginal costs. + +// The consistency level is maintained up to kFreeAndUncovered. class RandomSolutionGenerator { public: explicit RandomSolutionGenerator(SetCoverInvariant* inv) : inv_(inv) {} @@ -161,6 +144,7 @@ class RandomSolutionGenerator { // Algorithms for Very Large Datasets.” In CIKM ’10. ACM Press. // https://doi.org/10.1145/1871437.1871501. +// The consistency level is maintained up to kFreeAndUncovered. class GreedySolutionGenerator { public: explicit GreedySolutionGenerator(SetCoverInvariant* inv) : inv_(inv) {} @@ -185,8 +169,12 @@ class GreedySolutionGenerator { // Solution generator based on the degree of elements. // The degree of an element is the number of subsets covering it. // The generator consists in iteratively choosing a non-covered element with the -// smallest degree, and selecting a subset that covers it with the least cost. -// The newly-covered elements degree are also updated. +// smallest degree, and selecting a subset that covers it with the least +// ratio cost / number of uncovered elements. The number of uncovered elements +// are updated for each impacted subset. The newly-covered elements degree +// are also updated and set to zero. + +// The consistency level is maintained up to kFreeAndUncovered. class ElementDegreeSolutionGenerator { public: explicit ElementDegreeSolutionGenerator(SetCoverInvariant* inv) : inv_(inv) {} @@ -213,12 +201,50 @@ class ElementDegreeSolutionGenerator { SetCoverInvariant* inv_; }; +// Solution generator based on the degree of elements. +// The heuristic is the same as ElementDegreeSolutionGenerator, but the number +// of uncovered elements for a subset is computed on-demand. In empirical +// tests, this seems to be faster than ElementDegreeSolutionGenerator because +// a very small percentage of need to be computed, and even fewer among them +// need to be computed again later on. + +// Because the number of uncovered elements is computed on-demand, the +// consistency level only needs to be set to kCostAndCoverage. +class LazyElementDegreeSolutionGenerator { + public: + explicit LazyElementDegreeSolutionGenerator(SetCoverInvariant* inv) + : inv_(inv) {} + + // Returns true if a solution was found. + // TODO(user): Add time-outs and exit with a partial solution. + bool NextSolution(); + + // Computes the next partial solution considering only the subsets whose + // indices are in focus. + bool NextSolution(absl::Span focus); + + // Same with a different set of costs. + bool NextSolution(absl::Span focus, + const SubsetCostVector& costs); + + private: + // Same with a different set of costs, and the focus defined as a vector of + // Booleans. This is the actual implementation of NextSolution. + bool NextSolution(const SubsetBoolVector& in_focus, + const SubsetCostVector& costs); + + // The data structure that will maintain the invariant for the model. + SetCoverInvariant* inv_; +}; + // Once we have a first solution to the problem, there may be (most often, // there are) elements in E that are covered several times. To decrease the // total cost, SteepestSearch tries to eliminate some redundant S_j's from // the solution or equivalently, to flip some x_j's from 1 to 0. the algorithm // gets its name because it goes in the steepest immediate direction, taking // the S_j with the largest total cost. + +// The consistency level is maintained up to kFreeAndUncovered. class SteepestSearch { public: explicit SteepestSearch(SetCoverInvariant* inv) : inv_(inv) {} @@ -318,6 +344,8 @@ class TabuList { // 1 (2):190–206. doi:10.1287/ijoc.1.3.190. // F. Glover (1990) "Tabu Search – Part 2". ORSA Journal on Computing. // 2 (1): 4–32. doi:10.1287/ijoc.2.1.4. + +// The consistency level is maintained up to kFreeAndUncovered. class GuidedTabuSearch { public: explicit GuidedTabuSearch(SetCoverInvariant* inv) @@ -410,6 +438,8 @@ class GuidedTabuSearch { // by the number of subsets in the focus times a tunable factor alpha_. // [1] C. Voudouris (1997) "Guided local search for combinatorial optimisation // problems", PhD Thesis, University of Essex, Colchester, UK, July, 1997. + +// The consistency level is maintained up to kRedundancy. class GuidedLocalSearch { public: explicit GuidedLocalSearch(SetCoverInvariant* inv) @@ -477,6 +507,8 @@ class GuidedLocalSearch { // solution. There can be more than num_subsets variables cleared because the // intersecting subsets are also removed from the solution. Returns a list of // subset indices that can be reused as a focus. + +// The consistency level is maintained up to kCostAndCoverage. std::vector ClearRandomSubsets(BaseInt num_subsets, SetCoverInvariant* inv); @@ -490,6 +522,8 @@ std::vector ClearRandomSubsets(absl::Span focus, // randomly. // Returns the list of the chosen subset indices. // This indices can then be used ax a focus. + +// The consistency level is maintained up to kCostAndCoverage. std::vector ClearMostCoveredElements(BaseInt num_subsets, SetCoverInvariant* inv); diff --git a/ortools/algorithms/set_cover_invariant.cc b/ortools/algorithms/set_cover_invariant.cc index 128dff78a8..c0f41468cd 100644 --- a/ortools/algorithms/set_cover_invariant.cc +++ b/ortools/algorithms/set_cover_invariant.cc @@ -25,9 +25,7 @@ namespace operations_research { -namespace { -bool SupportsAvx512() { return false; } -} // namespace +using CL = SetCoverInvariant::ConsistencyLevel; // Note: in many of the member functions, variables have "crypterse" names // to avoid confusing them with member data. For example mrgnl_impcts is used @@ -35,7 +33,10 @@ bool SupportsAvx512() { return false; } void SetCoverInvariant::Initialize() { DCHECK(model_->ComputeFeasibility()); model_->CreateSparseRowView(); + Clear(); +} +void SetCoverInvariant::Clear() { cost_ = 0.0; const BaseInt num_subsets = model_->num_subsets(); @@ -56,52 +57,108 @@ void SetCoverInvariant::Initialize() { // No need to reserve for trace_ and other vectors as extending with // push_back is fast enough. + trace_.clear(); + newly_removable_subsets_.clear(); + newly_non_removable_subsets_.clear(); num_uncovered_elements_ = num_elements; - supports_avx512_ = SupportsAvx512(); - is_fully_updated_ = true; + consistency_level_ = CL::kRedundancy; } -bool SetCoverInvariant::CheckConsistency() const { +bool SetCoverInvariant::CheckConsistency(ConsistencyLevel consistency) const { + CHECK(consistency <= CL::kRedundancy); + if (consistency == CL::kInconsistent) { + return true; + } auto [cst, cvrg] = ComputeCostAndCoverage(is_selected_); CHECK_EQ(cost_, cst); for (const ElementIndex element : model_->ElementRange()) { CHECK_EQ(cvrg[element], coverage_[element]); } + if (consistency == CL::kCostAndCoverage) { + return true; + } auto [num_uncvrd_elts, num_free_elts] = - ComputeNumUncoveredAndFreeElements(cvrg); - auto [num_non_ovrcvrd_elts, is_rdndnt] = - ComputeNumNonOvercoveredElementsAndIsRedundant(cvrg); + ComputeNumUncoveredAndFreeElements(coverage_); for (const SubsetIndex subset : model_->SubsetRange()) { CHECK_EQ(num_free_elts[subset], num_free_elements_[subset]); - if (is_fully_updated_) { - CHECK_EQ(is_rdndnt[subset], is_redundant_[subset]); - CHECK_EQ(is_rdndnt[subset], num_non_ovrcvrd_elts[subset] == 0); - } } - return true; + if (consistency == CL::kFreeAndUncovered) { + return true; + } + auto [num_non_ovrcvrd_elts, is_rdndnt] = ComputeRedundancyInfo(coverage_); + for (const SubsetIndex subset : model_->SubsetRange()) { + CHECK_EQ(is_rdndnt[subset], is_redundant_[subset]); + CHECK_EQ(is_rdndnt[subset], num_non_ovrcvrd_elts[subset] == 0); + } + if (consistency == CL::kRedundancy) { + return true; + } + LOG(FATAL) << "Consistency level not supported: " + << static_cast(consistency); + return false; } void SetCoverInvariant::LoadSolution(const SubsetBoolVector& solution) { is_selected_ = solution; - RecomputeInvariant(); + ClearTrace(); + ClearRemovabilityInformation(); + for (SubsetIndex subset(0); bool b : solution) { + if (b) { + trace_.push_back(SetCoverDecision(subset, true)); + } + ++subset; + } + consistency_level_ = CL::kInconsistent; + Recompute(CL::kCostAndCoverage); } -void SetCoverInvariant::RecomputeInvariant() { - std::tie(cost_, coverage_) = ComputeCostAndCoverage(is_selected_); - std::tie(num_uncovered_elements_, num_free_elements_) = - ComputeNumUncoveredAndFreeElements(coverage_); - std::tie(num_non_overcovered_elements_, is_redundant_) = - ComputeNumNonOvercoveredElementsAndIsRedundant(coverage_); - is_fully_updated_ = true; +bool SetCoverInvariant::NeedToRecompute(ConsistencyLevel cheched_consistency, + ConsistencyLevel target_consistency) { + return consistency_level_ < cheched_consistency && + cheched_consistency <= target_consistency; } -void SetCoverInvariant::MakeFullyUpdated() { - std::tie(num_non_overcovered_elements_, is_redundant_) = - ComputeNumNonOvercoveredElementsAndIsRedundant(coverage_); - is_fully_updated_ = true; +void SetCoverInvariant::Recompute(ConsistencyLevel target_consistency) { + CHECK(target_consistency >= CL::kCostAndCoverage); + CHECK(target_consistency <= CL::kRedundancy); + DCHECK(CheckConsistency(consistency_level_)); + if (NeedToRecompute(CL::kCostAndCoverage, target_consistency)) { + std::tie(cost_, coverage_) = ComputeCostAndCoverage(is_selected_); + } + if (NeedToRecompute(CL::kFreeAndUncovered, target_consistency)) { + std::tie(num_uncovered_elements_, num_free_elements_) = + ComputeNumUncoveredAndFreeElements(coverage_); + } + if (NeedToRecompute(CL::kRedundancy, target_consistency)) { + std::tie(num_non_overcovered_elements_, is_redundant_) = + ComputeRedundancyInfo(coverage_); + } + consistency_level_ = target_consistency; } +// NOTE(user): This piece of code is for reference because it seems to be +// faster to update the invariant. const BaseInt num_subsets = +// model_->num_subsets(); is_redundant_.assign(num_subsets, false); +// num_non_overcovered_elements_.assign(num_subsets, 0); +// const SparseColumnView& columns = model_->columns(); +// for (const ElementIndex element : model_->ElementRange()) { +// if (coverage_[element] >= 1) { +// --num_uncovered_elements_; +// } +// } +// for (const SubsetIndex subset : model_->SubsetRange()) { +// for (const ElementIndex element : columns[subset]) { +// if (coverage_[element] <= 1) { +// ++num_non_overcovered_elements_[subset]; +// } +// if (coverage_[element] >= 1) { +// --num_free_elements_[subset]; +// } +// } +// is_redundant_[subset] = (num_non_overcovered_elements_[subset] == 0); +// } + std::tuple SetCoverInvariant::ComputeCostAndCoverage( const SubsetBoolVector& choices) const { Cost cst = 0.0; @@ -162,8 +219,7 @@ SetCoverInvariant::ComputeNumUncoveredAndFreeElements( } std::tuple -SetCoverInvariant::ComputeNumNonOvercoveredElementsAndIsRedundant( - const ElementToIntVector& cvrg) const { +SetCoverInvariant::ComputeRedundancyInfo(const ElementToIntVector& cvrg) const { const BaseInt num_subsets(model_->num_subsets()); SubsetToIntVector num_cvrg_le_1_elts(num_subsets, 0); SubsetBoolVector is_rdndnt(num_subsets, false); @@ -212,7 +268,7 @@ void SetCoverInvariant::CompressTrace() { } bool SetCoverInvariant::ComputeIsRedundant(SubsetIndex subset) const { - if (is_fully_updated_) { + if (consistency_level_ >= CL::kRedundancy) { return is_redundant_[subset]; } if (is_selected_[subset]) { @@ -231,34 +287,48 @@ bool SetCoverInvariant::ComputeIsRedundant(SubsetIndex subset) const { return true; } -void SetCoverInvariant::Flip(SubsetIndex subset, bool incremental_full_update) { +BaseInt SetCoverInvariant::ComputeNumFreeElements(SubsetIndex subset) const { + BaseInt num_free_elements = model_->columns()[subset].size(); + for (const ElementIndex element : model_->columns()[subset]) { + if (coverage_[element] != 0) { + --num_free_elements; + } + } + return num_free_elements; +} + +void SetCoverInvariant::Flip(SubsetIndex subset, + ConsistencyLevel target_consistency) { if (!is_selected_[subset]) { - Select(subset, incremental_full_update); + Select(subset, target_consistency); } else { - Deselect(subset, incremental_full_update); + Deselect(subset, target_consistency); } } void SetCoverInvariant::Select(SubsetIndex subset, - bool incremental_full_update) { - if (incremental_full_update) { + ConsistencyLevel target_consistency) { + const bool update_redundancy_info = target_consistency >= CL::kRedundancy; + if (update_redundancy_info) { ClearRemovabilityInformation(); - } else { - is_fully_updated_ = false; } + consistency_level_ = std::min(consistency_level_, target_consistency); DVLOG(1) << "Selecting subset " << subset; DCHECK(!is_selected_[subset]); - DCHECK(CheckConsistency()); + DCHECK(CheckConsistency(target_consistency)); trace_.push_back(SetCoverDecision(subset, true)); is_selected_[subset] = true; const SubsetCostVector& subset_costs = model_->subset_costs(); cost_ += subset_costs[subset]; - if (supports_avx512_) { - SelectAvx512(subset); - return; - } const SparseColumnView& columns = model_->columns(); const SparseRowView& rows = model_->rows(); + // Fast path for kCostAndCoverage. + if (target_consistency == CL::kCostAndCoverage) { + for (const ElementIndex element : columns[subset]) { + ++coverage_[element]; + } + return; + } for (const ElementIndex element : columns[subset]) { if (coverage_[element] == 0) { // `element` will be newly covered. @@ -266,7 +336,7 @@ void SetCoverInvariant::Select(SubsetIndex subset, for (const SubsetIndex impacted_subset : rows[element]) { --num_free_elements_[impacted_subset]; } - } else if (incremental_full_update && coverage_[element] == 1) { + } else if (update_redundancy_info && coverage_[element] == 1) { // `element` will be newly overcovered. for (const SubsetIndex impacted_subset : rows[element]) { --num_non_overcovered_elements_[impacted_subset]; @@ -276,49 +346,52 @@ void SetCoverInvariant::Select(SubsetIndex subset, // of impacted_subset becomes overcovered. DCHECK(!is_redundant_[impacted_subset]); if (is_selected_[impacted_subset]) { - new_removable_subsets_.push_back(impacted_subset); + newly_removable_subsets_.push_back(impacted_subset); } is_redundant_[impacted_subset] = true; } } } // Update coverage. Notice the asymmetry with Deselect where coverage is - // **decremented** before being tested. This allows to have more symmetrical - // code for conditions. + // **decremented** before being tested. This allows to have more + // symmetrical code for conditions. ++coverage_[element]; } - if (incremental_full_update) { + if (update_redundancy_info) { if (is_redundant_[subset]) { - new_removable_subsets_.push_back(subset); + newly_removable_subsets_.push_back(subset); } else { - new_non_removable_subsets_.push_back(subset); + newly_non_removable_subsets_.push_back(subset); } } - DCHECK(CheckConsistency()); + DCHECK(CheckConsistency(target_consistency)); } void SetCoverInvariant::Deselect(SubsetIndex subset, - bool incremental_full_update) { - if (incremental_full_update) { + ConsistencyLevel target_consistency) { + DCHECK(CheckConsistency(target_consistency)); + const bool update_redundancy_info = target_consistency >= CL::kRedundancy; + if (update_redundancy_info) { ClearRemovabilityInformation(); - } else { - is_fully_updated_ = false; } + consistency_level_ = std::min(consistency_level_, target_consistency); DVLOG(1) << "Deselecting subset " << subset; // If already selected, then num_free_elements == 0. DCHECK(is_selected_[subset]); DCHECK_EQ(num_free_elements_[subset], 0); - DCHECK(CheckConsistency()); trace_.push_back(SetCoverDecision(subset, false)); is_selected_[subset] = false; const SubsetCostVector& subset_costs = model_->subset_costs(); cost_ -= subset_costs[subset]; - if (supports_avx512_) { - DeselectAvx512(subset); - return; - } const SparseColumnView& columns = model_->columns(); const SparseRowView& rows = model_->rows(); + // Fast path for kCostAndCoverage. + if (target_consistency == CL::kCostAndCoverage) { + for (const ElementIndex element : columns[subset]) { + --coverage_[element]; + } + return; + } for (const ElementIndex element : columns[subset]) { // Update coverage. Notice the asymmetry with Select where coverage is // incremented after being tested. @@ -329,7 +402,7 @@ void SetCoverInvariant::Deselect(SubsetIndex subset, for (const SubsetIndex impacted_subset : rows[element]) { ++num_free_elements_[impacted_subset]; } - } else if (incremental_full_update && coverage_[element] == 1) { + } else if (update_redundancy_info && coverage_[element] == 1) { // `element` will be no longer overcovered. for (const SubsetIndex impacted_subset : rows[element]) { if (num_non_overcovered_elements_[impacted_subset] == 0) { @@ -337,7 +410,7 @@ void SetCoverInvariant::Deselect(SubsetIndex subset, // impacted_subset has just become non-removable. DCHECK(is_redundant_[impacted_subset]); if (is_selected_[impacted_subset]) { - new_non_removable_subsets_.push_back(impacted_subset); + newly_non_removable_subsets_.push_back(impacted_subset); } is_redundant_[impacted_subset] = false; } @@ -348,15 +421,7 @@ void SetCoverInvariant::Deselect(SubsetIndex subset, // Since subset is now deselected, there is no need // nor meaning in adding it a list of removable or non-removable subsets. // This is a dissymmetry with Select. - DCHECK(CheckConsistency()); -} - -void SetCoverInvariant::SelectAvx512(SubsetIndex) { - LOG(FATAL) << "SelectAvx512 is not implemented"; -} - -void SetCoverInvariant::DeselectAvx512(SubsetIndex) { - LOG(FATAL) << "DeselectAvx512 is not implemented"; + DCHECK(CheckConsistency(target_consistency)); } SetCoverSolutionResponse SetCoverInvariant::ExportSolutionAsProto() const { @@ -380,9 +445,7 @@ void SetCoverInvariant::ImportSolutionFromProto( for (auto s : message.subset()) { is_selected_[SubsetIndex(s)] = true; } - RecomputeInvariant(); Cost cost = message.cost(); CHECK_EQ(cost, cost_); } - } // namespace operations_research diff --git a/ortools/algorithms/set_cover_invariant.h b/ortools/algorithms/set_cover_invariant.h index 15d90f9a6a..8c4381c4b6 100644 --- a/ortools/algorithms/set_cover_invariant.h +++ b/ortools/algorithms/set_cover_invariant.h @@ -18,9 +18,9 @@ #include #include "absl/log/check.h" +#include "absl/types/span.h" #include "ortools/algorithms/set_cover.pb.h" #include "ortools/algorithms/set_cover_model.h" -#include "ortools/base/logging.h" namespace operations_research { @@ -66,6 +66,24 @@ class SetCoverDecision { class SetCoverInvariant { public: + // The consistency level of the invariant. + // The values denote the level of consistency of the invariant. + // There is an order between the levels, and the invariant is consistent at + // level k if it is consistent at all levels lower than k. + // The consistency level that is the most natural is to use kFreeAndUncovered, + // since it enables to implement most heuristics. + // kCostAndCoverage is used by LazyElementDegree, a fast greedy heuristic. + // kRedundancy is used for GuidedLocalSearch, because knowing whether a + // subset is redundant incrementally is faster than recomputing the + // information over and over again. + // Below, the quantities that are maintained at each level are listed. + enum class ConsistencyLevel { + kInconsistent = 0, // The invariant is not consistent. + kCostAndCoverage, // cost_ and coverage_. + kFreeAndUncovered, // num_free_elements_ and num_uncovered_elements_. + kRedundancy // is_redundant_ and num_non_overcovered_elements_. + }; + // Constructs an empty weighted set covering solver state. // The model may not change after the invariant was built. explicit SetCoverInvariant(SetCoverModel* m) : model_(m) { Initialize(); } @@ -74,13 +92,8 @@ class SetCoverInvariant { // afterwards. void Initialize(); - void Clear() { - is_selected_.assign(model_->num_subsets(), false); - RecomputeInvariant(); - } - - // Recomputes all the invariants for the current solution. - void RecomputeInvariant(); + // Clears the invariant. Also called by Initialize. + void Clear(); // Returns the weighted set covering model to which the state applies. SetCoverModel* model() const { return model_; } @@ -119,7 +132,7 @@ class SetCoverInvariant { // the solution. const SubsetBoolVector& is_redundant() const { return is_redundant_; } - // Returns the vector of the decisions which has led to the current solution. + // Returns the vector of the decisions which have led to the current solution. const std::vector& trace() const { return trace_; } // Clears the trace. @@ -127,18 +140,18 @@ class SetCoverInvariant { // Clear the removability information. void ClearRemovabilityInformation() { - new_removable_subsets_.clear(); - new_non_removable_subsets_.clear(); + newly_removable_subsets_.clear(); + newly_non_removable_subsets_.clear(); } // Returns the subsets that become removable after the last update. - const std::vector& new_removable_subsets() const { - return new_removable_subsets_; + const std::vector& newly_removable_subsets() const { + return newly_removable_subsets_; } // Returns the subsets that become non removable after the last update. - const std::vector& new_non_removable_subsets() const { - return new_non_removable_subsets_; + const std::vector& newly_non_removable_subsets() const { + return newly_non_removable_subsets_; } // Compresses the trace so that: @@ -152,10 +165,11 @@ class SetCoverInvariant { // Loads the solution and recomputes the data in the invariant. void LoadSolution(const SubsetBoolVector& solution); - // Returns true if the data stored in the invariant is consistent. - // The body of the function will CHECK-fail the first time an inconsistency - // is encountered. - bool CheckConsistency() const; + // Checks the consistency of the invariant at the given consistency level. + bool CheckConsistency(ConsistencyLevel consistency) const; + + // Recomputes the invariant to the given consistency level. + void Recompute(ConsistencyLevel target_consistency); // Returns true if the subset is redundant within the current solution, i.e. // when all its elements are already covered twice. Note that the set need @@ -163,33 +177,27 @@ class SetCoverInvariant { // TODO(user): Implement this using AVX-512? bool ComputeIsRedundant(SubsetIndex subset) const; - // Updates the invariant fully, so that is_redundant_ can be updated - // incrementally later with SelectAndFullyUpdate and - // DeselectSelectAndFullyUpdate. - void MakeFullyUpdated(); - - // Flips is_selected_[subset] to its negation, by calling Select or Deselect - // depending on value. Updates the invariant incrementally. - // FlipAndFullyUpdate performs a full incremental update of the invariant, - // including num_non_overcovered_elements_, is_redundant_, - // new_removable_subsets_, new_non_removable_subsets_. This is useful for some - // meta-heuristics. - void Flip(SubsetIndex subset) { Flip(subset, false); } - void FlipAndFullyUpdate(SubsetIndex subset) { Flip(subset, true); } + // Computes the number of free (uncovered) elements in the given subset. + BaseInt ComputeNumFreeElements(SubsetIndex subset) const; // Includes subset in the solution by setting is_selected_[subset] to true - // and incrementally updating the invariant. - // SelectAndFullyUpdate also updates the invariant in a more thorough way as - // explained with FlipAndFullyUpdate. - void Select(SubsetIndex subset) { Select(subset, false); } - void SelectAndFullyUpdate(SubsetIndex subset) { Select(subset, true); } + // without updating the invariant. Only updates the cost and the coverage. + // TODO(user): Merge with Select. Introduce consistency levels and maybe split + // the invariant into three. + void SelectNoUpdate(SubsetIndex subset); + + // Flips is_selected_[subset] to its negation, by calling Select or Deselect + // depending on value. Updates the invariant incrementally to the given + // consistency level. + void Flip(SubsetIndex subset, ConsistencyLevel consistency); + + // Includes subset in the solution by setting is_selected_[subset] to true + // and incrementally updating the invariant to the given consistency level. + void Select(SubsetIndex subset, ConsistencyLevel consistency); // Excludes subset from the solution by setting is_selected_[subset] to false - // and incrementally updating the invariant. - // DeselectAndFullyUpdate also updates the invariant in a more thorough way as - // explained with FlipAndFullyUpdate. - void Deselect(SubsetIndex subset) { Deselect(subset, false); } - void DeselectAndFullyUpdate(SubsetIndex subset) { Deselect(subset, true); } + // and incrementally updating the invariant to the given consistency level. + void Deselect(SubsetIndex subset, ConsistencyLevel consistency); // Returns the current solution as a proto. SetCoverSolutionResponse ExportSolutionAsProto() const; @@ -217,30 +225,13 @@ class SetCoverInvariant { // Temporarily uses |S| BaseInts. std::tuple // Redundancy for each of the subsets. - ComputeNumNonOvercoveredElementsAndIsRedundant( - const ElementToIntVector& cvrg) const; + ComputeRedundancyInfo(const ElementToIntVector& cvrg) const; - // Flips is_selected_[subset] to its negation, by calling Select or Deselect - // depending on value. Updates the invariant incrementally. - // When incremental_full_update is true, the following fields are also - // updated: num_non_overcovered_elements_, is_redundant_, - // new_removable_subsets_, new_non_removable_subsets_. This is useful for some - // meta-heuristics. - void Flip(SubsetIndex, bool incremental_full_update); - - // Sets is_selected_[subset] to true and incrementally updates the invariant. - // Parameter incremental_full_update has the same meaning as with Flip. - void Select(SubsetIndex subset, bool incremental_full_update); - - // Sets is_selected_[subset] to false and incrementally updates the invariant. - // Parameter incremental_full_update has the same meaning as with Flip. - void Deselect(SubsetIndex subset, bool incremental_full_update); - - // Helper function for Select when AVX-512 is supported by the processor. - void SelectAvx512(SubsetIndex subset); - - // Helper function for Deselect when AVX-512 is supported by the processor. - void DeselectAvx512(SubsetIndex subset); + // Returns true if the current consistency level consistency_ is lower than + // cheched_consistency and the desired consistency is higher than + // cheched_consistency. + bool NeedToRecompute(ConsistencyLevel cheched_consistency, + ConsistencyLevel target_consistency); // The weighted set covering model on which the solver is run. SetCoverModel* model_; @@ -281,23 +272,20 @@ class SetCoverInvariant { // Takes |S| bits. SubsetBoolVector is_redundant_; - // Subsets that become removable after the last update. + // Subsets that became removable after the last update. // Takes at most |S| BaseInts. (More likely a few percent of that). - std::vector new_removable_subsets_; + std::vector newly_removable_subsets_; - // Subsets that become non removable after the last update. + // Subsets that became non removable after the last update. // Takes at most |S| BaseInts. (More likely a few percent of that). - std::vector new_non_removable_subsets_; + std::vector newly_non_removable_subsets_; - // Denotes whether is_redundant_ and num_non_overcovered_elements_ have been - // updated. Initially true, it becomes false as soon as Flip, - // Select and Deselect are called with incremental_full_update = false. The - // fully updated status can be achieved again with a call to FullUpdate(), - // which can be expensive, - bool is_fully_updated_; - - // True if the CPU supports the AVX-512 instruction set. - bool supports_avx512_; + // Denotes the consistency level of the invariant. + // Some algorithms may need to recompute the invariant to a higher consistency + // level. + // TODO(user): think of making the enforcement of the consistency level + // automatic at the constructor level of the heuristic algorithms. + ConsistencyLevel consistency_level_; }; } // namespace operations_research diff --git a/ortools/algorithms/set_cover_mip.cc b/ortools/algorithms/set_cover_mip.cc index 00dec965d2..32433301a4 100644 --- a/ortools/algorithms/set_cover_mip.cc +++ b/ortools/algorithms/set_cover_mip.cc @@ -110,8 +110,7 @@ bool SetCoverMip::NextSolution(absl::Span focus, for (const ElementIndex element : model->columns()[subset]) { // The model should only contain elements that are not forcibly covered by // subsets outside the focus. - if (coverage_outside_focus[element] == 0) continue; - + if (coverage_outside_focus[element] != 0) continue; if (constraints[element] == nullptr) { constexpr double kInfinity = std::numeric_limits::infinity(); constraints[element] = solver.MakeRowConstraint(1.0, kInfinity); @@ -140,10 +139,12 @@ bool SetCoverMip::NextSolution(absl::Span focus, return false; } if (use_integers) { + using CL = SetCoverInvariant::ConsistencyLevel; for (const SubsetIndex subset : focus) { - choices[subset] = (vars[subset]->solution_value() > 0.9); + if (vars[subset]->solution_value() > 0.9) { + inv_->Select(subset, CL::kCostAndCoverage); + } } - inv_->LoadSolution(choices); } else { lower_bound_ = solver.Objective().Value(); } diff --git a/ortools/algorithms/set_cover_model.cc b/ortools/algorithms/set_cover_model.cc index e67718b197..dc159f6fc7 100644 --- a/ortools/algorithms/set_cover_model.cc +++ b/ortools/algorithms/set_cover_model.cc @@ -14,6 +14,7 @@ #include "ortools/algorithms/set_cover_model.h" #include +#include #include #include #include @@ -169,6 +170,7 @@ SetCoverModel SetCoverModel::GenerateRandomModelFrom( for (Cost& cost : model.subset_costs_) { cost = min_cost + absl::Uniform(bitgen, 0, cost_range); } + model.CreateSparseRowView(); return model; } @@ -207,12 +209,14 @@ void SetCoverModel::AddElementToLastSubset(ElementIndex element) { void SetCoverModel::SetSubsetCost(BaseInt subset, Cost cost) { CHECK(std::isfinite(cost)); DCHECK_GE(subset, 0); - num_subsets_ = std::max(num_subsets_, subset + 1); - columns_.resize(num_subsets_, SparseColumn()); - subset_costs_.resize(num_subsets_, 0.0); + if (subset >= num_subsets()) { + num_subsets_ = std::max(num_subsets_, subset + 1); + columns_.resize(num_subsets_, SparseColumn()); + subset_costs_.resize(num_subsets_, 0.0); + row_view_is_valid_ = false; + UpdateAllSubsetsList(); + } subset_costs_[SubsetIndex(subset)] = cost; - UpdateAllSubsetsList(); - row_view_is_valid_ = false; // Probably overkill, but better safe than sorry. } void SetCoverModel::SetSubsetCost(SubsetIndex subset, Cost cost) { @@ -220,10 +224,12 @@ void SetCoverModel::SetSubsetCost(SubsetIndex subset, Cost cost) { } void SetCoverModel::AddElementToSubset(BaseInt element, BaseInt subset) { - num_subsets_ = std::max(num_subsets_, subset + 1); - subset_costs_.resize(num_subsets_, 0.0); - columns_.resize(num_subsets_, SparseColumn()); - UpdateAllSubsetsList(); + if (subset >= num_subsets()) { + num_subsets_ = subset + 1; + subset_costs_.resize(num_subsets_, 0.0); + columns_.resize(num_subsets_, SparseColumn()); + UpdateAllSubsetsList(); + } columns_[SubsetIndex(subset)].push_back(ElementIndex(element)); num_elements_ = std::max(num_elements_, element + 1); ++num_nonzeros_; @@ -240,6 +246,7 @@ void SetCoverModel::ReserveNumSubsets(BaseInt num_subsets) { num_subsets_ = std::max(num_subsets_, num_subsets); columns_.resize(num_subsets_, SparseColumn()); subset_costs_.resize(num_subsets_, 0.0); + UpdateAllSubsetsList(); } void SetCoverModel::ReserveNumSubsets(SubsetIndex num_subsets) { @@ -363,9 +370,52 @@ double StandardDeviation(const std::vector& values) { sum += sample; ++n; } + // Since we know all the values, we can compute the standard deviation + // exactly. return n == 0.0 ? 0.0 : sqrt((sum_of_squares - sum * sum / n) / n); } +// Statistics accumulation class used to compute statistics on the deltas of +// the row and column elements and their sizes in bytes. +// Since the values are not all stored, it's not possible to compute the median +// exactly. It is returned as 0.0. NaN would be a better choice, but it's just +// not a good idea as NaNs can propagate and cause problems. +class StatsAccumulator { + public: + StatsAccumulator() + : count_(0), + min_(kInfinity), + max_(-kInfinity), + sum_(0.0), + sum_of_squares_(0.0) {} + + void Register(double value) { + ++count_; + min_ = std::min(min_, value); + max_ = std::max(max_, value); + sum_ += value; + sum_of_squares_ += value * value; + } + + SetCoverModel::Stats ComputeStats() const { + const BaseInt n = count_; + // Since the code is used on a known number of values, we can compute the + // standard deviation exactly, even if the values are not all stored. + const double stddev = + n == 0 ? 0.0 : sqrt((sum_of_squares_ - sum_ * sum_ / n) / n); + return SetCoverModel::Stats{min_, max_, 0.0, sum_ / n, stddev}; + } + + private: + static constexpr double kInfinity = std::numeric_limits::infinity(); + int64_t count_; + double min_; + double max_; + double sum_; + double sum_of_squares_; +}; +} // namespace + template SetCoverModel::Stats ComputeStats(std::vector sizes) { SetCoverModel::Stats stats; @@ -391,7 +441,6 @@ std::vector ComputeDeciles(std::vector values) { } return deciles; } -} // namespace SetCoverModel::Stats SetCoverModel::ComputeCostStats() { std::vector subset_costs(num_subsets()); @@ -435,4 +484,38 @@ std::vector SetCoverModel::ComputeColumnDeciles() const { return ComputeDeciles(std::move(column_sizes)); } +namespace { +// Returns the number of bytes needed to store x with a base-128 encoding. +BaseInt Base128SizeInBytes(BaseInt x) { + const uint64_t u = x == 0 ? 1 : static_cast(x); + return (64 - absl::countl_zero(u) + 6) / 7; +} +} // namespace + +SetCoverModel::Stats SetCoverModel::ComputeColumnDeltaSizeStats() const { + StatsAccumulator acc; + for (const SparseColumn& column : columns_) { + BaseInt previous = 0; + for (const ElementIndex element : column) { + const BaseInt delta = element.value() - previous; + previous = element.value(); + acc.Register(Base128SizeInBytes(delta)); + } + } + return acc.ComputeStats(); +} + +SetCoverModel::Stats SetCoverModel::ComputeRowDeltaSizeStats() const { + StatsAccumulator acc; + for (const SparseRow& row : rows_) { + BaseInt previous = 0; + for (const SubsetIndex subset : row) { + const BaseInt delta = subset.value() - previous; + previous = subset.value(); + acc.Register(Base128SizeInBytes(delta)); + } + } + return acc.ComputeStats(); +} + } // namespace operations_research diff --git a/ortools/algorithms/set_cover_model.h b/ortools/algorithms/set_cover_model.h index ea21f82444..684fb3153b 100644 --- a/ortools/algorithms/set_cover_model.h +++ b/ortools/algorithms/set_cover_model.h @@ -258,6 +258,14 @@ class SetCoverModel { // Computes deciles on columns and returns a vector of deciles. std::vector ComputeColumnDeciles() const; + // Computes basic statistics on the deltas of the row and column elements and + // returns a Stats structure. The deltas are computed as the difference + // between two consecutive indices in rows or columns. The number of bytes + // computed is meant using a variable-length base-128 encoding. + // TODO(user): actually use this to compress the rows and columns. + Stats ComputeRowDeltaSizeStats() const; + Stats ComputeColumnDeltaSizeStats() const; + private: // Updates the all_subsets_ vector so that it always contains 0 to // columns.size() - 1 diff --git a/ortools/algorithms/set_cover_reader.cc b/ortools/algorithms/set_cover_reader.cc index d4c9283d42..d773f2ddf5 100644 --- a/ortools/algorithms/set_cover_reader.cc +++ b/ortools/algorithms/set_cover_reader.cc @@ -109,6 +109,7 @@ SetCoverModel ReadBeasleySetCoverProblem(absl::string_view filename) { } } file->Close(file::Defaults()).IgnoreError(); + model.CreateSparseRowView(); return model; } @@ -117,7 +118,7 @@ SetCoverModel ReadRailSetCoverProblem(absl::string_view filename) { File* file(file::OpenOrDie(filename, "r", file::Defaults())); SetCoverReader reader(file); const ElementIndex num_rows(reader.ParseNextInteger()); - const int num_cols(reader.ParseNextInteger()); + const BaseInt num_cols(reader.ParseNextInteger()); model.ReserveNumSubsets(num_cols); for (int i(0); i < num_cols; ++i) { const double cost(reader.ParseNextDouble()); @@ -130,6 +131,7 @@ SetCoverModel ReadRailSetCoverProblem(absl::string_view filename) { } } file->Close(file::Defaults()).IgnoreError(); + model.CreateSparseRowView(); return model; } diff --git a/ortools/algorithms/set_cover_test.cc b/ortools/algorithms/set_cover_test.cc index c1da8e0bde..2124fd9a87 100644 --- a/ortools/algorithms/set_cover_test.cc +++ b/ortools/algorithms/set_cover_test.cc @@ -25,9 +25,29 @@ #include "ortools/algorithms/set_cover_model.h" #include "ortools/base/gmock.h" #include "ortools/base/logging.h" +#include "ortools/base/parse_text_proto.h" namespace operations_research { namespace { +using google::protobuf::contrib::parse_proto::ParseTextProtoOrDie; +using CL = SetCoverInvariant::ConsistencyLevel; + +TEST(SetCoverTest, GuidedLocalSearchVerySmall) { + SetCoverProto proto = ParseTextProtoOrDie(R"pb( + subset { cost: 1 element: 1 element: 2 } + subset { cost: 1 element: 0 })pb"); + + SetCoverModel model; + model.ImportModelFromProto(proto); + CHECK(model.ComputeFeasibility()); + SetCoverInvariant inv(&model); + GreedySolutionGenerator greedy_search(&inv); + CHECK(greedy_search.NextSolution()); + CHECK(inv.CheckConsistency(CL::kFreeAndUncovered)); + GuidedLocalSearch search(&inv); + CHECK(search.NextSolution(100)); + CHECK(inv.CheckConsistency(CL::kRedundancy)); +} SetCoverModel CreateKnightsCoverModel(int num_rows, int num_cols) { SetCoverModel model; @@ -85,15 +105,15 @@ TEST(SolutionProtoTest, SaveReloadTwice) { SetCoverInvariant inv(&model); GreedySolutionGenerator greedy(&inv); CHECK(greedy.NextSolution()); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kFreeAndUncovered)); SetCoverSolutionResponse greedy_proto = inv.ExportSolutionAsProto(); SteepestSearch steepest(&inv); CHECK(steepest.NextSolution(500)); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kRedundancy)); SetCoverSolutionResponse steepest_proto = inv.ExportSolutionAsProto(); inv.ImportSolutionFromProto(greedy_proto); CHECK(steepest.NextSolution(500)); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kRedundancy)); } TEST(SetCoverTest, InitialValues) { @@ -113,18 +133,18 @@ TEST(SetCoverTest, InitialValues) { TrivialSolutionGenerator trivial(&inv); CHECK(trivial.NextSolution()); LOG(INFO) << "TrivialSolutionGenerator cost: " << inv.cost(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kCostAndCoverage)); GreedySolutionGenerator greedy(&inv); EXPECT_TRUE(greedy.NextSolution()); LOG(INFO) << "GreedySolutionGenerator cost: " << inv.cost(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kFreeAndUncovered)); EXPECT_EQ(inv.num_uncovered_elements(), 0); SteepestSearch steepest(&inv); CHECK(steepest.NextSolution(500)); LOG(INFO) << "SteepestSearch cost: " << inv.cost(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kFreeAndUncovered)); } TEST(SetCoverTest, Preprocessor) { @@ -143,11 +163,11 @@ TEST(SetCoverTest, Preprocessor) { SetCoverInvariant inv(&model); Preprocessor preprocessor(&inv); preprocessor.NextSolution(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kCostAndCoverage)); GreedySolutionGenerator greedy(&inv); EXPECT_TRUE(greedy.NextSolution()); LOG(INFO) << "GreedySolutionGenerator cost: " << inv.cost(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kFreeAndUncovered)); } TEST(SetCoverTest, Infeasible) { @@ -178,19 +198,19 @@ TEST(SetCoverTest, KnightsCoverTrivalAndGreedy) { TrivialSolutionGenerator trivial(&inv); CHECK(trivial.NextSolution()); LOG(INFO) << "TrivialSolutionGenerator cost: " << inv.cost(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kCostAndCoverage)); // Reinitialize before using Greedy, to start from scratch. inv.Initialize(); GreedySolutionGenerator greedy(&inv); CHECK(greedy.NextSolution()); LOG(INFO) << "GreedySolutionGenerator cost: " << inv.cost(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kFreeAndUncovered)); SteepestSearch steepest(&inv); CHECK(steepest.NextSolution(100'000)); LOG(INFO) << "SteepestSearch cost: " << inv.cost(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kFreeAndUncovered)); } TEST(SetCoverTest, KnightsCoverGreedy) { @@ -202,7 +222,7 @@ TEST(SetCoverTest, KnightsCoverGreedy) { LOG(INFO) << "GreedySolutionGenerator cost: " << inv.cost(); SteepestSearch steepest(&inv); - CHECK(steepest.NextSolution(100'000)); + CHECK(steepest.NextSolution(100)); LOG(INFO) << "SteepestSearch cost: " << inv.cost(); } @@ -215,7 +235,7 @@ TEST(SetCoverTest, KnightsCoverDegree) { LOG(INFO) << "ElementDegreeSolutionGenerator cost: " << inv.cost(); SteepestSearch steepest(&inv); - CHECK(steepest.NextSolution(100'000)); + CHECK(steepest.NextSolution(100)); LOG(INFO) << "SteepestSearch cost: " << inv.cost(); } @@ -226,7 +246,7 @@ TEST(SetCoverTest, KnightsCoverGLS) { CHECK(greedy.NextSolution()); LOG(INFO) << "GreedySolutionGenerator cost: " << inv.cost(); GuidedLocalSearch gls(&inv); - CHECK(gls.NextSolution(100000)); + CHECK(gls.NextSolution(100)); LOG(INFO) << "GuidedLocalSearch cost: " << inv.cost(); } @@ -238,12 +258,12 @@ TEST(SetCoverTest, KnightsCoverRandom) { RandomSolutionGenerator random(&inv); CHECK(random.NextSolution()); LOG(INFO) << "RandomSolutionGenerator cost: " << inv.cost(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kCostAndCoverage)); SteepestSearch steepest(&inv); - CHECK(steepest.NextSolution(100'000)); + CHECK(steepest.NextSolution(100)); LOG(INFO) << "SteepestSearch cost: " << inv.cost(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kFreeAndUncovered)); } TEST(SetCoverTest, KnightsCoverTrivial) { @@ -254,12 +274,12 @@ TEST(SetCoverTest, KnightsCoverTrivial) { TrivialSolutionGenerator trivial(&inv); CHECK(trivial.NextSolution()); LOG(INFO) << "TrivialSolutionGenerator cost: " << inv.cost(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kCostAndCoverage)); SteepestSearch steepest(&inv); - CHECK(steepest.NextSolution(100'000)); + CHECK(steepest.NextSolution(100)); LOG(INFO) << "SteepestSearch cost: " << inv.cost(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kFreeAndUncovered)); } TEST(SetCoverTest, KnightsCoverGreedyAndTabu) { @@ -276,14 +296,14 @@ TEST(SetCoverTest, KnightsCoverGreedyAndTabu) { LOG(INFO) << "GreedySolutionGenerator cost: " << inv.cost(); SteepestSearch steepest(&inv); - CHECK(steepest.NextSolution(10'000)); + CHECK(steepest.NextSolution(100)); LOG(INFO) << "SteepestSearch cost: " << inv.cost(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kFreeAndUncovered)); GuidedTabuSearch gts(&inv); - CHECK(gts.NextSolution(10'000)); + CHECK(gts.NextSolution(1'000)); LOG(INFO) << "GuidedTabuSearch cost: " << inv.cost(); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kFreeAndUncovered)); DisplayKnightsCoverSolution(inv.is_selected(), BoardSize, BoardSize); } @@ -297,7 +317,7 @@ TEST(SetCoverTest, KnightsCoverGreedyRandomClear) { SetCoverInvariant inv(&model); Cost best_cost = std::numeric_limits::max(); SubsetBoolVector best_choices = inv.is_selected(); - for (int i = 0; i < 10'000; ++i) { + for (int i = 0; i < 100; ++i) { inv.LoadSolution(best_choices); ClearRandomSubsets(0.1 * inv.trace().size(), &inv); @@ -332,22 +352,22 @@ TEST(SetCoverTest, KnightsCoverElementDegreeRandomClear) { SetCoverModel model = CreateKnightsCoverModel(BoardSize, BoardSize); SetCoverInvariant inv(&model); Cost best_cost = std::numeric_limits::max(); - SubsetBoolVector best_choices = inv.is_selected(); - for (int i = 0; i < 1'000; ++i) { - inv.LoadSolution(best_choices); - ClearRandomSubsets(0.1 * inv.trace().size(), &inv); - - ElementDegreeSolutionGenerator degree(&inv); + SubsetBoolVector best_choices; + for (int i = 0; i < 10000; ++i) { + LazyElementDegreeSolutionGenerator degree(&inv); CHECK(degree.NextSolution()); + CHECK(inv.CheckConsistency(CL::kCostAndCoverage)); SteepestSearch steepest(&inv); - CHECK(steepest.NextSolution(10'000)); + CHECK(steepest.NextSolution(100)); if (inv.cost() < best_cost) { best_cost = inv.cost(); best_choices = inv.is_selected(); LOG(INFO) << "Best cost: " << best_cost << " at iteration = " << i; } + inv.LoadSolution(best_choices); + ClearRandomSubsets(0.1 * inv.trace().size(), &inv); } inv.LoadSolution(best_choices); DisplayKnightsCoverSolution(best_choices, BoardSize, BoardSize); @@ -372,24 +392,23 @@ TEST(SetCoverTest, KnightsCoverRandomClearMip) { LOG(INFO) << "GreedySolutionGenerator cost: " << inv.cost(); SteepestSearch steepest(&inv); - CHECK(steepest.NextSolution(10'000)); + CHECK(steepest.NextSolution(100)); LOG(INFO) << "SteepestSearch cost: " << inv.cost(); Cost best_cost = inv.cost(); SubsetBoolVector best_choices = inv.is_selected(); - for (int i = 0; i < 100; ++i) { - inv.LoadSolution(best_choices); - auto focus = ClearRandomSubsets(0.1 * model.num_subsets(), &inv); + for (int i = 0; i < 1'000; ++i) { + auto focus = ClearRandomSubsets(0.1 * inv.trace().size(), &inv); SetCoverMip mip(&inv); mip.NextSolution(focus, true, 1); - EXPECT_TRUE(inv.CheckConsistency()); + EXPECT_TRUE(inv.CheckConsistency(CL::kCostAndCoverage)); if (inv.cost() < best_cost) { best_cost = inv.cost(); best_choices = inv.is_selected(); LOG(INFO) << "Best cost: " << best_cost << " at iteration = " << i; } + inv.LoadSolution(best_choices); } - inv.LoadSolution(best_choices); DisplayKnightsCoverSolution(best_choices, BoardSize, BoardSize); LOG(INFO) << "RandomClearMip cost: " << best_cost; // The best solution found until 2023-08 has a cost of 350. @@ -408,10 +427,12 @@ TEST(SetCoverTest, KnightsCoverMip) { SetCoverModel model = CreateKnightsCoverModel(BoardSize, BoardSize); SetCoverInvariant inv(&model); SetCoverMip mip(&inv); - mip.NextSolution(true, 10); - SubsetBoolVector best_choices = inv.is_selected(); - DisplayKnightsCoverSolution(best_choices, BoardSize, BoardSize); + mip.NextSolution(true, .5); LOG(INFO) << "Mip cost: " << inv.cost(); + DisplayKnightsCoverSolution(inv.is_selected(), BoardSize, BoardSize); + if (BoardSize == 50) { + CHECK_GE(inv.cost(), 350); + } } void BM_Steepest(benchmark::State& state) {