diff --git a/ortools/algorithms/BUILD.bazel b/ortools/algorithms/BUILD.bazel index be5f372620..3d7f2284f3 100644 --- a/ortools/algorithms/BUILD.bazel +++ b/ortools/algorithms/BUILD.bazel @@ -275,6 +275,7 @@ cc_library( ":set_cover_model", "//ortools/base:threadpool", "@com_google_absl//absl/log:check", + "@com_google_absl//absl/synchronization", ], ) @@ -286,9 +287,10 @@ cc_library( ":set_cover_cc_proto", "//ortools/base:intops", "//ortools/base:strong_vector", - "//ortools/util:aligned_memory", "@com_google_absl//absl/log", "@com_google_absl//absl/log:check", + "@com_google_absl//absl/random", + "@com_google_absl//absl/random:distributions", "@com_google_absl//absl/strings", ], ) diff --git a/ortools/algorithms/python/set_cover.cc b/ortools/algorithms/python/set_cover.cc index 2d107fdfc0..a4263078c1 100644 --- a/ortools/algorithms/python/set_cover.cc +++ b/ortools/algorithms/python/set_cover.cc @@ -13,91 +13,235 @@ // A pybind11 wrapper for set_cover_*. +#include +#include +#include #include +#include #include "absl/base/nullability.h" #include "ortools/algorithms/set_cover_heuristics.h" #include "ortools/algorithms/set_cover_invariant.h" #include "ortools/algorithms/set_cover_model.h" #include "ortools/algorithms/set_cover_reader.h" +#include "pybind11/numpy.h" #include "pybind11/pybind11.h" #include "pybind11/pytypes.h" #include "pybind11/stl.h" #include "pybind11_protobuf/native_proto_caster.h" +using ::operations_research::BaseInt; +using ::operations_research::ClearRandomSubsets; using ::operations_research::ElementDegreeSolutionGenerator; +using ::operations_research::ElementIndex; using ::operations_research::GreedySolutionGenerator; using ::operations_research::GuidedLocalSearch; +using ::operations_research::GuidedTabuSearch; using ::operations_research::Preprocessor; using ::operations_research::RandomSolutionGenerator; using ::operations_research::ReadBeasleySetCoverProblem; using ::operations_research::ReadRailSetCoverProblem; +using ::operations_research::SetCoverDecision; using ::operations_research::SetCoverInvariant; using ::operations_research::SetCoverModel; +using ::operations_research::SparseColumn; +using ::operations_research::SparseRow; using ::operations_research::SteepestSearch; +using ::operations_research::SubsetBoolVector; +using ::operations_research::SubsetCostVector; using ::operations_research::SubsetIndex; +using ::operations_research::TabuList; using ::operations_research::TrivialSolutionGenerator; namespace py = pybind11; using ::py::arg; +using ::py::make_iterator; -// General note about TODOs: the corresponding functions/classes/methods are -// more complex to wrap, as they use nonstandard types, and are less important, -// as they are not as useful to most users (mostly useful to write some custom -// Python heuristics). +std::vector VectorIntToVectorSubsetIndex( + const std::vector& ints) { + std::vector subs; + std::transform(ints.begin(), ints.end(), subs.begin(), + [](int subset) -> SubsetIndex { return SubsetIndex(subset); }); + return subs; +} + +SubsetCostVector VectorDoubleToSubsetCostVector( + const std::vector& doubles) { + SubsetCostVector costs(doubles.begin(), doubles.end()); + return costs; +} + +class IntIterator { + public: + using value_type = int; + using difference_type = std::ptrdiff_t; + using pointer = int*; + using reference = int&; + using iterator_category = std::input_iterator_tag; + + explicit IntIterator(int max_value) + : max_value_(max_value), current_value_(0) {} + + int operator*() const { return current_value_; } + IntIterator& operator++() { + ++current_value_; + return *this; + } + + static IntIterator begin(int max_value) { return IntIterator{max_value}; } + static IntIterator end(int max_value) { return {max_value, max_value}; } + + friend bool operator==(const IntIterator& lhs, const IntIterator& rhs) { + return lhs.max_value_ == rhs.max_value_ && + lhs.current_value_ == rhs.current_value_; + } + + private: + IntIterator(int max_value, int current_value) + : max_value_(max_value), current_value_(current_value) {} + + const int max_value_; + int current_value_; +}; PYBIND11_MODULE(set_cover, m) { pybind11_protobuf::ImportNativeProtoCasters(); // set_cover_model.h + py::class_(m, "SetCoverModelStats") + .def_readwrite("min", &SetCoverModel::Stats::min) + .def_readwrite("max", &SetCoverModel::Stats::max) + .def_readwrite("median", &SetCoverModel::Stats::median) + .def_readwrite("mean", &SetCoverModel::Stats::mean) + .def_readwrite("stddev", &SetCoverModel::Stats::stddev) + .def("debug_string", &SetCoverModel::Stats::DebugString); + py::class_(m, "SetCoverModel") .def(py::init<>()) .def_property_readonly("num_elements", &SetCoverModel::num_elements) .def_property_readonly("num_subsets", &SetCoverModel::num_subsets) .def_property_readonly("num_nonzeros", &SetCoverModel::num_nonzeros) .def_property_readonly("fill_rate", &SetCoverModel::FillRate) + .def_property_readonly( + "subset_costs", + [](SetCoverModel& model) -> const std::vector& { + return model.subset_costs().get(); + }) + .def("columns", + [](SetCoverModel& model) -> std::vector> { + // Due to the inner StrongVector, make a deep copy. Anyway, + // columns() returns a const ref, so this keeps the semantics, not + // the efficiency. + std::vector> columns; + std::transform( + model.columns().begin(), model.columns().end(), + columns.begin(), + [](const SparseColumn& column) -> std::vector { + std::vector col(column.size()); + std::transform(column.begin(), column.end(), col.begin(), + [](ElementIndex element) -> BaseInt { + return element.value(); + }); + return col; + }); + return columns; + }) + .def("rows", + [](SetCoverModel& model) -> std::vector> { + // Due to the inner StrongVector, make a deep copy. Anyway, + // rows() returns a const ref, so this keeps the semantics, not + // the efficiency. + std::vector> rows; + std::transform( + model.rows().begin(), model.rows().end(), rows.begin(), + [](const SparseRow& row) -> std::vector { + std::vector r(row.size()); + std::transform(row.begin(), row.end(), r.begin(), + [](SubsetIndex element) -> BaseInt { + return element.value(); + }); + return r; + }); + return rows; + }) + .def("row_view_is_valid", &SetCoverModel::row_view_is_valid) + .def("SubsetRange", + [](SetCoverModel& model) { + return make_iterator<>(IntIterator::begin(model.num_subsets()), + IntIterator::end(model.num_subsets())); + }) + .def("ElementRange", + [](SetCoverModel& model) { + return make_iterator<>(IntIterator::begin(model.num_elements()), + IntIterator::end(model.num_elements())); + }) + .def_property_readonly("all_subsets", + [](SetCoverModel& model) -> std::vector { + std::vector subsets; + std::transform( + model.all_subsets().begin(), + model.all_subsets().end(), subsets.begin(), + [](const SubsetIndex element) -> BaseInt { + return element.value(); + }); + return subsets; + }) .def("add_empty_subset", &SetCoverModel::AddEmptySubset, arg("cost")) .def( "add_element_to_last_subset", - [](SetCoverModel& model, int element) { + [](SetCoverModel& model, BaseInt element) { model.AddElementToLastSubset(element); }, arg("element")) .def( "set_subset_cost", - [](SetCoverModel& model, int subset, double cost) { + [](SetCoverModel& model, BaseInt subset, double cost) { model.SetSubsetCost(subset, cost); }, arg("subset"), arg("cost")) .def( "add_element_to_subset", - [](SetCoverModel& model, int element, int subset) { + [](SetCoverModel& model, BaseInt element, BaseInt subset) { model.AddElementToSubset(element, subset); }, arg("subset"), arg("cost")) + .def("create_sparse_row_view", &SetCoverModel::CreateSparseRowView) .def("compute_feasibility", &SetCoverModel::ComputeFeasibility) .def( "reserve_num_subsets", - [](SetCoverModel& model, int num_subsets) { + [](SetCoverModel& model, BaseInt num_subsets) { model.ReserveNumSubsets(num_subsets); }, arg("num_subsets")) .def( "reserve_num_elements_in_subset", - [](SetCoverModel& model, int num_elements, int subset) { + [](SetCoverModel& model, BaseInt num_elements, BaseInt subset) { model.ReserveNumElementsInSubset(num_elements, subset); }, arg("num_elements"), arg("subset")) .def("export_model_as_proto", &SetCoverModel::ExportModelAsProto) - .def("import_model_from_proto", &SetCoverModel::ImportModelFromProto); - // TODO(user): add support for subset_costs, columns, rows, - // row_view_is_valid, SubsetRange, ElementRange, all_subsets, - // CreateSparseRowView, ComputeCostStats, ComputeRowStats, - // ComputeColumnStats, ComputeRowDeciles, ComputeColumnDeciles. + .def("import_model_from_proto", &SetCoverModel::ImportModelFromProto) + .def("compute_cost_stats", &SetCoverModel::ComputeCostStats) + .def("compute_row_stats", &SetCoverModel::ComputeRowStats) + .def("compute_column_stats", &SetCoverModel::ComputeColumnStats) + .def("compute_row_deciles", &SetCoverModel::ComputeRowDeciles) + .def("compute_column_deciles", &SetCoverModel::ComputeRowDeciles); // TODO(user): wrap IntersectingSubsetsIterator. // set_cover_invariant.h + py::class_(m, "SetCoverDecision") + .def(py::init<>()) + .def(py::init([](BaseInt subset, bool value) -> SetCoverDecision* { + return new SetCoverDecision(SubsetIndex(subset), value); + }), + arg("subset"), arg("value")) + .def("subset", + [](const SetCoverDecision& decision) -> BaseInt { + return decision.subset().value(); + }) + .def("decision", &SetCoverDecision::decision); + py::class_(m, "SetCoverInvariant") .def(py::init()) .def("initialize", &SetCoverInvariant::Initialize) @@ -118,44 +262,90 @@ PYBIND11_MODULE(set_cover, m) { }) .def("cost", &SetCoverInvariant::cost) .def("num_uncovered_elements", &SetCoverInvariant::num_uncovered_elements) + .def("is_selected", + [](SetCoverInvariant& invariant) -> std::vector { + return invariant.is_selected().get(); + }) + .def("num_free_elements", + [](SetCoverInvariant& invariant) -> std::vector { + return invariant.num_free_elements().get(); + }) + .def("num_coverage_le_1_elements", + [](SetCoverInvariant& invariant) -> std::vector { + return invariant.num_coverage_le_1_elements().get(); + }) + .def("coverage", + [](SetCoverInvariant& invariant) -> std::vector { + return invariant.coverage().get(); + }) + .def( + "compute_coverage_in_focus", + [](SetCoverInvariant& invariant, + const std::vector& focus) -> std::vector { + return invariant + .ComputeCoverageInFocus(VectorIntToVectorSubsetIndex(focus)) + .get(); + }, + arg("focus")) + .def("is_redundant", + [](SetCoverInvariant& invariant) -> std::vector { + return invariant.is_redundant().get(); + }) + .def("trace", &SetCoverInvariant::trace) .def("clear_trace", &SetCoverInvariant::ClearTrace) .def("clear_removability_information", &SetCoverInvariant::ClearRemovabilityInformation) + .def("new_removable_subsets", &SetCoverInvariant::new_removable_subsets) + .def("new_non_removable_subsets", + &SetCoverInvariant::new_non_removable_subsets) .def("compress_trace", &SetCoverInvariant::CompressTrace) + .def("load_solution", + [](SetCoverInvariant& invariant, + const std::vector& solution) -> void { + SubsetBoolVector sol(solution.begin(), solution.end()); + return invariant.LoadSolution(sol); + }) .def("check_consistency", &SetCoverInvariant::CheckConsistency) + .def( + "compute_is_redundant", + [](SetCoverInvariant& invariant, BaseInt subset) -> bool { + return invariant.ComputeIsRedundant(SubsetIndex(subset)); + }, + arg("subset")) + .def("make_fully_updated", &SetCoverInvariant::MakeFullyUpdated) .def( "flip", - [](SetCoverInvariant& invariant, int subset) { + [](SetCoverInvariant& invariant, BaseInt subset) { invariant.Flip(SubsetIndex(subset)); }, arg("subset")) .def( "flip_and_fully_update", - [](SetCoverInvariant& invariant, int subset) { + [](SetCoverInvariant& invariant, BaseInt subset) { invariant.FlipAndFullyUpdate(SubsetIndex(subset)); }, arg("subset")) .def( "select", - [](SetCoverInvariant& invariant, int subset) { + [](SetCoverInvariant& invariant, BaseInt subset) { invariant.Select(SubsetIndex(subset)); }, arg("subset")) .def( "select_and_fully_update", - [](SetCoverInvariant& invariant, int subset) { + [](SetCoverInvariant& invariant, BaseInt subset) { invariant.SelectAndFullyUpdate(SubsetIndex(subset)); }, arg("subset")) .def( "deselect", - [](SetCoverInvariant& invariant, int subset) { + [](SetCoverInvariant& invariant, BaseInt subset) { invariant.Deselect(SubsetIndex(subset)); }, arg("subset")) .def( "deselect_and_fully_update", - [](SetCoverInvariant& invariant, int subset) { + [](SetCoverInvariant& invariant, BaseInt subset) { invariant.DeselectAndFullyUpdate(SubsetIndex(subset)); }, arg("subset")) @@ -163,10 +353,6 @@ PYBIND11_MODULE(set_cover, m) { &SetCoverInvariant::ExportSolutionAsProto) .def("import_solution_from_proto", &SetCoverInvariant::ImportSolutionFromProto); - // TODO(user): add support for is_selected, num_free_elements, - // num_coverage_le_1_elements, coverage, ComputeCoverageInFocus, - // is_redundant, trace, new_removable_subsets, new_non_removable_subsets, - // LoadSolution, ComputeIsRedundant. // set_cover_heuristics.h py::class_(m, "Preprocessor") @@ -175,30 +361,57 @@ PYBIND11_MODULE(set_cover, m) { [](Preprocessor& heuristic) -> bool { return heuristic.NextSolution(); }) + .def("next_solution", + [](Preprocessor& heuristic, + const std::vector& focus) -> bool { + return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus)); + }) .def("num_columns_fixed_by_singleton_row", &Preprocessor::num_columns_fixed_by_singleton_row); - // TODO(user): add support for focus argument. py::class_(m, "TrivialSolutionGenerator") .def(py::init()) - .def("next_solution", [](TrivialSolutionGenerator& heuristic) -> bool { - return heuristic.NextSolution(); - }); - // TODO(user): add support for focus argument. + .def("next_solution", + [](TrivialSolutionGenerator& heuristic) -> bool { + return heuristic.NextSolution(); + }) + .def("next_solution", + [](TrivialSolutionGenerator& heuristic, + const std::vector& focus) -> bool { + return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus)); + }); py::class_(m, "RandomSolutionGenerator") .def(py::init()) - .def("next_solution", [](RandomSolutionGenerator& heuristic) -> bool { - return heuristic.NextSolution(); - }); - // TODO(user): add support for focus argument. + .def("next_solution", + [](RandomSolutionGenerator& heuristic) -> bool { + return heuristic.NextSolution(); + }) + .def("next_solution", + [](RandomSolutionGenerator& heuristic, + const std::vector& focus) -> bool { + return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus)); + }); py::class_(m, "GreedySolutionGenerator") .def(py::init()) - .def("next_solution", [](GreedySolutionGenerator& heuristic) -> bool { - return heuristic.NextSolution(); - }); - // TODO(user): add support for focus and cost arguments. + .def("next_solution", + [](GreedySolutionGenerator& heuristic) -> bool { + return heuristic.NextSolution(); + }) + .def("next_solution", + [](GreedySolutionGenerator& heuristic, + const std::vector& focus) -> bool { + return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus)); + }) + .def("next_solution", + [](GreedySolutionGenerator& heuristic, + const std::vector& focus, + const std::vector& costs) -> bool { + return heuristic.NextSolution( + VectorIntToVectorSubsetIndex(focus), + VectorDoubleToSubsetCostVector(costs)); + }); py::class_(m, "ElementDegreeSolutionGenerator") @@ -206,16 +419,40 @@ PYBIND11_MODULE(set_cover, m) { .def("next_solution", [](ElementDegreeSolutionGenerator& heuristic) -> bool { return heuristic.NextSolution(); + }) + .def("next_solution", + [](ElementDegreeSolutionGenerator& heuristic, + const std::vector& focus) -> bool { + return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus)); + }) + .def("next_solution", + [](ElementDegreeSolutionGenerator& heuristic, + const std::vector& focus, + const std::vector& costs) -> bool { + return heuristic.NextSolution( + VectorIntToVectorSubsetIndex(focus), + VectorDoubleToSubsetCostVector(costs)); }); - // TODO(user): add support for focus and cost arguments. py::class_(m, "SteepestSearch") .def(py::init()) .def("next_solution", [](SteepestSearch& heuristic, int num_iterations) -> bool { return heuristic.NextSolution(num_iterations); + }) + .def("next_solution", + [](SteepestSearch& heuristic, const std::vector& focus, + int num_iterations) -> bool { + return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus), + num_iterations); + }) + .def("next_solution", + [](SteepestSearch& heuristic, const std::vector& focus, + const std::vector& costs, int num_iterations) -> bool { + return heuristic.NextSolution( + VectorIntToVectorSubsetIndex(focus), + VectorDoubleToSubsetCostVector(costs), num_iterations); }); - // TODO(user): add support for focus and cost arguments. py::class_(m, "GuidedLocalSearch") .def(py::init()) @@ -223,12 +460,92 @@ PYBIND11_MODULE(set_cover, m) { .def("next_solution", [](GuidedLocalSearch& heuristic, int num_iterations) -> bool { return heuristic.NextSolution(num_iterations); + }) + .def("next_solution", + [](GuidedLocalSearch& heuristic, const std::vector& focus, + int num_iterations) -> bool { + return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus), + num_iterations); }); - // TODO(user): add support for focus and cost arguments. - // TODO(user): add support for ClearRandomSubsets, ClearRandomSubsets, - // ClearMostCoveredElements, ClearMostCoveredElements, TabuList, - // GuidedTabuSearch. + // Specialization for T = SubsetIndex ~= BaseInt (aka int for Python, whatever + // the size of BaseInt). + // A base type doesn't work, because TabuList uses `T::value` in the + // constructor. + py::class_>(m, "TabuList") + .def(py::init([](int size) -> TabuList* { + return new TabuList(SubsetIndex(size)); + }), + arg("size")) + .def("size", &TabuList::size) + .def("init", &TabuList::Init, arg("size")) + .def( + "add", + [](TabuList& list, BaseInt t) -> void { + return list.Add(SubsetIndex(t)); + }, + arg("t")) + .def( + "contains", + [](TabuList& list, BaseInt t) -> bool { + return list.Contains(SubsetIndex(t)); + }, + arg("t")); + + py::class_(m, "GuidedTabuSearch") + .def(py::init()) + .def("initialize", &GuidedTabuSearch::Initialize) + .def("next_solution", + [](GuidedTabuSearch& heuristic, int num_iterations) -> bool { + return heuristic.NextSolution(num_iterations); + }) + .def("next_solution", + [](GuidedTabuSearch& heuristic, const std::vector& focus, + int num_iterations) -> bool { + return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus), + num_iterations); + }) + .def("get_lagrangian_factor", &GuidedTabuSearch::SetLagrangianFactor, + arg("factor")) + .def("set_lagrangian_factor", &GuidedTabuSearch::GetLagrangianFactor) + .def("set_epsilon", &GuidedTabuSearch::SetEpsilon, arg("r")) + .def("get_epsilon", &GuidedTabuSearch::GetEpsilon) + .def("set_penalty_factor", &GuidedTabuSearch::SetPenaltyFactor, + arg("factor")) + .def("get_penalty_factor", &GuidedTabuSearch::GetPenaltyFactor) + .def("set_tabu_list_size", &GuidedTabuSearch::SetTabuListSize, + arg("size")) + .def("get_tabu_list_size", &GuidedTabuSearch::GetTabuListSize); + + m.def( + "clear_random_subsets", + [](BaseInt num_subsets, SetCoverInvariant* inv) -> std::vector { + const std::vector cleared = + ClearRandomSubsets(num_subsets, inv); + return {cleared.begin(), cleared.end()}; + }); + m.def("clear_random_subsets", + [](const std::vector& focus, BaseInt num_subsets, + SetCoverInvariant* inv) -> std::vector { + const std::vector cleared = ClearRandomSubsets( + VectorIntToVectorSubsetIndex(focus), num_subsets, inv); + return {cleared.begin(), cleared.end()}; + }); + + m.def( + "clear_most_covered_elements", + [](BaseInt num_subsets, SetCoverInvariant* inv) -> std::vector { + const std::vector cleared = + ClearMostCoveredElements(num_subsets, inv); + return {cleared.begin(), cleared.end()}; + }); + m.def("clear_most_covered_elements", + [](const std::vector& focus, BaseInt num_subsets, + SetCoverInvariant* inv) -> std::vector { + const std::vector cleared = ClearMostCoveredElements( + VectorIntToVectorSubsetIndex(focus), num_subsets, inv); + return {cleared.begin(), cleared.end()}; + }); // set_cover_reader.h m.def("read_beasly_set_cover_problem", &ReadBeasleySetCoverProblem); diff --git a/ortools/algorithms/samples/set_cover.cc b/ortools/algorithms/samples/set_cover.cc new file mode 100644 index 0000000000..c9feae012d --- /dev/null +++ b/ortools/algorithms/samples/set_cover.cc @@ -0,0 +1,65 @@ +// Copyright 2010-2024 Google LLC +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// [START program] +// [START import] +#include + +#include "ortools/algorithms/set_cover_heuristics.h" +#include "ortools/algorithms/set_cover_invariant.h" +#include "ortools/algorithms/set_cover_model.h" +#include "ortools/base/logging.h" +// [END import] + +namespace operations_research { + +void SimpleSetCoverProgram() { + // [START data] + SetCoverModel model; + model.AddEmptySubset(2.0); + model.AddElementToLastSubset(0); + model.AddEmptySubset(2.0); + model.AddElementToLastSubset(1); + model.AddEmptySubset(1.0); + model.AddElementToLastSubset(0); + model.AddElementToLastSubset(1); + // [END data] + + // [START solve] + SetCoverInvariant inv(&model); + GreedySolutionGenerator greedy(&inv); + bool found_solution = greedy.NextSolution(); + if (!found_solution) { + LOG(INFO) << "No solution found by the greedy heuristic."; + return; + } + SetCoverSolutionResponse solution = inv.ExportSolutionAsProto(); + // [END solve] + + // [START print_solution] + LOG(INFO) << "Total cost: " << solution.cost(); // == inv.cost() + LOG(INFO) << "Total number of selected subsets: " << solution.num_subsets(); + LOG(INFO) << "Chosen subsets:"; + for (int i = 0; i < solution.subset_size(); ++i) { + LOG(INFO) << " " << solution.subset(i); + } + // [END print_solution] +} + +} // namespace operations_research + +int main() { + operations_research::SimpleSetCoverProgram(); + return EXIT_SUCCESS; +} +// [END program] diff --git a/ortools/algorithms/samples/set_cover.py b/ortools/algorithms/samples/set_cover.py new file mode 100755 index 0000000000..aab98c6b0f --- /dev/null +++ b/ortools/algorithms/samples/set_cover.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# Copyright 2010-2024 Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A simple set-covering problem.""" + +# [START program] +# [START import] +from ortools.algorithms.python import set_cover +# [END import] + + +def main(): + # [START data] + model = set_cover.SetCoverModel() + model.add_empty_subset(2.0) + model.add_element_to_last_subset(0) + model.add_empty_subset(2.0) + model.add_element_to_last_subset(1) + model.add_empty_subset(1.0) + model.add_element_to_last_subset(0) + model.add_element_to_last_subset(1) + # [END data] + + # [START solve] + inv = set_cover.SetCoverInvariant(model) + greedy = set_cover.GreedySolutionGenerator(inv) + has_found = greedy.next_solution() + if not has_found: + print("No solution found by the greedy heuristic.") + return + solution = inv.export_solution_as_proto() + # [END solve] + + # [START print_solution] + print(f"Total cost: {solution.cost}") # == inv.cost() + print(f"Total number of selected subsets: {solution.num_subsets}") + print("Chosen subsets:") + for subset in solution.subset: + print(f" {subset}") + # [END print_solution] + + +if __name__ == "__main__": + main() +# [END program] diff --git a/ortools/algorithms/samples/simple_knapsack_program.cc b/ortools/algorithms/samples/simple_knapsack_program.cc index 64a80d3a46..d9fe9e7091 100644 --- a/ortools/algorithms/samples/simple_knapsack_program.cc +++ b/ortools/algorithms/samples/simple_knapsack_program.cc @@ -15,12 +15,14 @@ // [START import] #include #include +#include #include #include #include #include #include "ortools/algorithms/knapsack_solver.h" +#include "ortools/base/logging.h" // [END import] namespace operations_research { diff --git a/ortools/algorithms/set_cover_heuristics.cc b/ortools/algorithms/set_cover_heuristics.cc index 63c7fe3066..4208c90e17 100644 --- a/ortools/algorithms/set_cover_heuristics.cc +++ b/ortools/algorithms/set_cover_heuristics.cc @@ -544,23 +544,23 @@ bool GuidedLocalSearch::NextSolution(absl::Span focus, } namespace { -void SampleSubsets(std::vector* list, std::size_t num_subsets) { - num_subsets = std::min(num_subsets, list->size()); +void SampleSubsets(std::vector* list, BaseInt num_subsets) { + num_subsets = std::min(num_subsets, static_cast(list->size())); CHECK_GE(num_subsets, 0); std::shuffle(list->begin(), list->end(), absl::BitGen()); list->resize(num_subsets); } } // namespace -std::vector ClearRandomSubsets(std::size_t num_subsets, +std::vector ClearRandomSubsets(BaseInt num_subsets, SetCoverInvariant* inv) { return ClearRandomSubsets(inv->model()->all_subsets(), num_subsets, inv); } std::vector ClearRandomSubsets(absl::Span focus, - std::size_t num_subsets, + BaseInt num_subsets, SetCoverInvariant* inv) { - num_subsets = std::min(num_subsets, focus.size()); + num_subsets = std::min(num_subsets, static_cast(focus.size())); CHECK_GE(num_subsets, 0); std::vector chosen_indices; for (const SubsetIndex subset : focus) { @@ -569,7 +569,7 @@ std::vector ClearRandomSubsets(absl::Span focus, } } SampleSubsets(&chosen_indices, num_subsets); - std::size_t num_deselected = 0; + BaseInt num_deselected = 0; for (const SubsetIndex subset : chosen_indices) { inv->Deselect(subset); ++num_deselected; @@ -585,14 +585,14 @@ std::vector ClearRandomSubsets(absl::Span focus, return chosen_indices; } -std::vector ClearMostCoveredElements(std::size_t max_num_subsets, +std::vector ClearMostCoveredElements(BaseInt max_num_subsets, SetCoverInvariant* inv) { return ClearMostCoveredElements(inv->model()->all_subsets(), max_num_subsets, inv); } std::vector ClearMostCoveredElements( - absl::Span focus, std::size_t max_num_subsets, + absl::Span focus, BaseInt max_num_subsets, SetCoverInvariant* inv) { // This is the vector we will return. std::vector sampled_subsets; @@ -625,7 +625,8 @@ std::vector ClearMostCoveredElements( // Actually *sample* sampled_subset. // TODO(user): find another algorithm if necessary. std::shuffle(sampled_subsets.begin(), sampled_subsets.end(), absl::BitGen()); - sampled_subsets.resize(std::min(sampled_subsets.size(), max_num_subsets)); + sampled_subsets.resize( + std::min(static_cast(sampled_subsets.size()), max_num_subsets)); // Testing has shown that sorting sampled_subsets is not necessary. // Now, un-select the subset in sampled_subsets. diff --git a/ortools/algorithms/set_cover_heuristics.h b/ortools/algorithms/set_cover_heuristics.h index 04d3b31b9f..154724cf61 100644 --- a/ortools/algorithms/set_cover_heuristics.h +++ b/ortools/algorithms/set_cover_heuristics.h @@ -477,12 +477,12 @@ class GuidedLocalSearch { // solution. There can be more than num_subsets variables cleared because the // intersecting subsets are also removed from the solution. Returns a list of // subset indices that can be reused as a focus. -std::vector ClearRandomSubsets(std::size_t num_subsets, +std::vector ClearRandomSubsets(BaseInt num_subsets, SetCoverInvariant* inv); // Same as above, but clears the subset indices in focus. std::vector ClearRandomSubsets(absl::Span focus, - std::size_t num_subsets, + BaseInt num_subsets, SetCoverInvariant* inv); // Clears the variables (subsets) that cover the most covered elements. This is @@ -490,12 +490,12 @@ std::vector ClearRandomSubsets(absl::Span focus, // randomly. // Returns the list of the chosen subset indices. // This indices can then be used ax a focus. -std::vector ClearMostCoveredElements(std::size_t num_subsets, +std::vector ClearMostCoveredElements(BaseInt num_subsets, SetCoverInvariant* inv); // Same as above, but clears the subset indices in focus. std::vector ClearMostCoveredElements( - absl::Span focus, std::size_t num_subsets, + absl::Span focus, BaseInt num_subsets, SetCoverInvariant* inv); } // namespace operations_research diff --git a/ortools/algorithms/set_cover_lagrangian.cc b/ortools/algorithms/set_cover_lagrangian.cc index 18dea932df..f20a98a051 100644 --- a/ortools/algorithms/set_cover_lagrangian.cc +++ b/ortools/algorithms/set_cover_lagrangian.cc @@ -20,6 +20,7 @@ #include #include "absl/log/check.h" +#include "absl/synchronization/blocking_counter.h" #include "ortools/algorithms/adjustable_k_ary_heap.h" #include "ortools/algorithms/set_cover_invariant.h" #include "ortools/algorithms/set_cover_model.h" @@ -74,7 +75,7 @@ namespace { // TODO(user): Investigate. Cost ScalarProduct(const SparseColumn& column, const ElementCostVector& dual) { Cost result = 0.0; - for (ColumnEntryIndex pos(0); pos.value() < column.size(); ++pos) { + for (const ColumnEntryIndex pos : column.index_range()) { result += dual[column[pos]]; } return result; @@ -82,52 +83,49 @@ Cost ScalarProduct(const SparseColumn& column, const ElementCostVector& dual) { // Computes the reduced costs for a subset of subsets. // This is a helper function for ParallelComputeReducedCosts(). -// It is called on a slice of subsets, defined by start and end. +// It is called on a slice of subsets, defined by slice_start and slice_end. // The reduced costs are computed using the multipliers vector. // The columns of the subsets are given by the columns view. // The result is stored in reduced_costs. -void FillReducedCostsSlice(SubsetIndex start, SubsetIndex end, +void FillReducedCostsSlice(SubsetIndex slice_start, SubsetIndex slice_end, const SubsetCostVector& costs, const ElementCostVector& multipliers, const SparseColumnView& columns, SubsetCostVector* reduced_costs) { - for (SubsetIndex subset = start; subset < end; ++subset) { + for (SubsetIndex subset = slice_start; subset < slice_end; ++subset) { (*reduced_costs)[subset] = costs[subset] - ScalarProduct(columns[subset], multipliers); } } + +BaseInt BlockSize(BaseInt size, int num_threads) { + return 1 + (size - 1) / num_threads; +} } // namespace // Computes the reduced costs for all subsets in parallel using ThreadPool. SubsetCostVector SetCoverLagrangian::ParallelComputeReducedCosts( const SubsetCostVector& costs, const ElementCostVector& multipliers) const { const SubsetIndex num_subsets(model_.num_subsets()); - // TODO(user): compute a close-to-optimal k-subset partitioning. - const SubsetIndex block_size = - SubsetIndex(1) + num_subsets / num_threads_; // [***] Arbitrary choice. const SparseColumnView& columns = model_.columns(); SubsetCostVector reduced_costs(num_subsets); - ThreadPool thread_pool("ParallelComputeReducedCosts", num_threads_); - thread_pool.StartWorkers(); - { - // TODO(user): check how costly it is to create a new ThreadPool. - // TODO(user): use a queue of subsets to process? instead of a fixed range. - - // This parallelization is not very efficient, because all the threads - // use the same costs vector. Maybe it should be local to the thread. - // It's unclear whether sharing columns and costs is better than having - // each thread use its own partial copy. - // Finally, it might be better to use a queue of subsets to process, instead - // of a fixed range. - for (SubsetIndex start(0); start < num_subsets; start += block_size) { - thread_pool.Schedule([start, block_size, num_subsets, &costs, - &multipliers, &columns, &reduced_costs]() { - const SubsetIndex end = std::min(start + block_size, num_subsets); - FillReducedCostsSlice(start, end, costs, multipliers, columns, - &reduced_costs); - }); - } - } // Synchronize all the threads. This is equivalent to a wait. + // TODO(user): compute a close-to-optimal k-subset partitioning of the columns + // based on their sizes. [***] + const SubsetIndex block_size(BlockSize(num_subsets.value(), num_threads_)); + absl::BlockingCounter num_threads_running(num_threads_); + SubsetIndex slice_start(0); + for (int thread_index = 0; thread_index < num_threads_; ++thread_index) { + const SubsetIndex slice_end = + std::min(slice_start + block_size, num_subsets); + thread_pool_->Schedule([&num_threads_running, slice_start, slice_end, + &costs, &multipliers, &columns, &reduced_costs]() { + FillReducedCostsSlice(slice_start, slice_end, costs, multipliers, columns, + &reduced_costs); + num_threads_running.DecrementCount(); + }); + slice_start = slice_end; + } + num_threads_running.Wait(); return reduced_costs; } @@ -147,14 +145,14 @@ SubsetCostVector SetCoverLagrangian::ComputeReducedCosts( namespace { // Helper function to compute the subgradient. -// It fills a slice of the subgradient vector from indices start to end. -// This is a helper function for ParallelComputeSubgradient(). -// The subgradient is computed using the reduced costs vector. -void FillSubgradientSlice(SubsetIndex start, SubsetIndex end, +// It fills a slice of the subgradient vector from indices slice_start to +// slice_end. This is a helper function for ParallelComputeSubgradient(). The +// subgradient is computed using the reduced costs vector. +void FillSubgradientSlice(SubsetIndex slice_start, SubsetIndex slice_end, const SparseColumnView& columns, const SubsetCostVector& reduced_costs, ElementCostVector* subgradient) { - for (SubsetIndex subset(start); subset < end; ++subset) { + for (SubsetIndex subset(slice_start); subset < slice_end; ++subset) { if (reduced_costs[subset] < 0.0) { for (const ElementIndex element : columns[subset]) { (*subgradient)[element] -= 1.0; @@ -181,8 +179,6 @@ ElementCostVector SetCoverLagrangian::ComputeSubgradient( ElementCostVector SetCoverLagrangian::ParallelComputeSubgradient( const SubsetCostVector& reduced_costs) const { const SubsetIndex num_subsets(model_.num_subsets()); - const SubsetIndex block_size = - SubsetIndex(1) + num_subsets / num_threads_; // [***] const SparseColumnView& columns = model_.columns(); ElementCostVector subgradient(model_.num_elements(), 1.0); // The subgradient has one component per element, each thread processes @@ -191,20 +187,22 @@ ElementCostVector SetCoverLagrangian::ParallelComputeSubgradient( // although this might be less well-balanced. std::vector subgradients( num_threads_, ElementCostVector(model_.num_elements())); - ThreadPool thread_pool("ParallelComputeSubgradient", num_threads_); - thread_pool.StartWorkers(); - { - int thread_index = 0; - for (SubsetIndex start(0); start < num_subsets; - start += block_size, ++thread_index) { - thread_pool.Schedule([start, block_size, num_subsets, &reduced_costs, - &columns, &subgradients, thread_index]() { - const SubsetIndex end = std::min(start + block_size, num_subsets); - FillSubgradientSlice(start, end, columns, reduced_costs, - &subgradients[thread_index]); - }); - } - } // Synchronize all the threads. + absl::BlockingCounter num_threads_running(num_threads_); + const SubsetIndex block_size(BlockSize(num_subsets.value(), num_threads_)); + SubsetIndex slice_start(0); + for (int thread_index = 0; thread_index < num_threads_; ++thread_index) { + const SubsetIndex slice_end = + std::min(slice_start + block_size, num_subsets); + thread_pool_->Schedule([&num_threads_running, slice_start, slice_end, + &reduced_costs, &columns, &subgradients, + thread_index]() { + FillSubgradientSlice(slice_start, slice_end, columns, reduced_costs, + &subgradients[thread_index]); + num_threads_running.DecrementCount(); + }); + slice_start = slice_end; + } + num_threads_running.Wait(); for (int thread_index = 0; thread_index < num_threads_; ++thread_index) { for (const ElementIndex element : model_.ElementRange()) { subgradient[element] += subgradients[thread_index][element]; @@ -216,17 +214,17 @@ ElementCostVector SetCoverLagrangian::ParallelComputeSubgradient( namespace { // Helper function to compute the value of the Lagrangian. // This is a helper function for ParallelComputeLagrangianValue(). -// It is called on a slice of elements, defined by start and end. +// It is called on a slice of elements, defined by slice_start and slice_end. // The value of the Lagrangian is computed using the reduced costs vector and // the multipliers vector. // The result is stored in lagrangian_value. -void FillLagrangianValueSlice(SubsetIndex start, SubsetIndex end, +void FillLagrangianValueSlice(SubsetIndex slice_start, SubsetIndex slice_end, const SubsetCostVector& reduced_costs, Cost* lagrangian_value) { - // This is min \sum_{j \in N} c_j(u) x_j. This captures the remark above (**), - // taking into account the possible values for x_j, and using them to minimize - // the terms. - for (SubsetIndex subset(start); subset < end; ++subset) { + // This is min \sum_{j \in N} c_j(u) x_j. This captures the remark above + // (**), taking into account the possible values for x_j, and using them to + // minimize the terms. + for (SubsetIndex subset(slice_start); subset < slice_end; ++subset) { if (reduced_costs[subset] < 0.0) { *lagrangian_value += reduced_costs[subset]; } @@ -258,30 +256,31 @@ Cost SetCoverLagrangian::ComputeLagrangianValue( Cost SetCoverLagrangian::ParallelComputeLagrangianValue( const SubsetCostVector& reduced_costs, const ElementCostVector& multipliers) const { - const SubsetIndex num_subsets(model_.num_subsets()); - const SubsetIndex block_size = - SubsetIndex(1) + num_subsets / num_threads_; // [***] Arbitrary. Cost lagrangian_value = 0.0; // This is \sum{i \in M} u_i. - for (const Cost u : multipliers) { lagrangian_value += u; } std::vector lagrangian_values(num_threads_, 0.0); - ThreadPool thread_pool("ParallelComputeLagrangianValue", num_threads_); - thread_pool.StartWorkers(); - { - int thread_index = 0; - for (SubsetIndex start(0); start < num_subsets; start += block_size) { - thread_pool.Schedule([start, block_size, num_subsets, thread_index, - &reduced_costs, &lagrangian_values]() { - const SubsetIndex end = std::min(start + block_size, num_subsets); - FillLagrangianValueSlice(start, end, reduced_costs, - &lagrangian_values[thread_index]); - }); - ++thread_index; - } - } // Synchronize all the threads. + absl::BlockingCounter num_threads_running(num_threads_); + const SubsetIndex block_size(BlockSize(model_.num_subsets(), num_threads_)); + const SubsetIndex num_subsets(model_.num_subsets()); + SubsetIndex slice_start(0); + for (int thread_index = 0; thread_index < num_threads_; ++thread_index) { + const SubsetIndex slice_end = + std::min(slice_start + block_size, num_subsets); + thread_pool_->Schedule([&num_threads_running, slice_start, block_size, + num_subsets, thread_index, &reduced_costs, + &lagrangian_values]() { + const SubsetIndex slice_end = + std::min(slice_start + block_size, num_subsets); + FillLagrangianValueSlice(slice_start, slice_end, reduced_costs, + &lagrangian_values[thread_index]); + num_threads_running.DecrementCount(); + }); + slice_start = slice_end; + } + num_threads_running.Wait(); for (const Cost l : lagrangian_values) { lagrangian_value += l; } @@ -290,8 +289,8 @@ Cost SetCoverLagrangian::ParallelComputeLagrangianValue( // Perform a subgradient step. // In the general case, for an Integer Program A.x <=b, the Lagragian -// multipliers vector at step k+1 is defined as: u^{k+1} = u^k + t_k (A x^k - b) -// with term t_k = lambda_k * (UB - L(u^k)) / |A x^k - b|^2. +// multipliers vector at step k+1 is defined as: u^{k+1} = u^k + t_k (A x^k - +// b) with term t_k = lambda_k * (UB - L(u^k)) / |A x^k - b|^2. // |.| is the 2-norm (i.e. Euclidean) // In our case, the problem A x <= b is in the form A x >= 1. We need to // replace A x - b by s_i(u) = 1 - sum_{j \in J_i} x_j(u). @@ -343,9 +342,9 @@ void SetCoverLagrangian::ParallelUpdateMultipliers( step_size * (upper_bound - lagrangian_value) / subgradient_square_norm; for (const ElementIndex element : model_.ElementRange()) { // Avoid multipliers to go negative and to go through the roof. 1e6 chosen - // arbitrarily. [***] + const Cost kRoof = 1e6; // Arbitrary value, from [1]. (*multipliers)[element] = std::clamp( - (*multipliers)[element] + factor * subgradient[element], 0.0, 1e6); + (*multipliers)[element] + factor * subgradient[element], 0.0, kRoof); } } @@ -503,9 +502,9 @@ SetCoverLagrangian::ComputeLowerBound(const SubsetCostVector& costs, for (int iter = 0; iter < 1000; ++iter) { reduced_costs = ParallelComputeReducedCosts(costs, multipliers); const Cost lagrangian_value = - ComputeLagrangianValue(reduced_costs, multipliers); - UpdateMultipliers(step_size, lagrangian_value, upper_bound, reduced_costs, - &multipliers); + ParallelComputeLagrangianValue(reduced_costs, multipliers); + ParallelUpdateMultipliers(step_size, lagrangian_value, upper_bound, + reduced_costs, &multipliers); lower_bound = std::max(lower_bound, lagrangian_value); // step_size should be updated like this. For the time besing, we keep the // step size, because the implementation of the rest is not adequate yet diff --git a/ortools/algorithms/set_cover_lagrangian.h b/ortools/algorithms/set_cover_lagrangian.h index aa63627ad1..9e946b173a 100644 --- a/ortools/algorithms/set_cover_lagrangian.h +++ b/ortools/algorithms/set_cover_lagrangian.h @@ -15,6 +15,7 @@ #define OR_TOOLS_ALGORITHMS_SET_COVER_LAGRANGIAN_H_ #include +#include #include #include @@ -44,7 +45,12 @@ namespace operations_research { class SetCoverLagrangian { public: explicit SetCoverLagrangian(SetCoverInvariant* inv, int num_threads = 1) - : inv_(inv), model_(*inv->model()), num_threads_(num_threads) {} + : inv_(inv), + model_(*inv->model()), + num_threads_(num_threads), + thread_pool_(new ThreadPool(num_threads)) { + thread_pool_->StartWorkers(); + } // Returns true if a solution was found. // TODO(user): Add time-outs and exit with a partial solution. This seems @@ -137,6 +143,9 @@ class SetCoverLagrangian { // The number of threads to use for parallelization. int num_threads_; + // The thread pool used for parallelization. + std::unique_ptr thread_pool_; + // Total (scalar) Lagrangian cost. Cost lagrangian_; diff --git a/ortools/algorithms/set_cover_model.cc b/ortools/algorithms/set_cover_model.cc index f7b5faf24c..e67718b197 100644 --- a/ortools/algorithms/set_cover_model.cc +++ b/ortools/algorithms/set_cover_model.cc @@ -17,17 +17,161 @@ #include #include #include +#include #include #include +#include #include #include #include "absl/log/check.h" +#include "absl/random/discrete_distribution.h" +#include "absl/random/distributions.h" +#include "absl/random/random.h" #include "ortools/algorithms/set_cover.pb.h" #include "ortools/base/logging.h" namespace operations_research { +namespace { + +// Returns a value in [min, min + scaling_factor * (raw_value - min + +// random_term)], where raw_value is drawn from a discrete distribution, and +// random_term is a double drawn uniformly in [0, 1]. +BaseInt DiscreteAffine(absl::BitGen& bitgen, + absl::discrete_distribution& dist, BaseInt min, + double scaling_factor) { + const BaseInt raw_value = dist(bitgen); + const double random_term = absl::Uniform(bitgen, 0, 1.0); + const BaseInt affine_value = + static_cast( + std::floor((raw_value - min + random_term) * scaling_factor)) + + min; + return affine_value; +} + +// For a given view (SparseColumnView or SparseRowView), returns the +// distribution of the sizes of the vectors in the view, which can be used in +// an absl::discrete_distribution. +template +std::tuple> ComputeSizeHistogram( + const View& view) { + BaseInt max_size = 0; + BaseInt min_size = std::numeric_limits::max(); + for (const auto& vec : view) { + const BaseInt size = vec.size(); + min_size = std::min(min_size, size); + max_size = std::max(max_size, size); + } + std::vector weights(max_size + 1, 0); + for (const auto& vec : view) { + const BaseInt size = vec.size(); + ++weights[size]; + } + return {min_size, weights}; +} + +template +std::tuple> +ComputeSizeDistribution(const View& view) { + const auto [min_size, weights] = ComputeSizeHistogram(view); + absl::discrete_distribution dist(weights.begin(), weights.end()); + return {min_size, dist}; +} +} // namespace + +SetCoverModel SetCoverModel::GenerateRandomModelFrom( + const SetCoverModel& seed_model, BaseInt num_elements, BaseInt num_subsets, + double row_scale, double column_scale, double cost_scale) { + SetCoverModel model; + DCHECK_GT(row_scale, 0.0); + DCHECK_GT(column_scale, 0.0); + DCHECK_GT(cost_scale, 0.0); + model.num_elements_ = num_elements; + model.num_nonzeros_ = 0; + model.ReserveNumSubsets(num_subsets); + model.UpdateAllSubsetsList(); + absl::BitGen bitgen; + + // Create the distribution of the cardinalities of the subsets based on the + // histogram of column sizes in the seed model. + auto [min_column_size, column_dist] = + ComputeSizeDistribution(seed_model.columns()); + + // Create the distribution of the degrees of the elements based on the + // histogram of row sizes in the seed model. + auto [min_row_size, row_dist] = ComputeSizeDistribution(seed_model.rows()); + + // Prepare the degrees of the elements in the generated model, and use them + // in a distribution to generate the columns. This ponderates the columns + // towards the elements with higher degrees. ??? + ElementToIntVector degrees(num_elements); + for (ElementIndex element(0); element.value() < num_elements; ++element) { + degrees[element] = + DiscreteAffine(bitgen, row_dist, min_row_size, row_scale); + } + absl::discrete_distribution degree_dist(degrees.begin(), + degrees.end()); + + // Vector indicating whether the generated model covers an element. + ElementBoolVector contains_element(num_elements, false); + + // Number of elements in the generated model, using the above vector. + BaseInt num_elements_covered(0); + + // Loop-local vector indicating whether the currently generated subset + // contains an element. + ElementBoolVector subset_contains_element(num_elements, false); + + for (SubsetIndex subset(0); subset.value() < num_subsets; ++subset) { + const BaseInt cardinality = + DiscreteAffine(bitgen, column_dist, min_column_size, column_scale); + model.columns_[subset].reserve(cardinality); + for (BaseInt iter = 0; iter < cardinality; ++iter) { + int num_tries = 0; + ElementIndex element; + // Choose an element that is not yet in the subset at random with a + // distribution that is proportional to the degree of the element. + do { + element = ElementIndex(degree_dist(bitgen)); + CHECK_LT(element.value(), num_elements); + ++num_tries; + if (num_tries > 10) { + return SetCoverModel(); + } + } while (subset_contains_element[element]); + ++model.num_nonzeros_; + model.columns_[subset].push_back(element); + subset_contains_element[element] = true; + if (!contains_element[element]) { + contains_element[element] = true; + ++num_elements_covered; + } + } + for (const ElementIndex element : model.columns_[subset]) { + subset_contains_element[element] = false; + } + } + CHECK_EQ(num_elements_covered, num_elements); + + // TODO(user): if necessary, use a better distribution for the costs. + // The generation of the costs is done in two steps. First, compute the + // minimum and maximum costs. + Cost min_cost = std::numeric_limits::infinity(); + Cost max_cost = -min_cost; + for (const Cost cost : seed_model.subset_costs()) { + min_cost = std::min(min_cost, cost); + max_cost = std::max(max_cost, cost); + } + // Then, generate random numbers in [min_cost, min_cost + cost_range], where + // cost_range is defined as: + const Cost cost_range = cost_scale * (max_cost - min_cost); + for (Cost& cost : model.subset_costs_) { + cost = min_cost + absl::Uniform(bitgen, 0, cost_range); + } + return model; +} + void SetCoverModel::UpdateAllSubsetsList() { const BaseInt old_size = all_subsets_.size(); DCHECK_LE(old_size, num_subsets()); @@ -92,8 +236,8 @@ void SetCoverModel::AddElementToSubset(ElementIndex element, } // Reserves num_subsets columns in the model. -void SetCoverModel::ReserveNumSubsets(BaseInt number_of_subsets) { - num_subsets_ = std::max(num_subsets_, number_of_subsets); +void SetCoverModel::ReserveNumSubsets(BaseInt num_subsets) { + num_subsets_ = std::max(num_subsets_, num_subsets); columns_.resize(num_subsets_, SparseColumn()); subset_costs_.resize(num_subsets_, 0.0); } @@ -121,8 +265,8 @@ void SetCoverModel::CreateSparseRowView() { rows_.resize(num_elements_, SparseRow()); ElementToIntVector row_sizes(num_elements_, 0); for (const SubsetIndex subset : SubsetRange()) { - // Sort the columns. It's not super-critical to improve performance here as - // this needs to be done only once. + // Sort the columns. It's not super-critical to improve performance here + // as this needs to be done only once. std::sort(columns_[subset].begin(), columns_[subset].end()); for (const ElementIndex element : columns_[subset]) { ++row_sizes[element]; @@ -256,7 +400,7 @@ SetCoverModel::Stats SetCoverModel::ComputeCostStats() { } SetCoverModel::Stats SetCoverModel::ComputeRowStats() { - std::vector row_sizes(num_elements(), 0); + std::vector row_sizes(num_elements(), 0); for (const SparseColumn& column : columns_) { for (const ElementIndex element : column) { ++row_sizes[element.value()]; @@ -266,15 +410,15 @@ SetCoverModel::Stats SetCoverModel::ComputeRowStats() { } SetCoverModel::Stats SetCoverModel::ComputeColumnStats() { - std::vector column_sizes(columns_.size()); + std::vector column_sizes(columns_.size()); for (const SubsetIndex subset : SubsetRange()) { column_sizes[subset.value()] = columns_[subset].size(); } return ComputeStats(std::move(column_sizes)); } -std::vector SetCoverModel::ComputeRowDeciles() const { - std::vector row_sizes(num_elements(), 0); +std::vector SetCoverModel::ComputeRowDeciles() const { + std::vector row_sizes(num_elements(), 0); for (const SparseColumn& column : columns_) { for (const ElementIndex element : column) { ++row_sizes[element.value()]; @@ -283,8 +427,8 @@ std::vector SetCoverModel::ComputeRowDeciles() const { return ComputeDeciles(std::move(row_sizes)); } -std::vector SetCoverModel::ComputeColumnDeciles() const { - std::vector column_sizes(columns_.size()); +std::vector SetCoverModel::ComputeColumnDeciles() const { + std::vector column_sizes(columns_.size()); for (const SubsetIndex subset : SubsetRange()) { column_sizes[subset.value()] = columns_[subset].size(); } diff --git a/ortools/algorithms/set_cover_model.h b/ortools/algorithms/set_cover_model.h index fa3f55430b..ea21f82444 100644 --- a/ortools/algorithms/set_cover_model.h +++ b/ortools/algorithms/set_cover_model.h @@ -14,13 +14,7 @@ #ifndef OR_TOOLS_ALGORITHMS_SET_COVER_MODEL_H_ #define OR_TOOLS_ALGORITHMS_SET_COVER_MODEL_H_ -#if defined(_MSC_VER) -#include -typedef SSIZE_T ssize_t; -#else -#include -#endif // defined(_MSC_VER) - +#include #include #include @@ -29,7 +23,6 @@ typedef SSIZE_T ssize_t; #include "ortools/algorithms/set_cover.pb.h" #include "ortools/base/strong_int.h" #include "ortools/base/strong_vector.h" -#include "ortools/util/aligned_memory.h" // Representation class for the weighted set-covering problem. // @@ -65,7 +58,7 @@ using Cost = double; // (2e9) elements and subsets. If need arises one day, BaseInt can be split // into SubsetBaseInt and ElementBaseInt. // Quick testing has shown a slowdown of about 20-25% when using int64_t. -using BaseInt = int; +using BaseInt = int32_t; // We make heavy use of strong typing to avoid obvious mistakes. // Subset index. @@ -84,32 +77,14 @@ using SubsetRange = util_intops::StrongIntRange; using ElementRange = util_intops::StrongIntRange; using ColumnEntryRange = util_intops::StrongIntRange; -// SIMD operations require vectors to be aligned at 64-bytes on x86-64 -// processors as of 2024-05-03. -// TODO(user): improve the code to make it possible to use unaligned memory. -constexpr int kSetCoverAlignmentInBytes = 64; +using SubsetCostVector = util_intops::StrongVector; +using ElementCostVector = util_intops::StrongVector; -using CostAllocator = AlignedAllocator; -using ElementAllocator = - AlignedAllocator; -using SubsetAllocator = - AlignedAllocator; +using SparseColumn = util_intops::StrongVector; +using SparseRow = util_intops::StrongVector; -using SubsetCostVector = - util_intops::StrongVector; -using ElementCostVector = - util_intops::StrongVector; - -using SparseColumn = - util_intops::StrongVector; -using SparseRow = - util_intops::StrongVector; - -using IntAllocator = AlignedAllocator; -using ElementToIntVector = - util_intops::StrongVector; -using SubsetToIntVector = - util_intops::StrongVector; +using ElementToIntVector = util_intops::StrongVector; +using SubsetToIntVector = util_intops::StrongVector; // Views of the sparse vectors. These need not be aligned as it's their contents // that need to be aligned. @@ -117,6 +92,13 @@ using SparseColumnView = util_intops::StrongVector; using SparseRowView = util_intops::StrongVector; using SubsetBoolVector = util_intops::StrongVector; +using ElementBoolVector = util_intops::StrongVector; + +// Useful for representing permutations, +using ElementToElementVector = + util_intops::StrongVector; +using SubsetToSubsetVector = + util_intops::StrongVector; // Main class for describing a weighted set-covering problem. class SetCoverModel { @@ -132,6 +114,34 @@ class SetCoverModel { rows_(), all_subsets_() {} + // Constructs a weighted set-covering problem from a seed model, with + // num_elements elements and num_subsets subsets. + // - The distributions of the degrees of the elements and the cardinalities of + // the subsets are based on those of the seed model. They are scaled + // affininely by row_scale and column_scale respectively. + // - By affine scaling, we mean that the minimum value of the distribution is + // not scaled, but the variation above this minimum value is. + // - For a given subset with a given cardinality in the generated model, its + // elements are sampled from the distribution of the degrees as computed + // above. + // - The costs of the subsets in the new model are sampled from the + // distribution of the costs of the subsets in the seed model, scaled by + // cost_scale. + // IMPORTANT NOTICE: The algorithm may not succeed in generating a model + // where all the elements can be covered. In that case, the model will be + // empty. + + static SetCoverModel GenerateRandomModelFrom(const SetCoverModel& seed_model, + BaseInt num_elements, + BaseInt num_subsets, + double row_scale, + double column_scale, + double cost_scale); + + // Returns true if the model is empty, i.e. has no elements, no subsets, and + // no nonzeros. + bool IsEmpty() const { return rows_.empty() || columns_.empty(); } + // Current number of elements to be covered in the model, i.e. the number of // elements in S. In matrix terms, this is the number of rows. BaseInt num_elements() const { return num_elements_; } @@ -141,7 +151,7 @@ class SetCoverModel { BaseInt num_subsets() const { return num_subsets_; } // Current number of nonzeros in the matrix. - ssize_t num_nonzeros() const { return num_nonzeros_; } + int64_t num_nonzeros() const { return num_nonzeros_; } double FillRate() const { return 1.0 * num_nonzeros() / (1.0 * num_elements() * num_subsets()); @@ -243,10 +253,10 @@ class SetCoverModel { Stats ComputeColumnStats(); // Computes deciles on rows and returns a vector of deciles. - std::vector ComputeRowDeciles() const; + std::vector ComputeRowDeciles() const; // Computes deciles on columns and returns a vector of deciles. - std::vector ComputeColumnDeciles() const; + std::vector ComputeColumnDeciles() const; private: // Updates the all_subsets_ vector so that it always contains 0 to @@ -259,8 +269,9 @@ class SetCoverModel { // Number of subsets. Maintained for ease of access. BaseInt num_subsets_; - // Number of nonzeros in the matrix. - ssize_t num_nonzeros_; + // Number of nonzeros in the matrix. The value is an int64_t because there can + // be more than 1 << 31 nonzeros even with BaseInt = int32_t. + int64_t num_nonzeros_; // True when the SparseRowView is up-to-date. bool row_view_is_valid_; diff --git a/ortools/algorithms/set_cover_orlib_test.cc b/ortools/algorithms/set_cover_orlib_test.cc index 8dd153e44b..849e2b76ad 100644 --- a/ortools/algorithms/set_cover_orlib_test.cc +++ b/ortools/algorithms/set_cover_orlib_test.cc @@ -149,7 +149,7 @@ void ComputeLagrangianLowerBound(std::string name, SetCoverInvariant* inv) { const SetCoverModel* model = inv->model(); WallTimer timer; timer.Start(); - SetCoverLagrangian lagrangian(inv, /*num_threads=*/4); + SetCoverLagrangian lagrangian(inv, /*num_threads=*/8); const auto [lower_bound, reduced_costs, multipliers] = lagrangian.ComputeLowerBound(model->subset_costs(), inv->cost()); LogCostAndTiming(name, "LagrangianLowerBound", lower_bound, @@ -197,11 +197,11 @@ double RunSolver(std::string name, SetCoverModel* model) { global_timer.Start(); RunChvatalAndSteepest(name, model); // SetCoverInvariant inv = ComputeLPLowerBound(name, model); - RunMip(name, model); + // RunMip(name, model); RunChvatalAndGLS(name, model); SetCoverInvariant inv = RunElementDegreeGreedyAndSteepest(name, model); ComputeLagrangianLowerBound(name, &inv); - // IterateClearAndMip(name, inv); + // IterateClearAndMip(name, inv); IterateClearElementDegreeAndSteepest(name, &inv); return inv.cost(); } @@ -407,4 +407,22 @@ RAIL_TEST("rail4872.txt", 1527, 1861, MANYSECONDS); // [2] #undef SCP_TEST #undef RAIL_TEST +TEST(SetCoverHugeTest, GenerateProblem) { + SetCoverModel seed_model = + ReadRailSetCoverProblem(file::JoinPathRespectAbsolute( + ::testing::SrcDir(), data_dir, "rail4284.txt")); + seed_model.CreateSparseRowView(); + const BaseInt num_wanted_subsets(100'000'000); + const BaseInt num_wanted_elements(40'000); + const double row_scale = 1.1; + const double column_scale = 1.1; + const double cost_scale = 10.0; + SetCoverModel model = SetCoverModel::GenerateRandomModelFrom( + seed_model, num_wanted_elements, num_wanted_subsets, row_scale, + column_scale, cost_scale); + SetCoverInvariant inv = + RunElementDegreeGreedyAndSteepest("rail4284_huge.txt", &model); + LOG(INFO) << "Cost: " << inv.cost(); +} + } // namespace operations_research