algorithms: export from google3

This commit is contained in:
Corentin Le Molgat
2024-09-25 07:57:28 +02:00
parent 1b03cf25eb
commit 6feb7c8575
12 changed files with 813 additions and 189 deletions

View File

@@ -275,6 +275,7 @@ cc_library(
":set_cover_model",
"//ortools/base:threadpool",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/synchronization",
],
)
@@ -286,9 +287,10 @@ cc_library(
":set_cover_cc_proto",
"//ortools/base:intops",
"//ortools/base:strong_vector",
"//ortools/util:aligned_memory",
"@com_google_absl//absl/log",
"@com_google_absl//absl/log:check",
"@com_google_absl//absl/random",
"@com_google_absl//absl/random:distributions",
"@com_google_absl//absl/strings",
],
)

View File

@@ -13,91 +13,235 @@
// A pybind11 wrapper for set_cover_*.
#include <algorithm>
#include <cstddef>
#include <iterator>
#include <memory>
#include <vector>
#include "absl/base/nullability.h"
#include "ortools/algorithms/set_cover_heuristics.h"
#include "ortools/algorithms/set_cover_invariant.h"
#include "ortools/algorithms/set_cover_model.h"
#include "ortools/algorithms/set_cover_reader.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
#include "pybind11/pytypes.h"
#include "pybind11/stl.h"
#include "pybind11_protobuf/native_proto_caster.h"
using ::operations_research::BaseInt;
using ::operations_research::ClearRandomSubsets;
using ::operations_research::ElementDegreeSolutionGenerator;
using ::operations_research::ElementIndex;
using ::operations_research::GreedySolutionGenerator;
using ::operations_research::GuidedLocalSearch;
using ::operations_research::GuidedTabuSearch;
using ::operations_research::Preprocessor;
using ::operations_research::RandomSolutionGenerator;
using ::operations_research::ReadBeasleySetCoverProblem;
using ::operations_research::ReadRailSetCoverProblem;
using ::operations_research::SetCoverDecision;
using ::operations_research::SetCoverInvariant;
using ::operations_research::SetCoverModel;
using ::operations_research::SparseColumn;
using ::operations_research::SparseRow;
using ::operations_research::SteepestSearch;
using ::operations_research::SubsetBoolVector;
using ::operations_research::SubsetCostVector;
using ::operations_research::SubsetIndex;
using ::operations_research::TabuList;
using ::operations_research::TrivialSolutionGenerator;
namespace py = pybind11;
using ::py::arg;
using ::py::make_iterator;
// General note about TODOs: the corresponding functions/classes/methods are
// more complex to wrap, as they use nonstandard types, and are less important,
// as they are not as useful to most users (mostly useful to write some custom
// Python heuristics).
std::vector<SubsetIndex> VectorIntToVectorSubsetIndex(
const std::vector<BaseInt>& ints) {
std::vector<SubsetIndex> subs;
std::transform(ints.begin(), ints.end(), subs.begin(),
[](int subset) -> SubsetIndex { return SubsetIndex(subset); });
return subs;
}
SubsetCostVector VectorDoubleToSubsetCostVector(
const std::vector<double>& doubles) {
SubsetCostVector costs(doubles.begin(), doubles.end());
return costs;
}
class IntIterator {
public:
using value_type = int;
using difference_type = std::ptrdiff_t;
using pointer = int*;
using reference = int&;
using iterator_category = std::input_iterator_tag;
explicit IntIterator(int max_value)
: max_value_(max_value), current_value_(0) {}
int operator*() const { return current_value_; }
IntIterator& operator++() {
++current_value_;
return *this;
}
static IntIterator begin(int max_value) { return IntIterator{max_value}; }
static IntIterator end(int max_value) { return {max_value, max_value}; }
friend bool operator==(const IntIterator& lhs, const IntIterator& rhs) {
return lhs.max_value_ == rhs.max_value_ &&
lhs.current_value_ == rhs.current_value_;
}
private:
IntIterator(int max_value, int current_value)
: max_value_(max_value), current_value_(current_value) {}
const int max_value_;
int current_value_;
};
PYBIND11_MODULE(set_cover, m) {
pybind11_protobuf::ImportNativeProtoCasters();
// set_cover_model.h
py::class_<SetCoverModel::Stats>(m, "SetCoverModelStats")
.def_readwrite("min", &SetCoverModel::Stats::min)
.def_readwrite("max", &SetCoverModel::Stats::max)
.def_readwrite("median", &SetCoverModel::Stats::median)
.def_readwrite("mean", &SetCoverModel::Stats::mean)
.def_readwrite("stddev", &SetCoverModel::Stats::stddev)
.def("debug_string", &SetCoverModel::Stats::DebugString);
py::class_<SetCoverModel>(m, "SetCoverModel")
.def(py::init<>())
.def_property_readonly("num_elements", &SetCoverModel::num_elements)
.def_property_readonly("num_subsets", &SetCoverModel::num_subsets)
.def_property_readonly("num_nonzeros", &SetCoverModel::num_nonzeros)
.def_property_readonly("fill_rate", &SetCoverModel::FillRate)
.def_property_readonly(
"subset_costs",
[](SetCoverModel& model) -> const std::vector<double>& {
return model.subset_costs().get();
})
.def("columns",
[](SetCoverModel& model) -> std::vector<std::vector<BaseInt>> {
// Due to the inner StrongVector, make a deep copy. Anyway,
// columns() returns a const ref, so this keeps the semantics, not
// the efficiency.
std::vector<std::vector<BaseInt>> columns;
std::transform(
model.columns().begin(), model.columns().end(),
columns.begin(),
[](const SparseColumn& column) -> std::vector<BaseInt> {
std::vector<BaseInt> col(column.size());
std::transform(column.begin(), column.end(), col.begin(),
[](ElementIndex element) -> BaseInt {
return element.value();
});
return col;
});
return columns;
})
.def("rows",
[](SetCoverModel& model) -> std::vector<std::vector<BaseInt>> {
// Due to the inner StrongVector, make a deep copy. Anyway,
// rows() returns a const ref, so this keeps the semantics, not
// the efficiency.
std::vector<std::vector<BaseInt>> rows;
std::transform(
model.rows().begin(), model.rows().end(), rows.begin(),
[](const SparseRow& row) -> std::vector<BaseInt> {
std::vector<BaseInt> r(row.size());
std::transform(row.begin(), row.end(), r.begin(),
[](SubsetIndex element) -> BaseInt {
return element.value();
});
return r;
});
return rows;
})
.def("row_view_is_valid", &SetCoverModel::row_view_is_valid)
.def("SubsetRange",
[](SetCoverModel& model) {
return make_iterator<>(IntIterator::begin(model.num_subsets()),
IntIterator::end(model.num_subsets()));
})
.def("ElementRange",
[](SetCoverModel& model) {
return make_iterator<>(IntIterator::begin(model.num_elements()),
IntIterator::end(model.num_elements()));
})
.def_property_readonly("all_subsets",
[](SetCoverModel& model) -> std::vector<BaseInt> {
std::vector<BaseInt> subsets;
std::transform(
model.all_subsets().begin(),
model.all_subsets().end(), subsets.begin(),
[](const SubsetIndex element) -> BaseInt {
return element.value();
});
return subsets;
})
.def("add_empty_subset", &SetCoverModel::AddEmptySubset, arg("cost"))
.def(
"add_element_to_last_subset",
[](SetCoverModel& model, int element) {
[](SetCoverModel& model, BaseInt element) {
model.AddElementToLastSubset(element);
},
arg("element"))
.def(
"set_subset_cost",
[](SetCoverModel& model, int subset, double cost) {
[](SetCoverModel& model, BaseInt subset, double cost) {
model.SetSubsetCost(subset, cost);
},
arg("subset"), arg("cost"))
.def(
"add_element_to_subset",
[](SetCoverModel& model, int element, int subset) {
[](SetCoverModel& model, BaseInt element, BaseInt subset) {
model.AddElementToSubset(element, subset);
},
arg("subset"), arg("cost"))
.def("create_sparse_row_view", &SetCoverModel::CreateSparseRowView)
.def("compute_feasibility", &SetCoverModel::ComputeFeasibility)
.def(
"reserve_num_subsets",
[](SetCoverModel& model, int num_subsets) {
[](SetCoverModel& model, BaseInt num_subsets) {
model.ReserveNumSubsets(num_subsets);
},
arg("num_subsets"))
.def(
"reserve_num_elements_in_subset",
[](SetCoverModel& model, int num_elements, int subset) {
[](SetCoverModel& model, BaseInt num_elements, BaseInt subset) {
model.ReserveNumElementsInSubset(num_elements, subset);
},
arg("num_elements"), arg("subset"))
.def("export_model_as_proto", &SetCoverModel::ExportModelAsProto)
.def("import_model_from_proto", &SetCoverModel::ImportModelFromProto);
// TODO(user): add support for subset_costs, columns, rows,
// row_view_is_valid, SubsetRange, ElementRange, all_subsets,
// CreateSparseRowView, ComputeCostStats, ComputeRowStats,
// ComputeColumnStats, ComputeRowDeciles, ComputeColumnDeciles.
.def("import_model_from_proto", &SetCoverModel::ImportModelFromProto)
.def("compute_cost_stats", &SetCoverModel::ComputeCostStats)
.def("compute_row_stats", &SetCoverModel::ComputeRowStats)
.def("compute_column_stats", &SetCoverModel::ComputeColumnStats)
.def("compute_row_deciles", &SetCoverModel::ComputeRowDeciles)
.def("compute_column_deciles", &SetCoverModel::ComputeRowDeciles);
// TODO(user): wrap IntersectingSubsetsIterator.
// set_cover_invariant.h
py::class_<SetCoverDecision>(m, "SetCoverDecision")
.def(py::init<>())
.def(py::init([](BaseInt subset, bool value) -> SetCoverDecision* {
return new SetCoverDecision(SubsetIndex(subset), value);
}),
arg("subset"), arg("value"))
.def("subset",
[](const SetCoverDecision& decision) -> BaseInt {
return decision.subset().value();
})
.def("decision", &SetCoverDecision::decision);
py::class_<SetCoverInvariant>(m, "SetCoverInvariant")
.def(py::init<SetCoverModel*>())
.def("initialize", &SetCoverInvariant::Initialize)
@@ -118,44 +262,90 @@ PYBIND11_MODULE(set_cover, m) {
})
.def("cost", &SetCoverInvariant::cost)
.def("num_uncovered_elements", &SetCoverInvariant::num_uncovered_elements)
.def("is_selected",
[](SetCoverInvariant& invariant) -> std::vector<bool> {
return invariant.is_selected().get();
})
.def("num_free_elements",
[](SetCoverInvariant& invariant) -> std::vector<BaseInt> {
return invariant.num_free_elements().get();
})
.def("num_coverage_le_1_elements",
[](SetCoverInvariant& invariant) -> std::vector<BaseInt> {
return invariant.num_coverage_le_1_elements().get();
})
.def("coverage",
[](SetCoverInvariant& invariant) -> std::vector<BaseInt> {
return invariant.coverage().get();
})
.def(
"compute_coverage_in_focus",
[](SetCoverInvariant& invariant,
const std::vector<BaseInt>& focus) -> std::vector<BaseInt> {
return invariant
.ComputeCoverageInFocus(VectorIntToVectorSubsetIndex(focus))
.get();
},
arg("focus"))
.def("is_redundant",
[](SetCoverInvariant& invariant) -> std::vector<bool> {
return invariant.is_redundant().get();
})
.def("trace", &SetCoverInvariant::trace)
.def("clear_trace", &SetCoverInvariant::ClearTrace)
.def("clear_removability_information",
&SetCoverInvariant::ClearRemovabilityInformation)
.def("new_removable_subsets", &SetCoverInvariant::new_removable_subsets)
.def("new_non_removable_subsets",
&SetCoverInvariant::new_non_removable_subsets)
.def("compress_trace", &SetCoverInvariant::CompressTrace)
.def("load_solution",
[](SetCoverInvariant& invariant,
const std::vector<bool>& solution) -> void {
SubsetBoolVector sol(solution.begin(), solution.end());
return invariant.LoadSolution(sol);
})
.def("check_consistency", &SetCoverInvariant::CheckConsistency)
.def(
"compute_is_redundant",
[](SetCoverInvariant& invariant, BaseInt subset) -> bool {
return invariant.ComputeIsRedundant(SubsetIndex(subset));
},
arg("subset"))
.def("make_fully_updated", &SetCoverInvariant::MakeFullyUpdated)
.def(
"flip",
[](SetCoverInvariant& invariant, int subset) {
[](SetCoverInvariant& invariant, BaseInt subset) {
invariant.Flip(SubsetIndex(subset));
},
arg("subset"))
.def(
"flip_and_fully_update",
[](SetCoverInvariant& invariant, int subset) {
[](SetCoverInvariant& invariant, BaseInt subset) {
invariant.FlipAndFullyUpdate(SubsetIndex(subset));
},
arg("subset"))
.def(
"select",
[](SetCoverInvariant& invariant, int subset) {
[](SetCoverInvariant& invariant, BaseInt subset) {
invariant.Select(SubsetIndex(subset));
},
arg("subset"))
.def(
"select_and_fully_update",
[](SetCoverInvariant& invariant, int subset) {
[](SetCoverInvariant& invariant, BaseInt subset) {
invariant.SelectAndFullyUpdate(SubsetIndex(subset));
},
arg("subset"))
.def(
"deselect",
[](SetCoverInvariant& invariant, int subset) {
[](SetCoverInvariant& invariant, BaseInt subset) {
invariant.Deselect(SubsetIndex(subset));
},
arg("subset"))
.def(
"deselect_and_fully_update",
[](SetCoverInvariant& invariant, int subset) {
[](SetCoverInvariant& invariant, BaseInt subset) {
invariant.DeselectAndFullyUpdate(SubsetIndex(subset));
},
arg("subset"))
@@ -163,10 +353,6 @@ PYBIND11_MODULE(set_cover, m) {
&SetCoverInvariant::ExportSolutionAsProto)
.def("import_solution_from_proto",
&SetCoverInvariant::ImportSolutionFromProto);
// TODO(user): add support for is_selected, num_free_elements,
// num_coverage_le_1_elements, coverage, ComputeCoverageInFocus,
// is_redundant, trace, new_removable_subsets, new_non_removable_subsets,
// LoadSolution, ComputeIsRedundant.
// set_cover_heuristics.h
py::class_<Preprocessor>(m, "Preprocessor")
@@ -175,30 +361,57 @@ PYBIND11_MODULE(set_cover, m) {
[](Preprocessor& heuristic) -> bool {
return heuristic.NextSolution();
})
.def("next_solution",
[](Preprocessor& heuristic,
const std::vector<BaseInt>& focus) -> bool {
return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus));
})
.def("num_columns_fixed_by_singleton_row",
&Preprocessor::num_columns_fixed_by_singleton_row);
// TODO(user): add support for focus argument.
py::class_<TrivialSolutionGenerator>(m, "TrivialSolutionGenerator")
.def(py::init<SetCoverInvariant*>())
.def("next_solution", [](TrivialSolutionGenerator& heuristic) -> bool {
return heuristic.NextSolution();
});
// TODO(user): add support for focus argument.
.def("next_solution",
[](TrivialSolutionGenerator& heuristic) -> bool {
return heuristic.NextSolution();
})
.def("next_solution",
[](TrivialSolutionGenerator& heuristic,
const std::vector<BaseInt>& focus) -> bool {
return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus));
});
py::class_<RandomSolutionGenerator>(m, "RandomSolutionGenerator")
.def(py::init<SetCoverInvariant*>())
.def("next_solution", [](RandomSolutionGenerator& heuristic) -> bool {
return heuristic.NextSolution();
});
// TODO(user): add support for focus argument.
.def("next_solution",
[](RandomSolutionGenerator& heuristic) -> bool {
return heuristic.NextSolution();
})
.def("next_solution",
[](RandomSolutionGenerator& heuristic,
const std::vector<BaseInt>& focus) -> bool {
return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus));
});
py::class_<GreedySolutionGenerator>(m, "GreedySolutionGenerator")
.def(py::init<SetCoverInvariant*>())
.def("next_solution", [](GreedySolutionGenerator& heuristic) -> bool {
return heuristic.NextSolution();
});
// TODO(user): add support for focus and cost arguments.
.def("next_solution",
[](GreedySolutionGenerator& heuristic) -> bool {
return heuristic.NextSolution();
})
.def("next_solution",
[](GreedySolutionGenerator& heuristic,
const std::vector<BaseInt>& focus) -> bool {
return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus));
})
.def("next_solution",
[](GreedySolutionGenerator& heuristic,
const std::vector<BaseInt>& focus,
const std::vector<double>& costs) -> bool {
return heuristic.NextSolution(
VectorIntToVectorSubsetIndex(focus),
VectorDoubleToSubsetCostVector(costs));
});
py::class_<ElementDegreeSolutionGenerator>(m,
"ElementDegreeSolutionGenerator")
@@ -206,16 +419,40 @@ PYBIND11_MODULE(set_cover, m) {
.def("next_solution",
[](ElementDegreeSolutionGenerator& heuristic) -> bool {
return heuristic.NextSolution();
})
.def("next_solution",
[](ElementDegreeSolutionGenerator& heuristic,
const std::vector<BaseInt>& focus) -> bool {
return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus));
})
.def("next_solution",
[](ElementDegreeSolutionGenerator& heuristic,
const std::vector<BaseInt>& focus,
const std::vector<double>& costs) -> bool {
return heuristic.NextSolution(
VectorIntToVectorSubsetIndex(focus),
VectorDoubleToSubsetCostVector(costs));
});
// TODO(user): add support for focus and cost arguments.
py::class_<SteepestSearch>(m, "SteepestSearch")
.def(py::init<SetCoverInvariant*>())
.def("next_solution",
[](SteepestSearch& heuristic, int num_iterations) -> bool {
return heuristic.NextSolution(num_iterations);
})
.def("next_solution",
[](SteepestSearch& heuristic, const std::vector<BaseInt>& focus,
int num_iterations) -> bool {
return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus),
num_iterations);
})
.def("next_solution",
[](SteepestSearch& heuristic, const std::vector<BaseInt>& focus,
const std::vector<double>& costs, int num_iterations) -> bool {
return heuristic.NextSolution(
VectorIntToVectorSubsetIndex(focus),
VectorDoubleToSubsetCostVector(costs), num_iterations);
});
// TODO(user): add support for focus and cost arguments.
py::class_<GuidedLocalSearch>(m, "GuidedLocalSearch")
.def(py::init<SetCoverInvariant*>())
@@ -223,12 +460,92 @@ PYBIND11_MODULE(set_cover, m) {
.def("next_solution",
[](GuidedLocalSearch& heuristic, int num_iterations) -> bool {
return heuristic.NextSolution(num_iterations);
})
.def("next_solution",
[](GuidedLocalSearch& heuristic, const std::vector<BaseInt>& focus,
int num_iterations) -> bool {
return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus),
num_iterations);
});
// TODO(user): add support for focus and cost arguments.
// TODO(user): add support for ClearRandomSubsets, ClearRandomSubsets,
// ClearMostCoveredElements, ClearMostCoveredElements, TabuList,
// GuidedTabuSearch.
// Specialization for T = SubsetIndex ~= BaseInt (aka int for Python, whatever
// the size of BaseInt).
// A base type doesn't work, because TabuList uses `T::value` in the
// constructor.
py::class_<TabuList<SubsetIndex>>(m, "TabuList")
.def(py::init([](int size) -> TabuList<SubsetIndex>* {
return new TabuList<SubsetIndex>(SubsetIndex(size));
}),
arg("size"))
.def("size", &TabuList<SubsetIndex>::size)
.def("init", &TabuList<SubsetIndex>::Init, arg("size"))
.def(
"add",
[](TabuList<SubsetIndex>& list, BaseInt t) -> void {
return list.Add(SubsetIndex(t));
},
arg("t"))
.def(
"contains",
[](TabuList<SubsetIndex>& list, BaseInt t) -> bool {
return list.Contains(SubsetIndex(t));
},
arg("t"));
py::class_<GuidedTabuSearch>(m, "GuidedTabuSearch")
.def(py::init<SetCoverInvariant*>())
.def("initialize", &GuidedTabuSearch::Initialize)
.def("next_solution",
[](GuidedTabuSearch& heuristic, int num_iterations) -> bool {
return heuristic.NextSolution(num_iterations);
})
.def("next_solution",
[](GuidedTabuSearch& heuristic, const std::vector<BaseInt>& focus,
int num_iterations) -> bool {
return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus),
num_iterations);
})
.def("get_lagrangian_factor", &GuidedTabuSearch::SetLagrangianFactor,
arg("factor"))
.def("set_lagrangian_factor", &GuidedTabuSearch::GetLagrangianFactor)
.def("set_epsilon", &GuidedTabuSearch::SetEpsilon, arg("r"))
.def("get_epsilon", &GuidedTabuSearch::GetEpsilon)
.def("set_penalty_factor", &GuidedTabuSearch::SetPenaltyFactor,
arg("factor"))
.def("get_penalty_factor", &GuidedTabuSearch::GetPenaltyFactor)
.def("set_tabu_list_size", &GuidedTabuSearch::SetTabuListSize,
arg("size"))
.def("get_tabu_list_size", &GuidedTabuSearch::GetTabuListSize);
m.def(
"clear_random_subsets",
[](BaseInt num_subsets, SetCoverInvariant* inv) -> std::vector<BaseInt> {
const std::vector<SubsetIndex> cleared =
ClearRandomSubsets(num_subsets, inv);
return {cleared.begin(), cleared.end()};
});
m.def("clear_random_subsets",
[](const std::vector<BaseInt>& focus, BaseInt num_subsets,
SetCoverInvariant* inv) -> std::vector<BaseInt> {
const std::vector<SubsetIndex> cleared = ClearRandomSubsets(
VectorIntToVectorSubsetIndex(focus), num_subsets, inv);
return {cleared.begin(), cleared.end()};
});
m.def(
"clear_most_covered_elements",
[](BaseInt num_subsets, SetCoverInvariant* inv) -> std::vector<BaseInt> {
const std::vector<SubsetIndex> cleared =
ClearMostCoveredElements(num_subsets, inv);
return {cleared.begin(), cleared.end()};
});
m.def("clear_most_covered_elements",
[](const std::vector<BaseInt>& focus, BaseInt num_subsets,
SetCoverInvariant* inv) -> std::vector<BaseInt> {
const std::vector<SubsetIndex> cleared = ClearMostCoveredElements(
VectorIntToVectorSubsetIndex(focus), num_subsets, inv);
return {cleared.begin(), cleared.end()};
});
// set_cover_reader.h
m.def("read_beasly_set_cover_problem", &ReadBeasleySetCoverProblem);

View File

@@ -0,0 +1,65 @@
// Copyright 2010-2024 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// [START program]
// [START import]
#include <cstdlib>
#include "ortools/algorithms/set_cover_heuristics.h"
#include "ortools/algorithms/set_cover_invariant.h"
#include "ortools/algorithms/set_cover_model.h"
#include "ortools/base/logging.h"
// [END import]
namespace operations_research {
void SimpleSetCoverProgram() {
// [START data]
SetCoverModel model;
model.AddEmptySubset(2.0);
model.AddElementToLastSubset(0);
model.AddEmptySubset(2.0);
model.AddElementToLastSubset(1);
model.AddEmptySubset(1.0);
model.AddElementToLastSubset(0);
model.AddElementToLastSubset(1);
// [END data]
// [START solve]
SetCoverInvariant inv(&model);
GreedySolutionGenerator greedy(&inv);
bool found_solution = greedy.NextSolution();
if (!found_solution) {
LOG(INFO) << "No solution found by the greedy heuristic.";
return;
}
SetCoverSolutionResponse solution = inv.ExportSolutionAsProto();
// [END solve]
// [START print_solution]
LOG(INFO) << "Total cost: " << solution.cost(); // == inv.cost()
LOG(INFO) << "Total number of selected subsets: " << solution.num_subsets();
LOG(INFO) << "Chosen subsets:";
for (int i = 0; i < solution.subset_size(); ++i) {
LOG(INFO) << " " << solution.subset(i);
}
// [END print_solution]
}
} // namespace operations_research
int main() {
operations_research::SimpleSetCoverProgram();
return EXIT_SUCCESS;
}
// [END program]

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
# Copyright 2010-2024 Google LLC
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A simple set-covering problem."""
# [START program]
# [START import]
from ortools.algorithms.python import set_cover
# [END import]
def main():
# [START data]
model = set_cover.SetCoverModel()
model.add_empty_subset(2.0)
model.add_element_to_last_subset(0)
model.add_empty_subset(2.0)
model.add_element_to_last_subset(1)
model.add_empty_subset(1.0)
model.add_element_to_last_subset(0)
model.add_element_to_last_subset(1)
# [END data]
# [START solve]
inv = set_cover.SetCoverInvariant(model)
greedy = set_cover.GreedySolutionGenerator(inv)
has_found = greedy.next_solution()
if not has_found:
print("No solution found by the greedy heuristic.")
return
solution = inv.export_solution_as_proto()
# [END solve]
# [START print_solution]
print(f"Total cost: {solution.cost}") # == inv.cost()
print(f"Total number of selected subsets: {solution.num_subsets}")
print("Chosen subsets:")
for subset in solution.subset:
print(f" {subset}")
# [END print_solution]
if __name__ == "__main__":
main()
# [END program]

View File

@@ -15,12 +15,14 @@
// [START import]
#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <iterator>
#include <numeric>
#include <sstream>
#include <vector>
#include "ortools/algorithms/knapsack_solver.h"
#include "ortools/base/logging.h"
// [END import]
namespace operations_research {

View File

@@ -544,23 +544,23 @@ bool GuidedLocalSearch::NextSolution(absl::Span<const SubsetIndex> focus,
}
namespace {
void SampleSubsets(std::vector<SubsetIndex>* list, std::size_t num_subsets) {
num_subsets = std::min(num_subsets, list->size());
void SampleSubsets(std::vector<SubsetIndex>* list, BaseInt num_subsets) {
num_subsets = std::min(num_subsets, static_cast<BaseInt>(list->size()));
CHECK_GE(num_subsets, 0);
std::shuffle(list->begin(), list->end(), absl::BitGen());
list->resize(num_subsets);
}
} // namespace
std::vector<SubsetIndex> ClearRandomSubsets(std::size_t num_subsets,
std::vector<SubsetIndex> ClearRandomSubsets(BaseInt num_subsets,
SetCoverInvariant* inv) {
return ClearRandomSubsets(inv->model()->all_subsets(), num_subsets, inv);
}
std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
std::size_t num_subsets,
BaseInt num_subsets,
SetCoverInvariant* inv) {
num_subsets = std::min(num_subsets, focus.size());
num_subsets = std::min(num_subsets, static_cast<BaseInt>(focus.size()));
CHECK_GE(num_subsets, 0);
std::vector<SubsetIndex> chosen_indices;
for (const SubsetIndex subset : focus) {
@@ -569,7 +569,7 @@ std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
}
}
SampleSubsets(&chosen_indices, num_subsets);
std::size_t num_deselected = 0;
BaseInt num_deselected = 0;
for (const SubsetIndex subset : chosen_indices) {
inv->Deselect(subset);
++num_deselected;
@@ -585,14 +585,14 @@ std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
return chosen_indices;
}
std::vector<SubsetIndex> ClearMostCoveredElements(std::size_t max_num_subsets,
std::vector<SubsetIndex> ClearMostCoveredElements(BaseInt max_num_subsets,
SetCoverInvariant* inv) {
return ClearMostCoveredElements(inv->model()->all_subsets(), max_num_subsets,
inv);
}
std::vector<SubsetIndex> ClearMostCoveredElements(
absl::Span<const SubsetIndex> focus, std::size_t max_num_subsets,
absl::Span<const SubsetIndex> focus, BaseInt max_num_subsets,
SetCoverInvariant* inv) {
// This is the vector we will return.
std::vector<SubsetIndex> sampled_subsets;
@@ -625,7 +625,8 @@ std::vector<SubsetIndex> ClearMostCoveredElements(
// Actually *sample* sampled_subset.
// TODO(user): find another algorithm if necessary.
std::shuffle(sampled_subsets.begin(), sampled_subsets.end(), absl::BitGen());
sampled_subsets.resize(std::min(sampled_subsets.size(), max_num_subsets));
sampled_subsets.resize(
std::min(static_cast<BaseInt>(sampled_subsets.size()), max_num_subsets));
// Testing has shown that sorting sampled_subsets is not necessary.
// Now, un-select the subset in sampled_subsets.

View File

@@ -477,12 +477,12 @@ class GuidedLocalSearch {
// solution. There can be more than num_subsets variables cleared because the
// intersecting subsets are also removed from the solution. Returns a list of
// subset indices that can be reused as a focus.
std::vector<SubsetIndex> ClearRandomSubsets(std::size_t num_subsets,
std::vector<SubsetIndex> ClearRandomSubsets(BaseInt num_subsets,
SetCoverInvariant* inv);
// Same as above, but clears the subset indices in focus.
std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
std::size_t num_subsets,
BaseInt num_subsets,
SetCoverInvariant* inv);
// Clears the variables (subsets) that cover the most covered elements. This is
@@ -490,12 +490,12 @@ std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
// randomly.
// Returns the list of the chosen subset indices.
// This indices can then be used ax a focus.
std::vector<SubsetIndex> ClearMostCoveredElements(std::size_t num_subsets,
std::vector<SubsetIndex> ClearMostCoveredElements(BaseInt num_subsets,
SetCoverInvariant* inv);
// Same as above, but clears the subset indices in focus.
std::vector<SubsetIndex> ClearMostCoveredElements(
absl::Span<const SubsetIndex> focus, std::size_t num_subsets,
absl::Span<const SubsetIndex> focus, BaseInt num_subsets,
SetCoverInvariant* inv);
} // namespace operations_research

View File

@@ -20,6 +20,7 @@
#include <vector>
#include "absl/log/check.h"
#include "absl/synchronization/blocking_counter.h"
#include "ortools/algorithms/adjustable_k_ary_heap.h"
#include "ortools/algorithms/set_cover_invariant.h"
#include "ortools/algorithms/set_cover_model.h"
@@ -74,7 +75,7 @@ namespace {
// TODO(user): Investigate.
Cost ScalarProduct(const SparseColumn& column, const ElementCostVector& dual) {
Cost result = 0.0;
for (ColumnEntryIndex pos(0); pos.value() < column.size(); ++pos) {
for (const ColumnEntryIndex pos : column.index_range()) {
result += dual[column[pos]];
}
return result;
@@ -82,52 +83,49 @@ Cost ScalarProduct(const SparseColumn& column, const ElementCostVector& dual) {
// Computes the reduced costs for a subset of subsets.
// This is a helper function for ParallelComputeReducedCosts().
// It is called on a slice of subsets, defined by start and end.
// It is called on a slice of subsets, defined by slice_start and slice_end.
// The reduced costs are computed using the multipliers vector.
// The columns of the subsets are given by the columns view.
// The result is stored in reduced_costs.
void FillReducedCostsSlice(SubsetIndex start, SubsetIndex end,
void FillReducedCostsSlice(SubsetIndex slice_start, SubsetIndex slice_end,
const SubsetCostVector& costs,
const ElementCostVector& multipliers,
const SparseColumnView& columns,
SubsetCostVector* reduced_costs) {
for (SubsetIndex subset = start; subset < end; ++subset) {
for (SubsetIndex subset = slice_start; subset < slice_end; ++subset) {
(*reduced_costs)[subset] =
costs[subset] - ScalarProduct(columns[subset], multipliers);
}
}
BaseInt BlockSize(BaseInt size, int num_threads) {
return 1 + (size - 1) / num_threads;
}
} // namespace
// Computes the reduced costs for all subsets in parallel using ThreadPool.
SubsetCostVector SetCoverLagrangian::ParallelComputeReducedCosts(
const SubsetCostVector& costs, const ElementCostVector& multipliers) const {
const SubsetIndex num_subsets(model_.num_subsets());
// TODO(user): compute a close-to-optimal k-subset partitioning.
const SubsetIndex block_size =
SubsetIndex(1) + num_subsets / num_threads_; // [***] Arbitrary choice.
const SparseColumnView& columns = model_.columns();
SubsetCostVector reduced_costs(num_subsets);
ThreadPool thread_pool("ParallelComputeReducedCosts", num_threads_);
thread_pool.StartWorkers();
{
// TODO(user): check how costly it is to create a new ThreadPool.
// TODO(user): use a queue of subsets to process? instead of a fixed range.
// This parallelization is not very efficient, because all the threads
// use the same costs vector. Maybe it should be local to the thread.
// It's unclear whether sharing columns and costs is better than having
// each thread use its own partial copy.
// Finally, it might be better to use a queue of subsets to process, instead
// of a fixed range.
for (SubsetIndex start(0); start < num_subsets; start += block_size) {
thread_pool.Schedule([start, block_size, num_subsets, &costs,
&multipliers, &columns, &reduced_costs]() {
const SubsetIndex end = std::min(start + block_size, num_subsets);
FillReducedCostsSlice(start, end, costs, multipliers, columns,
&reduced_costs);
});
}
} // Synchronize all the threads. This is equivalent to a wait.
// TODO(user): compute a close-to-optimal k-subset partitioning of the columns
// based on their sizes. [***]
const SubsetIndex block_size(BlockSize(num_subsets.value(), num_threads_));
absl::BlockingCounter num_threads_running(num_threads_);
SubsetIndex slice_start(0);
for (int thread_index = 0; thread_index < num_threads_; ++thread_index) {
const SubsetIndex slice_end =
std::min(slice_start + block_size, num_subsets);
thread_pool_->Schedule([&num_threads_running, slice_start, slice_end,
&costs, &multipliers, &columns, &reduced_costs]() {
FillReducedCostsSlice(slice_start, slice_end, costs, multipliers, columns,
&reduced_costs);
num_threads_running.DecrementCount();
});
slice_start = slice_end;
}
num_threads_running.Wait();
return reduced_costs;
}
@@ -147,14 +145,14 @@ SubsetCostVector SetCoverLagrangian::ComputeReducedCosts(
namespace {
// Helper function to compute the subgradient.
// It fills a slice of the subgradient vector from indices start to end.
// This is a helper function for ParallelComputeSubgradient().
// The subgradient is computed using the reduced costs vector.
void FillSubgradientSlice(SubsetIndex start, SubsetIndex end,
// It fills a slice of the subgradient vector from indices slice_start to
// slice_end. This is a helper function for ParallelComputeSubgradient(). The
// subgradient is computed using the reduced costs vector.
void FillSubgradientSlice(SubsetIndex slice_start, SubsetIndex slice_end,
const SparseColumnView& columns,
const SubsetCostVector& reduced_costs,
ElementCostVector* subgradient) {
for (SubsetIndex subset(start); subset < end; ++subset) {
for (SubsetIndex subset(slice_start); subset < slice_end; ++subset) {
if (reduced_costs[subset] < 0.0) {
for (const ElementIndex element : columns[subset]) {
(*subgradient)[element] -= 1.0;
@@ -181,8 +179,6 @@ ElementCostVector SetCoverLagrangian::ComputeSubgradient(
ElementCostVector SetCoverLagrangian::ParallelComputeSubgradient(
const SubsetCostVector& reduced_costs) const {
const SubsetIndex num_subsets(model_.num_subsets());
const SubsetIndex block_size =
SubsetIndex(1) + num_subsets / num_threads_; // [***]
const SparseColumnView& columns = model_.columns();
ElementCostVector subgradient(model_.num_elements(), 1.0);
// The subgradient has one component per element, each thread processes
@@ -191,20 +187,22 @@ ElementCostVector SetCoverLagrangian::ParallelComputeSubgradient(
// although this might be less well-balanced.
std::vector<ElementCostVector> subgradients(
num_threads_, ElementCostVector(model_.num_elements()));
ThreadPool thread_pool("ParallelComputeSubgradient", num_threads_);
thread_pool.StartWorkers();
{
int thread_index = 0;
for (SubsetIndex start(0); start < num_subsets;
start += block_size, ++thread_index) {
thread_pool.Schedule([start, block_size, num_subsets, &reduced_costs,
&columns, &subgradients, thread_index]() {
const SubsetIndex end = std::min(start + block_size, num_subsets);
FillSubgradientSlice(start, end, columns, reduced_costs,
&subgradients[thread_index]);
});
}
} // Synchronize all the threads.
absl::BlockingCounter num_threads_running(num_threads_);
const SubsetIndex block_size(BlockSize(num_subsets.value(), num_threads_));
SubsetIndex slice_start(0);
for (int thread_index = 0; thread_index < num_threads_; ++thread_index) {
const SubsetIndex slice_end =
std::min(slice_start + block_size, num_subsets);
thread_pool_->Schedule([&num_threads_running, slice_start, slice_end,
&reduced_costs, &columns, &subgradients,
thread_index]() {
FillSubgradientSlice(slice_start, slice_end, columns, reduced_costs,
&subgradients[thread_index]);
num_threads_running.DecrementCount();
});
slice_start = slice_end;
}
num_threads_running.Wait();
for (int thread_index = 0; thread_index < num_threads_; ++thread_index) {
for (const ElementIndex element : model_.ElementRange()) {
subgradient[element] += subgradients[thread_index][element];
@@ -216,17 +214,17 @@ ElementCostVector SetCoverLagrangian::ParallelComputeSubgradient(
namespace {
// Helper function to compute the value of the Lagrangian.
// This is a helper function for ParallelComputeLagrangianValue().
// It is called on a slice of elements, defined by start and end.
// It is called on a slice of elements, defined by slice_start and slice_end.
// The value of the Lagrangian is computed using the reduced costs vector and
// the multipliers vector.
// The result is stored in lagrangian_value.
void FillLagrangianValueSlice(SubsetIndex start, SubsetIndex end,
void FillLagrangianValueSlice(SubsetIndex slice_start, SubsetIndex slice_end,
const SubsetCostVector& reduced_costs,
Cost* lagrangian_value) {
// This is min \sum_{j \in N} c_j(u) x_j. This captures the remark above (**),
// taking into account the possible values for x_j, and using them to minimize
// the terms.
for (SubsetIndex subset(start); subset < end; ++subset) {
// This is min \sum_{j \in N} c_j(u) x_j. This captures the remark above
// (**), taking into account the possible values for x_j, and using them to
// minimize the terms.
for (SubsetIndex subset(slice_start); subset < slice_end; ++subset) {
if (reduced_costs[subset] < 0.0) {
*lagrangian_value += reduced_costs[subset];
}
@@ -258,30 +256,31 @@ Cost SetCoverLagrangian::ComputeLagrangianValue(
Cost SetCoverLagrangian::ParallelComputeLagrangianValue(
const SubsetCostVector& reduced_costs,
const ElementCostVector& multipliers) const {
const SubsetIndex num_subsets(model_.num_subsets());
const SubsetIndex block_size =
SubsetIndex(1) + num_subsets / num_threads_; // [***] Arbitrary.
Cost lagrangian_value = 0.0;
// This is \sum{i \in M} u_i.
for (const Cost u : multipliers) {
lagrangian_value += u;
}
std::vector<Cost> lagrangian_values(num_threads_, 0.0);
ThreadPool thread_pool("ParallelComputeLagrangianValue", num_threads_);
thread_pool.StartWorkers();
{
int thread_index = 0;
for (SubsetIndex start(0); start < num_subsets; start += block_size) {
thread_pool.Schedule([start, block_size, num_subsets, thread_index,
&reduced_costs, &lagrangian_values]() {
const SubsetIndex end = std::min(start + block_size, num_subsets);
FillLagrangianValueSlice(start, end, reduced_costs,
&lagrangian_values[thread_index]);
});
++thread_index;
}
} // Synchronize all the threads.
absl::BlockingCounter num_threads_running(num_threads_);
const SubsetIndex block_size(BlockSize(model_.num_subsets(), num_threads_));
const SubsetIndex num_subsets(model_.num_subsets());
SubsetIndex slice_start(0);
for (int thread_index = 0; thread_index < num_threads_; ++thread_index) {
const SubsetIndex slice_end =
std::min(slice_start + block_size, num_subsets);
thread_pool_->Schedule([&num_threads_running, slice_start, block_size,
num_subsets, thread_index, &reduced_costs,
&lagrangian_values]() {
const SubsetIndex slice_end =
std::min(slice_start + block_size, num_subsets);
FillLagrangianValueSlice(slice_start, slice_end, reduced_costs,
&lagrangian_values[thread_index]);
num_threads_running.DecrementCount();
});
slice_start = slice_end;
}
num_threads_running.Wait();
for (const Cost l : lagrangian_values) {
lagrangian_value += l;
}
@@ -290,8 +289,8 @@ Cost SetCoverLagrangian::ParallelComputeLagrangianValue(
// Perform a subgradient step.
// In the general case, for an Integer Program A.x <=b, the Lagragian
// multipliers vector at step k+1 is defined as: u^{k+1} = u^k + t_k (A x^k - b)
// with term t_k = lambda_k * (UB - L(u^k)) / |A x^k - b|^2.
// multipliers vector at step k+1 is defined as: u^{k+1} = u^k + t_k (A x^k -
// b) with term t_k = lambda_k * (UB - L(u^k)) / |A x^k - b|^2.
// |.| is the 2-norm (i.e. Euclidean)
// In our case, the problem A x <= b is in the form A x >= 1. We need to
// replace A x - b by s_i(u) = 1 - sum_{j \in J_i} x_j(u).
@@ -343,9 +342,9 @@ void SetCoverLagrangian::ParallelUpdateMultipliers(
step_size * (upper_bound - lagrangian_value) / subgradient_square_norm;
for (const ElementIndex element : model_.ElementRange()) {
// Avoid multipliers to go negative and to go through the roof. 1e6 chosen
// arbitrarily. [***]
const Cost kRoof = 1e6; // Arbitrary value, from [1].
(*multipliers)[element] = std::clamp(
(*multipliers)[element] + factor * subgradient[element], 0.0, 1e6);
(*multipliers)[element] + factor * subgradient[element], 0.0, kRoof);
}
}
@@ -503,9 +502,9 @@ SetCoverLagrangian::ComputeLowerBound(const SubsetCostVector& costs,
for (int iter = 0; iter < 1000; ++iter) {
reduced_costs = ParallelComputeReducedCosts(costs, multipliers);
const Cost lagrangian_value =
ComputeLagrangianValue(reduced_costs, multipliers);
UpdateMultipliers(step_size, lagrangian_value, upper_bound, reduced_costs,
&multipliers);
ParallelComputeLagrangianValue(reduced_costs, multipliers);
ParallelUpdateMultipliers(step_size, lagrangian_value, upper_bound,
reduced_costs, &multipliers);
lower_bound = std::max(lower_bound, lagrangian_value);
// step_size should be updated like this. For the time besing, we keep the
// step size, because the implementation of the rest is not adequate yet

View File

@@ -15,6 +15,7 @@
#define OR_TOOLS_ALGORITHMS_SET_COVER_LAGRANGIAN_H_
#include <memory>
#include <new>
#include <tuple>
#include <vector>
@@ -44,7 +45,12 @@ namespace operations_research {
class SetCoverLagrangian {
public:
explicit SetCoverLagrangian(SetCoverInvariant* inv, int num_threads = 1)
: inv_(inv), model_(*inv->model()), num_threads_(num_threads) {}
: inv_(inv),
model_(*inv->model()),
num_threads_(num_threads),
thread_pool_(new ThreadPool(num_threads)) {
thread_pool_->StartWorkers();
}
// Returns true if a solution was found.
// TODO(user): Add time-outs and exit with a partial solution. This seems
@@ -137,6 +143,9 @@ class SetCoverLagrangian {
// The number of threads to use for parallelization.
int num_threads_;
// The thread pool used for parallelization.
std::unique_ptr<ThreadPool> thread_pool_;
// Total (scalar) Lagrangian cost.
Cost lagrangian_;

View File

@@ -17,17 +17,161 @@
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <numeric>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include "absl/log/check.h"
#include "absl/random/discrete_distribution.h"
#include "absl/random/distributions.h"
#include "absl/random/random.h"
#include "ortools/algorithms/set_cover.pb.h"
#include "ortools/base/logging.h"
namespace operations_research {
namespace {
// Returns a value in [min, min + scaling_factor * (raw_value - min +
// random_term)], where raw_value is drawn from a discrete distribution, and
// random_term is a double drawn uniformly in [0, 1].
BaseInt DiscreteAffine(absl::BitGen& bitgen,
absl::discrete_distribution<BaseInt>& dist, BaseInt min,
double scaling_factor) {
const BaseInt raw_value = dist(bitgen);
const double random_term = absl::Uniform<double>(bitgen, 0, 1.0);
const BaseInt affine_value =
static_cast<BaseInt>(
std::floor((raw_value - min + random_term) * scaling_factor)) +
min;
return affine_value;
}
// For a given view (SparseColumnView or SparseRowView), returns the
// distribution of the sizes of the vectors in the view, which can be used in
// an absl::discrete_distribution.
template <typename View>
std::tuple<BaseInt, std::vector<BaseInt>> ComputeSizeHistogram(
const View& view) {
BaseInt max_size = 0;
BaseInt min_size = std::numeric_limits<BaseInt>::max();
for (const auto& vec : view) {
const BaseInt size = vec.size();
min_size = std::min(min_size, size);
max_size = std::max(max_size, size);
}
std::vector<BaseInt> weights(max_size + 1, 0);
for (const auto& vec : view) {
const BaseInt size = vec.size();
++weights[size];
}
return {min_size, weights};
}
template <typename View>
std::tuple<BaseInt, absl::discrete_distribution<BaseInt>>
ComputeSizeDistribution(const View& view) {
const auto [min_size, weights] = ComputeSizeHistogram(view);
absl::discrete_distribution<BaseInt> dist(weights.begin(), weights.end());
return {min_size, dist};
}
} // namespace
SetCoverModel SetCoverModel::GenerateRandomModelFrom(
const SetCoverModel& seed_model, BaseInt num_elements, BaseInt num_subsets,
double row_scale, double column_scale, double cost_scale) {
SetCoverModel model;
DCHECK_GT(row_scale, 0.0);
DCHECK_GT(column_scale, 0.0);
DCHECK_GT(cost_scale, 0.0);
model.num_elements_ = num_elements;
model.num_nonzeros_ = 0;
model.ReserveNumSubsets(num_subsets);
model.UpdateAllSubsetsList();
absl::BitGen bitgen;
// Create the distribution of the cardinalities of the subsets based on the
// histogram of column sizes in the seed model.
auto [min_column_size, column_dist] =
ComputeSizeDistribution(seed_model.columns());
// Create the distribution of the degrees of the elements based on the
// histogram of row sizes in the seed model.
auto [min_row_size, row_dist] = ComputeSizeDistribution(seed_model.rows());
// Prepare the degrees of the elements in the generated model, and use them
// in a distribution to generate the columns. This ponderates the columns
// towards the elements with higher degrees. ???
ElementToIntVector degrees(num_elements);
for (ElementIndex element(0); element.value() < num_elements; ++element) {
degrees[element] =
DiscreteAffine(bitgen, row_dist, min_row_size, row_scale);
}
absl::discrete_distribution<BaseInt> degree_dist(degrees.begin(),
degrees.end());
// Vector indicating whether the generated model covers an element.
ElementBoolVector contains_element(num_elements, false);
// Number of elements in the generated model, using the above vector.
BaseInt num_elements_covered(0);
// Loop-local vector indicating whether the currently generated subset
// contains an element.
ElementBoolVector subset_contains_element(num_elements, false);
for (SubsetIndex subset(0); subset.value() < num_subsets; ++subset) {
const BaseInt cardinality =
DiscreteAffine(bitgen, column_dist, min_column_size, column_scale);
model.columns_[subset].reserve(cardinality);
for (BaseInt iter = 0; iter < cardinality; ++iter) {
int num_tries = 0;
ElementIndex element;
// Choose an element that is not yet in the subset at random with a
// distribution that is proportional to the degree of the element.
do {
element = ElementIndex(degree_dist(bitgen));
CHECK_LT(element.value(), num_elements);
++num_tries;
if (num_tries > 10) {
return SetCoverModel();
}
} while (subset_contains_element[element]);
++model.num_nonzeros_;
model.columns_[subset].push_back(element);
subset_contains_element[element] = true;
if (!contains_element[element]) {
contains_element[element] = true;
++num_elements_covered;
}
}
for (const ElementIndex element : model.columns_[subset]) {
subset_contains_element[element] = false;
}
}
CHECK_EQ(num_elements_covered, num_elements);
// TODO(user): if necessary, use a better distribution for the costs.
// The generation of the costs is done in two steps. First, compute the
// minimum and maximum costs.
Cost min_cost = std::numeric_limits<Cost>::infinity();
Cost max_cost = -min_cost;
for (const Cost cost : seed_model.subset_costs()) {
min_cost = std::min(min_cost, cost);
max_cost = std::max(max_cost, cost);
}
// Then, generate random numbers in [min_cost, min_cost + cost_range], where
// cost_range is defined as:
const Cost cost_range = cost_scale * (max_cost - min_cost);
for (Cost& cost : model.subset_costs_) {
cost = min_cost + absl::Uniform<double>(bitgen, 0, cost_range);
}
return model;
}
void SetCoverModel::UpdateAllSubsetsList() {
const BaseInt old_size = all_subsets_.size();
DCHECK_LE(old_size, num_subsets());
@@ -92,8 +236,8 @@ void SetCoverModel::AddElementToSubset(ElementIndex element,
}
// Reserves num_subsets columns in the model.
void SetCoverModel::ReserveNumSubsets(BaseInt number_of_subsets) {
num_subsets_ = std::max(num_subsets_, number_of_subsets);
void SetCoverModel::ReserveNumSubsets(BaseInt num_subsets) {
num_subsets_ = std::max(num_subsets_, num_subsets);
columns_.resize(num_subsets_, SparseColumn());
subset_costs_.resize(num_subsets_, 0.0);
}
@@ -121,8 +265,8 @@ void SetCoverModel::CreateSparseRowView() {
rows_.resize(num_elements_, SparseRow());
ElementToIntVector row_sizes(num_elements_, 0);
for (const SubsetIndex subset : SubsetRange()) {
// Sort the columns. It's not super-critical to improve performance here as
// this needs to be done only once.
// Sort the columns. It's not super-critical to improve performance here
// as this needs to be done only once.
std::sort(columns_[subset].begin(), columns_[subset].end());
for (const ElementIndex element : columns_[subset]) {
++row_sizes[element];
@@ -256,7 +400,7 @@ SetCoverModel::Stats SetCoverModel::ComputeCostStats() {
}
SetCoverModel::Stats SetCoverModel::ComputeRowStats() {
std::vector<ssize_t> row_sizes(num_elements(), 0);
std::vector<BaseInt> row_sizes(num_elements(), 0);
for (const SparseColumn& column : columns_) {
for (const ElementIndex element : column) {
++row_sizes[element.value()];
@@ -266,15 +410,15 @@ SetCoverModel::Stats SetCoverModel::ComputeRowStats() {
}
SetCoverModel::Stats SetCoverModel::ComputeColumnStats() {
std::vector<ssize_t> column_sizes(columns_.size());
std::vector<BaseInt> column_sizes(columns_.size());
for (const SubsetIndex subset : SubsetRange()) {
column_sizes[subset.value()] = columns_[subset].size();
}
return ComputeStats(std::move(column_sizes));
}
std::vector<ssize_t> SetCoverModel::ComputeRowDeciles() const {
std::vector<ssize_t> row_sizes(num_elements(), 0);
std::vector<BaseInt> SetCoverModel::ComputeRowDeciles() const {
std::vector<BaseInt> row_sizes(num_elements(), 0);
for (const SparseColumn& column : columns_) {
for (const ElementIndex element : column) {
++row_sizes[element.value()];
@@ -283,8 +427,8 @@ std::vector<ssize_t> SetCoverModel::ComputeRowDeciles() const {
return ComputeDeciles(std::move(row_sizes));
}
std::vector<ssize_t> SetCoverModel::ComputeColumnDeciles() const {
std::vector<ssize_t> column_sizes(columns_.size());
std::vector<BaseInt> SetCoverModel::ComputeColumnDeciles() const {
std::vector<BaseInt> column_sizes(columns_.size());
for (const SubsetIndex subset : SubsetRange()) {
column_sizes[subset.value()] = columns_[subset].size();
}

View File

@@ -14,13 +14,7 @@
#ifndef OR_TOOLS_ALGORITHMS_SET_COVER_MODEL_H_
#define OR_TOOLS_ALGORITHMS_SET_COVER_MODEL_H_
#if defined(_MSC_VER)
#include <BaseTsd.h>
typedef SSIZE_T ssize_t;
#else
#include <sys/types.h>
#endif // defined(_MSC_VER)
#include <cstdint>
#include <string>
#include <vector>
@@ -29,7 +23,6 @@ typedef SSIZE_T ssize_t;
#include "ortools/algorithms/set_cover.pb.h"
#include "ortools/base/strong_int.h"
#include "ortools/base/strong_vector.h"
#include "ortools/util/aligned_memory.h"
// Representation class for the weighted set-covering problem.
//
@@ -65,7 +58,7 @@ using Cost = double;
// (2e9) elements and subsets. If need arises one day, BaseInt can be split
// into SubsetBaseInt and ElementBaseInt.
// Quick testing has shown a slowdown of about 20-25% when using int64_t.
using BaseInt = int;
using BaseInt = int32_t;
// We make heavy use of strong typing to avoid obvious mistakes.
// Subset index.
@@ -84,32 +77,14 @@ using SubsetRange = util_intops::StrongIntRange<SubsetIndex>;
using ElementRange = util_intops::StrongIntRange<ElementIndex>;
using ColumnEntryRange = util_intops::StrongIntRange<ColumnEntryIndex>;
// SIMD operations require vectors to be aligned at 64-bytes on x86-64
// processors as of 2024-05-03.
// TODO(user): improve the code to make it possible to use unaligned memory.
constexpr int kSetCoverAlignmentInBytes = 64;
using SubsetCostVector = util_intops::StrongVector<SubsetIndex, Cost>;
using ElementCostVector = util_intops::StrongVector<ElementIndex, Cost>;
using CostAllocator = AlignedAllocator<Cost, kSetCoverAlignmentInBytes>;
using ElementAllocator =
AlignedAllocator<ElementIndex, kSetCoverAlignmentInBytes>;
using SubsetAllocator =
AlignedAllocator<SubsetIndex, kSetCoverAlignmentInBytes>;
using SparseColumn = util_intops::StrongVector<ColumnEntryIndex, ElementIndex>;
using SparseRow = util_intops::StrongVector<RowEntryIndex, SubsetIndex>;
using SubsetCostVector =
util_intops::StrongVector<SubsetIndex, Cost, CostAllocator>;
using ElementCostVector =
util_intops::StrongVector<ElementIndex, Cost, CostAllocator>;
using SparseColumn =
util_intops::StrongVector<ColumnEntryIndex, ElementIndex, ElementAllocator>;
using SparseRow =
util_intops::StrongVector<RowEntryIndex, SubsetIndex, SubsetAllocator>;
using IntAllocator = AlignedAllocator<BaseInt, kSetCoverAlignmentInBytes>;
using ElementToIntVector =
util_intops::StrongVector<ElementIndex, BaseInt, IntAllocator>;
using SubsetToIntVector =
util_intops::StrongVector<SubsetIndex, BaseInt, IntAllocator>;
using ElementToIntVector = util_intops::StrongVector<ElementIndex, BaseInt>;
using SubsetToIntVector = util_intops::StrongVector<SubsetIndex, BaseInt>;
// Views of the sparse vectors. These need not be aligned as it's their contents
// that need to be aligned.
@@ -117,6 +92,13 @@ using SparseColumnView = util_intops::StrongVector<SubsetIndex, SparseColumn>;
using SparseRowView = util_intops::StrongVector<ElementIndex, SparseRow>;
using SubsetBoolVector = util_intops::StrongVector<SubsetIndex, bool>;
using ElementBoolVector = util_intops::StrongVector<ElementIndex, bool>;
// Useful for representing permutations,
using ElementToElementVector =
util_intops::StrongVector<ElementIndex, ElementIndex>;
using SubsetToSubsetVector =
util_intops::StrongVector<SubsetIndex, SubsetIndex>;
// Main class for describing a weighted set-covering problem.
class SetCoverModel {
@@ -132,6 +114,34 @@ class SetCoverModel {
rows_(),
all_subsets_() {}
// Constructs a weighted set-covering problem from a seed model, with
// num_elements elements and num_subsets subsets.
// - The distributions of the degrees of the elements and the cardinalities of
// the subsets are based on those of the seed model. They are scaled
// affininely by row_scale and column_scale respectively.
// - By affine scaling, we mean that the minimum value of the distribution is
// not scaled, but the variation above this minimum value is.
// - For a given subset with a given cardinality in the generated model, its
// elements are sampled from the distribution of the degrees as computed
// above.
// - The costs of the subsets in the new model are sampled from the
// distribution of the costs of the subsets in the seed model, scaled by
// cost_scale.
// IMPORTANT NOTICE: The algorithm may not succeed in generating a model
// where all the elements can be covered. In that case, the model will be
// empty.
static SetCoverModel GenerateRandomModelFrom(const SetCoverModel& seed_model,
BaseInt num_elements,
BaseInt num_subsets,
double row_scale,
double column_scale,
double cost_scale);
// Returns true if the model is empty, i.e. has no elements, no subsets, and
// no nonzeros.
bool IsEmpty() const { return rows_.empty() || columns_.empty(); }
// Current number of elements to be covered in the model, i.e. the number of
// elements in S. In matrix terms, this is the number of rows.
BaseInt num_elements() const { return num_elements_; }
@@ -141,7 +151,7 @@ class SetCoverModel {
BaseInt num_subsets() const { return num_subsets_; }
// Current number of nonzeros in the matrix.
ssize_t num_nonzeros() const { return num_nonzeros_; }
int64_t num_nonzeros() const { return num_nonzeros_; }
double FillRate() const {
return 1.0 * num_nonzeros() / (1.0 * num_elements() * num_subsets());
@@ -243,10 +253,10 @@ class SetCoverModel {
Stats ComputeColumnStats();
// Computes deciles on rows and returns a vector of deciles.
std::vector<ssize_t> ComputeRowDeciles() const;
std::vector<BaseInt> ComputeRowDeciles() const;
// Computes deciles on columns and returns a vector of deciles.
std::vector<ssize_t> ComputeColumnDeciles() const;
std::vector<BaseInt> ComputeColumnDeciles() const;
private:
// Updates the all_subsets_ vector so that it always contains 0 to
@@ -259,8 +269,9 @@ class SetCoverModel {
// Number of subsets. Maintained for ease of access.
BaseInt num_subsets_;
// Number of nonzeros in the matrix.
ssize_t num_nonzeros_;
// Number of nonzeros in the matrix. The value is an int64_t because there can
// be more than 1 << 31 nonzeros even with BaseInt = int32_t.
int64_t num_nonzeros_;
// True when the SparseRowView is up-to-date.
bool row_view_is_valid_;

View File

@@ -149,7 +149,7 @@ void ComputeLagrangianLowerBound(std::string name, SetCoverInvariant* inv) {
const SetCoverModel* model = inv->model();
WallTimer timer;
timer.Start();
SetCoverLagrangian lagrangian(inv, /*num_threads=*/4);
SetCoverLagrangian lagrangian(inv, /*num_threads=*/8);
const auto [lower_bound, reduced_costs, multipliers] =
lagrangian.ComputeLowerBound(model->subset_costs(), inv->cost());
LogCostAndTiming(name, "LagrangianLowerBound", lower_bound,
@@ -197,11 +197,11 @@ double RunSolver(std::string name, SetCoverModel* model) {
global_timer.Start();
RunChvatalAndSteepest(name, model);
// SetCoverInvariant inv = ComputeLPLowerBound(name, model);
RunMip(name, model);
// RunMip(name, model);
RunChvatalAndGLS(name, model);
SetCoverInvariant inv = RunElementDegreeGreedyAndSteepest(name, model);
ComputeLagrangianLowerBound(name, &inv);
// IterateClearAndMip(name, inv);
// IterateClearAndMip(name, inv);
IterateClearElementDegreeAndSteepest(name, &inv);
return inv.cost();
}
@@ -407,4 +407,22 @@ RAIL_TEST("rail4872.txt", 1527, 1861, MANYSECONDS); // [2]
#undef SCP_TEST
#undef RAIL_TEST
TEST(SetCoverHugeTest, GenerateProblem) {
SetCoverModel seed_model =
ReadRailSetCoverProblem(file::JoinPathRespectAbsolute(
::testing::SrcDir(), data_dir, "rail4284.txt"));
seed_model.CreateSparseRowView();
const BaseInt num_wanted_subsets(100'000'000);
const BaseInt num_wanted_elements(40'000);
const double row_scale = 1.1;
const double column_scale = 1.1;
const double cost_scale = 10.0;
SetCoverModel model = SetCoverModel::GenerateRandomModelFrom(
seed_model, num_wanted_elements, num_wanted_subsets, row_scale,
column_scale, cost_scale);
SetCoverInvariant inv =
RunElementDegreeGreedyAndSteepest("rail4284_huge.txt", &model);
LOG(INFO) << "Cost: " << inv.cost();
}
} // namespace operations_research