diff --git a/ortools/algorithms/BUILD.bazel b/ortools/algorithms/BUILD.bazel
index be5f372620..3d7f2284f3 100644
--- a/ortools/algorithms/BUILD.bazel
+++ b/ortools/algorithms/BUILD.bazel
@@ -275,6 +275,7 @@ cc_library(
         ":set_cover_model",
         "//ortools/base:threadpool",
         "@com_google_absl//absl/log:check",
+        "@com_google_absl//absl/synchronization",
     ],
 )
 
@@ -286,9 +287,10 @@ cc_library(
         ":set_cover_cc_proto",
         "//ortools/base:intops",
         "//ortools/base:strong_vector",
-        "//ortools/util:aligned_memory",
         "@com_google_absl//absl/log",
         "@com_google_absl//absl/log:check",
+        "@com_google_absl//absl/random",
+        "@com_google_absl//absl/random:distributions",
         "@com_google_absl//absl/strings",
     ],
 )
diff --git a/ortools/algorithms/python/set_cover.cc b/ortools/algorithms/python/set_cover.cc
index 2d107fdfc0..a4263078c1 100644
--- a/ortools/algorithms/python/set_cover.cc
+++ b/ortools/algorithms/python/set_cover.cc
@@ -13,91 +13,235 @@
 
 // A pybind11 wrapper for set_cover_*.
 
+#include <algorithm>
+#include <cstddef>
+#include <iterator>
 #include <memory>
+#include <vector>
 
 #include "absl/base/nullability.h"
 #include "ortools/algorithms/set_cover_heuristics.h"
 #include "ortools/algorithms/set_cover_invariant.h"
 #include "ortools/algorithms/set_cover_model.h"
 #include "ortools/algorithms/set_cover_reader.h"
+#include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/pytypes.h"
 #include "pybind11/stl.h"
 #include "pybind11_protobuf/native_proto_caster.h"
 
+using ::operations_research::BaseInt;
+using ::operations_research::ClearRandomSubsets;
 using ::operations_research::ElementDegreeSolutionGenerator;
+using ::operations_research::ElementIndex;
 using ::operations_research::GreedySolutionGenerator;
 using ::operations_research::GuidedLocalSearch;
+using ::operations_research::GuidedTabuSearch;
 using ::operations_research::Preprocessor;
 using ::operations_research::RandomSolutionGenerator;
 using ::operations_research::ReadBeasleySetCoverProblem;
 using ::operations_research::ReadRailSetCoverProblem;
+using ::operations_research::SetCoverDecision;
 using ::operations_research::SetCoverInvariant;
 using ::operations_research::SetCoverModel;
+using ::operations_research::SparseColumn;
+using ::operations_research::SparseRow;
 using ::operations_research::SteepestSearch;
+using ::operations_research::SubsetBoolVector;
+using ::operations_research::SubsetCostVector;
 using ::operations_research::SubsetIndex;
+using ::operations_research::TabuList;
 using ::operations_research::TrivialSolutionGenerator;
 
 namespace py = pybind11;
 using ::py::arg;
+using ::py::make_iterator;
 
-// General note about TODOs: the corresponding functions/classes/methods are
-// more complex to wrap, as they use nonstandard types, and are less important,
-// as they are not as useful to most users (mostly useful to write some custom
-// Python heuristics).
+std::vector<SubsetIndex> VectorIntToVectorSubsetIndex(
+    const std::vector<BaseInt>& ints) {
+  std::vector<SubsetIndex> subs;
+  std::transform(ints.begin(), ints.end(), subs.begin(),
+                 [](int subset) -> SubsetIndex { return SubsetIndex(subset); });
+  return subs;
+}
+
+SubsetCostVector VectorDoubleToSubsetCostVector(
+    const std::vector<double>& doubles) {
+  SubsetCostVector costs(doubles.begin(), doubles.end());
+  return costs;
+}
+
+class IntIterator {
+ public:
+  using value_type = int;
+  using difference_type = std::ptrdiff_t;
+  using pointer = int*;
+  using reference = int&;
+  using iterator_category = std::input_iterator_tag;
+
+  explicit IntIterator(int max_value)
+      : max_value_(max_value), current_value_(0) {}
+
+  int operator*() const { return current_value_; }
+  IntIterator& operator++() {
+    ++current_value_;
+    return *this;
+  }
+
+  static IntIterator begin(int max_value) { return IntIterator{max_value}; }
+  static IntIterator end(int max_value) { return {max_value, max_value}; }
+
+  friend bool operator==(const IntIterator& lhs, const IntIterator& rhs) {
+    return lhs.max_value_ == rhs.max_value_ &&
+           lhs.current_value_ == rhs.current_value_;
+  }
+
+ private:
+  IntIterator(int max_value, int current_value)
+      : max_value_(max_value), current_value_(current_value) {}
+
+  const int max_value_;
+  int current_value_;
+};
 
 PYBIND11_MODULE(set_cover, m) {
   pybind11_protobuf::ImportNativeProtoCasters();
 
   // set_cover_model.h
+  py::class_<SetCoverModel::Stats>(m, "SetCoverModelStats")
+      .def_readwrite("min", &SetCoverModel::Stats::min)
+      .def_readwrite("max", &SetCoverModel::Stats::max)
+      .def_readwrite("median", &SetCoverModel::Stats::median)
+      .def_readwrite("mean", &SetCoverModel::Stats::mean)
+      .def_readwrite("stddev", &SetCoverModel::Stats::stddev)
+      .def("debug_string", &SetCoverModel::Stats::DebugString);
+
   py::class_<SetCoverModel>(m, "SetCoverModel")
       .def(py::init<>())
       .def_property_readonly("num_elements", &SetCoverModel::num_elements)
       .def_property_readonly("num_subsets", &SetCoverModel::num_subsets)
       .def_property_readonly("num_nonzeros", &SetCoverModel::num_nonzeros)
       .def_property_readonly("fill_rate", &SetCoverModel::FillRate)
+      .def_property_readonly(
+          "subset_costs",
+          [](SetCoverModel& model) -> const std::vector<double>& {
+            return model.subset_costs().get();
+          })
+      .def("columns",
+           [](SetCoverModel& model) -> std::vector<std::vector<BaseInt>> {
+             // Due to the inner StrongVector, make a deep copy. Anyway,
+             // columns() returns a const ref, so this keeps the semantics, not
+             // the efficiency.
+             std::vector<std::vector<BaseInt>> columns;
+             std::transform(
+                 model.columns().begin(), model.columns().end(),
+                 columns.begin(),
+                 [](const SparseColumn& column) -> std::vector<BaseInt> {
+                   std::vector<BaseInt> col(column.size());
+                   std::transform(column.begin(), column.end(), col.begin(),
+                                  [](ElementIndex element) -> BaseInt {
+                                    return element.value();
+                                  });
+                   return col;
+                 });
+             return columns;
+           })
+      .def("rows",
+           [](SetCoverModel& model) -> std::vector<std::vector<BaseInt>> {
+             // Due to the inner StrongVector, make a deep copy. Anyway,
+             // rows() returns a const ref, so this keeps the semantics, not
+             // the efficiency.
+             std::vector<std::vector<BaseInt>> rows;
+             std::transform(
+                 model.rows().begin(), model.rows().end(), rows.begin(),
+                 [](const SparseRow& row) -> std::vector<BaseInt> {
+                   std::vector<BaseInt> r(row.size());
+                   std::transform(row.begin(), row.end(), r.begin(),
+                                  [](SubsetIndex element) -> BaseInt {
+                                    return element.value();
+                                  });
+                   return r;
+                 });
+             return rows;
+           })
+      .def("row_view_is_valid", &SetCoverModel::row_view_is_valid)
+      .def("SubsetRange",
+           [](SetCoverModel& model) {
+             return make_iterator<>(IntIterator::begin(model.num_subsets()),
+                                    IntIterator::end(model.num_subsets()));
+           })
+      .def("ElementRange",
+           [](SetCoverModel& model) {
+             return make_iterator<>(IntIterator::begin(model.num_elements()),
+                                    IntIterator::end(model.num_elements()));
+           })
+      .def_property_readonly("all_subsets",
+                             [](SetCoverModel& model) -> std::vector<BaseInt> {
+                               std::vector<BaseInt> subsets;
+                               std::transform(
+                                   model.all_subsets().begin(),
+                                   model.all_subsets().end(), subsets.begin(),
+                                   [](const SubsetIndex element) -> BaseInt {
+                                     return element.value();
+                                   });
+                               return subsets;
+                             })
       .def("add_empty_subset", &SetCoverModel::AddEmptySubset, arg("cost"))
       .def(
           "add_element_to_last_subset",
-          [](SetCoverModel& model, int element) {
+          [](SetCoverModel& model, BaseInt element) {
             model.AddElementToLastSubset(element);
           },
           arg("element"))
       .def(
           "set_subset_cost",
-          [](SetCoverModel& model, int subset, double cost) {
+          [](SetCoverModel& model, BaseInt subset, double cost) {
             model.SetSubsetCost(subset, cost);
           },
           arg("subset"), arg("cost"))
       .def(
           "add_element_to_subset",
-          [](SetCoverModel& model, int element, int subset) {
+          [](SetCoverModel& model, BaseInt element, BaseInt subset) {
             model.AddElementToSubset(element, subset);
           },
           arg("subset"), arg("cost"))
+      .def("create_sparse_row_view", &SetCoverModel::CreateSparseRowView)
       .def("compute_feasibility", &SetCoverModel::ComputeFeasibility)
       .def(
           "reserve_num_subsets",
-          [](SetCoverModel& model, int num_subsets) {
+          [](SetCoverModel& model, BaseInt num_subsets) {
             model.ReserveNumSubsets(num_subsets);
           },
           arg("num_subsets"))
       .def(
           "reserve_num_elements_in_subset",
-          [](SetCoverModel& model, int num_elements, int subset) {
+          [](SetCoverModel& model, BaseInt num_elements, BaseInt subset) {
             model.ReserveNumElementsInSubset(num_elements, subset);
           },
           arg("num_elements"), arg("subset"))
       .def("export_model_as_proto", &SetCoverModel::ExportModelAsProto)
-      .def("import_model_from_proto", &SetCoverModel::ImportModelFromProto);
-  // TODO(user): add support for subset_costs, columns, rows,
-  // row_view_is_valid, SubsetRange, ElementRange, all_subsets,
-  // CreateSparseRowView, ComputeCostStats, ComputeRowStats,
-  // ComputeColumnStats, ComputeRowDeciles, ComputeColumnDeciles.
+      .def("import_model_from_proto", &SetCoverModel::ImportModelFromProto)
+      .def("compute_cost_stats", &SetCoverModel::ComputeCostStats)
+      .def("compute_row_stats", &SetCoverModel::ComputeRowStats)
+      .def("compute_column_stats", &SetCoverModel::ComputeColumnStats)
+      .def("compute_row_deciles", &SetCoverModel::ComputeRowDeciles)
+      .def("compute_column_deciles", &SetCoverModel::ComputeRowDeciles);
 
   // TODO(user): wrap IntersectingSubsetsIterator.
 
   // set_cover_invariant.h
+  py::class_<SetCoverDecision>(m, "SetCoverDecision")
+      .def(py::init<>())
+      .def(py::init([](BaseInt subset, bool value) -> SetCoverDecision* {
+             return new SetCoverDecision(SubsetIndex(subset), value);
+           }),
+           arg("subset"), arg("value"))
+      .def("subset",
+           [](const SetCoverDecision& decision) -> BaseInt {
+             return decision.subset().value();
+           })
+      .def("decision", &SetCoverDecision::decision);
+
   py::class_<SetCoverInvariant>(m, "SetCoverInvariant")
       .def(py::init<SetCoverModel*>())
       .def("initialize", &SetCoverInvariant::Initialize)
@@ -118,44 +262,90 @@ PYBIND11_MODULE(set_cover, m) {
           })
       .def("cost", &SetCoverInvariant::cost)
       .def("num_uncovered_elements", &SetCoverInvariant::num_uncovered_elements)
+      .def("is_selected",
+           [](SetCoverInvariant& invariant) -> std::vector<bool> {
+             return invariant.is_selected().get();
+           })
+      .def("num_free_elements",
+           [](SetCoverInvariant& invariant) -> std::vector<BaseInt> {
+             return invariant.num_free_elements().get();
+           })
+      .def("num_coverage_le_1_elements",
+           [](SetCoverInvariant& invariant) -> std::vector<BaseInt> {
+             return invariant.num_coverage_le_1_elements().get();
+           })
+      .def("coverage",
+           [](SetCoverInvariant& invariant) -> std::vector<BaseInt> {
+             return invariant.coverage().get();
+           })
+      .def(
+          "compute_coverage_in_focus",
+          [](SetCoverInvariant& invariant,
+             const std::vector<BaseInt>& focus) -> std::vector<BaseInt> {
+            return invariant
+                .ComputeCoverageInFocus(VectorIntToVectorSubsetIndex(focus))
+                .get();
+          },
+          arg("focus"))
+      .def("is_redundant",
+           [](SetCoverInvariant& invariant) -> std::vector<bool> {
+             return invariant.is_redundant().get();
+           })
+      .def("trace", &SetCoverInvariant::trace)
       .def("clear_trace", &SetCoverInvariant::ClearTrace)
       .def("clear_removability_information",
            &SetCoverInvariant::ClearRemovabilityInformation)
+      .def("new_removable_subsets", &SetCoverInvariant::new_removable_subsets)
+      .def("new_non_removable_subsets",
+           &SetCoverInvariant::new_non_removable_subsets)
       .def("compress_trace", &SetCoverInvariant::CompressTrace)
+      .def("load_solution",
+           [](SetCoverInvariant& invariant,
+              const std::vector<bool>& solution) -> void {
+             SubsetBoolVector sol(solution.begin(), solution.end());
+             return invariant.LoadSolution(sol);
+           })
       .def("check_consistency", &SetCoverInvariant::CheckConsistency)
+      .def(
+          "compute_is_redundant",
+          [](SetCoverInvariant& invariant, BaseInt subset) -> bool {
+            return invariant.ComputeIsRedundant(SubsetIndex(subset));
+          },
+          arg("subset"))
+      .def("make_fully_updated", &SetCoverInvariant::MakeFullyUpdated)
       .def(
           "flip",
-          [](SetCoverInvariant& invariant, int subset) {
+          [](SetCoverInvariant& invariant, BaseInt subset) {
             invariant.Flip(SubsetIndex(subset));
           },
           arg("subset"))
       .def(
           "flip_and_fully_update",
-          [](SetCoverInvariant& invariant, int subset) {
+          [](SetCoverInvariant& invariant, BaseInt subset) {
             invariant.FlipAndFullyUpdate(SubsetIndex(subset));
           },
           arg("subset"))
       .def(
           "select",
-          [](SetCoverInvariant& invariant, int subset) {
+          [](SetCoverInvariant& invariant, BaseInt subset) {
             invariant.Select(SubsetIndex(subset));
           },
           arg("subset"))
       .def(
           "select_and_fully_update",
-          [](SetCoverInvariant& invariant, int subset) {
+          [](SetCoverInvariant& invariant, BaseInt subset) {
             invariant.SelectAndFullyUpdate(SubsetIndex(subset));
           },
           arg("subset"))
       .def(
           "deselect",
-          [](SetCoverInvariant& invariant, int subset) {
+          [](SetCoverInvariant& invariant, BaseInt subset) {
             invariant.Deselect(SubsetIndex(subset));
           },
           arg("subset"))
       .def(
           "deselect_and_fully_update",
-          [](SetCoverInvariant& invariant, int subset) {
+          [](SetCoverInvariant& invariant, BaseInt subset) {
             invariant.DeselectAndFullyUpdate(SubsetIndex(subset));
           },
           arg("subset"))
@@ -163,10 +353,6 @@ PYBIND11_MODULE(set_cover, m) {
            &SetCoverInvariant::ExportSolutionAsProto)
       .def("import_solution_from_proto",
            &SetCoverInvariant::ImportSolutionFromProto);
-  // TODO(user): add support for is_selected, num_free_elements,
-  // num_coverage_le_1_elements, coverage, ComputeCoverageInFocus,
-  // is_redundant, trace, new_removable_subsets, new_non_removable_subsets,
-  // LoadSolution, ComputeIsRedundant.
 
   // set_cover_heuristics.h
   py::class_<Preprocessor>(m, "Preprocessor")
@@ -175,30 +361,57 @@ PYBIND11_MODULE(set_cover, m) {
            [](Preprocessor& heuristic) -> bool {
              return heuristic.NextSolution();
            })
+      .def("next_solution",
+           [](Preprocessor& heuristic,
+              const std::vector<BaseInt>& focus) -> bool {
+             return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus));
+           })
       .def("num_columns_fixed_by_singleton_row",
            &Preprocessor::num_columns_fixed_by_singleton_row);
-  // TODO(user): add support for focus argument.
 
   py::class_<TrivialSolutionGenerator>(m, "TrivialSolutionGenerator")
       .def(py::init<SetCoverInvariant*>())
-      .def("next_solution", [](TrivialSolutionGenerator& heuristic) -> bool {
-        return heuristic.NextSolution();
-      });
-  // TODO(user): add support for focus argument.
+      .def("next_solution",
+           [](TrivialSolutionGenerator& heuristic) -> bool {
+             return heuristic.NextSolution();
+           })
+      .def("next_solution",
+           [](TrivialSolutionGenerator& heuristic,
+              const std::vector<BaseInt>& focus) -> bool {
+             return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus));
+           });
 
   py::class_<RandomSolutionGenerator>(m, "RandomSolutionGenerator")
       .def(py::init<SetCoverInvariant*>())
-      .def("next_solution", [](RandomSolutionGenerator& heuristic) -> bool {
-        return heuristic.NextSolution();
-      });
-  // TODO(user): add support for focus argument.
+      .def("next_solution",
+           [](RandomSolutionGenerator& heuristic) -> bool {
+             return heuristic.NextSolution();
+           })
+      .def("next_solution",
+           [](RandomSolutionGenerator& heuristic,
+              const std::vector<BaseInt>& focus) -> bool {
+             return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus));
+           });
 
   py::class_<GreedySolutionGenerator>(m, "GreedySolutionGenerator")
       .def(py::init<SetCoverInvariant*>())
-      .def("next_solution", [](GreedySolutionGenerator& heuristic) -> bool {
-        return heuristic.NextSolution();
-      });
-  // TODO(user): add support for focus and cost arguments.
+      .def("next_solution",
+           [](GreedySolutionGenerator& heuristic) -> bool {
+             return heuristic.NextSolution();
+           })
+      .def("next_solution",
+           [](GreedySolutionGenerator& heuristic,
+              const std::vector<BaseInt>& focus) -> bool {
+             return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus));
+           })
+      .def("next_solution",
+           [](GreedySolutionGenerator& heuristic,
+              const std::vector<BaseInt>& focus,
+              const std::vector<double>& costs) -> bool {
+             return heuristic.NextSolution(
+                 VectorIntToVectorSubsetIndex(focus),
+                 VectorDoubleToSubsetCostVector(costs));
+           });
 
   py::class_<ElementDegreeSolutionGenerator>(m,
                                              "ElementDegreeSolutionGenerator")
@@ -206,16 +419,40 @@ PYBIND11_MODULE(set_cover, m) {
       .def("next_solution",
            [](ElementDegreeSolutionGenerator& heuristic) -> bool {
              return heuristic.NextSolution();
+           })
+      .def("next_solution",
+           [](ElementDegreeSolutionGenerator& heuristic,
+              const std::vector<BaseInt>& focus) -> bool {
+             return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus));
+           })
+      .def("next_solution",
+           [](ElementDegreeSolutionGenerator& heuristic,
+              const std::vector<BaseInt>& focus,
+              const std::vector<double>& costs) -> bool {
+             return heuristic.NextSolution(
+                 VectorIntToVectorSubsetIndex(focus),
+                 VectorDoubleToSubsetCostVector(costs));
            });
-  // TODO(user): add support for focus and cost arguments.
 
   py::class_<SteepestSearch>(m, "SteepestSearch")
       .def(py::init<SetCoverInvariant*>())
       .def("next_solution",
            [](SteepestSearch& heuristic, int num_iterations) -> bool {
              return heuristic.NextSolution(num_iterations);
+           })
+      .def("next_solution",
+           [](SteepestSearch& heuristic, const std::vector<BaseInt>& focus,
+              int num_iterations) -> bool {
+             return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus),
+                                           num_iterations);
+           })
+      .def("next_solution",
+           [](SteepestSearch& heuristic, const std::vector<BaseInt>& focus,
+              const std::vector<double>& costs, int num_iterations) -> bool {
+             return heuristic.NextSolution(
+                 VectorIntToVectorSubsetIndex(focus),
+                 VectorDoubleToSubsetCostVector(costs), num_iterations);
            });
-  // TODO(user): add support for focus and cost arguments.
 
   py::class_<GuidedLocalSearch>(m, "GuidedLocalSearch")
       .def(py::init<SetCoverInvariant*>())
@@ -223,12 +460,92 @@ PYBIND11_MODULE(set_cover, m) {
       .def("next_solution",
            [](GuidedLocalSearch& heuristic, int num_iterations) -> bool {
              return heuristic.NextSolution(num_iterations);
+           })
+      .def("next_solution",
+           [](GuidedLocalSearch& heuristic, const std::vector<BaseInt>& focus,
+              int num_iterations) -> bool {
+             return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus),
+                                           num_iterations);
            });
-  // TODO(user): add support for focus and cost arguments.
 
-  // TODO(user): add support for ClearRandomSubsets, ClearRandomSubsets,
-  // ClearMostCoveredElements, ClearMostCoveredElements, TabuList,
-  // GuidedTabuSearch.
+  // Specialization for T = SubsetIndex ~= BaseInt (aka int for Python, whatever
+  // the size of BaseInt).
+  // A base type doesn't work, because TabuList uses `T::value` in the
+  // constructor.
+  py::class_<TabuList<SubsetIndex>>(m, "TabuList")
+      .def(py::init([](int size) -> TabuList<SubsetIndex>* {
+             return new TabuList<SubsetIndex>(SubsetIndex(size));
+           }),
+           arg("size"))
+      .def("size", &TabuList<SubsetIndex>::size)
+      .def("init", &TabuList<SubsetIndex>::Init, arg("size"))
+      .def(
+          "add",
+          [](TabuList<SubsetIndex>& list, BaseInt t) -> void {
+            return list.Add(SubsetIndex(t));
+          },
+          arg("t"))
+      .def(
+          "contains",
+          [](TabuList<SubsetIndex>& list, BaseInt t) -> bool {
+            return list.Contains(SubsetIndex(t));
+          },
+          arg("t"));
+
+  py::class_<GuidedTabuSearch>(m, "GuidedTabuSearch")
+      .def(py::init<SetCoverInvariant*>())
+      .def("initialize", &GuidedTabuSearch::Initialize)
+      .def("next_solution",
+           [](GuidedTabuSearch& heuristic, int num_iterations) -> bool {
+             return heuristic.NextSolution(num_iterations);
+           })
+      .def("next_solution",
+           [](GuidedTabuSearch& heuristic, const std::vector<BaseInt>& focus,
+              int num_iterations) -> bool {
+             return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus),
+                                           num_iterations);
+           })
+      .def("get_lagrangian_factor", &GuidedTabuSearch::SetLagrangianFactor,
+           arg("factor"))
+      .def("set_lagrangian_factor", &GuidedTabuSearch::GetLagrangianFactor)
+      .def("set_epsilon", &GuidedTabuSearch::SetEpsilon, arg("r"))
+      .def("get_epsilon", &GuidedTabuSearch::GetEpsilon)
+      .def("set_penalty_factor", &GuidedTabuSearch::SetPenaltyFactor,
+           arg("factor"))
+      .def("get_penalty_factor", &GuidedTabuSearch::GetPenaltyFactor)
+      .def("set_tabu_list_size", &GuidedTabuSearch::SetTabuListSize,
+           arg("size"))
+      .def("get_tabu_list_size", &GuidedTabuSearch::GetTabuListSize);
+
+  m.def(
+      "clear_random_subsets",
+      [](BaseInt num_subsets, SetCoverInvariant* inv) -> std::vector<BaseInt> {
+        const std::vector<SubsetIndex> cleared =
+            ClearRandomSubsets(num_subsets, inv);
+        return {cleared.begin(), cleared.end()};
+      });
+  m.def("clear_random_subsets",
+        [](const std::vector<BaseInt>& focus, BaseInt num_subsets,
+           SetCoverInvariant* inv) -> std::vector<BaseInt> {
+          const std::vector<SubsetIndex> cleared = ClearRandomSubsets(
+              VectorIntToVectorSubsetIndex(focus), num_subsets, inv);
+          return {cleared.begin(), cleared.end()};
+        });
+
+  m.def(
+      "clear_most_covered_elements",
+      [](BaseInt num_subsets, SetCoverInvariant* inv) -> std::vector<BaseInt> {
+        const std::vector<SubsetIndex> cleared =
+            ClearMostCoveredElements(num_subsets, inv);
+        return {cleared.begin(), cleared.end()};
+      });
+  m.def("clear_most_covered_elements",
+        [](const std::vector<BaseInt>& focus, BaseInt num_subsets,
+           SetCoverInvariant* inv) -> std::vector<BaseInt> {
+          const std::vector<SubsetIndex> cleared = ClearMostCoveredElements(
+              VectorIntToVectorSubsetIndex(focus), num_subsets, inv);
+          return {cleared.begin(), cleared.end()};
+        });
 
   // set_cover_reader.h
   m.def("read_beasly_set_cover_problem", &ReadBeasleySetCoverProblem);
diff --git a/ortools/algorithms/samples/set_cover.cc b/ortools/algorithms/samples/set_cover.cc
new file mode 100644
index 0000000000..c9feae012d
--- /dev/null
+++ b/ortools/algorithms/samples/set_cover.cc
@@ -0,0 +1,65 @@
+// Copyright 2010-2024 Google LLC
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// [START program]
+// [START import]
+#include <cstdlib>
+
+#include "ortools/algorithms/set_cover_heuristics.h"
+#include "ortools/algorithms/set_cover_invariant.h"
+#include "ortools/algorithms/set_cover_model.h"
+#include "ortools/base/logging.h"
+// [END import]
+
+namespace operations_research {
+
+void SimpleSetCoverProgram() {
+  // [START data]
+  SetCoverModel model;
+  model.AddEmptySubset(2.0);
+  model.AddElementToLastSubset(0);
+  model.AddEmptySubset(2.0);
+  model.AddElementToLastSubset(1);
+  model.AddEmptySubset(1.0);
+  model.AddElementToLastSubset(0);
+  model.AddElementToLastSubset(1);
+  // [END data]
+
+  // [START solve]
+  SetCoverInvariant inv(&model);
+  GreedySolutionGenerator greedy(&inv);
+  bool found_solution = greedy.NextSolution();
+  if (!found_solution) {
+    LOG(INFO) << "No solution found by the greedy heuristic.";
+    return;
+  }
+  SetCoverSolutionResponse solution = inv.ExportSolutionAsProto();
+  // [END solve]
+
+  // [START print_solution]
+  LOG(INFO) << "Total cost: " << solution.cost();  // == inv.cost()
+  LOG(INFO) << "Total number of selected subsets: " << solution.num_subsets();
+  LOG(INFO) << "Chosen subsets:";
+  for (int i = 0; i < solution.subset_size(); ++i) {
+    LOG(INFO) << "  " << solution.subset(i);
+  }
+  // [END print_solution]
+}
+
+}  // namespace operations_research
+
+int main() {
+  operations_research::SimpleSetCoverProgram();
+  return EXIT_SUCCESS;
+}
+// [END program]
diff --git a/ortools/algorithms/samples/set_cover.py b/ortools/algorithms/samples/set_cover.py
new file mode 100755
index 0000000000..aab98c6b0f
--- /dev/null
+++ b/ortools/algorithms/samples/set_cover.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+# Copyright 2010-2024 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A simple set-covering problem."""
+
+# [START program]
+# [START import]
+from ortools.algorithms.python import set_cover
+# [END import]
+
+
+def main():
+    # [START data]
+    model = set_cover.SetCoverModel()
+    model.add_empty_subset(2.0)
+    model.add_element_to_last_subset(0)
+    model.add_empty_subset(2.0)
+    model.add_element_to_last_subset(1)
+    model.add_empty_subset(1.0)
+    model.add_element_to_last_subset(0)
+    model.add_element_to_last_subset(1)
+    # [END data]
+
+    # [START solve]
+    inv = set_cover.SetCoverInvariant(model)
+    greedy = set_cover.GreedySolutionGenerator(inv)
+    has_found = greedy.next_solution()
+    if not has_found:
+        print("No solution found by the greedy heuristic.")
+        return
+    solution = inv.export_solution_as_proto()
+    # [END solve]
+
+    # [START print_solution]
+    print(f"Total cost: {solution.cost}")  # == inv.cost()
+    print(f"Total number of selected subsets: {solution.num_subsets}")
+    print("Chosen subsets:")
+    for subset in solution.subset:
+        print(f"  {subset}")
+    # [END print_solution]
+
+
+if __name__ == "__main__":
+    main()
+# [END program]
diff --git a/ortools/algorithms/samples/simple_knapsack_program.cc b/ortools/algorithms/samples/simple_knapsack_program.cc
index 64a80d3a46..d9fe9e7091 100644
--- a/ortools/algorithms/samples/simple_knapsack_program.cc
+++ b/ortools/algorithms/samples/simple_knapsack_program.cc
@@ -15,12 +15,14 @@
 // [START import]
 #include <algorithm>
 #include <cstdint>
+#include <cstdlib>
 #include <iterator>
 #include <numeric>
 #include <sstream>
 #include <vector>
 
 #include "ortools/algorithms/knapsack_solver.h"
+#include "ortools/base/logging.h"
 // [END import]
 
 namespace operations_research {
diff --git a/ortools/algorithms/set_cover_heuristics.cc b/ortools/algorithms/set_cover_heuristics.cc
index 63c7fe3066..4208c90e17 100644
--- a/ortools/algorithms/set_cover_heuristics.cc
+++ b/ortools/algorithms/set_cover_heuristics.cc
@@ -544,23 +544,23 @@ bool GuidedLocalSearch::NextSolution(absl::Span<const SubsetIndex> focus,
 }
 
 namespace {
-void SampleSubsets(std::vector<SubsetIndex>* list, std::size_t num_subsets) {
-  num_subsets = std::min(num_subsets, list->size());
+void SampleSubsets(std::vector<SubsetIndex>* list, BaseInt num_subsets) {
+  num_subsets = std::min(num_subsets, static_cast<BaseInt>(list->size()));
   CHECK_GE(num_subsets, 0);
   std::shuffle(list->begin(), list->end(), absl::BitGen());
   list->resize(num_subsets);
 }
 }  // namespace
 
-std::vector<SubsetIndex> ClearRandomSubsets(std::size_t num_subsets,
+std::vector<SubsetIndex> ClearRandomSubsets(BaseInt num_subsets,
                                             SetCoverInvariant* inv) {
   return ClearRandomSubsets(inv->model()->all_subsets(), num_subsets, inv);
 }
 
 std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
-                                            std::size_t num_subsets,
+                                            BaseInt num_subsets,
                                             SetCoverInvariant* inv) {
-  num_subsets = std::min(num_subsets, focus.size());
+  num_subsets = std::min(num_subsets, static_cast<BaseInt>(focus.size()));
   CHECK_GE(num_subsets, 0);
   std::vector<SubsetIndex> chosen_indices;
   for (const SubsetIndex subset : focus) {
@@ -569,7 +569,7 @@ std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
     }
   }
   SampleSubsets(&chosen_indices, num_subsets);
-  std::size_t num_deselected = 0;
+  BaseInt num_deselected = 0;
   for (const SubsetIndex subset : chosen_indices) {
     inv->Deselect(subset);
     ++num_deselected;
@@ -585,14 +585,14 @@ std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
   return chosen_indices;
 }
 
-std::vector<SubsetIndex> ClearMostCoveredElements(std::size_t max_num_subsets,
+std::vector<SubsetIndex> ClearMostCoveredElements(BaseInt max_num_subsets,
                                                   SetCoverInvariant* inv) {
   return ClearMostCoveredElements(inv->model()->all_subsets(), max_num_subsets,
                                   inv);
 }
 
 std::vector<SubsetIndex> ClearMostCoveredElements(
-    absl::Span<const SubsetIndex> focus, std::size_t max_num_subsets,
+    absl::Span<const SubsetIndex> focus, BaseInt max_num_subsets,
     SetCoverInvariant* inv) {
   // This is the vector we will return.
   std::vector<SubsetIndex> sampled_subsets;
@@ -625,7 +625,8 @@ std::vector<SubsetIndex> ClearMostCoveredElements(
   // Actually *sample* sampled_subset.
   // TODO(user): find another algorithm if necessary.
   std::shuffle(sampled_subsets.begin(), sampled_subsets.end(), absl::BitGen());
-  sampled_subsets.resize(std::min(sampled_subsets.size(), max_num_subsets));
+  sampled_subsets.resize(
+      std::min(static_cast<BaseInt>(sampled_subsets.size()), max_num_subsets));
 
   // Testing has shown that sorting sampled_subsets is not necessary.
   // Now, un-select the subset in sampled_subsets.
diff --git a/ortools/algorithms/set_cover_heuristics.h b/ortools/algorithms/set_cover_heuristics.h
index 04d3b31b9f..154724cf61 100644
--- a/ortools/algorithms/set_cover_heuristics.h
+++ b/ortools/algorithms/set_cover_heuristics.h
@@ -477,12 +477,12 @@ class GuidedLocalSearch {
 // solution. There can be more than num_subsets variables cleared because the
 // intersecting subsets are also removed from the solution. Returns a list of
 // subset indices that can be reused as a focus.
-std::vector<SubsetIndex> ClearRandomSubsets(std::size_t num_subsets,
+std::vector<SubsetIndex> ClearRandomSubsets(BaseInt num_subsets,
                                             SetCoverInvariant* inv);
 
 // Same as above, but clears the subset indices in focus.
 std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
-                                            std::size_t num_subsets,
+                                            BaseInt num_subsets,
                                             SetCoverInvariant* inv);
 
 // Clears the variables (subsets) that cover the most covered elements. This is
@@ -490,12 +490,12 @@ std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
 // randomly.
 // Returns the list of the chosen subset indices.
 // This indices can then be used ax a focus.
-std::vector<SubsetIndex> ClearMostCoveredElements(std::size_t num_subsets,
+std::vector<SubsetIndex> ClearMostCoveredElements(BaseInt num_subsets,
                                                   SetCoverInvariant* inv);
 
 // Same as above, but clears the subset indices in focus.
 std::vector<SubsetIndex> ClearMostCoveredElements(
-    absl::Span<const SubsetIndex> focus, std::size_t num_subsets,
+    absl::Span<const SubsetIndex> focus, BaseInt num_subsets,
     SetCoverInvariant* inv);
 }  // namespace operations_research
 
diff --git a/ortools/algorithms/set_cover_lagrangian.cc b/ortools/algorithms/set_cover_lagrangian.cc
index 18dea932df..f20a98a051 100644
--- a/ortools/algorithms/set_cover_lagrangian.cc
+++ b/ortools/algorithms/set_cover_lagrangian.cc
@@ -20,6 +20,7 @@
 #include <vector>
 
 #include "absl/log/check.h"
+#include "absl/synchronization/blocking_counter.h"
 #include "ortools/algorithms/adjustable_k_ary_heap.h"
 #include "ortools/algorithms/set_cover_invariant.h"
 #include "ortools/algorithms/set_cover_model.h"
@@ -74,7 +75,7 @@ namespace {
 // TODO(user): Investigate.
 Cost ScalarProduct(const SparseColumn& column, const ElementCostVector& dual) {
   Cost result = 0.0;
-  for (ColumnEntryIndex pos(0); pos.value() < column.size(); ++pos) {
+  for (const ColumnEntryIndex pos : column.index_range()) {
     result += dual[column[pos]];
   }
   return result;
@@ -82,52 +83,49 @@ Cost ScalarProduct(const SparseColumn& column, const ElementCostVector& dual) {
 
 // Computes the reduced costs for a subset of subsets.
 // This is a helper function for ParallelComputeReducedCosts().
-// It is called on a slice of subsets, defined by start and end.
+// It is called on a slice of subsets, defined by slice_start and slice_end.
 // The reduced costs are computed using the multipliers vector.
 // The columns of the subsets are given by the columns view.
 // The result is stored in reduced_costs.
-void FillReducedCostsSlice(SubsetIndex start, SubsetIndex end,
+void FillReducedCostsSlice(SubsetIndex slice_start, SubsetIndex slice_end,
                            const SubsetCostVector& costs,
                            const ElementCostVector& multipliers,
                            const SparseColumnView& columns,
                            SubsetCostVector* reduced_costs) {
-  for (SubsetIndex subset = start; subset < end; ++subset) {
+  for (SubsetIndex subset = slice_start; subset < slice_end; ++subset) {
     (*reduced_costs)[subset] =
         costs[subset] - ScalarProduct(columns[subset], multipliers);
   }
 }
+
+BaseInt BlockSize(BaseInt size, int num_threads) {
+  return 1 + (size - 1) / num_threads;
+}
 }  // namespace
 
 // Computes the reduced costs for all subsets in parallel using ThreadPool.
 SubsetCostVector SetCoverLagrangian::ParallelComputeReducedCosts(
     const SubsetCostVector& costs, const ElementCostVector& multipliers) const {
   const SubsetIndex num_subsets(model_.num_subsets());
-  // TODO(user): compute a close-to-optimal k-subset partitioning.
-  const SubsetIndex block_size =
-      SubsetIndex(1) + num_subsets / num_threads_;  // [***] Arbitrary choice.
   const SparseColumnView& columns = model_.columns();
   SubsetCostVector reduced_costs(num_subsets);
-  ThreadPool thread_pool("ParallelComputeReducedCosts", num_threads_);
-  thread_pool.StartWorkers();
-  {
-    // TODO(user): check how costly it is to create a new ThreadPool.
-    // TODO(user): use a queue of subsets to process? instead of a fixed range.
-
-    // This parallelization is not very efficient, because all the threads
-    // use the same costs vector. Maybe it should be local to the thread.
-    // It's unclear whether sharing columns and costs is better than having
-    // each thread use its own partial copy.
-    // Finally, it might be better to use a queue of subsets to process, instead
-    // of a fixed range.
-    for (SubsetIndex start(0); start < num_subsets; start += block_size) {
-      thread_pool.Schedule([start, block_size, num_subsets, &costs,
-                            &multipliers, &columns, &reduced_costs]() {
-        const SubsetIndex end = std::min(start + block_size, num_subsets);
-        FillReducedCostsSlice(start, end, costs, multipliers, columns,
-                              &reduced_costs);
-      });
-    }
-  }  // Synchronize all the threads. This is equivalent to a wait.
+  // TODO(user): compute a close-to-optimal k-subset partitioning of the columns
+  // based on their sizes. [***]
+  const SubsetIndex block_size(BlockSize(num_subsets.value(), num_threads_));
+  absl::BlockingCounter num_threads_running(num_threads_);
+  SubsetIndex slice_start(0);
+  for (int thread_index = 0; thread_index < num_threads_; ++thread_index) {
+    const SubsetIndex slice_end =
+        std::min(slice_start + block_size, num_subsets);
+    thread_pool_->Schedule([&num_threads_running, slice_start, slice_end,
+                            &costs, &multipliers, &columns, &reduced_costs]() {
+      FillReducedCostsSlice(slice_start, slice_end, costs, multipliers, columns,
+                            &reduced_costs);
+      num_threads_running.DecrementCount();
+    });
+    slice_start = slice_end;
+  }
+  num_threads_running.Wait();
   return reduced_costs;
 }
 
@@ -147,14 +145,14 @@ SubsetCostVector SetCoverLagrangian::ComputeReducedCosts(
 
 namespace {
 // Helper function to compute the subgradient.
-// It fills a slice of the subgradient vector from indices start to end.
-// This is a helper function for ParallelComputeSubgradient().
-// The subgradient is computed using the reduced costs vector.
-void FillSubgradientSlice(SubsetIndex start, SubsetIndex end,
+// It fills a slice of the subgradient vector from indices slice_start to
+// slice_end. This is a helper function for ParallelComputeSubgradient(). The
+// subgradient is computed using the reduced costs vector.
+void FillSubgradientSlice(SubsetIndex slice_start, SubsetIndex slice_end,
                           const SparseColumnView& columns,
                           const SubsetCostVector& reduced_costs,
                           ElementCostVector* subgradient) {
-  for (SubsetIndex subset(start); subset < end; ++subset) {
+  for (SubsetIndex subset(slice_start); subset < slice_end; ++subset) {
     if (reduced_costs[subset] < 0.0) {
       for (const ElementIndex element : columns[subset]) {
         (*subgradient)[element] -= 1.0;
@@ -181,8 +179,6 @@ ElementCostVector SetCoverLagrangian::ComputeSubgradient(
 ElementCostVector SetCoverLagrangian::ParallelComputeSubgradient(
     const SubsetCostVector& reduced_costs) const {
   const SubsetIndex num_subsets(model_.num_subsets());
-  const SubsetIndex block_size =
-      SubsetIndex(1) + num_subsets / num_threads_;  // [***]
   const SparseColumnView& columns = model_.columns();
   ElementCostVector subgradient(model_.num_elements(), 1.0);
   // The subgradient has one component per element, each thread processes
@@ -191,20 +187,22 @@ ElementCostVector SetCoverLagrangian::ParallelComputeSubgradient(
   // although this might be less well-balanced.
   std::vector<ElementCostVector> subgradients(
       num_threads_, ElementCostVector(model_.num_elements()));
-  ThreadPool thread_pool("ParallelComputeSubgradient", num_threads_);
-  thread_pool.StartWorkers();
-  {
-    int thread_index = 0;
-    for (SubsetIndex start(0); start < num_subsets;
-         start += block_size, ++thread_index) {
-      thread_pool.Schedule([start, block_size, num_subsets, &reduced_costs,
-                            &columns, &subgradients, thread_index]() {
-        const SubsetIndex end = std::min(start + block_size, num_subsets);
-        FillSubgradientSlice(start, end, columns, reduced_costs,
-                             &subgradients[thread_index]);
-      });
-    }
-  }  // Synchronize all the threads.
+  absl::BlockingCounter num_threads_running(num_threads_);
+  const SubsetIndex block_size(BlockSize(num_subsets.value(), num_threads_));
+  SubsetIndex slice_start(0);
+  for (int thread_index = 0; thread_index < num_threads_; ++thread_index) {
+    const SubsetIndex slice_end =
+        std::min(slice_start + block_size, num_subsets);
+    thread_pool_->Schedule([&num_threads_running, slice_start, slice_end,
+                            &reduced_costs, &columns, &subgradients,
+                            thread_index]() {
+      FillSubgradientSlice(slice_start, slice_end, columns, reduced_costs,
+                           &subgradients[thread_index]);
+      num_threads_running.DecrementCount();
+    });
+    slice_start = slice_end;
+  }
+  num_threads_running.Wait();
   for (int thread_index = 0; thread_index < num_threads_; ++thread_index) {
     for (const ElementIndex element : model_.ElementRange()) {
       subgradient[element] += subgradients[thread_index][element];
@@ -216,17 +214,17 @@ ElementCostVector SetCoverLagrangian::ParallelComputeSubgradient(
 namespace {
 // Helper function to compute the value of the Lagrangian.
 // This is a helper function for ParallelComputeLagrangianValue().
-// It is called on a slice of elements, defined by start and end.
+// It is called on a slice of elements, defined by slice_start and slice_end.
 // The value of the Lagrangian is computed using the reduced costs vector and
 // the multipliers vector.
 // The result is stored in lagrangian_value.
-void FillLagrangianValueSlice(SubsetIndex start, SubsetIndex end,
+void FillLagrangianValueSlice(SubsetIndex slice_start, SubsetIndex slice_end,
                               const SubsetCostVector& reduced_costs,
                               Cost* lagrangian_value) {
-  // This is min \sum_{j \in N} c_j(u) x_j. This captures the remark above (**),
-  // taking into account the possible values for x_j, and using them to minimize
-  // the terms.
-  for (SubsetIndex subset(start); subset < end; ++subset) {
+  // This is min \sum_{j \in N} c_j(u) x_j. This captures the remark above
+  // (**), taking into account the possible values for x_j, and using them to
+  // minimize the terms.
+  for (SubsetIndex subset(slice_start); subset < slice_end; ++subset) {
     if (reduced_costs[subset] < 0.0) {
       *lagrangian_value += reduced_costs[subset];
     }
@@ -258,30 +256,31 @@ Cost SetCoverLagrangian::ComputeLagrangianValue(
 Cost SetCoverLagrangian::ParallelComputeLagrangianValue(
     const SubsetCostVector& reduced_costs,
     const ElementCostVector& multipliers) const {
-  const SubsetIndex num_subsets(model_.num_subsets());
-  const SubsetIndex block_size =
-      SubsetIndex(1) + num_subsets / num_threads_;  // [***] Arbitrary.
   Cost lagrangian_value = 0.0;
   // This is \sum{i \in M} u_i.
-
   for (const Cost u : multipliers) {
     lagrangian_value += u;
   }
   std::vector<Cost> lagrangian_values(num_threads_, 0.0);
-  ThreadPool thread_pool("ParallelComputeLagrangianValue", num_threads_);
-  thread_pool.StartWorkers();
-  {
-    int thread_index = 0;
-    for (SubsetIndex start(0); start < num_subsets; start += block_size) {
-      thread_pool.Schedule([start, block_size, num_subsets, thread_index,
-                            &reduced_costs, &lagrangian_values]() {
-        const SubsetIndex end = std::min(start + block_size, num_subsets);
-        FillLagrangianValueSlice(start, end, reduced_costs,
-                                 &lagrangian_values[thread_index]);
-      });
-      ++thread_index;
-    }
-  }  // Synchronize all the threads.
+  absl::BlockingCounter num_threads_running(num_threads_);
+  const SubsetIndex block_size(BlockSize(model_.num_subsets(), num_threads_));
+  const SubsetIndex num_subsets(model_.num_subsets());
+  SubsetIndex slice_start(0);
+  for (int thread_index = 0; thread_index < num_threads_; ++thread_index) {
+    const SubsetIndex slice_end =
+        std::min(slice_start + block_size, num_subsets);
+    thread_pool_->Schedule([&num_threads_running, slice_start, block_size,
+                            num_subsets, thread_index, &reduced_costs,
+                            &lagrangian_values]() {
+      const SubsetIndex slice_end =
+          std::min(slice_start + block_size, num_subsets);
+      FillLagrangianValueSlice(slice_start, slice_end, reduced_costs,
+                               &lagrangian_values[thread_index]);
+      num_threads_running.DecrementCount();
+    });
+    slice_start = slice_end;
+  }
+  num_threads_running.Wait();
   for (const Cost l : lagrangian_values) {
     lagrangian_value += l;
   }
@@ -290,8 +289,8 @@ Cost SetCoverLagrangian::ParallelComputeLagrangianValue(
 
 // Perform a subgradient step.
 // In the general case, for an Integer Program A.x <=b, the Lagragian
-// multipliers vector at step k+1 is defined as: u^{k+1} = u^k + t_k (A x^k - b)
-// with term t_k = lambda_k * (UB -  L(u^k)) / |A x^k - b|^2.
+// multipliers vector at step k+1 is defined as: u^{k+1} = u^k + t_k (A x^k -
+// b) with term t_k = lambda_k * (UB -  L(u^k)) / |A x^k - b|^2.
 // |.| is the 2-norm (i.e. Euclidean)
 // In our case, the problem A x <= b is in the form A x >= 1. We need to
 // replace A x - b by s_i(u) = 1 - sum_{j \in J_i} x_j(u).
@@ -343,9 +342,9 @@ void SetCoverLagrangian::ParallelUpdateMultipliers(
       step_size * (upper_bound - lagrangian_value) / subgradient_square_norm;
   for (const ElementIndex element : model_.ElementRange()) {
     // Avoid multipliers to go negative and to go through the roof. 1e6 chosen
-    // arbitrarily. [***]
+    const Cost kRoof = 1e6;  // Arbitrary value, from [1].
     (*multipliers)[element] = std::clamp(
-        (*multipliers)[element] + factor * subgradient[element], 0.0, 1e6);
+        (*multipliers)[element] + factor * subgradient[element], 0.0, kRoof);
   }
 }
 
@@ -503,9 +502,9 @@ SetCoverLagrangian::ComputeLowerBound(const SubsetCostVector& costs,
   for (int iter = 0; iter < 1000; ++iter) {
     reduced_costs = ParallelComputeReducedCosts(costs, multipliers);
     const Cost lagrangian_value =
-        ComputeLagrangianValue(reduced_costs, multipliers);
-    UpdateMultipliers(step_size, lagrangian_value, upper_bound, reduced_costs,
-                      &multipliers);
+        ParallelComputeLagrangianValue(reduced_costs, multipliers);
+    ParallelUpdateMultipliers(step_size, lagrangian_value, upper_bound,
+                              reduced_costs, &multipliers);
     lower_bound = std::max(lower_bound, lagrangian_value);
     // step_size should be updated like this. For the time besing, we keep the
     // step size, because the implementation of the rest is not adequate yet
diff --git a/ortools/algorithms/set_cover_lagrangian.h b/ortools/algorithms/set_cover_lagrangian.h
index aa63627ad1..9e946b173a 100644
--- a/ortools/algorithms/set_cover_lagrangian.h
+++ b/ortools/algorithms/set_cover_lagrangian.h
@@ -15,6 +15,7 @@
 #define OR_TOOLS_ALGORITHMS_SET_COVER_LAGRANGIAN_H_
 
 #include <memory>
+#include <new>
 #include <tuple>
 #include <vector>
 
@@ -44,7 +45,12 @@ namespace operations_research {
 class SetCoverLagrangian {
  public:
   explicit SetCoverLagrangian(SetCoverInvariant* inv, int num_threads = 1)
-      : inv_(inv), model_(*inv->model()), num_threads_(num_threads) {}
+      : inv_(inv),
+        model_(*inv->model()),
+        num_threads_(num_threads),
+        thread_pool_(new ThreadPool(num_threads)) {
+    thread_pool_->StartWorkers();
+  }
 
   // Returns true if a solution was found.
   // TODO(user): Add time-outs and exit with a partial solution. This seems
@@ -137,6 +143,9 @@ class SetCoverLagrangian {
   // The number of threads to use for parallelization.
   int num_threads_;
 
+  // The thread pool used for parallelization.
+  std::unique_ptr<ThreadPool> thread_pool_;
+
   // Total (scalar) Lagrangian cost.
   Cost lagrangian_;
 
diff --git a/ortools/algorithms/set_cover_model.cc b/ortools/algorithms/set_cover_model.cc
index f7b5faf24c..e67718b197 100644
--- a/ortools/algorithms/set_cover_model.cc
+++ b/ortools/algorithms/set_cover_model.cc
@@ -17,17 +17,161 @@
 #include <cmath>
 #include <cstddef>
 #include <cstdint>
+#include <limits>
 #include <numeric>
 #include <string>
+#include <tuple>
 #include <utility>
 #include <vector>
 
 #include "absl/log/check.h"
+#include "absl/random/discrete_distribution.h"
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
 #include "ortools/algorithms/set_cover.pb.h"
 #include "ortools/base/logging.h"
 
 namespace operations_research {
 
+namespace {
+
+// Returns a value in [min, min + scaling_factor * (raw_value - min +
+// random_term)], where raw_value is drawn from a discrete distribution, and
+// random_term is a double drawn uniformly in [0, 1].
+BaseInt DiscreteAffine(absl::BitGen& bitgen,
+                       absl::discrete_distribution<BaseInt>& dist, BaseInt min,
+                       double scaling_factor) {
+  const BaseInt raw_value = dist(bitgen);
+  const double random_term = absl::Uniform<double>(bitgen, 0, 1.0);
+  const BaseInt affine_value =
+      static_cast<BaseInt>(
+          std::floor((raw_value - min + random_term) * scaling_factor)) +
+      min;
+  return affine_value;
+}
+
+// For a given view (SparseColumnView or SparseRowView), returns the
+// distribution of the sizes of the vectors in the view, which can be used in
+// an absl::discrete_distribution.
+template <typename View>
+std::tuple<BaseInt, std::vector<BaseInt>> ComputeSizeHistogram(
+    const View& view) {
+  BaseInt max_size = 0;
+  BaseInt min_size = std::numeric_limits<BaseInt>::max();
+  for (const auto& vec : view) {
+    const BaseInt size = vec.size();
+    min_size = std::min(min_size, size);
+    max_size = std::max(max_size, size);
+  }
+  std::vector<BaseInt> weights(max_size + 1, 0);
+  for (const auto& vec : view) {
+    const BaseInt size = vec.size();
+    ++weights[size];
+  }
+  return {min_size, weights};
+}
+
+template <typename View>
+std::tuple<BaseInt, absl::discrete_distribution<BaseInt>>
+ComputeSizeDistribution(const View& view) {
+  const auto [min_size, weights] = ComputeSizeHistogram(view);
+  absl::discrete_distribution<BaseInt> dist(weights.begin(), weights.end());
+  return {min_size, dist};
+}
+}  // namespace
+
+SetCoverModel SetCoverModel::GenerateRandomModelFrom(
+    const SetCoverModel& seed_model, BaseInt num_elements, BaseInt num_subsets,
+    double row_scale, double column_scale, double cost_scale) {
+  SetCoverModel model;
+  DCHECK_GT(row_scale, 0.0);
+  DCHECK_GT(column_scale, 0.0);
+  DCHECK_GT(cost_scale, 0.0);
+  model.num_elements_ = num_elements;
+  model.num_nonzeros_ = 0;
+  model.ReserveNumSubsets(num_subsets);
+  model.UpdateAllSubsetsList();
+  absl::BitGen bitgen;
+
+  // Create the distribution of the cardinalities of the subsets based on the
+  // histogram of column sizes in the seed model.
+  auto [min_column_size, column_dist] =
+      ComputeSizeDistribution(seed_model.columns());
+
+  // Create the distribution of the degrees of the elements based on the
+  // histogram of row sizes in the seed model.
+  auto [min_row_size, row_dist] = ComputeSizeDistribution(seed_model.rows());
+
+  // Prepare the degrees of the elements in the generated model, and use them
+  // in a distribution to generate the columns. This ponderates the columns
+  // towards the elements with higher degrees. ???
+  ElementToIntVector degrees(num_elements);
+  for (ElementIndex element(0); element.value() < num_elements; ++element) {
+    degrees[element] =
+        DiscreteAffine(bitgen, row_dist, min_row_size, row_scale);
+  }
+  absl::discrete_distribution<BaseInt> degree_dist(degrees.begin(),
+                                                   degrees.end());
+
+  // Vector indicating whether the generated model covers an element.
+  ElementBoolVector contains_element(num_elements, false);
+
+  // Number of elements in the generated model, using the above vector.
+  BaseInt num_elements_covered(0);
+
+  // Loop-local vector indicating whether the currently generated subset
+  // contains an element.
+  ElementBoolVector subset_contains_element(num_elements, false);
+
+  for (SubsetIndex subset(0); subset.value() < num_subsets; ++subset) {
+    const BaseInt cardinality =
+        DiscreteAffine(bitgen, column_dist, min_column_size, column_scale);
+    model.columns_[subset].reserve(cardinality);
+    for (BaseInt iter = 0; iter < cardinality; ++iter) {
+      int num_tries = 0;
+      ElementIndex element;
+      // Choose an element that is not yet in the subset at random with a
+      // distribution that is proportional to the degree of the element.
+      do {
+        element = ElementIndex(degree_dist(bitgen));
+        CHECK_LT(element.value(), num_elements);
+        ++num_tries;
+        if (num_tries > 10) {
+          return SetCoverModel();
+        }
+      } while (subset_contains_element[element]);
+      ++model.num_nonzeros_;
+      model.columns_[subset].push_back(element);
+      subset_contains_element[element] = true;
+      if (!contains_element[element]) {
+        contains_element[element] = true;
+        ++num_elements_covered;
+      }
+    }
+    for (const ElementIndex element : model.columns_[subset]) {
+      subset_contains_element[element] = false;
+    }
+  }
+  CHECK_EQ(num_elements_covered, num_elements);
+
+  // TODO(user): if necessary, use a better distribution for the costs.
+  // The generation of the costs is done in two steps. First, compute the
+  // minimum and maximum costs.
+  Cost min_cost = std::numeric_limits<Cost>::infinity();
+  Cost max_cost = -min_cost;
+  for (const Cost cost : seed_model.subset_costs()) {
+    min_cost = std::min(min_cost, cost);
+    max_cost = std::max(max_cost, cost);
+  }
+  // Then, generate random numbers in [min_cost, min_cost + cost_range], where
+  // cost_range is defined as:
+  const Cost cost_range = cost_scale * (max_cost - min_cost);
+  for (Cost& cost : model.subset_costs_) {
+    cost = min_cost + absl::Uniform<double>(bitgen, 0, cost_range);
+  }
+  return model;
+}
+
 void SetCoverModel::UpdateAllSubsetsList() {
   const BaseInt old_size = all_subsets_.size();
   DCHECK_LE(old_size, num_subsets());
@@ -92,8 +236,8 @@ void SetCoverModel::AddElementToSubset(ElementIndex element,
 }
 
 // Reserves num_subsets columns in the model.
-void SetCoverModel::ReserveNumSubsets(BaseInt number_of_subsets) {
-  num_subsets_ = std::max(num_subsets_, number_of_subsets);
+void SetCoverModel::ReserveNumSubsets(BaseInt num_subsets) {
+  num_subsets_ = std::max(num_subsets_, num_subsets);
   columns_.resize(num_subsets_, SparseColumn());
   subset_costs_.resize(num_subsets_, 0.0);
 }
@@ -121,8 +265,8 @@ void SetCoverModel::CreateSparseRowView() {
   rows_.resize(num_elements_, SparseRow());
   ElementToIntVector row_sizes(num_elements_, 0);
   for (const SubsetIndex subset : SubsetRange()) {
-    // Sort the columns. It's not super-critical to improve performance here as
-    // this needs to be done only once.
+    // Sort the columns. It's not super-critical to improve performance here
+    // as this needs to be done only once.
     std::sort(columns_[subset].begin(), columns_[subset].end());
     for (const ElementIndex element : columns_[subset]) {
       ++row_sizes[element];
@@ -256,7 +400,7 @@ SetCoverModel::Stats SetCoverModel::ComputeCostStats() {
 }
 
 SetCoverModel::Stats SetCoverModel::ComputeRowStats() {
-  std::vector<ssize_t> row_sizes(num_elements(), 0);
+  std::vector<BaseInt> row_sizes(num_elements(), 0);
   for (const SparseColumn& column : columns_) {
     for (const ElementIndex element : column) {
       ++row_sizes[element.value()];
@@ -266,15 +410,15 @@ SetCoverModel::Stats SetCoverModel::ComputeRowStats() {
 }
 
 SetCoverModel::Stats SetCoverModel::ComputeColumnStats() {
-  std::vector<ssize_t> column_sizes(columns_.size());
+  std::vector<BaseInt> column_sizes(columns_.size());
   for (const SubsetIndex subset : SubsetRange()) {
     column_sizes[subset.value()] = columns_[subset].size();
   }
   return ComputeStats(std::move(column_sizes));
 }
 
-std::vector<ssize_t> SetCoverModel::ComputeRowDeciles() const {
-  std::vector<ssize_t> row_sizes(num_elements(), 0);
+std::vector<BaseInt> SetCoverModel::ComputeRowDeciles() const {
+  std::vector<BaseInt> row_sizes(num_elements(), 0);
   for (const SparseColumn& column : columns_) {
     for (const ElementIndex element : column) {
       ++row_sizes[element.value()];
@@ -283,8 +427,8 @@ std::vector<ssize_t> SetCoverModel::ComputeRowDeciles() const {
   return ComputeDeciles(std::move(row_sizes));
 }
 
-std::vector<ssize_t> SetCoverModel::ComputeColumnDeciles() const {
-  std::vector<ssize_t> column_sizes(columns_.size());
+std::vector<BaseInt> SetCoverModel::ComputeColumnDeciles() const {
+  std::vector<BaseInt> column_sizes(columns_.size());
   for (const SubsetIndex subset : SubsetRange()) {
     column_sizes[subset.value()] = columns_[subset].size();
   }
diff --git a/ortools/algorithms/set_cover_model.h b/ortools/algorithms/set_cover_model.h
index fa3f55430b..ea21f82444 100644
--- a/ortools/algorithms/set_cover_model.h
+++ b/ortools/algorithms/set_cover_model.h
@@ -14,13 +14,7 @@
 #ifndef OR_TOOLS_ALGORITHMS_SET_COVER_MODEL_H_
 #define OR_TOOLS_ALGORITHMS_SET_COVER_MODEL_H_
 
-#if defined(_MSC_VER)
-#include <BaseTsd.h>
-typedef SSIZE_T ssize_t;
-#else
-#include <sys/types.h>
-#endif  // defined(_MSC_VER)
-
+#include <cstdint>
 #include <string>
 #include <vector>
 
@@ -29,7 +23,6 @@ typedef SSIZE_T ssize_t;
 #include "ortools/algorithms/set_cover.pb.h"
 #include "ortools/base/strong_int.h"
 #include "ortools/base/strong_vector.h"
-#include "ortools/util/aligned_memory.h"
 
 // Representation class for the weighted set-covering problem.
 //
@@ -65,7 +58,7 @@ using Cost = double;
 // (2e9) elements and subsets. If need arises one day, BaseInt can be split
 // into SubsetBaseInt and ElementBaseInt.
 // Quick testing has shown a slowdown of about 20-25% when using int64_t.
-using BaseInt = int;
+using BaseInt = int32_t;
 
 // We make heavy use of strong typing to avoid obvious mistakes.
 // Subset index.
@@ -84,32 +77,14 @@ using SubsetRange = util_intops::StrongIntRange<SubsetIndex>;
 using ElementRange = util_intops::StrongIntRange<ElementIndex>;
 using ColumnEntryRange = util_intops::StrongIntRange<ColumnEntryIndex>;
 
-// SIMD operations require vectors to be aligned at 64-bytes on x86-64
-// processors as of 2024-05-03.
-// TODO(user): improve the code to make it possible to use unaligned memory.
-constexpr int kSetCoverAlignmentInBytes = 64;
+using SubsetCostVector = util_intops::StrongVector<SubsetIndex, Cost>;
+using ElementCostVector = util_intops::StrongVector<ElementIndex, Cost>;
 
-using CostAllocator = AlignedAllocator<Cost, kSetCoverAlignmentInBytes>;
-using ElementAllocator =
-    AlignedAllocator<ElementIndex, kSetCoverAlignmentInBytes>;
-using SubsetAllocator =
-    AlignedAllocator<SubsetIndex, kSetCoverAlignmentInBytes>;
+using SparseColumn = util_intops::StrongVector<ColumnEntryIndex, ElementIndex>;
+using SparseRow = util_intops::StrongVector<RowEntryIndex, SubsetIndex>;
 
-using SubsetCostVector =
-    util_intops::StrongVector<SubsetIndex, Cost, CostAllocator>;
-using ElementCostVector =
-    util_intops::StrongVector<ElementIndex, Cost, CostAllocator>;
-
-using SparseColumn =
-    util_intops::StrongVector<ColumnEntryIndex, ElementIndex, ElementAllocator>;
-using SparseRow =
-    util_intops::StrongVector<RowEntryIndex, SubsetIndex, SubsetAllocator>;
-
-using IntAllocator = AlignedAllocator<BaseInt, kSetCoverAlignmentInBytes>;
-using ElementToIntVector =
-    util_intops::StrongVector<ElementIndex, BaseInt, IntAllocator>;
-using SubsetToIntVector =
-    util_intops::StrongVector<SubsetIndex, BaseInt, IntAllocator>;
+using ElementToIntVector = util_intops::StrongVector<ElementIndex, BaseInt>;
+using SubsetToIntVector = util_intops::StrongVector<SubsetIndex, BaseInt>;
 
 // Views of the sparse vectors. These need not be aligned as it's their contents
 // that need to be aligned.
@@ -117,6 +92,13 @@ using SparseColumnView = util_intops::StrongVector<SubsetIndex, SparseColumn>;
 using SparseRowView = util_intops::StrongVector<ElementIndex, SparseRow>;
 
 using SubsetBoolVector = util_intops::StrongVector<SubsetIndex, bool>;
+using ElementBoolVector = util_intops::StrongVector<ElementIndex, bool>;
+
+// Useful for representing permutations,
+using ElementToElementVector =
+    util_intops::StrongVector<ElementIndex, ElementIndex>;
+using SubsetToSubsetVector =
+    util_intops::StrongVector<SubsetIndex, SubsetIndex>;
 
 // Main class for describing a weighted set-covering problem.
 class SetCoverModel {
@@ -132,6 +114,34 @@ class SetCoverModel {
         rows_(),
         all_subsets_() {}
 
+  // Constructs a weighted set-covering problem from a seed model, with
+  // num_elements elements and num_subsets subsets.
+  // - The distributions of the degrees of the elements and the cardinalities of
+  // the subsets are based on those of the seed model. They are scaled
+  // affininely by row_scale and column_scale respectively.
+  // - By affine scaling, we mean that the minimum value of the distribution is
+  // not scaled, but the variation above this minimum value is.
+  // - For a given subset with a given cardinality in the generated model, its
+  // elements are sampled from the distribution of the degrees as computed
+  // above.
+  // - The costs of the subsets in the new model are sampled from the
+  // distribution of the costs of the subsets in the seed model, scaled by
+  // cost_scale.
+  // IMPORTANT NOTICE: The algorithm may not succeed in generating a model
+  // where all the elements can be covered. In that case, the model will be
+  // empty.
+
+  static SetCoverModel GenerateRandomModelFrom(const SetCoverModel& seed_model,
+                                               BaseInt num_elements,
+                                               BaseInt num_subsets,
+                                               double row_scale,
+                                               double column_scale,
+                                               double cost_scale);
+
+  // Returns true if the model is empty, i.e. has no elements, no subsets, and
+  // no nonzeros.
+  bool IsEmpty() const { return rows_.empty() || columns_.empty(); }
+
   // Current number of elements to be covered in the model, i.e. the number of
   // elements in S. In matrix terms, this is the number of rows.
   BaseInt num_elements() const { return num_elements_; }
@@ -141,7 +151,7 @@ class SetCoverModel {
   BaseInt num_subsets() const { return num_subsets_; }
 
   // Current number of nonzeros in the matrix.
-  ssize_t num_nonzeros() const { return num_nonzeros_; }
+  int64_t num_nonzeros() const { return num_nonzeros_; }
 
   double FillRate() const {
     return 1.0 * num_nonzeros() / (1.0 * num_elements() * num_subsets());
@@ -243,10 +253,10 @@ class SetCoverModel {
   Stats ComputeColumnStats();
 
   // Computes deciles on rows and returns a vector of deciles.
-  std::vector<ssize_t> ComputeRowDeciles() const;
+  std::vector<BaseInt> ComputeRowDeciles() const;
 
   // Computes deciles on columns and returns a vector of deciles.
-  std::vector<ssize_t> ComputeColumnDeciles() const;
+  std::vector<BaseInt> ComputeColumnDeciles() const;
 
  private:
   // Updates the all_subsets_ vector so that it always contains 0 to
@@ -259,8 +269,9 @@ class SetCoverModel {
   // Number of subsets. Maintained for ease of access.
   BaseInt num_subsets_;
 
-  // Number of nonzeros in the matrix.
-  ssize_t num_nonzeros_;
+  // Number of nonzeros in the matrix. The value is an int64_t because there can
+  // be more than 1 << 31 nonzeros even with BaseInt = int32_t.
+  int64_t num_nonzeros_;
 
   // True when the SparseRowView is up-to-date.
   bool row_view_is_valid_;
diff --git a/ortools/algorithms/set_cover_orlib_test.cc b/ortools/algorithms/set_cover_orlib_test.cc
index 8dd153e44b..849e2b76ad 100644
--- a/ortools/algorithms/set_cover_orlib_test.cc
+++ b/ortools/algorithms/set_cover_orlib_test.cc
@@ -149,7 +149,7 @@ void ComputeLagrangianLowerBound(std::string name, SetCoverInvariant* inv) {
   const SetCoverModel* model = inv->model();
   WallTimer timer;
   timer.Start();
-  SetCoverLagrangian lagrangian(inv, /*num_threads=*/4);
+  SetCoverLagrangian lagrangian(inv, /*num_threads=*/8);
   const auto [lower_bound, reduced_costs, multipliers] =
       lagrangian.ComputeLowerBound(model->subset_costs(), inv->cost());
   LogCostAndTiming(name, "LagrangianLowerBound", lower_bound,
@@ -197,11 +197,11 @@ double RunSolver(std::string name, SetCoverModel* model) {
   global_timer.Start();
   RunChvatalAndSteepest(name, model);
   // SetCoverInvariant inv = ComputeLPLowerBound(name, model);
-  RunMip(name, model);
+  // RunMip(name, model);
   RunChvatalAndGLS(name, model);
   SetCoverInvariant inv = RunElementDegreeGreedyAndSteepest(name, model);
   ComputeLagrangianLowerBound(name, &inv);
-  //  IterateClearAndMip(name, inv);
+  // IterateClearAndMip(name, inv);
   IterateClearElementDegreeAndSteepest(name, &inv);
   return inv.cost();
 }
@@ -407,4 +407,22 @@ RAIL_TEST("rail4872.txt", 1527, 1861, MANYSECONDS);  // [2]
 #undef SCP_TEST
 #undef RAIL_TEST
 
+TEST(SetCoverHugeTest, GenerateProblem) {
+  SetCoverModel seed_model =
+      ReadRailSetCoverProblem(file::JoinPathRespectAbsolute(
+          ::testing::SrcDir(), data_dir, "rail4284.txt"));
+  seed_model.CreateSparseRowView();
+  const BaseInt num_wanted_subsets(100'000'000);
+  const BaseInt num_wanted_elements(40'000);
+  const double row_scale = 1.1;
+  const double column_scale = 1.1;
+  const double cost_scale = 10.0;
+  SetCoverModel model = SetCoverModel::GenerateRandomModelFrom(
+      seed_model, num_wanted_elements, num_wanted_subsets, row_scale,
+      column_scale, cost_scale);
+  SetCoverInvariant inv =
+      RunElementDegreeGreedyAndSteepest("rail4284_huge.txt", &model);
+  LOG(INFO) << "Cost: " << inv.cost();
+}
+
 }  // namespace operations_research