speed up glop on large problems

2018-02-13 17:29:11 +01:00
parent a8358f5f6a
commit e0de4b61c5
18 changed files with 329 additions and 324 deletions
--- a/ortools/glop/basis_representation.cc
+++ b/ortools/glop/basis_representation.cc
@@ -302,21 +302,7 @@ Status BasisFactorization::Update(ColIndex entering_col,
  return ForceRefactorization();
 }

-void BasisFactorization::LeftSolve(DenseRow* y) const {
-  SCOPED_TIME_STAT(&stats_);
-  RETURN_IF_NULL(y);
-  BumpDeterministicTimeForSolve(matrix_.num_rows().value());
-  if (use_middle_product_form_update_) {
-    lu_factorization_.LeftSolveU(y);
-    rank_one_factorization_.LeftSolve(y);
-    lu_factorization_.LeftSolveL(y);
-  } else {
-    eta_factorization_.LeftSolve(y);
-    lu_factorization_.LeftSolve(y);
-  }
-}
-
-void BasisFactorization::LeftSolveWithNonZeros(ScatteredRow* y) const {
+void BasisFactorization::LeftSolve(ScatteredRow* y) const {
  SCOPED_TIME_STAT(&stats_);
  RETURN_IF_NULL(y);
  BumpDeterministicTimeForSolve(matrix_.num_rows().value());
@@ -324,49 +310,31 @@ void BasisFactorization::LeftSolveWithNonZeros(ScatteredRow* y) const {
    lu_factorization_.LeftSolveUWithNonZeros(y);
    rank_one_factorization_.LeftSolveWithNonZeros(y);
    lu_factorization_.LeftSolveLWithNonZeros(y, nullptr);
+    y->SortNonZerosIfNeeded();
  } else {
+    y->non_zeros.clear();
    eta_factorization_.LeftSolve(&y->values);
    lu_factorization_.LeftSolve(&y->values);
-    ComputeNonZeros(y->values, &y->non_zeros);
  }
 }

-void BasisFactorization::RightSolve(DenseColumn* d) const {
-  SCOPED_TIME_STAT(&stats_);
-  RETURN_IF_NULL(d);
-  BumpDeterministicTimeForSolve(matrix_.num_rows().value());
-  if (use_middle_product_form_update_) {
-    lu_factorization_.RightSolveL(d);
-    rank_one_factorization_.RightSolve(d);
-    lu_factorization_.RightSolveU(d);
-  } else {
-    lu_factorization_.RightSolve(d);
-    eta_factorization_.RightSolve(d);
-  }
-}
-
-void BasisFactorization::RightSolveWithNonZeros(ScatteredColumn* d) const {
+void BasisFactorization::RightSolve(ScatteredColumn* d) const {
  SCOPED_TIME_STAT(&stats_);
  RETURN_IF_NULL(d);
  BumpDeterministicTimeForSolve(d->non_zeros.size());
  if (use_middle_product_form_update_) {
-    lu_factorization_.RightSolveL(&d->values);
-    rank_one_factorization_.RightSolve(&d->values);
-
-    // We need to clear non-zeros because at this point in the code, they don't
-    // correspond to the zeros of d. An empty non_zeros means that
-    // RightSolveWithNonZeros() will recompute them.
-    d->non_zeros.clear();
+    lu_factorization_.RightSolveLWithNonZeros(d);
+    rank_one_factorization_.RightSolveWithNonZeros(d);
    lu_factorization_.RightSolveUWithNonZeros(d);
+    d->SortNonZerosIfNeeded();
  } else {
+    d->non_zeros.clear();
    lu_factorization_.RightSolve(&d->values);
    eta_factorization_.RightSolve(&d->values);
-    ComputeNonZeros(d->values, &d->non_zeros);
-    return;
  }
 }

-DenseColumn* BasisFactorization::RightSolveForTau(
+const DenseColumn& BasisFactorization::RightSolveForTau(
    const ScatteredColumn& a) const {
  SCOPED_TIME_STAT(&stats_);
  BumpDeterministicTimeForSolve(matrix_.num_rows().value());
@@ -378,26 +346,19 @@ DenseColumn* BasisFactorization::RightSolveForTau(
      lu_factorization_.RightSolveLWithPermutedInput(a.values, &tau_.values);
      tau_.non_zeros.clear();
    } else {
-      // TODO(user): Extract function.
-      if (tau_.non_zeros.empty()) {
-        tau_.values.assign(matrix_.num_rows(), 0);
-      } else {
-        tau_.values.resize(matrix_.num_rows(), 0);
-        for (const RowIndex row : tau_.non_zeros) {
-          tau_[row] = 0.0;
-        }
-      }
+      ClearAndResizeVectorWithNonZeros(matrix_.num_rows(), &tau_);
      lu_factorization_.RightSolveLForScatteredColumn(a, &tau_);
    }
    rank_one_factorization_.RightSolveWithNonZeros(&tau_);
    lu_factorization_.RightSolveUWithNonZeros(&tau_);
  } else {
+    tau_.non_zeros.clear();
    tau_.values = a.values;
    lu_factorization_.RightSolve(&tau_.values);
    eta_factorization_.RightSolve(&tau_.values);
  }
  tau_is_computed_ = true;
-  return &tau_.values;
+  return tau_.values;
 }

 void BasisFactorization::LeftSolveForUnitRow(ColIndex j,
@@ -442,12 +403,13 @@ void BasisFactorization::LeftSolveForUnitRow(ColIndex j,
  if (tau_is_computed_) {
    tau_is_computed_ = false;
    tau_computation_can_be_optimized_ =
-        lu_factorization_.LeftSolveLWithNonZeros(y, &tau_.values);
+        lu_factorization_.LeftSolveLWithNonZeros(y, &tau_);
    tau_.non_zeros.clear();
  } else {
    tau_computation_can_be_optimized_ = false;
    lu_factorization_.LeftSolveLWithNonZeros(y, nullptr);
  }
+  y->SortNonZerosIfNeeded();
 }

 void BasisFactorization::RightSolveForProblemColumn(ColIndex col,
@@ -456,10 +418,10 @@ void BasisFactorization::RightSolveForProblemColumn(ColIndex col,
  RETURN_IF_NULL(d);
  BumpDeterministicTimeForSolve(matrix_.column(col).num_entries().value());
  if (!use_middle_product_form_update_) {
+    d->non_zeros.clear();
    lu_factorization_.SparseRightSolve(matrix_.column(col), matrix_.num_rows(),
                                       &d->values);
    eta_factorization_.RightSolve(&d->values);
-    ComputeNonZeros(d->values, &d->non_zeros);
    return;
  }

@@ -484,6 +446,7 @@ void BasisFactorization::RightSolveForProblemColumn(ColIndex col,
        right_storage_.AddDenseColumnWithNonZeros(d->values, d->non_zeros);
  }
  lu_factorization_.RightSolveUWithNonZeros(d);
+  d->SortNonZerosIfNeeded();
 }

 Fractional BasisFactorization::RightSolveSquaredNorm(const SparseColumn& a)
@@ -536,7 +499,8 @@ Fractional BasisFactorization::ComputeInverseOneNorm() const {
  const ColIndex num_cols = RowToColIndex(num_rows);
  Fractional norm = 0.0;
  for (ColIndex col(0); col < num_cols; ++col) {
-    DenseColumn right_hand_side(num_rows, 0.0);
+    ScatteredColumn right_hand_side;
+    right_hand_side.values.AssignToZero(num_rows);
    right_hand_side[ColToRowIndex(col)] = 1.0;
    // Get a column of the matrix inverse.
    RightSolve(&right_hand_side);
@@ -557,7 +521,8 @@ Fractional BasisFactorization::ComputeInverseInfinityNorm() const {
  const ColIndex num_cols = RowToColIndex(num_rows);
  DenseColumn row_sum(num_rows, 0.0);
  for (ColIndex col(0); col < num_cols; ++col) {
-    DenseColumn right_hand_side(num_rows, 0.0);
+    ScatteredColumn right_hand_side;
+    right_hand_side.values.AssignToZero(num_rows);
    right_hand_side[ColToRowIndex(col)] = 1.0;
    // Get a column of the matrix inverse.
    RightSolve(&right_hand_side);
--- a/ortools/glop/basis_representation.h
+++ b/ortools/glop/basis_representation.h
@@ -205,28 +205,25 @@ class BasisFactorization {
                const ScatteredColumn& direction) MUST_USE_RESULT;

  // Left solves the system y.B = rhs, where y initialy contains rhs.
-  // The second version also computes the non-zero positions of the result.
-  void LeftSolve(DenseRow* y) const;
-  void LeftSolveWithNonZeros(ScatteredRow* y) const;
+  void LeftSolve(ScatteredRow* y) const;

  // Left solves the system y.B = e_j, where e_j has only 1 non-zero
  // coefficient of value 1.0 at position 'j'.
  void LeftSolveForUnitRow(ColIndex j, ScatteredRow* y) const;

-  // Same as RightSolve() for matrix.column(col).
-  // This also exploits its sparsity.
-  void RightSolveForProblemColumn(ColIndex col, ScatteredColumn* d) const;
-
  // Right solves the system B.d = a where the input is the initial value of d.
-  void RightSolve(DenseColumn* d) const;
-  void RightSolveWithNonZeros(ScatteredColumn* d) const;
+  void RightSolve(ScatteredColumn* d) const;
+
+  // Same as RightSolve() for matrix.column(col). This also exploits its
+  // sparsity.
+  void RightSolveForProblemColumn(ColIndex col, ScatteredColumn* d) const;

  // Specialized version for ComputeTau() in DualEdgeNorms. This reuses an
  // intermediate result of the last LeftSolveForUnitRow() in order to save a
  // permutation if it is available. Note that the input 'a' should always be
  // equal to the last result of LeftSolveForUnitRow() and will be used for a
  // DCHECK() or if the intermediate result wasn't kept.
-  DenseColumn* RightSolveForTau(const ScatteredColumn& a) const;
+  const DenseColumn& RightSolveForTau(const ScatteredColumn& a) const;

  // Returns the norm of B^{-1}.a, this is a specific function because
  // it is a bit faster and it avoids polluting the stats of RightSolve().
--- a/ortools/glop/dual_edge_norms.cc
+++ b/ortools/glop/dual_edge_norms.cc
@@ -45,7 +45,7 @@ void DualEdgeNorms::UpdateBeforeBasisPivot(
    const ScatteredRow& unit_row_left_inverse) {
  // No need to update if we will recompute it from scratch later.
  if (recompute_edge_squared_norms_) return;
-  DenseColumn* tau = ComputeTau(TransposedView(unit_row_left_inverse));
+  const DenseColumn& tau = ComputeTau(TransposedView(unit_row_left_inverse));
  SCOPED_TIME_STAT(&stats_);

  // ||unit_row_left_inverse||^2 is the same as
@@ -80,7 +80,7 @@ void DualEdgeNorms::UpdateBeforeBasisPivot(
    // See Koberstein's PhD section 8.2.2.1.
    edge_squared_norms_[row] +=
        direction[row] *
-        (direction[row] * new_leaving_squared_norm - 2.0 / pivot * (*tau)[row]);
+        (direction[row] * new_leaving_squared_norm - 2.0 / pivot * tau[row]);

    // Avoid 0.0 norms (The 1e-4 is the value used by Koberstein).
    // TODO(user): use a more precise lower bound depending on the column norm?
@@ -111,12 +111,12 @@ void DualEdgeNorms::ComputeEdgeSquaredNorms() {
  recompute_edge_squared_norms_ = false;
 }

-DenseColumn* DualEdgeNorms::ComputeTau(
+const DenseColumn& DualEdgeNorms::ComputeTau(
    const ScatteredColumn& unit_row_left_inverse) {
  SCOPED_TIME_STAT(&stats_);
-  DenseColumn* result =
+  const DenseColumn& result =
      basis_factorization_.RightSolveForTau(unit_row_left_inverse);
-  IF_STATS_ENABLED(stats_.tau_density.Add(Density(Transpose(*result))));
+  IF_STATS_ENABLED(stats_.tau_density.Add(Density(Transpose(result))));
  return result;
 }

--- a/ortools/glop/dual_edge_norms.h
+++ b/ortools/glop/dual_edge_norms.h
@@ -91,7 +91,7 @@ class DualEdgeNorms {

  // Computes the vector tau needed to update the norms using a right solve:
  //     B.tau = (u_i)^T, u_i.B = e_i for i = leaving_row.
-  DenseColumn* ComputeTau(const ScatteredColumn& unit_row_left_inverse);
+  const DenseColumn& ComputeTau(const ScatteredColumn& unit_row_left_inverse);

  // Statistics.
  struct Stats : public StatsGroup {
--- a/ortools/glop/lu_factorization.cc
+++ b/ortools/glop/lu_factorization.cc
@@ -195,8 +195,7 @@ DenseColumn* DenseRowAsColumn(DenseRow* y) {
 void LuFactorization::RightSolveL(DenseColumn* x) const {
  SCOPED_TIME_STAT(&stats_);
  if (!is_identity_factorization_) {
-    ApplyPermutationWhenInputIsProbablySparse(row_perm_,
-                                              &dense_zero_scratchpad_, x);
+    PermuteWithScratchpad(row_perm_, &dense_zero_scratchpad_, x);
    lower_.LowerSolve(x);
  }
 }
@@ -295,6 +294,7 @@ void LuFactorization::RightSolveLForSparseColumn(const SparseColumn& b,
  }

  lower_.ComputeRowsToConsiderInSortedOrder(&x->non_zeros);
+  x->non_zeros_are_sorted = true;
  if (x->non_zeros.empty()) {
    lower_.LowerSolveStartingAt(first_column_to_consider, &x->values);
  } else {
@@ -302,19 +302,37 @@ void LuFactorization::RightSolveLForSparseColumn(const SparseColumn& b,
  }
 }

+void LuFactorization::RightSolveLWithNonZeros(ScatteredColumn* x) const {
+  if (is_identity_factorization_) return;
+  if (x->non_zeros.empty()) {
+    return RightSolveL(&x->values);
+  }
+  PermuteWithKnownNonZeros(row_perm_, &dense_zero_scratchpad_, &x->values,
+                           &x->non_zeros);
+  lower_.ComputeRowsToConsiderInSortedOrder(&x->non_zeros);
+  x->non_zeros_are_sorted = true;
+  if (x->non_zeros.empty()) {
+    lower_.LowerSolve(&x->values);
+  } else {
+    lower_.HyperSparseSolve(&x->values, &x->non_zeros);
+  }
+}
+
 // TODO(user): This code is almost the same a RightSolveLForSparseColumn()
 // except that the API to iterate on the input is different. Find a way to
 // deduplicate the code.
 void LuFactorization::RightSolveLForScatteredColumn(const ScatteredColumn& b,
                                                    ScatteredColumn* x) const {
+  if (b.non_zeros.empty()) {
+    *x = b;
+    x->non_zeros.clear();
+    return RightSolveL(&x->values);
+  }
  SCOPED_TIME_STAT(&stats_);
  DCHECK(IsAllZero(x->values));
  x->non_zeros.clear();
  if (is_identity_factorization_) {
-    for (const RowIndex row : b.non_zeros) {
-      (*x)[row] = b[row];
-      x->non_zeros.push_back(row);
-    }
+    *x = b;
    return;
  }

@@ -340,6 +358,7 @@ void LuFactorization::RightSolveLForScatteredColumn(const ScatteredColumn& b,
  }

  lower_.ComputeRowsToConsiderInSortedOrder(&x->non_zeros);
+  x->non_zeros_are_sorted = true;
  if (x->non_zeros.empty()) {
    lower_.LowerSolveStartingAt(first_column_to_consider, &x->values);
  } else {
@@ -350,11 +369,18 @@ void LuFactorization::RightSolveLForScatteredColumn(const ScatteredColumn& b,
 void LuFactorization::LeftSolveUWithNonZeros(ScatteredRow* y) const {
  SCOPED_TIME_STAT(&stats_);
  if (is_identity_factorization_) return;
+  if (!col_perm_.empty()) {
+    // TODO(user): This is just used in the test, in a real solve we never
+    // have a column permutation because we absorb it in the basis.
+    y->non_zeros.clear();
+    LeftSolveU(&y->values);
+    return;
+  }

-  DCHECK(col_perm_.empty());
  DenseColumn* const x = DenseRowAsColumn(&y->values);
  RowIndexVector* const nz = reinterpret_cast<RowIndexVector*>(&y->non_zeros);
  transpose_upper_.ComputeRowsToConsiderInSortedOrder(nz);
+  y->non_zeros_are_sorted = true;
  if (nz->empty()) {
    upper_.TransposeUpperSolve(x);
  } else {
@@ -365,7 +391,6 @@ void LuFactorization::LeftSolveUWithNonZeros(ScatteredRow* y) const {
 void LuFactorization::RightSolveUWithNonZeros(ScatteredColumn* x) const {
  SCOPED_TIME_STAT(&stats_);
  if (is_identity_factorization_) {
-    if (x->non_zeros.empty()) ComputeNonZeros(x->values, &x->non_zeros);
    return;
  }

@@ -373,8 +398,9 @@ void LuFactorization::RightSolveUWithNonZeros(ScatteredColumn* x) const {
  // non_zeros starts to be too big, we clear it and thus switch back to a
  // normal sparse solve.
  upper_.ComputeRowsToConsiderInSortedOrder(&x->non_zeros);
+  x->non_zeros_are_sorted = true;
  if (x->non_zeros.empty()) {
-    upper_.UpperSolveWithNonZeros(&x->values, &x->non_zeros);
+    upper_.UpperSolve(&x->values);
  } else {
    upper_.HyperSparseSolveWithReversedNonZeros(&x->values, &x->non_zeros);
  }
@@ -383,16 +409,16 @@ void LuFactorization::RightSolveUWithNonZeros(ScatteredColumn* x) const {
    // Note(user): This is only needed for the test because in Glop the
    // col_perm_ is always "absorbed" by permuting the basis. So it is
    // fine to do it in a non hyper-sparse way.
-    PermuteAndComputeNonZeros(inverse_col_perm_, &dense_zero_scratchpad_,
-                              &x->values, &x->non_zeros);
+    x->non_zeros.clear();
+    PermuteWithScratchpad(inverse_col_perm_, &dense_zero_scratchpad_,
+                          &x->values);
  }
 }

 bool LuFactorization::LeftSolveLWithNonZeros(
-    ScatteredRow* y, DenseColumn* result_before_permutation) const {
+    ScatteredRow* y, ScatteredColumn* result_before_permutation) const {
  SCOPED_TIME_STAT(&stats_);
  if (is_identity_factorization_) {
-    if (y->non_zeros.empty()) ComputeNonZeros(y->values, &y->non_zeros);
    // It is not advantageous to fill result_before_permutation in this case.
    return false;
  }
@@ -402,6 +428,7 @@ bool LuFactorization::LeftSolveLWithNonZeros(
  // Hypersparse?
  RowIndex last_non_zero_row = ColToRowIndex(y->values.size() - 1);
  transpose_lower_.ComputeRowsToConsiderInSortedOrder(nz);
+  y->non_zeros_are_sorted = true;
  if (nz->empty()) {
    lower_.TransposeLowerSolve(x, &last_non_zero_row);
  } else {
@@ -414,8 +441,7 @@ bool LuFactorization::LeftSolveLWithNonZeros(
    // should be the case because the hyper-sparse functions makes sure of that.
    // We also DCHECK() this below.
    if (nz->empty()) {
-      PermuteAndComputeNonZeros(inverse_row_perm_, &dense_zero_scratchpad_, x,
-                                nz);
+      PermuteWithScratchpad(inverse_row_perm_, &dense_zero_scratchpad_, x);
    } else {
      PermuteWithKnownNonZeros(inverse_row_perm_, &dense_zero_scratchpad_, x,
                               nz);
@@ -430,7 +456,8 @@ bool LuFactorization::LeftSolveLWithNonZeros(
    // This computes the same thing as in the other branch but also keeps the
    // original x in result_before_permutation. Because of this, it is faster to
    // use a different algorithm.
-    x->swap(*result_before_permutation);
+    result_before_permutation->non_zeros.clear();
+    x->swap(result_before_permutation->values);
    x->AssignToZero(inverse_row_perm_.size());
    y->non_zeros.clear();
    for (RowIndex row(0); row <= last_non_zero_row; ++row) {
@@ -438,7 +465,6 @@ bool LuFactorization::LeftSolveLWithNonZeros(
      if (value != 0.0) {
        const RowIndex permuted_row = inverse_row_perm_[row];
        (*x)[permuted_row] = value;
-        y->non_zeros.push_back(RowToColIndex(permuted_row));
      }
    }
    return true;
@@ -468,6 +494,7 @@ ColIndex LuFactorization::LeftSolveUForUnitRow(ColIndex col,
    RowIndexVector* const nz = reinterpret_cast<RowIndexVector*>(&y->non_zeros);
    DenseColumn* const x = reinterpret_cast<DenseColumn*>(&y->values);
    transpose_upper_.ComputeRowsToConsiderInSortedOrder(nz);
+    y->non_zeros_are_sorted = true;
    if (y->non_zeros.empty()) {
      transpose_upper_.LowerSolveStartingAt(permuted_col, x);
    } else {
--- a/ortools/glop/lu_factorization.h
+++ b/ortools/glop/lu_factorization.h
@@ -124,19 +124,19 @@ class LuFactorization {
  // Important: the output y must be of the correct size and all zero.
  ColIndex LeftSolveUForUnitRow(ColIndex col, ScatteredRow* y) const;

-  // Specialized version of RightSolveU() and LeftSolveU() that may exploit the
-  // initial non-zeros if it is non-empty. In addition,
-  // RightSolveUWithNonZeros() always return the non-zeros of the output.
+  // Specialized version that may exploit the initial non-zeros if it is
+  // non-empty.
+  void RightSolveLWithNonZeros(ScatteredColumn* x) const;
  void RightSolveUWithNonZeros(ScatteredColumn* x) const;
  void LeftSolveUWithNonZeros(ScatteredRow* y) const;

-  // Specialized version of LeftSolveL() that also computes the non-zero
-  // pattern of the output. Moreover, if result_before_permutation is not NULL,
-  // it is filled with the result just before row_perm_ is applied to it and
-  // true is returned. If result_before_permutation is not filled, then false is
-  // returned.
+  // Specialized version of LeftSolveL() that may exploit the initial non_zeros
+  // of y if it is non empty. Moreover, if result_before_permutation is not
+  // NULL, it might be filled with the result just before row_perm_ is applied
+  // to it and true is returned. If result_before_permutation is not filled,
+  // then false is returned.
  bool LeftSolveLWithNonZeros(ScatteredRow* y,
-                              DenseColumn* result_before_permutation) const;
+                              ScatteredColumn* result_before_permutation) const;

  // Returns the given column of U.
  // It will only be valid until the next call to GetColumnOfU().
--- a/ortools/glop/primal_edge_norms.cc
+++ b/ortools/glop/primal_edge_norms.cc
@@ -142,13 +142,16 @@ void PrimalEdgeNorms::ComputeEdgeSquaredNorms() {
  recompute_edge_squared_norms_ = false;
 }

+// TODO(user): It should be possible to reorganize the code and call this when
+// the value of direction is no longer needed. This will simplify the code and
+// avoid a copy here.
 void PrimalEdgeNorms::ComputeDirectionLeftInverse(
    ColIndex entering_col, const ScatteredColumn& direction) {
  SCOPED_TIME_STAT(&stats_);

-  // Initialize direction_left_inverse_ to direction.values .
-  // Note the special case when the non-zero vector is empty which means we
-  // don't know and need to use the dense version.
+  // Initialize direction_left_inverse_ to direction. Note the special case when
+  // the non-zero vector is empty which means we don't know and need to use the
+  // dense version.
  const ColIndex size = RowToColIndex(direction.values.size());
  const double kThreshold = 0.05 * size.value();
  if (!direction_left_inverse_.non_zeros.empty() &&
@@ -163,14 +166,10 @@ void PrimalEdgeNorms::ComputeDirectionLeftInverse(
    direction_left_inverse_.non_zeros.clear();
  }

-  // Depending on the sparsity of the input, we decide which version to use.
  if (direction.non_zeros.size() < kThreshold) {
-    direction_left_inverse_.non_zeros =
-        *reinterpret_cast<ColIndexVector const*>(&direction.non_zeros);
-    basis_factorization_.LeftSolveWithNonZeros(&direction_left_inverse_);
-  } else {
-    basis_factorization_.LeftSolve(&direction_left_inverse_.values);
+    direction_left_inverse_.non_zeros = TransposedView(direction).non_zeros;
  }
+  basis_factorization_.LeftSolve(&direction_left_inverse_);

  // TODO(user): Refactorize if estimated accuracy above a threshold.
  IF_STATS_ENABLED(stats_.direction_left_inverse_accuracy.Add(
--- a/ortools/glop/rank_one_update.h
+++ b/ortools/glop/rank_one_update.h
@@ -63,14 +63,13 @@ class RankOneUpdateElementaryMatrix {
        -storage_->ColumnScalarProduct(v_index_, Transpose(*x)) / mu_;
    storage_->ColumnAddMultipleToDenseColumn(u_index_, multiplier, x);
  }
-  void RightSolveWithNonZeros(
-      ScatteredColumn* x, StrictITIVector<RowIndex, bool>* is_non_zero) const {
+  void RightSolveWithNonZeros(ScatteredColumn* x) const {
    DCHECK(!IsSingular());
    const Fractional multiplier =
        -storage_->ColumnScalarProduct(v_index_, Transpose(x->values)) / mu_;
    if (multiplier != 0.0) {
-      storage_->ColumnAddMultipleToDenseColumnAndUpdateNonZeros(
-          u_index_, multiplier, &x->values, is_non_zero, &x->non_zeros);
+      storage_->ColumnAddMultipleToSparseScatteredColumn(u_index_, multiplier,
+                                                         x);
    }
  }

@@ -83,16 +82,13 @@ class RankOneUpdateElementaryMatrix {
    storage_->ColumnAddMultipleToDenseColumn(v_index_, multiplier,
                                             reinterpret_cast<DenseColumn*>(y));
  }
-  void LeftSolveWithNonZeros(
-      ScatteredRow* y, StrictITIVector<ColIndex, bool>* is_non_zero) const {
+  void LeftSolveWithNonZeros(ScatteredRow* y) const {
    DCHECK(!IsSingular());
    const Fractional multiplier =
        -storage_->ColumnScalarProduct(u_index_, y->values) / mu_;
    if (multiplier != 0.0) {
-      storage_->ColumnAddMultipleToDenseColumnAndUpdateNonZeros(
-          v_index_, multiplier, reinterpret_cast<DenseColumn*>(&y->values),
-          reinterpret_cast<StrictITIVector<RowIndex, bool>*>(is_non_zero),
-          reinterpret_cast<std::vector<RowIndex>*>(&y->non_zeros));
+      storage_->ColumnAddMultipleToSparseScatteredColumn(
+          v_index_, multiplier, reinterpret_cast<ScatteredColumn*>(y));
    }
  }

@@ -172,20 +168,19 @@ class RankOneUpdateFactorization {
    }

    // tmp_row_is_non_zero_ is always all false before and after this code.
-    tmp_col_is_non_zero_.resize(y->values.size(), false);
-    DCHECK(std::all_of(tmp_col_is_non_zero_.begin(), tmp_col_is_non_zero_.end(),
-                       [](bool v) { return !v; }));
-    for (const ColIndex col : y->non_zeros) tmp_col_is_non_zero_[col] = true;
+    y->is_non_zero.resize(y->values.size(), false);
+    DCHECK(IsAllFalse(y->is_non_zero));
+    for (const ColIndex col : y->non_zeros) y->is_non_zero[col] = true;
    const int hypersparse_threshold = static_cast<int>(
        hypersparse_ratio_ * static_cast<double>(y->values.size().value()));
    for (int i = elementary_matrices_.size() - 1; i >= 0; --i) {
      if (y->non_zeros.size() < hypersparse_threshold) {
-        elementary_matrices_[i].LeftSolveWithNonZeros(y, &tmp_col_is_non_zero_);
+        elementary_matrices_[i].LeftSolveWithNonZeros(y);
      } else {
        elementary_matrices_[i].LeftSolve(&y->values);
      }
    }
-    for (const ColIndex col : y->non_zeros) tmp_col_is_non_zero_[col] = false;
+    for (const ColIndex col : y->non_zeros) y->is_non_zero[col] = false;
    if (y->non_zeros.size() >= hypersparse_threshold) y->non_zeros.clear();
  }

@@ -207,23 +202,21 @@ class RankOneUpdateFactorization {
      return;
    }

-    // tmp_row_is_non_zero_ is always all false before and after this code.
-    tmp_row_is_non_zero_.resize(d->values.size(), false);
-    DCHECK(std::all_of(tmp_row_is_non_zero_.begin(), tmp_row_is_non_zero_.end(),
-                       [](bool v) { return !v; }));
-    for (const RowIndex row : d->non_zeros) tmp_row_is_non_zero_[row] = true;
+    // d->is_non_zero is always all false before and after this code.
+    d->is_non_zero.resize(d->values.size(), false);
+    DCHECK(IsAllFalse(d->is_non_zero));
+    for (const RowIndex row : d->non_zeros) d->is_non_zero[row] = true;
    const size_t end = elementary_matrices_.size();
    const int hypersparse_threshold = static_cast<int>(
        hypersparse_ratio_ * static_cast<double>(d->values.size().value()));
    for (int i = 0; i < end; ++i) {
      if (d->non_zeros.size() < hypersparse_threshold) {
-        elementary_matrices_[i].RightSolveWithNonZeros(d,
-                                                       &tmp_row_is_non_zero_);
+        elementary_matrices_[i].RightSolveWithNonZeros(d);
      } else {
        elementary_matrices_[i].RightSolve(&d->values);
      }
    }
-    for (const RowIndex row : d->non_zeros) tmp_row_is_non_zero_[row] = false;
+    for (const RowIndex row : d->non_zeros) d->is_non_zero[row] = false;
    if (d->non_zeros.size() >= hypersparse_threshold) d->non_zeros.clear();
  }

@@ -233,8 +226,6 @@ class RankOneUpdateFactorization {
  double hypersparse_ratio_;
  EntryIndex num_entries_;
  std::vector<RankOneUpdateElementaryMatrix> elementary_matrices_;
-  mutable StrictITIVector<RowIndex, bool> tmp_row_is_non_zero_;
-  mutable StrictITIVector<ColIndex, bool> tmp_col_is_non_zero_;
  DISALLOW_COPY_AND_ASSIGN(RankOneUpdateFactorization);
 };

--- a/ortools/glop/reduced_costs.cc
+++ b/ortools/glop/reduced_costs.cc
@@ -245,7 +245,7 @@ void ReducedCosts::PerturbCosts() {
        std::max(max_cost_magnitude, std::abs(objective_[col]));
  }

-  objective_perturbation_.assign(matrix_.num_cols(), 0.0);
+  objective_perturbation_.AssignToZero(matrix_.num_cols());
  for (ColIndex col(0); col < structural_size; ++col) {
    const Fractional objective = objective_[col];
    const Fractional magnitude =
@@ -297,7 +297,7 @@ void ReducedCosts::ShiftCost(ColIndex col) {

 void ReducedCosts::ClearAndRemoveCostShifts() {
  SCOPED_TIME_STAT(&stats_);
-  objective_perturbation_.assign(matrix_.num_cols(), 0.0);
+  objective_perturbation_.AssignToZero(matrix_.num_cols());
  recompute_basic_objective_ = true;
  recompute_basic_objective_left_inverse_ = true;
  recompute_reduced_costs_ = true;
@@ -320,7 +320,7 @@ const DenseRow& ReducedCosts::GetReducedCosts() {
 const DenseColumn& ReducedCosts::GetDualValues() {
  SCOPED_TIME_STAT(&stats_);
  ComputeBasicObjectiveLeftInverse();
-  return Transpose(basic_objective_left_inverse_);
+  return Transpose(basic_objective_left_inverse_.values);
 }

 void ReducedCosts::RecomputeReducedCostsAndPrimalEnteringCandidatesIfNeeded() {
@@ -366,9 +366,9 @@ void ReducedCosts::ComputeReducedCosts() {
 #endif
  if (num_omp_threads == 1) {
    for (ColIndex col(0); col < num_cols; ++col) {
-      reduced_costs_[col] =
-          objective_[col] + objective_perturbation_[col] -
-          matrix_.ColumnScalarProduct(col, basic_objective_left_inverse_);
+      reduced_costs_[col] = objective_[col] + objective_perturbation_[col] -
+                            matrix_.ColumnScalarProduct(
+                                col, basic_objective_left_inverse_.values);

      // We also compute the dual residual error y.B - c_B.
      if (is_basic.IsSet(col)) {
@@ -386,9 +386,9 @@ void ReducedCosts::ComputeReducedCosts() {
 #pragma omp parallel for num_threads(num_omp_threads)
    for (int i = 0; i < parallel_loop_size; i++) {
      const ColIndex col(i);
-      reduced_costs_[col] =
-          objective_[col] + objective_perturbation_[col] -
-          matrix_.ColumnScalarProduct(col, basic_objective_left_inverse_);
+      reduced_costs_[col] = objective_[col] + objective_perturbation_[col] -
+                            matrix_.ColumnScalarProduct(
+                                col, basic_objective_left_inverse_.values);

      if (is_basic.IsSet(col)) {
        thread_local_dual_residual_error[omp_get_thread_num()] =
@@ -422,15 +422,15 @@ void ReducedCosts::ComputeReducedCosts() {

 void ReducedCosts::ComputeBasicObjectiveLeftInverse() {
  SCOPED_TIME_STAT(&stats_);
-  basic_objective_left_inverse_.resize(RowToColIndex(matrix_.num_rows()), 0.0);
  if (recompute_basic_objective_) {
    ComputeBasicObjective();
  }
-  basic_objective_left_inverse_ = basic_objective_;
+  basic_objective_left_inverse_.values = basic_objective_;
+  basic_objective_left_inverse_.non_zeros.clear();
  basis_factorization_.LeftSolve(&basic_objective_left_inverse_);
  recompute_basic_objective_left_inverse_ = false;
  IF_STATS_ENABLED(stats_.basic_objective_left_inverse_density.Add(
-      Density(basic_objective_left_inverse_)));
+      Density(basic_objective_left_inverse_.values)));

  // TODO(user): Estimate its accuracy by a few scalar products, and refactorize
  // if it is above a threshold?
@@ -487,10 +487,19 @@ void ReducedCosts::UpdateReducedCosts(ColIndex entering_col,
  // we can use them in ComputeCurrentDualResidualError().
  const ScatteredRow& unit_row_left_inverse =
      update_row->GetUnitRowLeftInverse();
-  for (const ColIndex col : unit_row_left_inverse.non_zeros) {
-    const ColIndex slack_col = first_slack_col + col;
-    const Fractional coeff = unit_row_left_inverse[col];
-    reduced_costs_[slack_col] += new_leaving_reduced_cost * coeff;
+  if (unit_row_left_inverse.non_zeros.empty()) {
+    const ColIndex num_cols = unit_row_left_inverse.values.size();
+    for (ColIndex col(0); col < num_cols; ++col) {
+      const ColIndex slack_col = first_slack_col + col;
+      const Fractional coeff = unit_row_left_inverse[col];
+      reduced_costs_[slack_col] += new_leaving_reduced_cost * coeff;
+    }
+  } else {
+    for (const ColIndex col : unit_row_left_inverse.non_zeros) {
+      const ColIndex slack_col = first_slack_col + col;
+      const Fractional coeff = unit_row_left_inverse[col];
+      reduced_costs_[slack_col] += new_leaving_reduced_cost * coeff;
+    }
  }
  reduced_costs_[leaving_col] = new_leaving_reduced_cost;

--- a/ortools/glop/reduced_costs.h
+++ b/ortools/glop/reduced_costs.h
@@ -273,7 +273,7 @@ class ReducedCosts {
  // algorithm, but may gives us a good idea of the current precision of our
  // estimates. It is also faster to compute the unit_row_left_inverse_ because
  // of sparsity.
-  DenseRow basic_objective_left_inverse_;
+  ScatteredRow basic_objective_left_inverse_;

  // This is usually parameters_.dual_feasibility_tolerance() except when the
  // dual residual error |y.B - c_B| is higher than it and we have to increase
--- a/ortools/glop/revised_simplex.cc
+++ b/ortools/glop/revised_simplex.cc
@@ -781,7 +781,7 @@ bool RevisedSimplex::InitializeBoundsAndTestIfUnchanged(
  SCOPED_TIME_STAT(&function_stats_);
  lower_bound_.resize(num_cols_, 0.0);
  upper_bound_.resize(num_cols_, 0.0);
-  bound_perturbation_.assign(num_cols_, 0.0);
+  bound_perturbation_.AssignToZero(num_cols_);

  // Variable bounds, for both non-slack and slack variables.
  bool bounds_are_unchanged = true;
@@ -1325,7 +1325,7 @@ void RevisedSimplex::CorrectErrorsOnVariableValues() {

 void RevisedSimplex::ComputeVariableValuesError() {
  SCOPED_TIME_STAT(&function_stats_);
-  error_.assign(num_rows_, 0.0);
+  error_.AssignToZero(num_rows_);
  const DenseRow& variable_values = variable_values_.GetDenseRow();
  for (ColIndex col(0); col < num_cols_; ++col) {
    const Fractional value = variable_values[col];
@@ -1338,9 +1338,21 @@ void RevisedSimplex::ComputeDirection(ColIndex col) {
  DCHECK_COL_BOUNDS(col);
  basis_factorization_.RightSolveForProblemColumn(col, &direction_);
  direction_infinity_norm_ = 0.0;
-  for (const RowIndex row : direction_.non_zeros) {
-    direction_infinity_norm_ =
-        std::max(direction_infinity_norm_, std::abs(direction_[row]));
+  if (direction_.non_zeros.empty()) {
+    // We still compute the direction non-zeros because our code relies on it.
+    for (RowIndex row(0); row < num_rows_; ++row) {
+      const Fractional value = direction_[row];
+      if (value != 0.0) {
+        direction_.non_zeros.push_back(row);
+        direction_infinity_norm_ =
+            std::max(direction_infinity_norm_, std::abs(value));
+      }
+    }
+  } else {
+    for (const RowIndex row : direction_.non_zeros) {
+      direction_infinity_norm_ =
+          std::max(direction_infinity_norm_, std::abs(direction_[row]));
+    }
  }
  IF_STATS_ENABLED(ratio_test_stats_.direction_density.Add(
      num_rows_ == 0 ? 0.0
@@ -1952,27 +1964,50 @@ void RevisedSimplex::DualPhaseIUpdatePriceOnReducedCostChange(
        ++num_dual_infeasible_positions_;
      }
      if (!something_to_do) {
+        initially_all_zero_scratchpad_.is_non_zero.resize(num_rows_, false);
        initially_all_zero_scratchpad_.values.resize(num_rows_, 0.0);
        something_to_do = true;
      }
-      compact_matrix_.ColumnAddMultipleToDenseColumn(
+      compact_matrix_.ColumnAddMultipleToSparseScatteredColumn(
          col, sign - dual_infeasibility_improvement_direction_[col],
-          &initially_all_zero_scratchpad_.values);
+          &initially_all_zero_scratchpad_);
      dual_infeasibility_improvement_direction_[col] = sign;
    }
  }
  if (something_to_do) {
+    // TODO(user): This code is duplicated with UpdateGivenNonBasicVariables()
+    // and more generally with the one in RankOneUpdateFactorization. Fix.
+    if (ShouldUseDenseIteration(initially_all_zero_scratchpad_)) {
+      initially_all_zero_scratchpad_.non_zeros.clear();
+      initially_all_zero_scratchpad_.is_non_zero.assign(num_rows_, false);
+    } else {
+      for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
+        initially_all_zero_scratchpad_.is_non_zero[row] = false;
+      }
+    }
+
    const VariableTypeRow& variable_type = variables_info_.GetTypeRow();
    const Fractional threshold = parameters_.ratio_test_zero_threshold();
-    basis_factorization_.RightSolveWithNonZeros(
-        &initially_all_zero_scratchpad_);
-    for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
-      dual_pricing_vector_[row] += initially_all_zero_scratchpad_[row];
-      initially_all_zero_scratchpad_[row] = 0.0;
-      is_dual_entering_candidate_.Set(
-          row,
-          IsDualPhaseILeavingCandidate(dual_pricing_vector_[row],
-                                       variable_type[basis_[row]], threshold));
+    basis_factorization_.RightSolve(&initially_all_zero_scratchpad_);
+    if (initially_all_zero_scratchpad_.non_zeros.empty()) {
+      for (RowIndex row(0); row < num_rows_; ++row) {
+        if (initially_all_zero_scratchpad_[row] == 0.0) continue;
+        dual_pricing_vector_[row] += initially_all_zero_scratchpad_[row];
+        is_dual_entering_candidate_.Set(
+            row, IsDualPhaseILeavingCandidate(dual_pricing_vector_[row],
+                                              variable_type[basis_[row]],
+                                              threshold));
+      }
+      initially_all_zero_scratchpad_.values.AssignToZero(num_rows_);
+    } else {
+      for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
+        dual_pricing_vector_[row] += initially_all_zero_scratchpad_[row];
+        initially_all_zero_scratchpad_[row] = 0.0;
+        is_dual_entering_candidate_.Set(
+            row, IsDualPhaseILeavingCandidate(dual_pricing_vector_[row],
+                                              variable_type[basis_[row]],
+                                              threshold));
+      }
    }
    initially_all_zero_scratchpad_.non_zeros.clear();
  }
@@ -1999,9 +2034,9 @@ Status RevisedSimplex::DualPhaseIChooseLeavingVariableRow(
      dual_pricing_vector_.empty()) {
    // Recompute everything from scratch.
    num_dual_infeasible_positions_ = 0;
-    dual_pricing_vector_.assign(num_rows_, 0.0);
+    dual_pricing_vector_.AssignToZero(num_rows_);
    is_dual_entering_candidate_.ClearAndResize(num_rows_);
-    dual_infeasibility_improvement_direction_.assign(num_cols_, 0.0);
+    dual_infeasibility_improvement_direction_.AssignToZero(num_cols_);
    DualPhaseIUpdatePriceOnReducedCostChange(
        variables_info_.GetIsRelevantBitRow());
  } else {
@@ -2247,7 +2282,7 @@ Status RevisedSimplex::Minimize(TimeLimit* time_limit) {
  if (feasibility_phase_) {
    // Initialize the primal phase-I objective.
    // Note that this temporarily erases the problem objective.
-    objective_.assign(num_cols_, 0.0);
+    objective_.AssignToZero(num_cols_);
    UpdatePrimalPhaseICosts(
        util::IntegerRange<RowIndex>(RowIndex(0), num_rows_));
  }
@@ -2368,7 +2403,7 @@ Status RevisedSimplex::Minimize(TimeLimit* time_limit) {
      } else {
        VLOG(1) << "Unbounded problem.";
        problem_status_ = ProblemStatus::PRIMAL_UNBOUNDED;
-        solution_primal_ray_.assign(num_cols_, 0.0);
+        solution_primal_ray_.AssignToZero(num_cols_);
        for (RowIndex row(0); row < num_rows_; ++row) {
          const ColIndex col = basis_[row];
          solution_primal_ray_[col] = -direction_[row];
@@ -2650,7 +2685,7 @@ Status RevisedSimplex::DualMinimize(TimeLimit* time_limit) {
        solution_dual_ray_ =
            Transpose(update_row_.GetUnitRowLeftInverse().values);
        update_row_.RecomputeFullUpdateRow(leaving_row);
-        solution_dual_ray_row_combination_.assign(num_cols_, 0.0);
+        solution_dual_ray_row_combination_.AssignToZero(num_cols_);
        for (const ColIndex col : update_row_.GetNonZeroPositions()) {
          solution_dual_ray_row_combination_[col] =
              update_row_.GetCoefficient(col);
--- a/ortools/glop/update_row.cc
+++ b/ortools/glop/update_row.cc
@@ -68,8 +68,7 @@ void UpdateRow::ComputeUnitRowLeftInverse(RowIndex leaving_row) {
                                  unit_row_left_inverse_.values) -
      1.0));
  IF_STATS_ENABLED(stats_.unit_row_left_inverse_density.Add(
-      static_cast<double>(unit_row_left_inverse_.non_zeros.size()) /
-      static_cast<double>(matrix_.num_rows().value())));
+      Density(unit_row_left_inverse_.values())));
 }

 void UpdateRow::ComputeUpdateRow(RowIndex leaving_row) {
@@ -78,7 +77,8 @@ void UpdateRow::ComputeUpdateRow(RowIndex leaving_row) {
  ComputeUnitRowLeftInverse(leaving_row);
  SCOPED_TIME_STAT(&stats_);

-  if (parameters_.use_transposed_matrix()) {
+  if (parameters_.use_transposed_matrix() &&
+      !unit_row_left_inverse_.non_zeros.empty()) {
    // Number of entries that ComputeUpdatesRowWise() will need to look at.
    EntryIndex num_row_wise_entries(0);
    for (const ColIndex col : unit_row_left_inverse_.non_zeros) {
--- a/ortools/glop/variable_values.cc
+++ b/ortools/glop/variable_values.cc
@@ -66,10 +66,11 @@ void VariableValues::RecomputeBasicVariableValues() {
  SCOPED_TIME_STAT(&stats_);
  DCHECK(basis_factorization_.IsRefactorized());
  const RowIndex num_rows = matrix_.num_rows();
-  scratchpad_.assign(num_rows, 0.0);
+  scratchpad_.non_zeros.clear();
+  scratchpad_.values.AssignToZero(num_rows);
  for (const ColIndex col : variables_info_.GetNotBasicBitRow()) {
    const Fractional value = variable_values_[col];
-    matrix_.ColumnAddMultipleToDenseColumn(col, -value, &scratchpad_);
+    matrix_.ColumnAddMultipleToDenseColumn(col, -value, &scratchpad_.values);
  }
  basis_factorization_.RightSolve(&scratchpad_);
  for (RowIndex row(0); row < num_rows; ++row) {
@@ -79,13 +80,14 @@ void VariableValues::RecomputeBasicVariableValues() {

 Fractional VariableValues::ComputeMaximumPrimalResidual() const {
  SCOPED_TIME_STAT(&stats_);
-  scratchpad_.assign(matrix_.num_rows(), 0.0);
+  scratchpad_.non_zeros.clear();
+  scratchpad_.values.AssignToZero(matrix_.num_rows());
  const ColIndex num_cols = matrix_.num_cols();
  for (ColIndex col(0); col < num_cols; ++col) {
    const Fractional value = variable_values_[col];
-    matrix_.ColumnAddMultipleToDenseColumn(col, value, &scratchpad_);
+    matrix_.ColumnAddMultipleToDenseColumn(col, value, &scratchpad_.values);
  }
-  return InfinityNorm(scratchpad_);
+  return InfinityNorm(scratchpad_.values);
 }

 Fractional VariableValues::ComputeMaximumPrimalInfeasibility() const {
@@ -144,24 +146,45 @@ void VariableValues::UpdateGivenNonBasicVariables(
    const std::vector<ColIndex>& cols_to_update, bool update_basic_variables) {
  SCOPED_TIME_STAT(&stats_);
  if (update_basic_variables) {
-    initially_all_zero_scratchpad_.values.resize(matrix_.num_rows(), 0.0);
+    const RowIndex num_rows = matrix_.num_rows();
+    initially_all_zero_scratchpad_.values.resize(num_rows, 0.0);
+    initially_all_zero_scratchpad_.is_non_zero.resize(num_rows, false);
    DCHECK(IsAllZero(initially_all_zero_scratchpad_.values));
+    DCHECK(IsAllFalse(initially_all_zero_scratchpad_.is_non_zero));
+
+    // TODO(user): Abort the non-zeros computation earlier if dense.
    for (ColIndex col : cols_to_update) {
      const Fractional old_value = variable_values_[col];
      SetNonBasicVariableValueFromStatus(col);
-      matrix_.ColumnAddMultipleToDenseColumn(
+      matrix_.ColumnAddMultipleToSparseScatteredColumn(
          col, variable_values_[col] - old_value,
-          &initially_all_zero_scratchpad_.values);
+          &initially_all_zero_scratchpad_);
    }
-    basis_factorization_.RightSolveWithNonZeros(
-        &initially_all_zero_scratchpad_);
-    for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
-      variable_values_[basis_[row]] -= initially_all_zero_scratchpad_[row];
-      initially_all_zero_scratchpad_[row] = 0.0;
+    if (ShouldUseDenseIteration(initially_all_zero_scratchpad_)) {
+      initially_all_zero_scratchpad_.non_zeros.clear();
+      initially_all_zero_scratchpad_.is_non_zero.assign(num_rows, false);
+    } else {
+      for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
+        initially_all_zero_scratchpad_.is_non_zero[row] = false;
+      }
+    }
+
+    basis_factorization_.RightSolve(&initially_all_zero_scratchpad_);
+    if (initially_all_zero_scratchpad_.non_zeros.empty()) {
+      for (RowIndex row(0); row < num_rows; ++row) {
+        variable_values_[basis_[row]] -= initially_all_zero_scratchpad_[row];
+      }
+      initially_all_zero_scratchpad_.values.AssignToZero(num_rows);
+      ResetPrimalInfeasibilityInformation();
+    } else {
+      for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
+        variable_values_[basis_[row]] -= initially_all_zero_scratchpad_[row];
+        initially_all_zero_scratchpad_[row] = 0.0;
+      }
+      UpdatePrimalInfeasibilityInformation(
+          initially_all_zero_scratchpad_.non_zeros);
+      initially_all_zero_scratchpad_.non_zeros.clear();
    }
-    UpdatePrimalInfeasibilityInformation(
-        initially_all_zero_scratchpad_.non_zeros);
-    initially_all_zero_scratchpad_.non_zeros.clear();
  } else {
    for (ColIndex col : cols_to_update) {
      SetNonBasicVariableValueFromStatus(col);
@@ -182,22 +205,31 @@ void VariableValues::ResetPrimalInfeasibilityInformation() {
  const RowIndex num_rows = matrix_.num_rows();
  primal_squared_infeasibilities_.resize(num_rows, 0.0);
  primal_infeasible_positions_.ClearAndResize(num_rows);
-  UpdatePrimalInfeasibilities(
-      util::IntegerRange<RowIndex>(RowIndex(0), num_rows));
+
+  const DenseRow& lower_bounds = variables_info_.GetVariableLowerBounds();
+  const DenseRow& upper_bounds = variables_info_.GetVariableUpperBounds();
+  for (RowIndex row(0); row < num_rows; ++row) {
+    const ColIndex col = basis_[row];
+    const Fractional value = variable_values_[col];
+    const Fractional magnitude =
+        std::max(value - upper_bounds[col], lower_bounds[col] - value);
+    if (magnitude > tolerance_) {
+      primal_squared_infeasibilities_[row] = Square(magnitude);
+      primal_infeasible_positions_.Set(row);
+    }
+  }
 }

 void VariableValues::UpdatePrimalInfeasibilityInformation(
    const std::vector<RowIndex>& rows) {
  if (primal_squared_infeasibilities_.size() != matrix_.num_rows()) {
    ResetPrimalInfeasibilityInformation();
-  } else {
-    SCOPED_TIME_STAT(&stats_);
-    UpdatePrimalInfeasibilities(rows);
+    return;
  }
-}

-template <typename Rows>
-void VariableValues::UpdatePrimalInfeasibilities(const Rows& rows) {
+  // Note(user): this is the same as the code in
+  // ResetPrimalInfeasibilityInformation(), but we do need the clear part.
+  SCOPED_TIME_STAT(&stats_);
  const DenseRow& lower_bounds = variables_info_.GetVariableLowerBounds();
  const DenseRow& upper_bounds = variables_info_.GetVariableUpperBounds();
  for (const RowIndex row : rows) {
--- a/ortools/glop/variable_values.h
+++ b/ortools/glop/variable_values.h
@@ -104,10 +104,6 @@ class VariableValues {
  std::string StatString() const { return stats_.StatString(); }

 private:
-  // Internal version of UpdatePrimalInfeasibilityInformation().
-  template <typename Rows>
-  void UpdatePrimalInfeasibilities(const Rows& rows);
-
  // Input problem data.
  const CompactSparseMatrix& matrix_;
  const RowToColMapping& basis_;
@@ -123,7 +119,7 @@ class VariableValues {
  DenseBitColumn primal_infeasible_positions_;

  mutable StatsGroup stats_;
-  mutable DenseColumn scratchpad_;
+  mutable ScatteredColumn scratchpad_;

  // A temporary scattered column that is always reset to all zero after use.
  ScatteredColumn initially_all_zero_scratchpad_;
--- a/ortools/lp_data/lp_types.h
+++ b/ortools/lp_data/lp_types.h
@@ -343,38 +343,6 @@ typedef StrictITIVector<RowIndex, ColIndex> RowToColMapping;
 // Column of constraints (slack variables) statuses.
 typedef StrictITIVector<RowIndex, ConstraintStatus> ConstraintStatusColumn;

-// A simple struct that contains a DenseColumn and its non-zeros indices.
-struct ScatteredColumn {
-  DenseColumn values;
-
-  // This can be left empty in which case we just have the dense representation
-  // above. Otherwise, it should always be a subset of the actual non-zeros.
-  //
-  // Note that the non-zeros should always be sorted by increasing index. This
-  // allows to have exactly the same iteration behavior if we iterate on the
-  // non-zeros or directly in a dense-way on the values.
-  RowIndexVector non_zeros;
-
-  Fractional operator[](RowIndex row) const { return values[row]; }
-  Fractional& operator[](RowIndex row) { return values[row]; }
-};
-
-// Same as ScatteredColumn for a row.
-struct ScatteredRow {
-  DenseRow values;
-  ColIndexVector non_zeros;
-
-  Fractional operator[](ColIndex col) const { return values[col]; }
-  Fractional& operator[](ColIndex col) { return values[col]; }
-};
-
-inline const ScatteredRow& TransposedView(const ScatteredColumn& c) {
-  return reinterpret_cast<const ScatteredRow&>(c);
-}
-inline const ScatteredColumn& TransposedView(const ScatteredRow& r) {
-  return reinterpret_cast<const ScatteredColumn&>(r);
-}
-
 // Returns true if it is more advantageous to use a dense iteration rather than
 // using the non-zeros positions.
 //
@@ -391,6 +359,46 @@ bool ShouldUseDenseIteration(const ScatteredRowOrCol& v) {
             static_cast<double>(v.values.size().value());
 }

+// A simple struct that contains a DenseVector and its non-zeros indices.
+template <typename Index>
+struct ScatteredVector {
+  StrictITIVector<Index, Fractional> values;
+
+  // This can be left empty in which case we just have the dense representation
+  // above. Otherwise, it should always be a subset of the actual non-zeros.
+  bool non_zeros_are_sorted = false;
+  std::vector<Index> non_zeros;
+
+  // Temporary vector used in some sparse computation on the ScatteredColumn.
+  // True indicate a possible non-zero value. Note that its state is not always
+  // consistent.
+  StrictITIVector<Index, bool> is_non_zero;
+
+  Fractional operator[](Index index) const { return values[index]; }
+  Fractional& operator[](Index index) { return values[index]; }
+
+  // Sorting the non-zeros is not always needed, but it allows us to have
+  // exactly the same behavior while using a sparse iteration or a dense one. So
+  // we always do it after a Solve().
+  void SortNonZerosIfNeeded() {
+    if (!non_zeros_are_sorted) {
+      std::sort(non_zeros.begin(), non_zeros.end());
+      non_zeros_are_sorted = true;
+    }
+  }
+};
+
+// Specialization used in the code.
+struct ScatteredColumn : public ScatteredVector<RowIndex> {};
+struct ScatteredRow : public ScatteredVector<ColIndex> {};
+
+inline const ScatteredRow& TransposedView(const ScatteredColumn& c) {
+  return reinterpret_cast<const ScatteredRow&>(c);
+}
+inline const ScatteredColumn& TransposedView(const ScatteredRow& r) {
+  return reinterpret_cast<const ScatteredColumn&>(r);
+}
+
 // This is used during the deterministic time computation to convert a given
 // number of floating-point operations to something in the same order of
 // magnitude as a second (on a 2014 desktop).
--- a/ortools/lp_data/lp_utils.h
+++ b/ortools/lp_data/lp_utils.h
@@ -137,6 +137,9 @@ Fractional InfinityNorm(const DenseColumn& v);
 Fractional InfinityNorm(const SparseColumn& v);

 // Returns the fraction of non-zero entries of the given row.
+//
+// TODO(user): Take a Scattered row/col instead. This is only used to report
+// stats, but we should still have a sparse version to do it faster.
 double Density(const DenseRow& row);

 // Sets to 0.0 all entries of the given row whose fabs() is lower than the given
@@ -205,35 +208,31 @@ inline bool IsAllZero(const Container& input) {
  return true;
 }

-// Permutes the given dense vector and computes the positions of its non-zeros.
-// It uses for this an all zero dense vector (zero_scratchpad).
-// This operation is efficient for a sparse vector because:
-// - It combines two iterations in one.
-// - It avoids cache pollution by not looking at unecessary permuted locations.
-// Note that the produced non_zeros is not ordered which may be a drawback.
-// TODO(user): Investigate alternatives with an ordered non_zeros.
-template <typename IndexType, typename PermutationIndexType,
-          typename NonZeroIndexType>
-inline void PermuteAndComputeNonZeros(
+// Returns true if the given vector of bool is all false.
+template <typename BoolVector>
+bool IsAllFalse(const BoolVector& v) {
+  return std::all_of(v.begin(), v.end(), [](bool value) { return !value; });
+}
+
+// Permutes the given dense vector. It uses for this an all zero scratchpad.
+template <typename IndexType, typename PermutationIndexType>
+inline void PermuteWithScratchpad(
    const Permutation<PermutationIndexType>& permutation,
    StrictITIVector<IndexType, Fractional>* zero_scratchpad,
-    StrictITIVector<IndexType, Fractional>* output,
-    std::vector<NonZeroIndexType>* non_zeros) {
-  non_zeros->clear();
+    StrictITIVector<IndexType, Fractional>* input_output) {
  DCHECK(IsAllZero(*zero_scratchpad));
-  zero_scratchpad->swap(*output);
-  output->resize(zero_scratchpad->size(), 0.0);
-  const IndexType end = zero_scratchpad->size();
-  for (IndexType index(0); index < end; ++index) {
+  const IndexType size = input_output->size();
+  zero_scratchpad->swap(*input_output);
+  input_output->resize(size, 0.0);
+  for (IndexType index(0); index < size; ++index) {
    const Fractional value = (*zero_scratchpad)[index];
    if (value != 0.0) {
-      (*zero_scratchpad)[index] = 0.0;
      const IndexType permuted_index(
          permutation[PermutationIndexType(index.value())].value());
-      (*output)[permuted_index] = value;
-      non_zeros->push_back(NonZeroIndexType(permuted_index.value()));
+      (*input_output)[permuted_index] = value;
    }
  }
+  zero_scratchpad->assign(size, 0.0);
 }

 // Same as PermuteAndComputeNonZeros() except that we assume that the given
@@ -249,7 +248,6 @@ inline void PermuteWithKnownNonZeros(
  output->resize(zero_scratchpad->size(), 0.0);
  for (IndexType& index_ref : *non_zeros) {
    const Fractional value = (*zero_scratchpad)[index_ref];
-    DCHECK_NE(value, 0.0);
    (*zero_scratchpad)[index_ref] = 0.0;
    const IndexType permuted_index(permutation[index_ref]);
    (*output)[permuted_index] = value;
@@ -257,25 +255,6 @@ inline void PermuteWithKnownNonZeros(
  }
 }

-// Same algorithm as PermuteAndComputeNonZeros() above when the non-zeros are
-// not needed. This should be faster than a simple ApplyPermutation() if the
-// input vector is relatively sparse. The input is the initial value of output.
-inline void ApplyPermutationWhenInputIsProbablySparse(
-    const Permutation<RowIndex>& permutation, DenseColumn* zero_scratchpad,
-    DenseColumn* output) {
-  const RowIndex num_rows(permutation.size());
-  DCHECK(IsAllZero(*zero_scratchpad));
-  zero_scratchpad->swap(*output);
-  output->resize(num_rows, 0.0);
-  for (RowIndex row(0); row < num_rows; ++row) {
-    const Fractional value = (*zero_scratchpad)[row];
-    if (value != 0.0) {
-      (*zero_scratchpad)[row] = 0.0;
-      (*output)[permutation[row]] = value;
-    }
-  }
-}
-
 // Sets a dense vector for which the non zeros are known to be non_zeros.
 template <typename IndexType, typename ScatteredRowOrCol>
 inline void ClearAndResizeVectorWithNonZeros(IndexType size,
@@ -284,7 +263,8 @@ inline void ClearAndResizeVectorWithNonZeros(IndexType size,
  // compared to the wanted size. Note that in most cases the vector will
  // already be of the correct size.
  const double kSparseThreshold = 0.05;
-  if (v->non_zeros.size() < kSparseThreshold * size.value()) {
+  if (!v->non_zeros.empty() &&
+      v->non_zeros.size() < kSparseThreshold * size.value()) {
    for (const IndexType index : v->non_zeros) {
      DCHECK_LT(index, v->values.size());
      (*v)[index] = 0.0;
--- a/ortools/lp_data/sparse.cc
+++ b/ortools/lp_data/sparse.cc
@@ -756,36 +756,19 @@ void TriangularMatrix::LowerSolveStartingAtInternal(ColIndex start,

 void TriangularMatrix::UpperSolve(DenseColumn* rhs) const {
  if (all_diagonal_coefficients_are_one_) {
-    UpperSolveWithNonZerosInternal<true, false>(rhs, nullptr);
+    UpperSolveInternal<true>(rhs);
  } else {
-    UpperSolveWithNonZerosInternal<false, false>(rhs, nullptr);
+    UpperSolveInternal<false>(rhs);
  }
 }

-void TriangularMatrix::UpperSolveWithNonZeros(
-    DenseColumn* rhs, RowIndexVector* non_zero_rows) const {
-  if (all_diagonal_coefficients_are_one_) {
-    UpperSolveWithNonZerosInternal<true, true>(rhs, non_zero_rows);
-  } else {
-    UpperSolveWithNonZerosInternal<false, true>(rhs, non_zero_rows);
-  }
-}
-
-template <bool diagonal_of_ones, bool with_non_zeros>
-void TriangularMatrix::UpperSolveWithNonZerosInternal(
-    DenseColumn* rhs, RowIndexVector* non_zero_rows) const {
+template <bool diagonal_of_ones>
+void TriangularMatrix::UpperSolveInternal(DenseColumn* rhs) const {
  RETURN_IF_NULL(rhs);
-  if (with_non_zeros) {
-    RETURN_IF_NULL(non_zero_rows);
-    non_zero_rows->clear();
-  }
  const ColIndex end = first_non_identity_column_;
  for (ColIndex col(diagonal_coefficients_.size() - 1); col >= end; --col) {
    const Fractional value = (*rhs)[ColToRowIndex(col)];
    if (value == 0.0) continue;
-    if (with_non_zeros) {
-      non_zero_rows->push_back(ColToRowIndex(col));
-    }
    const Fractional coeff =
        diagonal_of_ones ? value : value / diagonal_coefficients_[col];
    if (!diagonal_of_ones) {
@@ -800,18 +783,6 @@ void TriangularMatrix::UpperSolveWithNonZerosInternal(
      (*rhs)[EntryRow(i)] -= coeff * EntryCoefficient(i);
    }
  }
-
-  // Finish filling the non_zero_rows vector if needed.
-  if (with_non_zeros) {
-    for (RowIndex row(end.value() - 1); row >= 0; --row) {
-      if ((*rhs)[row] != 0.0) {
-        non_zero_rows->push_back(row);
-      }
-    }
-
-    // Sorting the non-zero positions gives better performance later.
-    std::reverse(non_zero_rows->begin(), non_zero_rows->end());
-  }
 }

 void TriangularMatrix::TransposeUpperSolve(DenseColumn* rhs) const {
--- a/ortools/lp_data/sparse.h
+++ b/ortools/lp_data/sparse.h
@@ -418,20 +418,20 @@ class CompactSparseMatrix {
  // Same as ColumnAddMultipleToDenseColumn() but also adds the new non-zeros to
  // the non_zeros vector. A non-zero is "new" if is_non_zero[row] was false,
  // and we update dense_column[row]. This functions also updates is_non_zero.
-  void ColumnAddMultipleToDenseColumnAndUpdateNonZeros(
-      ColIndex col, Fractional multiplier, DenseColumn* dense_column,
-      StrictITIVector<RowIndex, bool>* is_non_zero,
-      std::vector<RowIndex>* non_zeros) const {
+  void ColumnAddMultipleToSparseScatteredColumn(ColIndex col,
+                                                Fractional multiplier,
+                                                ScatteredColumn* column) const {
    if (multiplier == 0.0) return;
-    RETURN_IF_NULL(dense_column);
+    RETURN_IF_NULL(column);
    for (const EntryIndex i : Column(col)) {
      const RowIndex row = EntryRow(i);
-      (*dense_column)[row] += multiplier * EntryCoefficient(i);
-      if (!(*is_non_zero)[row]) {
-        (*is_non_zero)[row] = true;
-        non_zeros->push_back(row);
+      (*column)[row] += multiplier * EntryCoefficient(i);
+      if (!column->is_non_zero[row]) {
+        column->is_non_zero[row] = true;
+        column->non_zeros.push_back(row);
      }
    }
+    column->non_zeros_are_sorted = false;
  }

  // Copies the given column of this matrix into the given dense_column.
@@ -588,10 +588,6 @@ class TriangularMatrix : private CompactSparseMatrix {
  // This also computes the last non-zero row position (if not nullptr).
  void TransposeLowerSolve(DenseColumn* rhs, RowIndex* last_non_zero_row) const;

-  // This also computes the non-zero row positions (if not nullptr).
-  void UpperSolveWithNonZeros(DenseColumn* rhs,
-                              RowIndexVector* non_zero_rows) const;
-
  // Hyper-sparse version of the triangular solve functions. The passed
  // non_zero_rows should contain the positions of the symbolic non-zeros of the
  // result in the order in which they need to be accessed (or in the reverse
@@ -714,9 +710,8 @@ class TriangularMatrix : private CompactSparseMatrix {
  // Internal versions of some Solve() functions to avoid code duplication.
  template <bool diagonal_of_ones>
  void LowerSolveStartingAtInternal(ColIndex start, DenseColumn* rhs) const;
-  template <bool diagonal_of_ones, bool with_non_zeros>
-  void UpperSolveWithNonZerosInternal(DenseColumn* rhs,
-                                      RowIndexVector* non_zero_rows) const;
+  template <bool diagonal_of_ones>
+  void UpperSolveInternal(DenseColumn* rhs) const;
  template <bool diagonal_of_ones>
  void TransposeLowerSolveInternal(DenseColumn* rhs,
                                   RowIndex* last_non_zero_row) const;