speed up glop on large problems

This commit is contained in:
Laurent Perron
2018-02-13 17:29:11 +01:00
parent a8358f5f6a
commit e0de4b61c5
18 changed files with 329 additions and 324 deletions

View File

@@ -302,21 +302,7 @@ Status BasisFactorization::Update(ColIndex entering_col,
return ForceRefactorization();
}
void BasisFactorization::LeftSolve(DenseRow* y) const {
SCOPED_TIME_STAT(&stats_);
RETURN_IF_NULL(y);
BumpDeterministicTimeForSolve(matrix_.num_rows().value());
if (use_middle_product_form_update_) {
lu_factorization_.LeftSolveU(y);
rank_one_factorization_.LeftSolve(y);
lu_factorization_.LeftSolveL(y);
} else {
eta_factorization_.LeftSolve(y);
lu_factorization_.LeftSolve(y);
}
}
void BasisFactorization::LeftSolveWithNonZeros(ScatteredRow* y) const {
void BasisFactorization::LeftSolve(ScatteredRow* y) const {
SCOPED_TIME_STAT(&stats_);
RETURN_IF_NULL(y);
BumpDeterministicTimeForSolve(matrix_.num_rows().value());
@@ -324,49 +310,31 @@ void BasisFactorization::LeftSolveWithNonZeros(ScatteredRow* y) const {
lu_factorization_.LeftSolveUWithNonZeros(y);
rank_one_factorization_.LeftSolveWithNonZeros(y);
lu_factorization_.LeftSolveLWithNonZeros(y, nullptr);
y->SortNonZerosIfNeeded();
} else {
y->non_zeros.clear();
eta_factorization_.LeftSolve(&y->values);
lu_factorization_.LeftSolve(&y->values);
ComputeNonZeros(y->values, &y->non_zeros);
}
}
void BasisFactorization::RightSolve(DenseColumn* d) const {
SCOPED_TIME_STAT(&stats_);
RETURN_IF_NULL(d);
BumpDeterministicTimeForSolve(matrix_.num_rows().value());
if (use_middle_product_form_update_) {
lu_factorization_.RightSolveL(d);
rank_one_factorization_.RightSolve(d);
lu_factorization_.RightSolveU(d);
} else {
lu_factorization_.RightSolve(d);
eta_factorization_.RightSolve(d);
}
}
void BasisFactorization::RightSolveWithNonZeros(ScatteredColumn* d) const {
void BasisFactorization::RightSolve(ScatteredColumn* d) const {
SCOPED_TIME_STAT(&stats_);
RETURN_IF_NULL(d);
BumpDeterministicTimeForSolve(d->non_zeros.size());
if (use_middle_product_form_update_) {
lu_factorization_.RightSolveL(&d->values);
rank_one_factorization_.RightSolve(&d->values);
// We need to clear non-zeros because at this point in the code, they don't
// correspond to the zeros of d. An empty non_zeros means that
// RightSolveWithNonZeros() will recompute them.
d->non_zeros.clear();
lu_factorization_.RightSolveLWithNonZeros(d);
rank_one_factorization_.RightSolveWithNonZeros(d);
lu_factorization_.RightSolveUWithNonZeros(d);
d->SortNonZerosIfNeeded();
} else {
d->non_zeros.clear();
lu_factorization_.RightSolve(&d->values);
eta_factorization_.RightSolve(&d->values);
ComputeNonZeros(d->values, &d->non_zeros);
return;
}
}
DenseColumn* BasisFactorization::RightSolveForTau(
const DenseColumn& BasisFactorization::RightSolveForTau(
const ScatteredColumn& a) const {
SCOPED_TIME_STAT(&stats_);
BumpDeterministicTimeForSolve(matrix_.num_rows().value());
@@ -378,26 +346,19 @@ DenseColumn* BasisFactorization::RightSolveForTau(
lu_factorization_.RightSolveLWithPermutedInput(a.values, &tau_.values);
tau_.non_zeros.clear();
} else {
// TODO(user): Extract function.
if (tau_.non_zeros.empty()) {
tau_.values.assign(matrix_.num_rows(), 0);
} else {
tau_.values.resize(matrix_.num_rows(), 0);
for (const RowIndex row : tau_.non_zeros) {
tau_[row] = 0.0;
}
}
ClearAndResizeVectorWithNonZeros(matrix_.num_rows(), &tau_);
lu_factorization_.RightSolveLForScatteredColumn(a, &tau_);
}
rank_one_factorization_.RightSolveWithNonZeros(&tau_);
lu_factorization_.RightSolveUWithNonZeros(&tau_);
} else {
tau_.non_zeros.clear();
tau_.values = a.values;
lu_factorization_.RightSolve(&tau_.values);
eta_factorization_.RightSolve(&tau_.values);
}
tau_is_computed_ = true;
return &tau_.values;
return tau_.values;
}
void BasisFactorization::LeftSolveForUnitRow(ColIndex j,
@@ -442,12 +403,13 @@ void BasisFactorization::LeftSolveForUnitRow(ColIndex j,
if (tau_is_computed_) {
tau_is_computed_ = false;
tau_computation_can_be_optimized_ =
lu_factorization_.LeftSolveLWithNonZeros(y, &tau_.values);
lu_factorization_.LeftSolveLWithNonZeros(y, &tau_);
tau_.non_zeros.clear();
} else {
tau_computation_can_be_optimized_ = false;
lu_factorization_.LeftSolveLWithNonZeros(y, nullptr);
}
y->SortNonZerosIfNeeded();
}
void BasisFactorization::RightSolveForProblemColumn(ColIndex col,
@@ -456,10 +418,10 @@ void BasisFactorization::RightSolveForProblemColumn(ColIndex col,
RETURN_IF_NULL(d);
BumpDeterministicTimeForSolve(matrix_.column(col).num_entries().value());
if (!use_middle_product_form_update_) {
d->non_zeros.clear();
lu_factorization_.SparseRightSolve(matrix_.column(col), matrix_.num_rows(),
&d->values);
eta_factorization_.RightSolve(&d->values);
ComputeNonZeros(d->values, &d->non_zeros);
return;
}
@@ -484,6 +446,7 @@ void BasisFactorization::RightSolveForProblemColumn(ColIndex col,
right_storage_.AddDenseColumnWithNonZeros(d->values, d->non_zeros);
}
lu_factorization_.RightSolveUWithNonZeros(d);
d->SortNonZerosIfNeeded();
}
Fractional BasisFactorization::RightSolveSquaredNorm(const SparseColumn& a)
@@ -536,7 +499,8 @@ Fractional BasisFactorization::ComputeInverseOneNorm() const {
const ColIndex num_cols = RowToColIndex(num_rows);
Fractional norm = 0.0;
for (ColIndex col(0); col < num_cols; ++col) {
DenseColumn right_hand_side(num_rows, 0.0);
ScatteredColumn right_hand_side;
right_hand_side.values.AssignToZero(num_rows);
right_hand_side[ColToRowIndex(col)] = 1.0;
// Get a column of the matrix inverse.
RightSolve(&right_hand_side);
@@ -557,7 +521,8 @@ Fractional BasisFactorization::ComputeInverseInfinityNorm() const {
const ColIndex num_cols = RowToColIndex(num_rows);
DenseColumn row_sum(num_rows, 0.0);
for (ColIndex col(0); col < num_cols; ++col) {
DenseColumn right_hand_side(num_rows, 0.0);
ScatteredColumn right_hand_side;
right_hand_side.values.AssignToZero(num_rows);
right_hand_side[ColToRowIndex(col)] = 1.0;
// Get a column of the matrix inverse.
RightSolve(&right_hand_side);

View File

@@ -205,28 +205,25 @@ class BasisFactorization {
const ScatteredColumn& direction) MUST_USE_RESULT;
// Left solves the system y.B = rhs, where y initialy contains rhs.
// The second version also computes the non-zero positions of the result.
void LeftSolve(DenseRow* y) const;
void LeftSolveWithNonZeros(ScatteredRow* y) const;
void LeftSolve(ScatteredRow* y) const;
// Left solves the system y.B = e_j, where e_j has only 1 non-zero
// coefficient of value 1.0 at position 'j'.
void LeftSolveForUnitRow(ColIndex j, ScatteredRow* y) const;
// Same as RightSolve() for matrix.column(col).
// This also exploits its sparsity.
void RightSolveForProblemColumn(ColIndex col, ScatteredColumn* d) const;
// Right solves the system B.d = a where the input is the initial value of d.
void RightSolve(DenseColumn* d) const;
void RightSolveWithNonZeros(ScatteredColumn* d) const;
void RightSolve(ScatteredColumn* d) const;
// Same as RightSolve() for matrix.column(col). This also exploits its
// sparsity.
void RightSolveForProblemColumn(ColIndex col, ScatteredColumn* d) const;
// Specialized version for ComputeTau() in DualEdgeNorms. This reuses an
// intermediate result of the last LeftSolveForUnitRow() in order to save a
// permutation if it is available. Note that the input 'a' should always be
// equal to the last result of LeftSolveForUnitRow() and will be used for a
// DCHECK() or if the intermediate result wasn't kept.
DenseColumn* RightSolveForTau(const ScatteredColumn& a) const;
const DenseColumn& RightSolveForTau(const ScatteredColumn& a) const;
// Returns the norm of B^{-1}.a, this is a specific function because
// it is a bit faster and it avoids polluting the stats of RightSolve().

View File

@@ -45,7 +45,7 @@ void DualEdgeNorms::UpdateBeforeBasisPivot(
const ScatteredRow& unit_row_left_inverse) {
// No need to update if we will recompute it from scratch later.
if (recompute_edge_squared_norms_) return;
DenseColumn* tau = ComputeTau(TransposedView(unit_row_left_inverse));
const DenseColumn& tau = ComputeTau(TransposedView(unit_row_left_inverse));
SCOPED_TIME_STAT(&stats_);
// ||unit_row_left_inverse||^2 is the same as
@@ -80,7 +80,7 @@ void DualEdgeNorms::UpdateBeforeBasisPivot(
// See Koberstein's PhD section 8.2.2.1.
edge_squared_norms_[row] +=
direction[row] *
(direction[row] * new_leaving_squared_norm - 2.0 / pivot * (*tau)[row]);
(direction[row] * new_leaving_squared_norm - 2.0 / pivot * tau[row]);
// Avoid 0.0 norms (The 1e-4 is the value used by Koberstein).
// TODO(user): use a more precise lower bound depending on the column norm?
@@ -111,12 +111,12 @@ void DualEdgeNorms::ComputeEdgeSquaredNorms() {
recompute_edge_squared_norms_ = false;
}
DenseColumn* DualEdgeNorms::ComputeTau(
const DenseColumn& DualEdgeNorms::ComputeTau(
const ScatteredColumn& unit_row_left_inverse) {
SCOPED_TIME_STAT(&stats_);
DenseColumn* result =
const DenseColumn& result =
basis_factorization_.RightSolveForTau(unit_row_left_inverse);
IF_STATS_ENABLED(stats_.tau_density.Add(Density(Transpose(*result))));
IF_STATS_ENABLED(stats_.tau_density.Add(Density(Transpose(result))));
return result;
}

View File

@@ -91,7 +91,7 @@ class DualEdgeNorms {
// Computes the vector tau needed to update the norms using a right solve:
// B.tau = (u_i)^T, u_i.B = e_i for i = leaving_row.
DenseColumn* ComputeTau(const ScatteredColumn& unit_row_left_inverse);
const DenseColumn& ComputeTau(const ScatteredColumn& unit_row_left_inverse);
// Statistics.
struct Stats : public StatsGroup {

View File

@@ -195,8 +195,7 @@ DenseColumn* DenseRowAsColumn(DenseRow* y) {
void LuFactorization::RightSolveL(DenseColumn* x) const {
SCOPED_TIME_STAT(&stats_);
if (!is_identity_factorization_) {
ApplyPermutationWhenInputIsProbablySparse(row_perm_,
&dense_zero_scratchpad_, x);
PermuteWithScratchpad(row_perm_, &dense_zero_scratchpad_, x);
lower_.LowerSolve(x);
}
}
@@ -295,6 +294,7 @@ void LuFactorization::RightSolveLForSparseColumn(const SparseColumn& b,
}
lower_.ComputeRowsToConsiderInSortedOrder(&x->non_zeros);
x->non_zeros_are_sorted = true;
if (x->non_zeros.empty()) {
lower_.LowerSolveStartingAt(first_column_to_consider, &x->values);
} else {
@@ -302,19 +302,37 @@ void LuFactorization::RightSolveLForSparseColumn(const SparseColumn& b,
}
}
void LuFactorization::RightSolveLWithNonZeros(ScatteredColumn* x) const {
if (is_identity_factorization_) return;
if (x->non_zeros.empty()) {
return RightSolveL(&x->values);
}
PermuteWithKnownNonZeros(row_perm_, &dense_zero_scratchpad_, &x->values,
&x->non_zeros);
lower_.ComputeRowsToConsiderInSortedOrder(&x->non_zeros);
x->non_zeros_are_sorted = true;
if (x->non_zeros.empty()) {
lower_.LowerSolve(&x->values);
} else {
lower_.HyperSparseSolve(&x->values, &x->non_zeros);
}
}
// TODO(user): This code is almost the same a RightSolveLForSparseColumn()
// except that the API to iterate on the input is different. Find a way to
// deduplicate the code.
void LuFactorization::RightSolveLForScatteredColumn(const ScatteredColumn& b,
ScatteredColumn* x) const {
if (b.non_zeros.empty()) {
*x = b;
x->non_zeros.clear();
return RightSolveL(&x->values);
}
SCOPED_TIME_STAT(&stats_);
DCHECK(IsAllZero(x->values));
x->non_zeros.clear();
if (is_identity_factorization_) {
for (const RowIndex row : b.non_zeros) {
(*x)[row] = b[row];
x->non_zeros.push_back(row);
}
*x = b;
return;
}
@@ -340,6 +358,7 @@ void LuFactorization::RightSolveLForScatteredColumn(const ScatteredColumn& b,
}
lower_.ComputeRowsToConsiderInSortedOrder(&x->non_zeros);
x->non_zeros_are_sorted = true;
if (x->non_zeros.empty()) {
lower_.LowerSolveStartingAt(first_column_to_consider, &x->values);
} else {
@@ -350,11 +369,18 @@ void LuFactorization::RightSolveLForScatteredColumn(const ScatteredColumn& b,
void LuFactorization::LeftSolveUWithNonZeros(ScatteredRow* y) const {
SCOPED_TIME_STAT(&stats_);
if (is_identity_factorization_) return;
if (!col_perm_.empty()) {
// TODO(user): This is just used in the test, in a real solve we never
// have a column permutation because we absorb it in the basis.
y->non_zeros.clear();
LeftSolveU(&y->values);
return;
}
DCHECK(col_perm_.empty());
DenseColumn* const x = DenseRowAsColumn(&y->values);
RowIndexVector* const nz = reinterpret_cast<RowIndexVector*>(&y->non_zeros);
transpose_upper_.ComputeRowsToConsiderInSortedOrder(nz);
y->non_zeros_are_sorted = true;
if (nz->empty()) {
upper_.TransposeUpperSolve(x);
} else {
@@ -365,7 +391,6 @@ void LuFactorization::LeftSolveUWithNonZeros(ScatteredRow* y) const {
void LuFactorization::RightSolveUWithNonZeros(ScatteredColumn* x) const {
SCOPED_TIME_STAT(&stats_);
if (is_identity_factorization_) {
if (x->non_zeros.empty()) ComputeNonZeros(x->values, &x->non_zeros);
return;
}
@@ -373,8 +398,9 @@ void LuFactorization::RightSolveUWithNonZeros(ScatteredColumn* x) const {
// non_zeros starts to be too big, we clear it and thus switch back to a
// normal sparse solve.
upper_.ComputeRowsToConsiderInSortedOrder(&x->non_zeros);
x->non_zeros_are_sorted = true;
if (x->non_zeros.empty()) {
upper_.UpperSolveWithNonZeros(&x->values, &x->non_zeros);
upper_.UpperSolve(&x->values);
} else {
upper_.HyperSparseSolveWithReversedNonZeros(&x->values, &x->non_zeros);
}
@@ -383,16 +409,16 @@ void LuFactorization::RightSolveUWithNonZeros(ScatteredColumn* x) const {
// Note(user): This is only needed for the test because in Glop the
// col_perm_ is always "absorbed" by permuting the basis. So it is
// fine to do it in a non hyper-sparse way.
PermuteAndComputeNonZeros(inverse_col_perm_, &dense_zero_scratchpad_,
&x->values, &x->non_zeros);
x->non_zeros.clear();
PermuteWithScratchpad(inverse_col_perm_, &dense_zero_scratchpad_,
&x->values);
}
}
bool LuFactorization::LeftSolveLWithNonZeros(
ScatteredRow* y, DenseColumn* result_before_permutation) const {
ScatteredRow* y, ScatteredColumn* result_before_permutation) const {
SCOPED_TIME_STAT(&stats_);
if (is_identity_factorization_) {
if (y->non_zeros.empty()) ComputeNonZeros(y->values, &y->non_zeros);
// It is not advantageous to fill result_before_permutation in this case.
return false;
}
@@ -402,6 +428,7 @@ bool LuFactorization::LeftSolveLWithNonZeros(
// Hypersparse?
RowIndex last_non_zero_row = ColToRowIndex(y->values.size() - 1);
transpose_lower_.ComputeRowsToConsiderInSortedOrder(nz);
y->non_zeros_are_sorted = true;
if (nz->empty()) {
lower_.TransposeLowerSolve(x, &last_non_zero_row);
} else {
@@ -414,8 +441,7 @@ bool LuFactorization::LeftSolveLWithNonZeros(
// should be the case because the hyper-sparse functions makes sure of that.
// We also DCHECK() this below.
if (nz->empty()) {
PermuteAndComputeNonZeros(inverse_row_perm_, &dense_zero_scratchpad_, x,
nz);
PermuteWithScratchpad(inverse_row_perm_, &dense_zero_scratchpad_, x);
} else {
PermuteWithKnownNonZeros(inverse_row_perm_, &dense_zero_scratchpad_, x,
nz);
@@ -430,7 +456,8 @@ bool LuFactorization::LeftSolveLWithNonZeros(
// This computes the same thing as in the other branch but also keeps the
// original x in result_before_permutation. Because of this, it is faster to
// use a different algorithm.
x->swap(*result_before_permutation);
result_before_permutation->non_zeros.clear();
x->swap(result_before_permutation->values);
x->AssignToZero(inverse_row_perm_.size());
y->non_zeros.clear();
for (RowIndex row(0); row <= last_non_zero_row; ++row) {
@@ -438,7 +465,6 @@ bool LuFactorization::LeftSolveLWithNonZeros(
if (value != 0.0) {
const RowIndex permuted_row = inverse_row_perm_[row];
(*x)[permuted_row] = value;
y->non_zeros.push_back(RowToColIndex(permuted_row));
}
}
return true;
@@ -468,6 +494,7 @@ ColIndex LuFactorization::LeftSolveUForUnitRow(ColIndex col,
RowIndexVector* const nz = reinterpret_cast<RowIndexVector*>(&y->non_zeros);
DenseColumn* const x = reinterpret_cast<DenseColumn*>(&y->values);
transpose_upper_.ComputeRowsToConsiderInSortedOrder(nz);
y->non_zeros_are_sorted = true;
if (y->non_zeros.empty()) {
transpose_upper_.LowerSolveStartingAt(permuted_col, x);
} else {

View File

@@ -124,19 +124,19 @@ class LuFactorization {
// Important: the output y must be of the correct size and all zero.
ColIndex LeftSolveUForUnitRow(ColIndex col, ScatteredRow* y) const;
// Specialized version of RightSolveU() and LeftSolveU() that may exploit the
// initial non-zeros if it is non-empty. In addition,
// RightSolveUWithNonZeros() always return the non-zeros of the output.
// Specialized version that may exploit the initial non-zeros if it is
// non-empty.
void RightSolveLWithNonZeros(ScatteredColumn* x) const;
void RightSolveUWithNonZeros(ScatteredColumn* x) const;
void LeftSolveUWithNonZeros(ScatteredRow* y) const;
// Specialized version of LeftSolveL() that also computes the non-zero
// pattern of the output. Moreover, if result_before_permutation is not NULL,
// it is filled with the result just before row_perm_ is applied to it and
// true is returned. If result_before_permutation is not filled, then false is
// returned.
// Specialized version of LeftSolveL() that may exploit the initial non_zeros
// of y if it is non empty. Moreover, if result_before_permutation is not
// NULL, it might be filled with the result just before row_perm_ is applied
// to it and true is returned. If result_before_permutation is not filled,
// then false is returned.
bool LeftSolveLWithNonZeros(ScatteredRow* y,
DenseColumn* result_before_permutation) const;
ScatteredColumn* result_before_permutation) const;
// Returns the given column of U.
// It will only be valid until the next call to GetColumnOfU().

View File

@@ -142,13 +142,16 @@ void PrimalEdgeNorms::ComputeEdgeSquaredNorms() {
recompute_edge_squared_norms_ = false;
}
// TODO(user): It should be possible to reorganize the code and call this when
// the value of direction is no longer needed. This will simplify the code and
// avoid a copy here.
void PrimalEdgeNorms::ComputeDirectionLeftInverse(
ColIndex entering_col, const ScatteredColumn& direction) {
SCOPED_TIME_STAT(&stats_);
// Initialize direction_left_inverse_ to direction.values .
// Note the special case when the non-zero vector is empty which means we
// don't know and need to use the dense version.
// Initialize direction_left_inverse_ to direction. Note the special case when
// the non-zero vector is empty which means we don't know and need to use the
// dense version.
const ColIndex size = RowToColIndex(direction.values.size());
const double kThreshold = 0.05 * size.value();
if (!direction_left_inverse_.non_zeros.empty() &&
@@ -163,14 +166,10 @@ void PrimalEdgeNorms::ComputeDirectionLeftInverse(
direction_left_inverse_.non_zeros.clear();
}
// Depending on the sparsity of the input, we decide which version to use.
if (direction.non_zeros.size() < kThreshold) {
direction_left_inverse_.non_zeros =
*reinterpret_cast<ColIndexVector const*>(&direction.non_zeros);
basis_factorization_.LeftSolveWithNonZeros(&direction_left_inverse_);
} else {
basis_factorization_.LeftSolve(&direction_left_inverse_.values);
direction_left_inverse_.non_zeros = TransposedView(direction).non_zeros;
}
basis_factorization_.LeftSolve(&direction_left_inverse_);
// TODO(user): Refactorize if estimated accuracy above a threshold.
IF_STATS_ENABLED(stats_.direction_left_inverse_accuracy.Add(

View File

@@ -63,14 +63,13 @@ class RankOneUpdateElementaryMatrix {
-storage_->ColumnScalarProduct(v_index_, Transpose(*x)) / mu_;
storage_->ColumnAddMultipleToDenseColumn(u_index_, multiplier, x);
}
void RightSolveWithNonZeros(
ScatteredColumn* x, StrictITIVector<RowIndex, bool>* is_non_zero) const {
void RightSolveWithNonZeros(ScatteredColumn* x) const {
DCHECK(!IsSingular());
const Fractional multiplier =
-storage_->ColumnScalarProduct(v_index_, Transpose(x->values)) / mu_;
if (multiplier != 0.0) {
storage_->ColumnAddMultipleToDenseColumnAndUpdateNonZeros(
u_index_, multiplier, &x->values, is_non_zero, &x->non_zeros);
storage_->ColumnAddMultipleToSparseScatteredColumn(u_index_, multiplier,
x);
}
}
@@ -83,16 +82,13 @@ class RankOneUpdateElementaryMatrix {
storage_->ColumnAddMultipleToDenseColumn(v_index_, multiplier,
reinterpret_cast<DenseColumn*>(y));
}
void LeftSolveWithNonZeros(
ScatteredRow* y, StrictITIVector<ColIndex, bool>* is_non_zero) const {
void LeftSolveWithNonZeros(ScatteredRow* y) const {
DCHECK(!IsSingular());
const Fractional multiplier =
-storage_->ColumnScalarProduct(u_index_, y->values) / mu_;
if (multiplier != 0.0) {
storage_->ColumnAddMultipleToDenseColumnAndUpdateNonZeros(
v_index_, multiplier, reinterpret_cast<DenseColumn*>(&y->values),
reinterpret_cast<StrictITIVector<RowIndex, bool>*>(is_non_zero),
reinterpret_cast<std::vector<RowIndex>*>(&y->non_zeros));
storage_->ColumnAddMultipleToSparseScatteredColumn(
v_index_, multiplier, reinterpret_cast<ScatteredColumn*>(y));
}
}
@@ -172,20 +168,19 @@ class RankOneUpdateFactorization {
}
// tmp_row_is_non_zero_ is always all false before and after this code.
tmp_col_is_non_zero_.resize(y->values.size(), false);
DCHECK(std::all_of(tmp_col_is_non_zero_.begin(), tmp_col_is_non_zero_.end(),
[](bool v) { return !v; }));
for (const ColIndex col : y->non_zeros) tmp_col_is_non_zero_[col] = true;
y->is_non_zero.resize(y->values.size(), false);
DCHECK(IsAllFalse(y->is_non_zero));
for (const ColIndex col : y->non_zeros) y->is_non_zero[col] = true;
const int hypersparse_threshold = static_cast<int>(
hypersparse_ratio_ * static_cast<double>(y->values.size().value()));
for (int i = elementary_matrices_.size() - 1; i >= 0; --i) {
if (y->non_zeros.size() < hypersparse_threshold) {
elementary_matrices_[i].LeftSolveWithNonZeros(y, &tmp_col_is_non_zero_);
elementary_matrices_[i].LeftSolveWithNonZeros(y);
} else {
elementary_matrices_[i].LeftSolve(&y->values);
}
}
for (const ColIndex col : y->non_zeros) tmp_col_is_non_zero_[col] = false;
for (const ColIndex col : y->non_zeros) y->is_non_zero[col] = false;
if (y->non_zeros.size() >= hypersparse_threshold) y->non_zeros.clear();
}
@@ -207,23 +202,21 @@ class RankOneUpdateFactorization {
return;
}
// tmp_row_is_non_zero_ is always all false before and after this code.
tmp_row_is_non_zero_.resize(d->values.size(), false);
DCHECK(std::all_of(tmp_row_is_non_zero_.begin(), tmp_row_is_non_zero_.end(),
[](bool v) { return !v; }));
for (const RowIndex row : d->non_zeros) tmp_row_is_non_zero_[row] = true;
// d->is_non_zero is always all false before and after this code.
d->is_non_zero.resize(d->values.size(), false);
DCHECK(IsAllFalse(d->is_non_zero));
for (const RowIndex row : d->non_zeros) d->is_non_zero[row] = true;
const size_t end = elementary_matrices_.size();
const int hypersparse_threshold = static_cast<int>(
hypersparse_ratio_ * static_cast<double>(d->values.size().value()));
for (int i = 0; i < end; ++i) {
if (d->non_zeros.size() < hypersparse_threshold) {
elementary_matrices_[i].RightSolveWithNonZeros(d,
&tmp_row_is_non_zero_);
elementary_matrices_[i].RightSolveWithNonZeros(d);
} else {
elementary_matrices_[i].RightSolve(&d->values);
}
}
for (const RowIndex row : d->non_zeros) tmp_row_is_non_zero_[row] = false;
for (const RowIndex row : d->non_zeros) d->is_non_zero[row] = false;
if (d->non_zeros.size() >= hypersparse_threshold) d->non_zeros.clear();
}
@@ -233,8 +226,6 @@ class RankOneUpdateFactorization {
double hypersparse_ratio_;
EntryIndex num_entries_;
std::vector<RankOneUpdateElementaryMatrix> elementary_matrices_;
mutable StrictITIVector<RowIndex, bool> tmp_row_is_non_zero_;
mutable StrictITIVector<ColIndex, bool> tmp_col_is_non_zero_;
DISALLOW_COPY_AND_ASSIGN(RankOneUpdateFactorization);
};

View File

@@ -245,7 +245,7 @@ void ReducedCosts::PerturbCosts() {
std::max(max_cost_magnitude, std::abs(objective_[col]));
}
objective_perturbation_.assign(matrix_.num_cols(), 0.0);
objective_perturbation_.AssignToZero(matrix_.num_cols());
for (ColIndex col(0); col < structural_size; ++col) {
const Fractional objective = objective_[col];
const Fractional magnitude =
@@ -297,7 +297,7 @@ void ReducedCosts::ShiftCost(ColIndex col) {
void ReducedCosts::ClearAndRemoveCostShifts() {
SCOPED_TIME_STAT(&stats_);
objective_perturbation_.assign(matrix_.num_cols(), 0.0);
objective_perturbation_.AssignToZero(matrix_.num_cols());
recompute_basic_objective_ = true;
recompute_basic_objective_left_inverse_ = true;
recompute_reduced_costs_ = true;
@@ -320,7 +320,7 @@ const DenseRow& ReducedCosts::GetReducedCosts() {
const DenseColumn& ReducedCosts::GetDualValues() {
SCOPED_TIME_STAT(&stats_);
ComputeBasicObjectiveLeftInverse();
return Transpose(basic_objective_left_inverse_);
return Transpose(basic_objective_left_inverse_.values);
}
void ReducedCosts::RecomputeReducedCostsAndPrimalEnteringCandidatesIfNeeded() {
@@ -366,9 +366,9 @@ void ReducedCosts::ComputeReducedCosts() {
#endif
if (num_omp_threads == 1) {
for (ColIndex col(0); col < num_cols; ++col) {
reduced_costs_[col] =
objective_[col] + objective_perturbation_[col] -
matrix_.ColumnScalarProduct(col, basic_objective_left_inverse_);
reduced_costs_[col] = objective_[col] + objective_perturbation_[col] -
matrix_.ColumnScalarProduct(
col, basic_objective_left_inverse_.values);
// We also compute the dual residual error y.B - c_B.
if (is_basic.IsSet(col)) {
@@ -386,9 +386,9 @@ void ReducedCosts::ComputeReducedCosts() {
#pragma omp parallel for num_threads(num_omp_threads)
for (int i = 0; i < parallel_loop_size; i++) {
const ColIndex col(i);
reduced_costs_[col] =
objective_[col] + objective_perturbation_[col] -
matrix_.ColumnScalarProduct(col, basic_objective_left_inverse_);
reduced_costs_[col] = objective_[col] + objective_perturbation_[col] -
matrix_.ColumnScalarProduct(
col, basic_objective_left_inverse_.values);
if (is_basic.IsSet(col)) {
thread_local_dual_residual_error[omp_get_thread_num()] =
@@ -422,15 +422,15 @@ void ReducedCosts::ComputeReducedCosts() {
void ReducedCosts::ComputeBasicObjectiveLeftInverse() {
SCOPED_TIME_STAT(&stats_);
basic_objective_left_inverse_.resize(RowToColIndex(matrix_.num_rows()), 0.0);
if (recompute_basic_objective_) {
ComputeBasicObjective();
}
basic_objective_left_inverse_ = basic_objective_;
basic_objective_left_inverse_.values = basic_objective_;
basic_objective_left_inverse_.non_zeros.clear();
basis_factorization_.LeftSolve(&basic_objective_left_inverse_);
recompute_basic_objective_left_inverse_ = false;
IF_STATS_ENABLED(stats_.basic_objective_left_inverse_density.Add(
Density(basic_objective_left_inverse_)));
Density(basic_objective_left_inverse_.values)));
// TODO(user): Estimate its accuracy by a few scalar products, and refactorize
// if it is above a threshold?
@@ -487,10 +487,19 @@ void ReducedCosts::UpdateReducedCosts(ColIndex entering_col,
// we can use them in ComputeCurrentDualResidualError().
const ScatteredRow& unit_row_left_inverse =
update_row->GetUnitRowLeftInverse();
for (const ColIndex col : unit_row_left_inverse.non_zeros) {
const ColIndex slack_col = first_slack_col + col;
const Fractional coeff = unit_row_left_inverse[col];
reduced_costs_[slack_col] += new_leaving_reduced_cost * coeff;
if (unit_row_left_inverse.non_zeros.empty()) {
const ColIndex num_cols = unit_row_left_inverse.values.size();
for (ColIndex col(0); col < num_cols; ++col) {
const ColIndex slack_col = first_slack_col + col;
const Fractional coeff = unit_row_left_inverse[col];
reduced_costs_[slack_col] += new_leaving_reduced_cost * coeff;
}
} else {
for (const ColIndex col : unit_row_left_inverse.non_zeros) {
const ColIndex slack_col = first_slack_col + col;
const Fractional coeff = unit_row_left_inverse[col];
reduced_costs_[slack_col] += new_leaving_reduced_cost * coeff;
}
}
reduced_costs_[leaving_col] = new_leaving_reduced_cost;

View File

@@ -273,7 +273,7 @@ class ReducedCosts {
// algorithm, but may gives us a good idea of the current precision of our
// estimates. It is also faster to compute the unit_row_left_inverse_ because
// of sparsity.
DenseRow basic_objective_left_inverse_;
ScatteredRow basic_objective_left_inverse_;
// This is usually parameters_.dual_feasibility_tolerance() except when the
// dual residual error |y.B - c_B| is higher than it and we have to increase

View File

@@ -781,7 +781,7 @@ bool RevisedSimplex::InitializeBoundsAndTestIfUnchanged(
SCOPED_TIME_STAT(&function_stats_);
lower_bound_.resize(num_cols_, 0.0);
upper_bound_.resize(num_cols_, 0.0);
bound_perturbation_.assign(num_cols_, 0.0);
bound_perturbation_.AssignToZero(num_cols_);
// Variable bounds, for both non-slack and slack variables.
bool bounds_are_unchanged = true;
@@ -1325,7 +1325,7 @@ void RevisedSimplex::CorrectErrorsOnVariableValues() {
void RevisedSimplex::ComputeVariableValuesError() {
SCOPED_TIME_STAT(&function_stats_);
error_.assign(num_rows_, 0.0);
error_.AssignToZero(num_rows_);
const DenseRow& variable_values = variable_values_.GetDenseRow();
for (ColIndex col(0); col < num_cols_; ++col) {
const Fractional value = variable_values[col];
@@ -1338,9 +1338,21 @@ void RevisedSimplex::ComputeDirection(ColIndex col) {
DCHECK_COL_BOUNDS(col);
basis_factorization_.RightSolveForProblemColumn(col, &direction_);
direction_infinity_norm_ = 0.0;
for (const RowIndex row : direction_.non_zeros) {
direction_infinity_norm_ =
std::max(direction_infinity_norm_, std::abs(direction_[row]));
if (direction_.non_zeros.empty()) {
// We still compute the direction non-zeros because our code relies on it.
for (RowIndex row(0); row < num_rows_; ++row) {
const Fractional value = direction_[row];
if (value != 0.0) {
direction_.non_zeros.push_back(row);
direction_infinity_norm_ =
std::max(direction_infinity_norm_, std::abs(value));
}
}
} else {
for (const RowIndex row : direction_.non_zeros) {
direction_infinity_norm_ =
std::max(direction_infinity_norm_, std::abs(direction_[row]));
}
}
IF_STATS_ENABLED(ratio_test_stats_.direction_density.Add(
num_rows_ == 0 ? 0.0
@@ -1952,27 +1964,50 @@ void RevisedSimplex::DualPhaseIUpdatePriceOnReducedCostChange(
++num_dual_infeasible_positions_;
}
if (!something_to_do) {
initially_all_zero_scratchpad_.is_non_zero.resize(num_rows_, false);
initially_all_zero_scratchpad_.values.resize(num_rows_, 0.0);
something_to_do = true;
}
compact_matrix_.ColumnAddMultipleToDenseColumn(
compact_matrix_.ColumnAddMultipleToSparseScatteredColumn(
col, sign - dual_infeasibility_improvement_direction_[col],
&initially_all_zero_scratchpad_.values);
&initially_all_zero_scratchpad_);
dual_infeasibility_improvement_direction_[col] = sign;
}
}
if (something_to_do) {
// TODO(user): This code is duplicated with UpdateGivenNonBasicVariables()
// and more generally with the one in RankOneUpdateFactorization. Fix.
if (ShouldUseDenseIteration(initially_all_zero_scratchpad_)) {
initially_all_zero_scratchpad_.non_zeros.clear();
initially_all_zero_scratchpad_.is_non_zero.assign(num_rows_, false);
} else {
for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
initially_all_zero_scratchpad_.is_non_zero[row] = false;
}
}
const VariableTypeRow& variable_type = variables_info_.GetTypeRow();
const Fractional threshold = parameters_.ratio_test_zero_threshold();
basis_factorization_.RightSolveWithNonZeros(
&initially_all_zero_scratchpad_);
for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
dual_pricing_vector_[row] += initially_all_zero_scratchpad_[row];
initially_all_zero_scratchpad_[row] = 0.0;
is_dual_entering_candidate_.Set(
row,
IsDualPhaseILeavingCandidate(dual_pricing_vector_[row],
variable_type[basis_[row]], threshold));
basis_factorization_.RightSolve(&initially_all_zero_scratchpad_);
if (initially_all_zero_scratchpad_.non_zeros.empty()) {
for (RowIndex row(0); row < num_rows_; ++row) {
if (initially_all_zero_scratchpad_[row] == 0.0) continue;
dual_pricing_vector_[row] += initially_all_zero_scratchpad_[row];
is_dual_entering_candidate_.Set(
row, IsDualPhaseILeavingCandidate(dual_pricing_vector_[row],
variable_type[basis_[row]],
threshold));
}
initially_all_zero_scratchpad_.values.AssignToZero(num_rows_);
} else {
for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
dual_pricing_vector_[row] += initially_all_zero_scratchpad_[row];
initially_all_zero_scratchpad_[row] = 0.0;
is_dual_entering_candidate_.Set(
row, IsDualPhaseILeavingCandidate(dual_pricing_vector_[row],
variable_type[basis_[row]],
threshold));
}
}
initially_all_zero_scratchpad_.non_zeros.clear();
}
@@ -1999,9 +2034,9 @@ Status RevisedSimplex::DualPhaseIChooseLeavingVariableRow(
dual_pricing_vector_.empty()) {
// Recompute everything from scratch.
num_dual_infeasible_positions_ = 0;
dual_pricing_vector_.assign(num_rows_, 0.0);
dual_pricing_vector_.AssignToZero(num_rows_);
is_dual_entering_candidate_.ClearAndResize(num_rows_);
dual_infeasibility_improvement_direction_.assign(num_cols_, 0.0);
dual_infeasibility_improvement_direction_.AssignToZero(num_cols_);
DualPhaseIUpdatePriceOnReducedCostChange(
variables_info_.GetIsRelevantBitRow());
} else {
@@ -2247,7 +2282,7 @@ Status RevisedSimplex::Minimize(TimeLimit* time_limit) {
if (feasibility_phase_) {
// Initialize the primal phase-I objective.
// Note that this temporarily erases the problem objective.
objective_.assign(num_cols_, 0.0);
objective_.AssignToZero(num_cols_);
UpdatePrimalPhaseICosts(
util::IntegerRange<RowIndex>(RowIndex(0), num_rows_));
}
@@ -2368,7 +2403,7 @@ Status RevisedSimplex::Minimize(TimeLimit* time_limit) {
} else {
VLOG(1) << "Unbounded problem.";
problem_status_ = ProblemStatus::PRIMAL_UNBOUNDED;
solution_primal_ray_.assign(num_cols_, 0.0);
solution_primal_ray_.AssignToZero(num_cols_);
for (RowIndex row(0); row < num_rows_; ++row) {
const ColIndex col = basis_[row];
solution_primal_ray_[col] = -direction_[row];
@@ -2650,7 +2685,7 @@ Status RevisedSimplex::DualMinimize(TimeLimit* time_limit) {
solution_dual_ray_ =
Transpose(update_row_.GetUnitRowLeftInverse().values);
update_row_.RecomputeFullUpdateRow(leaving_row);
solution_dual_ray_row_combination_.assign(num_cols_, 0.0);
solution_dual_ray_row_combination_.AssignToZero(num_cols_);
for (const ColIndex col : update_row_.GetNonZeroPositions()) {
solution_dual_ray_row_combination_[col] =
update_row_.GetCoefficient(col);

View File

@@ -68,8 +68,7 @@ void UpdateRow::ComputeUnitRowLeftInverse(RowIndex leaving_row) {
unit_row_left_inverse_.values) -
1.0));
IF_STATS_ENABLED(stats_.unit_row_left_inverse_density.Add(
static_cast<double>(unit_row_left_inverse_.non_zeros.size()) /
static_cast<double>(matrix_.num_rows().value())));
Density(unit_row_left_inverse_.values())));
}
void UpdateRow::ComputeUpdateRow(RowIndex leaving_row) {
@@ -78,7 +77,8 @@ void UpdateRow::ComputeUpdateRow(RowIndex leaving_row) {
ComputeUnitRowLeftInverse(leaving_row);
SCOPED_TIME_STAT(&stats_);
if (parameters_.use_transposed_matrix()) {
if (parameters_.use_transposed_matrix() &&
!unit_row_left_inverse_.non_zeros.empty()) {
// Number of entries that ComputeUpdatesRowWise() will need to look at.
EntryIndex num_row_wise_entries(0);
for (const ColIndex col : unit_row_left_inverse_.non_zeros) {

View File

@@ -66,10 +66,11 @@ void VariableValues::RecomputeBasicVariableValues() {
SCOPED_TIME_STAT(&stats_);
DCHECK(basis_factorization_.IsRefactorized());
const RowIndex num_rows = matrix_.num_rows();
scratchpad_.assign(num_rows, 0.0);
scratchpad_.non_zeros.clear();
scratchpad_.values.AssignToZero(num_rows);
for (const ColIndex col : variables_info_.GetNotBasicBitRow()) {
const Fractional value = variable_values_[col];
matrix_.ColumnAddMultipleToDenseColumn(col, -value, &scratchpad_);
matrix_.ColumnAddMultipleToDenseColumn(col, -value, &scratchpad_.values);
}
basis_factorization_.RightSolve(&scratchpad_);
for (RowIndex row(0); row < num_rows; ++row) {
@@ -79,13 +80,14 @@ void VariableValues::RecomputeBasicVariableValues() {
Fractional VariableValues::ComputeMaximumPrimalResidual() const {
SCOPED_TIME_STAT(&stats_);
scratchpad_.assign(matrix_.num_rows(), 0.0);
scratchpad_.non_zeros.clear();
scratchpad_.values.AssignToZero(matrix_.num_rows());
const ColIndex num_cols = matrix_.num_cols();
for (ColIndex col(0); col < num_cols; ++col) {
const Fractional value = variable_values_[col];
matrix_.ColumnAddMultipleToDenseColumn(col, value, &scratchpad_);
matrix_.ColumnAddMultipleToDenseColumn(col, value, &scratchpad_.values);
}
return InfinityNorm(scratchpad_);
return InfinityNorm(scratchpad_.values);
}
Fractional VariableValues::ComputeMaximumPrimalInfeasibility() const {
@@ -144,24 +146,45 @@ void VariableValues::UpdateGivenNonBasicVariables(
const std::vector<ColIndex>& cols_to_update, bool update_basic_variables) {
SCOPED_TIME_STAT(&stats_);
if (update_basic_variables) {
initially_all_zero_scratchpad_.values.resize(matrix_.num_rows(), 0.0);
const RowIndex num_rows = matrix_.num_rows();
initially_all_zero_scratchpad_.values.resize(num_rows, 0.0);
initially_all_zero_scratchpad_.is_non_zero.resize(num_rows, false);
DCHECK(IsAllZero(initially_all_zero_scratchpad_.values));
DCHECK(IsAllFalse(initially_all_zero_scratchpad_.is_non_zero));
// TODO(user): Abort the non-zeros computation earlier if dense.
for (ColIndex col : cols_to_update) {
const Fractional old_value = variable_values_[col];
SetNonBasicVariableValueFromStatus(col);
matrix_.ColumnAddMultipleToDenseColumn(
matrix_.ColumnAddMultipleToSparseScatteredColumn(
col, variable_values_[col] - old_value,
&initially_all_zero_scratchpad_.values);
&initially_all_zero_scratchpad_);
}
basis_factorization_.RightSolveWithNonZeros(
&initially_all_zero_scratchpad_);
for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
variable_values_[basis_[row]] -= initially_all_zero_scratchpad_[row];
initially_all_zero_scratchpad_[row] = 0.0;
if (ShouldUseDenseIteration(initially_all_zero_scratchpad_)) {
initially_all_zero_scratchpad_.non_zeros.clear();
initially_all_zero_scratchpad_.is_non_zero.assign(num_rows, false);
} else {
for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
initially_all_zero_scratchpad_.is_non_zero[row] = false;
}
}
basis_factorization_.RightSolve(&initially_all_zero_scratchpad_);
if (initially_all_zero_scratchpad_.non_zeros.empty()) {
for (RowIndex row(0); row < num_rows; ++row) {
variable_values_[basis_[row]] -= initially_all_zero_scratchpad_[row];
}
initially_all_zero_scratchpad_.values.AssignToZero(num_rows);
ResetPrimalInfeasibilityInformation();
} else {
for (const RowIndex row : initially_all_zero_scratchpad_.non_zeros) {
variable_values_[basis_[row]] -= initially_all_zero_scratchpad_[row];
initially_all_zero_scratchpad_[row] = 0.0;
}
UpdatePrimalInfeasibilityInformation(
initially_all_zero_scratchpad_.non_zeros);
initially_all_zero_scratchpad_.non_zeros.clear();
}
UpdatePrimalInfeasibilityInformation(
initially_all_zero_scratchpad_.non_zeros);
initially_all_zero_scratchpad_.non_zeros.clear();
} else {
for (ColIndex col : cols_to_update) {
SetNonBasicVariableValueFromStatus(col);
@@ -182,22 +205,31 @@ void VariableValues::ResetPrimalInfeasibilityInformation() {
const RowIndex num_rows = matrix_.num_rows();
primal_squared_infeasibilities_.resize(num_rows, 0.0);
primal_infeasible_positions_.ClearAndResize(num_rows);
UpdatePrimalInfeasibilities(
util::IntegerRange<RowIndex>(RowIndex(0), num_rows));
const DenseRow& lower_bounds = variables_info_.GetVariableLowerBounds();
const DenseRow& upper_bounds = variables_info_.GetVariableUpperBounds();
for (RowIndex row(0); row < num_rows; ++row) {
const ColIndex col = basis_[row];
const Fractional value = variable_values_[col];
const Fractional magnitude =
std::max(value - upper_bounds[col], lower_bounds[col] - value);
if (magnitude > tolerance_) {
primal_squared_infeasibilities_[row] = Square(magnitude);
primal_infeasible_positions_.Set(row);
}
}
}
void VariableValues::UpdatePrimalInfeasibilityInformation(
const std::vector<RowIndex>& rows) {
if (primal_squared_infeasibilities_.size() != matrix_.num_rows()) {
ResetPrimalInfeasibilityInformation();
} else {
SCOPED_TIME_STAT(&stats_);
UpdatePrimalInfeasibilities(rows);
return;
}
}
template <typename Rows>
void VariableValues::UpdatePrimalInfeasibilities(const Rows& rows) {
// Note(user): this is the same as the code in
// ResetPrimalInfeasibilityInformation(), but we do need the clear part.
SCOPED_TIME_STAT(&stats_);
const DenseRow& lower_bounds = variables_info_.GetVariableLowerBounds();
const DenseRow& upper_bounds = variables_info_.GetVariableUpperBounds();
for (const RowIndex row : rows) {

View File

@@ -104,10 +104,6 @@ class VariableValues {
std::string StatString() const { return stats_.StatString(); }
private:
// Internal version of UpdatePrimalInfeasibilityInformation().
template <typename Rows>
void UpdatePrimalInfeasibilities(const Rows& rows);
// Input problem data.
const CompactSparseMatrix& matrix_;
const RowToColMapping& basis_;
@@ -123,7 +119,7 @@ class VariableValues {
DenseBitColumn primal_infeasible_positions_;
mutable StatsGroup stats_;
mutable DenseColumn scratchpad_;
mutable ScatteredColumn scratchpad_;
// A temporary scattered column that is always reset to all zero after use.
ScatteredColumn initially_all_zero_scratchpad_;

View File

@@ -343,38 +343,6 @@ typedef StrictITIVector<RowIndex, ColIndex> RowToColMapping;
// Column of constraints (slack variables) statuses.
typedef StrictITIVector<RowIndex, ConstraintStatus> ConstraintStatusColumn;
// A simple struct that contains a DenseColumn and its non-zeros indices.
struct ScatteredColumn {
DenseColumn values;
// This can be left empty in which case we just have the dense representation
// above. Otherwise, it should always be a subset of the actual non-zeros.
//
// Note that the non-zeros should always be sorted by increasing index. This
// allows to have exactly the same iteration behavior if we iterate on the
// non-zeros or directly in a dense-way on the values.
RowIndexVector non_zeros;
Fractional operator[](RowIndex row) const { return values[row]; }
Fractional& operator[](RowIndex row) { return values[row]; }
};
// Same as ScatteredColumn for a row.
struct ScatteredRow {
DenseRow values;
ColIndexVector non_zeros;
Fractional operator[](ColIndex col) const { return values[col]; }
Fractional& operator[](ColIndex col) { return values[col]; }
};
inline const ScatteredRow& TransposedView(const ScatteredColumn& c) {
return reinterpret_cast<const ScatteredRow&>(c);
}
inline const ScatteredColumn& TransposedView(const ScatteredRow& r) {
return reinterpret_cast<const ScatteredColumn&>(r);
}
// Returns true if it is more advantageous to use a dense iteration rather than
// using the non-zeros positions.
//
@@ -391,6 +359,46 @@ bool ShouldUseDenseIteration(const ScatteredRowOrCol& v) {
static_cast<double>(v.values.size().value());
}
// A simple struct that contains a DenseVector and its non-zeros indices.
template <typename Index>
struct ScatteredVector {
StrictITIVector<Index, Fractional> values;
// This can be left empty in which case we just have the dense representation
// above. Otherwise, it should always be a subset of the actual non-zeros.
bool non_zeros_are_sorted = false;
std::vector<Index> non_zeros;
// Temporary vector used in some sparse computation on the ScatteredColumn.
// True indicate a possible non-zero value. Note that its state is not always
// consistent.
StrictITIVector<Index, bool> is_non_zero;
Fractional operator[](Index index) const { return values[index]; }
Fractional& operator[](Index index) { return values[index]; }
// Sorting the non-zeros is not always needed, but it allows us to have
// exactly the same behavior while using a sparse iteration or a dense one. So
// we always do it after a Solve().
void SortNonZerosIfNeeded() {
if (!non_zeros_are_sorted) {
std::sort(non_zeros.begin(), non_zeros.end());
non_zeros_are_sorted = true;
}
}
};
// Specialization used in the code.
struct ScatteredColumn : public ScatteredVector<RowIndex> {};
struct ScatteredRow : public ScatteredVector<ColIndex> {};
inline const ScatteredRow& TransposedView(const ScatteredColumn& c) {
return reinterpret_cast<const ScatteredRow&>(c);
}
inline const ScatteredColumn& TransposedView(const ScatteredRow& r) {
return reinterpret_cast<const ScatteredColumn&>(r);
}
// This is used during the deterministic time computation to convert a given
// number of floating-point operations to something in the same order of
// magnitude as a second (on a 2014 desktop).

View File

@@ -137,6 +137,9 @@ Fractional InfinityNorm(const DenseColumn& v);
Fractional InfinityNorm(const SparseColumn& v);
// Returns the fraction of non-zero entries of the given row.
//
// TODO(user): Take a Scattered row/col instead. This is only used to report
// stats, but we should still have a sparse version to do it faster.
double Density(const DenseRow& row);
// Sets to 0.0 all entries of the given row whose fabs() is lower than the given
@@ -205,35 +208,31 @@ inline bool IsAllZero(const Container& input) {
return true;
}
// Permutes the given dense vector and computes the positions of its non-zeros.
// It uses for this an all zero dense vector (zero_scratchpad).
// This operation is efficient for a sparse vector because:
// - It combines two iterations in one.
// - It avoids cache pollution by not looking at unecessary permuted locations.
// Note that the produced non_zeros is not ordered which may be a drawback.
// TODO(user): Investigate alternatives with an ordered non_zeros.
template <typename IndexType, typename PermutationIndexType,
typename NonZeroIndexType>
inline void PermuteAndComputeNonZeros(
// Returns true if the given vector of bool is all false.
template <typename BoolVector>
bool IsAllFalse(const BoolVector& v) {
return std::all_of(v.begin(), v.end(), [](bool value) { return !value; });
}
// Permutes the given dense vector. It uses for this an all zero scratchpad.
template <typename IndexType, typename PermutationIndexType>
inline void PermuteWithScratchpad(
const Permutation<PermutationIndexType>& permutation,
StrictITIVector<IndexType, Fractional>* zero_scratchpad,
StrictITIVector<IndexType, Fractional>* output,
std::vector<NonZeroIndexType>* non_zeros) {
non_zeros->clear();
StrictITIVector<IndexType, Fractional>* input_output) {
DCHECK(IsAllZero(*zero_scratchpad));
zero_scratchpad->swap(*output);
output->resize(zero_scratchpad->size(), 0.0);
const IndexType end = zero_scratchpad->size();
for (IndexType index(0); index < end; ++index) {
const IndexType size = input_output->size();
zero_scratchpad->swap(*input_output);
input_output->resize(size, 0.0);
for (IndexType index(0); index < size; ++index) {
const Fractional value = (*zero_scratchpad)[index];
if (value != 0.0) {
(*zero_scratchpad)[index] = 0.0;
const IndexType permuted_index(
permutation[PermutationIndexType(index.value())].value());
(*output)[permuted_index] = value;
non_zeros->push_back(NonZeroIndexType(permuted_index.value()));
(*input_output)[permuted_index] = value;
}
}
zero_scratchpad->assign(size, 0.0);
}
// Same as PermuteAndComputeNonZeros() except that we assume that the given
@@ -249,7 +248,6 @@ inline void PermuteWithKnownNonZeros(
output->resize(zero_scratchpad->size(), 0.0);
for (IndexType& index_ref : *non_zeros) {
const Fractional value = (*zero_scratchpad)[index_ref];
DCHECK_NE(value, 0.0);
(*zero_scratchpad)[index_ref] = 0.0;
const IndexType permuted_index(permutation[index_ref]);
(*output)[permuted_index] = value;
@@ -257,25 +255,6 @@ inline void PermuteWithKnownNonZeros(
}
}
// Same algorithm as PermuteAndComputeNonZeros() above when the non-zeros are
// not needed. This should be faster than a simple ApplyPermutation() if the
// input vector is relatively sparse. The input is the initial value of output.
inline void ApplyPermutationWhenInputIsProbablySparse(
const Permutation<RowIndex>& permutation, DenseColumn* zero_scratchpad,
DenseColumn* output) {
const RowIndex num_rows(permutation.size());
DCHECK(IsAllZero(*zero_scratchpad));
zero_scratchpad->swap(*output);
output->resize(num_rows, 0.0);
for (RowIndex row(0); row < num_rows; ++row) {
const Fractional value = (*zero_scratchpad)[row];
if (value != 0.0) {
(*zero_scratchpad)[row] = 0.0;
(*output)[permutation[row]] = value;
}
}
}
// Sets a dense vector for which the non zeros are known to be non_zeros.
template <typename IndexType, typename ScatteredRowOrCol>
inline void ClearAndResizeVectorWithNonZeros(IndexType size,
@@ -284,7 +263,8 @@ inline void ClearAndResizeVectorWithNonZeros(IndexType size,
// compared to the wanted size. Note that in most cases the vector will
// already be of the correct size.
const double kSparseThreshold = 0.05;
if (v->non_zeros.size() < kSparseThreshold * size.value()) {
if (!v->non_zeros.empty() &&
v->non_zeros.size() < kSparseThreshold * size.value()) {
for (const IndexType index : v->non_zeros) {
DCHECK_LT(index, v->values.size());
(*v)[index] = 0.0;

View File

@@ -756,36 +756,19 @@ void TriangularMatrix::LowerSolveStartingAtInternal(ColIndex start,
void TriangularMatrix::UpperSolve(DenseColumn* rhs) const {
if (all_diagonal_coefficients_are_one_) {
UpperSolveWithNonZerosInternal<true, false>(rhs, nullptr);
UpperSolveInternal<true>(rhs);
} else {
UpperSolveWithNonZerosInternal<false, false>(rhs, nullptr);
UpperSolveInternal<false>(rhs);
}
}
void TriangularMatrix::UpperSolveWithNonZeros(
DenseColumn* rhs, RowIndexVector* non_zero_rows) const {
if (all_diagonal_coefficients_are_one_) {
UpperSolveWithNonZerosInternal<true, true>(rhs, non_zero_rows);
} else {
UpperSolveWithNonZerosInternal<false, true>(rhs, non_zero_rows);
}
}
template <bool diagonal_of_ones, bool with_non_zeros>
void TriangularMatrix::UpperSolveWithNonZerosInternal(
DenseColumn* rhs, RowIndexVector* non_zero_rows) const {
template <bool diagonal_of_ones>
void TriangularMatrix::UpperSolveInternal(DenseColumn* rhs) const {
RETURN_IF_NULL(rhs);
if (with_non_zeros) {
RETURN_IF_NULL(non_zero_rows);
non_zero_rows->clear();
}
const ColIndex end = first_non_identity_column_;
for (ColIndex col(diagonal_coefficients_.size() - 1); col >= end; --col) {
const Fractional value = (*rhs)[ColToRowIndex(col)];
if (value == 0.0) continue;
if (with_non_zeros) {
non_zero_rows->push_back(ColToRowIndex(col));
}
const Fractional coeff =
diagonal_of_ones ? value : value / diagonal_coefficients_[col];
if (!diagonal_of_ones) {
@@ -800,18 +783,6 @@ void TriangularMatrix::UpperSolveWithNonZerosInternal(
(*rhs)[EntryRow(i)] -= coeff * EntryCoefficient(i);
}
}
// Finish filling the non_zero_rows vector if needed.
if (with_non_zeros) {
for (RowIndex row(end.value() - 1); row >= 0; --row) {
if ((*rhs)[row] != 0.0) {
non_zero_rows->push_back(row);
}
}
// Sorting the non-zero positions gives better performance later.
std::reverse(non_zero_rows->begin(), non_zero_rows->end());
}
}
void TriangularMatrix::TransposeUpperSolve(DenseColumn* rhs) const {

View File

@@ -418,20 +418,20 @@ class CompactSparseMatrix {
// Same as ColumnAddMultipleToDenseColumn() but also adds the new non-zeros to
// the non_zeros vector. A non-zero is "new" if is_non_zero[row] was false,
// and we update dense_column[row]. This functions also updates is_non_zero.
void ColumnAddMultipleToDenseColumnAndUpdateNonZeros(
ColIndex col, Fractional multiplier, DenseColumn* dense_column,
StrictITIVector<RowIndex, bool>* is_non_zero,
std::vector<RowIndex>* non_zeros) const {
void ColumnAddMultipleToSparseScatteredColumn(ColIndex col,
Fractional multiplier,
ScatteredColumn* column) const {
if (multiplier == 0.0) return;
RETURN_IF_NULL(dense_column);
RETURN_IF_NULL(column);
for (const EntryIndex i : Column(col)) {
const RowIndex row = EntryRow(i);
(*dense_column)[row] += multiplier * EntryCoefficient(i);
if (!(*is_non_zero)[row]) {
(*is_non_zero)[row] = true;
non_zeros->push_back(row);
(*column)[row] += multiplier * EntryCoefficient(i);
if (!column->is_non_zero[row]) {
column->is_non_zero[row] = true;
column->non_zeros.push_back(row);
}
}
column->non_zeros_are_sorted = false;
}
// Copies the given column of this matrix into the given dense_column.
@@ -588,10 +588,6 @@ class TriangularMatrix : private CompactSparseMatrix {
// This also computes the last non-zero row position (if not nullptr).
void TransposeLowerSolve(DenseColumn* rhs, RowIndex* last_non_zero_row) const;
// This also computes the non-zero row positions (if not nullptr).
void UpperSolveWithNonZeros(DenseColumn* rhs,
RowIndexVector* non_zero_rows) const;
// Hyper-sparse version of the triangular solve functions. The passed
// non_zero_rows should contain the positions of the symbolic non-zeros of the
// result in the order in which they need to be accessed (or in the reverse
@@ -714,9 +710,8 @@ class TriangularMatrix : private CompactSparseMatrix {
// Internal versions of some Solve() functions to avoid code duplication.
template <bool diagonal_of_ones>
void LowerSolveStartingAtInternal(ColIndex start, DenseColumn* rhs) const;
template <bool diagonal_of_ones, bool with_non_zeros>
void UpperSolveWithNonZerosInternal(DenseColumn* rhs,
RowIndexVector* non_zero_rows) const;
template <bool diagonal_of_ones>
void UpperSolveInternal(DenseColumn* rhs) const;
template <bool diagonal_of_ones>
void TransposeLowerSolveInternal(DenseColumn* rhs,
RowIndex* last_non_zero_row) const;