minor optim to glop

This commit is contained in:
Laurent Perron
2024-04-25 16:29:03 +02:00
parent 7e839d666c
commit c8c3b5f6ce
3 changed files with 82 additions and 22 deletions

View File

@@ -19,6 +19,7 @@
#include <vector>
#include "absl/log/check.h"
#include "absl/types/span.h"
#include "ortools/lp_data/lp_types.h"
#include "ortools/lp_data/lp_utils.h"
@@ -132,15 +133,14 @@ namespace {
// norm of the given column, otherwise do the same with a sparse version. In
// both cases column is cleared.
Fractional ComputeSquaredNormAndResetToZero(
const std::vector<RowIndex>& non_zeros, DenseColumn* column) {
const std::vector<RowIndex>& non_zeros, absl::Span<Fractional> column) {
Fractional sum = 0.0;
if (non_zeros.empty()) {
sum = SquaredNorm(*column);
column->clear();
sum = SquaredNormAndResetToZero(column);
} else {
for (const RowIndex row : non_zeros) {
sum += Square((*column)[row]);
(*column)[row] = 0.0;
sum += Square(column[row.value()]);
(column)[row.value()] = 0.0;
}
}
return sum;
@@ -152,7 +152,8 @@ Fractional LuFactorization::RightSolveSquaredNorm(const ColumnView& a) const {
if (is_identity_factorization_) return SquaredNorm(a);
non_zero_rows_.clear();
dense_zero_scratchpad_.resize(lower_.num_rows(), 0.0);
const RowIndex num_rows = lower_.num_rows();
dense_zero_scratchpad_.resize(num_rows, 0.0);
DCHECK(IsAllZero(dense_zero_scratchpad_));
for (const SparseColumn::Entry e : a) {
@@ -174,8 +175,9 @@ Fractional LuFactorization::RightSolveSquaredNorm(const ColumnView& a) const {
upper_.HyperSparseSolveWithReversedNonZeros(&dense_zero_scratchpad_,
&non_zero_rows_);
}
return ComputeSquaredNormAndResetToZero(non_zero_rows_,
&dense_zero_scratchpad_);
return ComputeSquaredNormAndResetToZero(
non_zero_rows_,
absl::MakeSpan(dense_zero_scratchpad_.data(), num_rows.value()));
}
Fractional LuFactorization::DualEdgeSquaredNorm(RowIndex row) const {
@@ -185,7 +187,8 @@ Fractional LuFactorization::DualEdgeSquaredNorm(RowIndex row) const {
col_perm_.empty() ? row : ColToRowIndex(col_perm_[RowToColIndex(row)]);
non_zero_rows_.clear();
dense_zero_scratchpad_.resize(lower_.num_rows(), 0.0);
const RowIndex num_rows = lower_.num_rows();
dense_zero_scratchpad_.resize(num_rows, 0.0);
DCHECK(IsAllZero(dense_zero_scratchpad_));
dense_zero_scratchpad_[permuted_row] = 1.0;
non_zero_rows_.push_back(permuted_row);
@@ -204,8 +207,9 @@ Fractional LuFactorization::DualEdgeSquaredNorm(RowIndex row) const {
transpose_lower_.HyperSparseSolveWithReversedNonZeros(
&dense_zero_scratchpad_, &non_zero_rows_);
}
return ComputeSquaredNormAndResetToZero(non_zero_rows_,
&dense_zero_scratchpad_);
return ComputeSquaredNormAndResetToZero(
non_zero_rows_,
absl::MakeSpan(dense_zero_scratchpad_.data(), num_rows.value()));
}
namespace {

View File

@@ -16,6 +16,7 @@
#include <algorithm>
#include "absl/log/check.h"
#include "absl/types/span.h"
#include "ortools/lp_data/lp_types.h"
#include "ortools/lp_data/scattered_vector.h"
#include "ortools/lp_data/sparse_column.h"
@@ -70,22 +71,75 @@ Fractional PreciseSquaredNorm(const ScatteredColumn& v) {
return sum.Value();
}
Fractional SquaredNorm(const DenseColumn& column) {
Fractional sum(0.0);
RowIndex row(0);
const size_t num_blocks = column.size().value() / 4;
for (size_t i = 0; i < num_blocks; ++i) {
// See the comment in ScalarProduct in the header for some notes about the
// effect of adding up several squares at a time.
sum += Square(column[row++]) + Square(column[row++]) +
Square(column[row++]) + Square(column[row++]);
Fractional SquaredNorm(absl::Span<const Fractional> data) {
// We expand ourselves since we don't really care about the floating
// point order of operation and this seems faster.
int i = 0;
const int end = data.size();
const int shifted_end = end - 3;
Fractional sum1 = 0.0;
Fractional sum2 = 0.0;
Fractional sum3 = 0.0;
Fractional sum4 = 0.0;
for (; i < shifted_end; i += 4) {
sum1 += data[i] * data[i];
sum2 += data[i + 1] * data[i + 1];
sum3 += data[i + 2] * data[i + 2];
sum4 += data[i + 3] * data[i + 3];
}
while (row < column.size()) {
sum += Square(column[row++]);
Fractional sum = sum1 + sum2 + sum3 + sum4;
if (i < end) {
sum += data[i] * data[i];
if (i + 1 < end) {
sum += data[i + 1] * data[i + 1];
if (i + 2 < end) {
sum += data[i + 2] * data[i + 2];
}
}
}
return sum;
}
Fractional SquaredNormAndResetToZero(absl::Span<Fractional> data) {
// We expand ourselves since we don't really care about the floating
// point order of operation and this seems faster.
int i = 0;
const int end = data.size();
const int shifted_end = end - 3;
Fractional sum1 = 0.0;
Fractional sum2 = 0.0;
Fractional sum3 = 0.0;
Fractional sum4 = 0.0;
for (; i < shifted_end; i += 4) {
sum1 += data[i] * data[i];
sum2 += data[i + 1] * data[i + 1];
sum3 += data[i + 2] * data[i + 2];
sum4 += data[i + 3] * data[i + 3];
data[i] = 0.0;
data[i + 1] = 0.0;
data[i + 2] = 0.0;
data[i + 3] = 0.0;
}
Fractional sum = sum1 + sum2 + sum3 + sum4;
if (i < end) {
sum += data[i] * data[i];
data[i] = 0.0;
if (i + 1 < end) {
sum += data[i + 1] * data[i + 1];
data[i + 1] = 0.0;
if (i + 2 < end) {
sum += data[i + 2] * data[i + 2];
data[i + 2] = 0.0;
}
}
}
return sum;
}
Fractional SquaredNorm(const DenseColumn& column) {
return SquaredNorm(absl::MakeSpan(column.data(), column.size().value()));
}
Fractional PreciseSquaredNorm(const DenseColumn& column) {
KahanSum sum;
for (RowIndex row(0); row < column.size(); ++row) {

View File

@@ -147,6 +147,8 @@ Fractional PartialScalarProduct(const DenseRowOrColumn& u,
// Returns the norm^2 (sum of the square of the entries) of the given column.
// The precise version uses KahanSum and are about two times slower.
Fractional SquaredNorm(const SparseColumn& v);
Fractional SquaredNorm(absl::Span<const Fractional> data);
Fractional SquaredNormAndResetToZero(absl::Span<Fractional> data);
Fractional SquaredNorm(const DenseColumn& column);
Fractional SquaredNorm(const ColumnView& v);
Fractional SquaredNorm(const ScatteredColumn& v);