fix mps reader

2018-02-12 11:37:45 +01:00
parent 8ea92e372e
commit 377571c954
6 changed files with 126 additions and 68 deletions
--- a/ortools/lp_data/lp_types.cc
+++ b/ortools/lp_data/lp_types.cc
@@ -16,9 +16,6 @@
 namespace operations_research {
 namespace glop {

-// static
-const double ScatteredColumnReference::kDenseThresholdForPreciseSum = 0.8;
-
 std::string GetProblemStatusString(ProblemStatus problem_status) {
  switch (problem_status) {
    case ProblemStatus::OPTIMAL:
--- a/ortools/lp_data/lp_types.h
+++ b/ortools/lp_data/lp_types.h
@@ -149,7 +149,7 @@ enum class ProblemStatus : int8 {
  // solution.
  DUAL_FEASIBLE,

-  // An error occured during the solving process.
+  // An error occurred during the solving process.
  ABNORMAL,

  // The input problem was invalid (see LinearProgram.IsValid()).
@@ -343,44 +343,60 @@ typedef StrictITIVector<RowIndex, ColIndex> RowToColMapping;
 // Column of constraints (slack variables) statuses.
 typedef StrictITIVector<RowIndex, ConstraintStatus> ConstraintStatusColumn;

-// A simple wrapper that contains references to a DenseColumn and its non-zeros
-// indices. Passing such reference by value when calling a function is
-// equivalent to passing two const references (one to the DenseColumn and one to
-// the RowIndexVector).
-//
-// TODO(user): Create a ScatteredColumn class and use a reference to it instead
-// of this. It will require more work since this class allows one to combine the
-// dense vector and its non-zero positions even if they come from two different
-// places.
-struct ScatteredColumnReference {
-  ScatteredColumnReference(const DenseColumn& _dense_column,
-                           const RowIndexVector& _non_zero_rows)
-      : dense_column(_dense_column), non_zero_rows(_non_zero_rows) {}
+// A simple struct that contains a DenseColumn and its non-zeros indices.
+struct ScatteredColumn {
+  DenseColumn values;

-  // For convenience, this constructor takes a column transpose.
-  // TODO(user): Move the Transpose() function in this file and use it here?
-  // Also introduce a function to transpose a RowIndexVector into a
-  // ColIndexVector.
-  ScatteredColumnReference(const DenseRow& dense_row,
-                           const ColIndexVector& non_zero_cols)
-      : dense_column(reinterpret_cast<const DenseColumn&>(dense_row)),
-        non_zero_rows(reinterpret_cast<const RowIndexVector&>(non_zero_cols)) {}
+  // This can be left empty in which case we just have the dense representation
+  // above. Otherwise, it should always be a subset of the actual non-zeros.
+  //
+  // Note that the non-zeros should always be sorted by increasing index. This
+  // allows to have exactly the same iteration behavior if we iterate on the
+  // non-zeros or directly in a dense-way on the values.
+  RowIndexVector non_zeros;

-  Fractional operator[](RowIndex row) const { return dense_column[row]; }
-  const DenseColumn& dense_column;
-  const RowIndexVector& non_zero_rows;
-
-  // Density threshold past which a dense sum is used rather than a loop over
-  // the non-zero positions of a vector during a precise sum.
-  static const double kDenseThresholdForPreciseSum;
+  Fractional operator[](RowIndex row) const { return values[row]; }
+  Fractional& operator[](RowIndex row) { return values[row]; }
 };

+// Same as ScatteredColumn for a row.
+struct ScatteredRow {
+  DenseRow values;
+  ColIndexVector non_zeros;
+
+  Fractional operator[](ColIndex col) const { return values[col]; }
+  Fractional& operator[](ColIndex col) { return values[col]; }
+};
+
+inline const ScatteredRow& TransposedView(const ScatteredColumn& c) {
+  return reinterpret_cast<const ScatteredRow&>(c);
+}
+inline const ScatteredColumn& TransposedView(const ScatteredRow& r) {
+  return reinterpret_cast<const ScatteredColumn&>(r);
+}
+
+// Returns true if it is more advantageous to use a dense iteration rather than
+// using the non-zeros positions.
+//
+// TODO(user): The constant should depend on what algorithm is used. Clearing a
+// dense vector is a lot more efficient than doing more complex stuff. Clean
+// this up by extracting all the currently used constants in one place with
+// meaningful names.
+template <typename ScatteredRowOrCol>
+bool ShouldUseDenseIteration(const ScatteredRowOrCol& v) {
+  if (v.non_zeros.empty()) return true;
+  const double kThresholdForUsingDenseRepresentation = 0.8;
+  return static_cast<double>(v.non_zeros.size()) >
+         kThresholdForUsingDenseRepresentation *
+             static_cast<double>(v.values.size().value());
+}
+
 // This is used during the deterministic time computation to convert a given
 // number of floating-point operations to something in the same order of
 // magnitude as a second (on a 2014 desktop).
 static inline double DeterministicTimeForFpOperations(int64 n) {
-  const double kConvertionFactor = 2e-9;
-  return kConvertionFactor * static_cast<double>(n);
+  const double kConversionFactor = 2e-9;
+  return kConversionFactor * static_cast<double>(n);
 }

 }  // namespace glop
--- a/ortools/lp_data/lp_utils.cc
+++ b/ortools/lp_data/lp_utils.cc
@@ -33,14 +33,12 @@ Fractional PreciseSquaredNorm(const SparseColumn& v) {
  return sum.Value();
 }

-Fractional PreciseSquaredNorm(ScatteredColumnReference v) {
-  if (v.non_zero_rows.size() >
-      ScatteredColumnReference::kDenseThresholdForPreciseSum *
-          v.dense_column.size().value()) {
-    return PreciseSquaredNorm(v.dense_column);
+Fractional PreciseSquaredNorm(const ScatteredColumn& v) {
+  if (ShouldUseDenseIteration(v)) {
+    return PreciseSquaredNorm(v.values);
  }
  KahanSum sum;
-  for (const RowIndex row : v.non_zero_rows) {
+  for (const RowIndex row : v.non_zeros) {
    sum.Add(Square(v[row]));
  }
  return sum.Value();
@@ -48,8 +46,16 @@ Fractional PreciseSquaredNorm(ScatteredColumnReference v) {

 Fractional SquaredNorm(const DenseColumn& column) {
  Fractional sum(0.0);
-  for (RowIndex row(0); row < column.size(); ++row) {
-    sum += Square(column[row]);
+  RowIndex row(0);
+  const size_t num_blocks = column.size().value() / 4;
+  for (size_t i = 0; i < num_blocks; ++i) {
+    // See the comment in ScalarProduct in the header for some notes about the
+    // effect of adding up several squares at a time.
+    sum += Square(column[row++]) + Square(column[row++]) +
+           Square(column[row++]) + Square(column[row++]);
+  }
+  while (row < column.size()) {
+    sum += Square(column[row++]);
  }
  return sum;
 }
--- a/ortools/lp_data/lp_utils.h
+++ b/ortools/lp_data/lp_utils.h
@@ -43,13 +43,33 @@ static inline Fractional Fractionality(Fractional f) {

 // Returns the scalar product between u and v.
 // The precise versions use KahanSum and are about two times slower.
-template <class DenseRowOrColumn, class DenseRowOrColumn2>
-Fractional ScalarProduct(const DenseRowOrColumn& u,
+template <class DenseRowOrColumn1, class DenseRowOrColumn2>
+Fractional ScalarProduct(const DenseRowOrColumn1& u,
                         const DenseRowOrColumn2& v) {
  DCHECK_EQ(u.size().value(), v.size().value());
  Fractional sum(0.0);
-  for (typename DenseRowOrColumn::IndexType i(0); i < u.size(); ++i) {
-    sum += u[i] * v[typename DenseRowOrColumn2::IndexType(i.value())];
+  typename DenseRowOrColumn1::IndexType i(0);
+  typename DenseRowOrColumn2::IndexType j(0);
+  const size_t num_blocks = u.size().value() / 4;
+  for (size_t block = 0; block < num_blocks; ++block) {
+    // Computing the sum of 4 elements at once may allow the compiler to
+    // generate more efficient code, e.g. using SIMD and checking the loop
+    // condition much less frequently.
+    //
+    // This produces different results from the case where each multiplication
+    // is added to sum separately. An extreme example of this can be derived
+    // using the fact that 1e11 + 2e-6 == 1e11, but 1e11 + 8e-6 > 1e11.
+    //
+    // While the results are different, they aren't necessarily better or worse.
+    // Typically, sum will be of larger magnitude than any individual
+    // multiplication, so one might expect, in practice, this method to yield
+    // more accurate results. However, if accuracy is vital, use the precise
+    // version.
+    sum += (u[i++] * v[j++]) + (u[i++] * v[j++]) + (u[i++] * v[j++]) +
+           (u[i++] * v[j++]);
+  }
+  while (i < u.size()) {
+    sum += u[i++] * v[j++];
  }
  return sum;
 }
@@ -92,15 +112,13 @@ Fractional PreciseScalarProduct(const DenseRowOrColumn& u,

 template <class DenseRowOrColumn>
 Fractional PreciseScalarProduct(const DenseRowOrColumn& u,
-                                ScatteredColumnReference v) {
-  DCHECK_EQ(u.size().value(), v.dense_column.size().value());
-  if (v.non_zero_rows.size() >
-      ScatteredColumnReference::kDenseThresholdForPreciseSum *
-          v.dense_column.size().value()) {
-    return PreciseScalarProduct(u, v.dense_column);
+                                const ScatteredColumn& v) {
+  DCHECK_EQ(u.size().value(), v.values.size().value());
+  if (ShouldUseDenseIteration(v)) {
+    return PreciseScalarProduct(u, v.values);
  }
  KahanSum sum;
-  for (const RowIndex row : v.non_zero_rows) {
+  for (const RowIndex row : v.non_zeros) {
    sum.Add(u[typename DenseRowOrColumn::IndexType(row.value())] * v[row]);
  }
  return sum.Value();
@@ -112,7 +130,7 @@ Fractional SquaredNorm(const SparseColumn& v);
 Fractional SquaredNorm(const DenseColumn& v);
 Fractional PreciseSquaredNorm(const SparseColumn& v);
 Fractional PreciseSquaredNorm(const DenseColumn& v);
-Fractional PreciseSquaredNorm(ScatteredColumnReference v);
+Fractional PreciseSquaredNorm(const ScatteredColumn& v);

 // Returns the maximum of the |coefficients| of 'v'.
 Fractional InfinityNorm(const DenseColumn& v);
@@ -259,24 +277,24 @@ inline void ApplyPermutationWhenInputIsProbablySparse(
 }

 // Sets a dense vector for which the non zeros are known to be non_zeros.
-template <typename Vector, typename IndexType>
-inline void ClearAndResizeVectorWithNonZeros(
-    IndexType size, Vector* v, std::vector<IndexType>* non_zeros) {
+template <typename IndexType, typename ScatteredRowOrCol>
+inline void ClearAndResizeVectorWithNonZeros(IndexType size,
+                                             ScatteredRowOrCol* v) {
  // Only use the sparse version if there is less than 5% non-zeros positions
  // compared to the wanted size. Note that in most cases the vector will
  // already be of the correct size.
  const double kSparseThreshold = 0.05;
-  if (non_zeros->size() < kSparseThreshold * size.value()) {
-    for (const IndexType index : *non_zeros) {
-      DCHECK_LT(index, v->size());
+  if (v->non_zeros.size() < kSparseThreshold * size.value()) {
+    for (const IndexType index : v->non_zeros) {
+      DCHECK_LT(index, v->values.size());
      (*v)[index] = 0.0;
    }
-    v->resize(size, 0.0);
-    DCHECK(IsAllZero(*v));
+    v->values.resize(size, 0.0);
+    DCHECK(IsAllZero(v->values));
  } else {
-    v->AssignToZero(size);
+    v->values.AssignToZero(size);
  }
-  non_zeros->clear();
+  v->non_zeros.clear();
 }

 // Changes the sign of all the entries in the given vector.
--- a/ortools/lp_data/mps_reader.cc
+++ b/ortools/lp_data/mps_reader.cc
@@ -34,7 +34,8 @@
 #include "ortools/base/status.h"

 DEFINE_bool(mps_free_form, false, "Read MPS files in free form.");
-DEFINE_bool(mps_stop_after_first_error, true, "Stop after the first error.");
+DEFINE_bool(mps_stop_after_first_error, true,
+            "Stop after the first error.");

 namespace operations_research {
 namespace glop {
@@ -108,7 +109,7 @@ void MPSReader::DisplaySummary() {

 void MPSReader::SplitLineIntoFields() {
  if (free_form_) {
-    fields_ = absl::StrSplit(line_, ' ', absl::SkipEmpty());
+    fields_ = absl::StrSplit(line_, absl::delimiter::AnyOf(" \t"), absl::SkipEmpty());
    CHECK_GE(kNumFields, fields_.size());
  } else {
    int length = line_.length();
@@ -193,11 +194,19 @@ bool MPSReader::IsCommentOrBlank() const {

 void MPSReader::ProcessLine(const std::string& line) {
  ++line_num_;
-  if (!parse_success_ && FLAGS_mps_stop_after_first_error) return;
+  if (!parse_success_ && FLAGS_mps_stop_after_first_error)
+    return;
  line_ = line;
  if (IsCommentOrBlank()) {
    return;  // Skip blank lines and comments.
  }
+  if (!free_form_ && line_.find('\t') != std::string::npos) {
+    if (log_errors_) {
+      LOG(ERROR) << "Line " << line_num_ << ": contains tab "
+                 << "(Line contents: " << line_ << ").";
+    }
+    parse_success_ = false;
+  }
  std::string section;
  if (line[0] != '\0' && line[0] != ' ') {
    section = GetFirstWord();
@@ -347,8 +356,12 @@ void MPSReader::ProcessColumnsSection() {
  // Take into account the INTORG and INTEND markers.
  if (line_.find("'MARKER'") != std::string::npos) {
    if (line_.find("'INTORG'") != std::string::npos) {
+      VLOG(2) << "Entering integer marker.\n" << line_;
+      CHECK(!in_integer_section_);
      in_integer_section_ = true;
    } else if (line_.find("'INTEND'") != std::string::npos) {
+      VLOG(2) << "Leaving integer marker.\n" << line_;
+      CHECK(in_integer_section_);
      in_integer_section_ = false;
    }
    return;
@@ -518,6 +531,10 @@ void MPSReader::StoreBound(const std::string& bound_type_mnemonic,
  switch (bound_type_id) {
    case LOWER_BOUND:
      lower_bound = Fractional(GetDoubleFromString(bound_value));
+      // LI with the value 0.0 specifies general integers with no upper bound.
+      if (bound_type_mnemonic == "LI" && lower_bound == 0.0) {
+        upper_bound = kInfinity;
+      }
      break;
    case UPPER_BOUND:
      upper_bound = Fractional(GetDoubleFromString(bound_value));
--- a/ortools/lp_data/mps_reader.h
+++ b/ortools/lp_data/mps_reader.h
@@ -30,6 +30,7 @@
 #include <string>  // for std::string
 #include <vector>  // for vector

+#include "ortools/base/commandlineflags.h"
 #include "ortools/base/macros.h"  // for DISALLOW_COPY_AND_ASSIGN, NULL
 #include "ortools/base/stringprintf.h"
 #include "ortools/base/int_type.h"
@@ -39,6 +40,9 @@
 #include "ortools/lp_data/lp_data.h"
 #include "ortools/lp_data/lp_types.h"

+DECLARE_bool(mps_free_form);
+DECLARE_bool(mps_stop_after_first_error);
+
 namespace operations_research {
 namespace glop {

@@ -101,7 +105,7 @@ class MPSReader {
  std::string GetFirstWord() const;

  // Returns true if the line contains a comment (starting with '*') or
-  // if it it is a blank line.
+  // if it is a blank line.
  bool IsCommentOrBlank() const;

  // Helper function that returns fields_[offset + index].