Files
ortools-clone/ortools/util/dense_set.h
Corentin Le Molgat b4b226801b update include guards
2025-11-05 11:54:02 +01:00

152 lines
5.0 KiB
C++

// Copyright 2010-2025 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef ORTOOLS_UTIL_DENSE_SET_H_
#define ORTOOLS_UTIL_DENSE_SET_H_
#include <cstddef>
#include <utility>
#include <vector>
#include "absl/log/check.h"
#include "absl/types/span.h"
namespace operations_research {
// A set of dense non-negative integer values stored in a dense vector.
//
// This is useful when we want to iterate over a small subset of the possible
// values and reuse the memory, or if we want to randomly sample from the set.
//
// If the set is usually small but occasionally very large, iterating over a
// regular hash_set would be less efficient as you would (internal to the hash
// table iterator) have have to iterate over all the buckets in the hash
// table even if empty. If you clear the set frequently to avoid this, you would
// grow and rehash when you have a larger set.
//
// If resize=false, users *must* call reserve(K) where K > any key before
// calling any other method.
template <typename T, bool auto_resize = true>
class DenseSet {
public:
using iterator = typename std::vector<T>::const_iterator;
using const_iterator = typename std::vector<T>::const_iterator;
using value_type = T;
static constexpr bool kAutoResize = auto_resize;
const_iterator begin() const { return values_.begin(); }
const_iterator end() const { return values_.end(); }
size_t size() const { return values_.size(); }
bool empty() const { return values_.empty(); }
void reserve(size_t size) {
values_.reserve(size);
if (size >= positions_.size()) positions_.resize(size, -1);
}
size_t capacity() const { return positions_.size(); }
std::pair<iterator, bool> insert(T value) {
const int pos = Position(value);
if (pos == -1) {
DCHECK_GT(positions_.size(), ToInt(value));
positions_[ToInt(value)] = values_.size();
values_.push_back(value);
return {values_.begin() + positions_[ToInt(value)], true};
}
return {values_.begin() + pos, false};
}
iterator find(T value) {
const int pos = Position(value);
DCHECK_GT(positions_.size(), ToInt(value));
if (pos < 0) return values_.end();
return values_.begin() + pos;
}
bool contains(T value) const {
if (kAutoResize && ToInt(value) >= positions_.size()) return false;
return positions_[ToInt(value)] >= 0;
}
void erase(iterator it) {
const T value = *it;
DCHECK_GT(positions_.size(), ToInt(value));
positions_[ToInt(values_.back())] = it - values_.begin();
positions_[ToInt(value)] = -1;
// This is a hack to allow erase to work with a const iterator.
values_[it - begin()] = values_.back();
values_.pop_back();
}
int erase(T value) {
const int pos = Position(value);
if (pos < 0) return 0;
DCHECK_GT(positions_.size(), ToInt(value));
positions_[ToInt(values_.back())] = pos;
values_[pos] = values_.back();
values_.pop_back();
positions_[ToInt(value)] = -1;
return 1;
}
// The ordering is deterministic given the same sequence of inserts and
// erases but is arbitrary and should not be relied upon.
absl::Span<const T> values() const { return values_; }
void clear() {
// We expect values_ to be much smaller than the total number of possible
// values, so just clear entries in the set.
for (const T value : values_) {
DCHECK_GT(positions_.size(), ToInt(value));
positions_[ToInt(value)] = -1;
}
values_.clear();
}
private:
static int ToInt(T);
inline int Position(T value) {
int int_value = ToInt(value);
DCHECK_GE(int_value, 0);
// Automatic Resize increases the CPU time of microbenchmarks by ~30%, but
// even with kAutoResize=true, DenseSet is still 25x faster than a
// flat_hash_set<int>.
if (kAutoResize && int_value >= positions_.size()) {
positions_.resize(ToInt(value) + 1, -1);
}
DCHECK_GT(positions_.size(), int_value);
return positions_[int_value];
}
std::vector<int> positions_;
std::vector<T> values_;
};
// Like DenseSet, but does not automatically resize the internal position
// vector, which is ~30% faster.
template <typename T>
using UnsafeDenseSet = DenseSet<T, false>;
template <typename T, bool resize>
inline int DenseSet<T, resize>::ToInt(T value) {
return value.value();
}
template <>
inline int DenseSet<int, true>::ToInt(int value) {
return value;
}
template <>
inline int DenseSet<int, false>::ToInt(int value) {
return value;
}
} // namespace operations_research
#endif // ORTOOLS_UTIL_DENSE_SET_H_