-
Notifications
You must be signed in to change notification settings - Fork 52
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add concepts and concrete types where possible for BlockZipperJoinImpl
#1625
Open
RobinTF
wants to merge
2
commits into
ad-freiburg:master
Choose a base branch
from
RobinTF:join-algorithm-polishing
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+63
−37
Open
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
#include "util/JoinAlgorithms/FindUndefRanges.h" | ||
#include "util/JoinAlgorithms/JoinColumnMapping.h" | ||
#include "util/TransparentFunctors.h" | ||
#include "util/TypeTraits.h" | ||
|
||
namespace ad_utility { | ||
|
||
|
@@ -770,9 +771,10 @@ struct BlockZipperJoinImpl { | |
// Recompute the `currentEl`. It is the minimum of the last element in the | ||
// first block of either of the join sides. | ||
ProjectedEl getCurrentEl() { | ||
auto getFirst = [](const auto& side) { | ||
return side.projection_(side.currentBlocks_.front().back()); | ||
}; | ||
auto getFirst = | ||
[](const ad_utility::SameAsAny<LeftSide, RightSide> auto& side) { | ||
return side.projection_(side.currentBlocks_.front().back()); | ||
}; | ||
return std::min(getFirst(leftSide_), getFirst(rightSide_), lessThan_); | ||
} | ||
|
||
|
@@ -784,7 +786,9 @@ struct BlockZipperJoinImpl { | |
// blocks that contain elements <= `currentEl` have been added, and `false` if | ||
// the function returned because 3 blocks were added without fulfilling the | ||
// condition. | ||
bool fillEqualToCurrentEl(auto& side, const auto& currentEl) { | ||
bool fillEqualToCurrentEl( | ||
ad_utility::SameAsAny<LeftSide, RightSide> auto& side, | ||
const ProjectedEl& currentEl) { | ||
auto& it = side.it_; | ||
auto& end = side.end_; | ||
for (size_t numBlocksRead = 0; it != end && numBlocksRead < 3; | ||
|
@@ -808,7 +812,7 @@ struct BlockZipperJoinImpl { | |
// sides contain all the relevant blocks. Only filling one side is used for | ||
// the optimization for the Cartesian product described in the documentation. | ||
enum struct BlockStatus { leftMissing, rightMissing, allFilled }; | ||
BlockStatus fillEqualToCurrentElBothSides(const auto& currentEl) { | ||
BlockStatus fillEqualToCurrentElBothSides(const ProjectedEl& currentEl) { | ||
bool allBlocksFromLeft = false; | ||
bool allBlocksFromRight = false; | ||
while (!(allBlocksFromLeft || allBlocksFromRight)) { | ||
|
@@ -828,14 +832,15 @@ struct BlockZipperJoinImpl { | |
// `rightSide_.currentBlocks`) s.t. only elements `> lastProcessedElement` | ||
// remain. This effectively removes all blocks completely, except maybe the | ||
// last one. | ||
template <typename Blocks, typename ProjectedEl> | ||
void removeEqualToCurrentEl(Blocks& blocks, | ||
ProjectedEl lastProcessedElement) { | ||
void removeEqualToCurrentEl( | ||
ad_utility::SameAsAny<typename LeftSide::CurrentBlocks, | ||
typename RightSide::CurrentBlocks> auto& blocks, | ||
const ProjectedEl& lastProcessedElement) { | ||
// Erase all but the last block. | ||
AD_CORRECTNESS_CHECK(!blocks.empty()); | ||
if (blocks.size() > 1 && !blocks.front().empty()) { | ||
AD_CORRECTNESS_CHECK(!lessThan_(lastProcessedElement, | ||
std::as_const(blocks.front()).back())); | ||
AD_CORRECTNESS_CHECK( | ||
!lessThan_(lastProcessedElement, blocks.front().back())); | ||
} | ||
blocks.erase(blocks.begin(), blocks.end() - 1); | ||
|
||
|
@@ -854,15 +859,19 @@ struct BlockZipperJoinImpl { | |
// * A reference to the first full block | ||
// * The currently active subrange of that block | ||
// * An iterator pointing to the first element ` >= currentEl` in the block. | ||
auto getFirstBlock(auto& currentBlocks, const auto& currentEl) { | ||
auto getFirstBlock(ad_utility::SameAsAny< | ||
typename LeftSide::CurrentBlocks, | ||
typename RightSide::CurrentBlocks> auto& currentBlocks, | ||
const ProjectedEl& currentEl) { | ||
AD_CORRECTNESS_CHECK(!currentBlocks.empty()); | ||
const auto& first = currentBlocks.at(0); | ||
auto it = std::ranges::lower_bound(first.subrange(), currentEl, lessThan_); | ||
return std::tuple{std::ref(first.fullBlock()), first.subrange(), it}; | ||
} | ||
|
||
// Check if a side contains undefined values. | ||
static bool hasUndef(const auto& side) { | ||
static bool hasUndef( | ||
const ad_utility::SameAsAny<LeftSide, RightSide> auto& side) { | ||
if constexpr (potentiallyHasUndef) { | ||
return !side.undefBlocks_.empty(); | ||
} | ||
|
@@ -871,7 +880,8 @@ struct BlockZipperJoinImpl { | |
|
||
// Combine all elements from all blocks on the left with all elements from all | ||
// blocks on the right and add them to the result. | ||
void addCartesianProduct(const auto& blocksLeft, const auto& blocksRight) { | ||
void addCartesianProduct(const LeftSide::CurrentBlocks& blocksLeft, | ||
const RightSide::CurrentBlocks& blocksRight) { | ||
// TODO<C++23> use `std::views::cartesian_product`. | ||
for (const auto& lBlock : blocksLeft) { | ||
for (const auto& rBlock : blocksRight) { | ||
|
@@ -888,8 +898,9 @@ struct BlockZipperJoinImpl { | |
// Handle non-matching rows from the left side for an optional join or a minus | ||
// join. | ||
template <bool DoOptionalJoin> | ||
void addNonMatchingRowsFromLeftForOptionalJoin(const auto& blocksLeft, | ||
const auto& blocksRight) { | ||
void addNonMatchingRowsFromLeftForOptionalJoin( | ||
const LeftSide::CurrentBlocks& blocksLeft, | ||
const RightSide::CurrentBlocks& blocksRight) { | ||
if constexpr (DoOptionalJoin) { | ||
if (!hasUndef(rightSide_) && | ||
std::ranges::all_of( | ||
|
@@ -910,7 +921,8 @@ struct BlockZipperJoinImpl { | |
// Call `compatibleRowAction` for all pairs of elements in the Cartesian | ||
// product of the blocks in `blocksLeft` and `blocksRight`. | ||
template <bool DoOptionalJoin> | ||
void addAll(const auto& blocksLeft, const auto& blocksRight) { | ||
void addAll(const LeftSide::CurrentBlocks& blocksLeft, | ||
const RightSide::CurrentBlocks& blocksRight) { | ||
addNonMatchingRowsFromLeftForOptionalJoin<DoOptionalJoin>(blocksLeft, | ||
blocksRight); | ||
addCartesianProduct(blocksLeft, blocksRight); | ||
|
@@ -921,7 +933,10 @@ struct BlockZipperJoinImpl { | |
// `currentEl`. Effectively, these subranges cover all the blocks completely | ||
// except maybe the last one, which might contain elements `> currentEl` at | ||
// the end. | ||
auto getEqualToCurrentEl(const auto& blocks, const auto& currentEl) { | ||
auto getEqualToCurrentEl(const ad_utility::SameAsAny< | ||
typename LeftSide::CurrentBlocks, | ||
typename RightSide::CurrentBlocks> auto& blocks, | ||
const ProjectedEl& currentEl) { | ||
auto result = blocks; | ||
if (result.empty()) { | ||
return result; | ||
|
@@ -990,9 +1005,10 @@ struct BlockZipperJoinImpl { | |
// The fully joined parts of the block are then removed from | ||
// `currentBlocksLeft/Right`, as they are not needed anymore. | ||
template <bool DoOptionalJoin> | ||
void joinAndRemoveLessThanCurrentEl(auto& currentBlocksLeft, | ||
auto& currentBlocksRight, | ||
const auto& currentEl) { | ||
void joinAndRemoveLessThanCurrentEl( | ||
LeftSide::CurrentBlocks& currentBlocksLeft, | ||
RightSide::CurrentBlocks& currentBlocksRight, | ||
const ProjectedEl& currentEl) { | ||
// Get the first blocks. | ||
auto [fullBlockLeft, subrangeLeft, currentElItL] = | ||
getFirstBlock(currentBlocksLeft, currentEl); | ||
|
@@ -1045,7 +1061,8 @@ struct BlockZipperJoinImpl { | |
|
||
// If the `targetBuffer` is empty, read the next nonempty block from `[it, | ||
// end)` if there is one. | ||
void fillWithAtLeastOne(auto& side) { | ||
void fillWithAtLeastOne( | ||
ad_utility::SameAsAny<LeftSide, RightSide> auto& side) { | ||
auto& targetBuffer = side.currentBlocks_; | ||
auto& it = side.it_; | ||
const auto& end = side.end_; | ||
|
@@ -1086,8 +1103,9 @@ struct BlockZipperJoinImpl { | |
// Based on `blockStatus` add the Cartesian product of the blocks in | ||
// `leftBlocks` and/or `rightBlocks` with their respective counterpart in | ||
// `undefBlocks_`. | ||
void joinWithUndefBlocks(BlockStatus blockStatus, const auto& leftBlocks, | ||
const auto& rightBlocks) { | ||
void joinWithUndefBlocks(BlockStatus blockStatus, | ||
const LeftSide::CurrentBlocks& leftBlocks, | ||
const RightSide::CurrentBlocks& rightBlocks) { | ||
if (blockStatus == BlockStatus::allFilled || | ||
blockStatus == BlockStatus::leftMissing) { | ||
addCartesianProduct(leftBlocks, rightSide_.undefBlocks_); | ||
|
@@ -1117,18 +1135,23 @@ struct BlockZipperJoinImpl { | |
auto equalToCurrentElRight = | ||
getEqualToCurrentEl(currentBlocksRight, currentEl); | ||
|
||
auto getNextBlocks = [¤tEl, self = this, &blockStatus](auto& target, | ||
auto& side) { | ||
self->removeEqualToCurrentEl(side.currentBlocks_, currentEl); | ||
bool allBlocksWereFilled = self->fillEqualToCurrentEl(side, currentEl); | ||
if (side.currentBlocks_.empty()) { | ||
AD_CORRECTNESS_CHECK(allBlocksWereFilled); | ||
} | ||
target = self->getEqualToCurrentEl(side.currentBlocks_, currentEl); | ||
if (allBlocksWereFilled) { | ||
blockStatus = BlockStatus::allFilled; | ||
} | ||
}; | ||
auto getNextBlocks = | ||
[this, ¤tEl, &blockStatus]( | ||
ad_utility::SameAsAny<typename LeftSide::CurrentBlocks, | ||
typename RightSide::CurrentBlocks> auto& | ||
target, | ||
ad_utility::SameAsAny<LeftSide, RightSide> auto& side) { | ||
// Explicit this to avoid false positive warning in clang. | ||
this->removeEqualToCurrentEl(side.currentBlocks_, currentEl); | ||
bool allBlocksWereFilled = fillEqualToCurrentEl(side, currentEl); | ||
if (side.currentBlocks_.empty()) { | ||
AD_CORRECTNESS_CHECK(allBlocksWereFilled); | ||
} | ||
target = getEqualToCurrentEl(side.currentBlocks_, currentEl); | ||
if (allBlocksWereFilled) { | ||
blockStatus = BlockStatus::allFilled; | ||
} | ||
}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, |
||
|
||
// We are only guaranteed to have all relevant blocks from one side, so we | ||
// also need to pass through the remaining blocks from the other side. | ||
|
@@ -1185,7 +1208,9 @@ struct BlockZipperJoinImpl { | |
// those blocks with the undef blocks from the other side. | ||
// `reverse` is used to determine if the left or right side is consumed. | ||
template <bool reversed> | ||
void consumeRemainingBlocks(auto& side, const auto& undefBlocks) { | ||
void consumeRemainingBlocks( | ||
ad_utility::SameAsAny<LeftSide, RightSide> auto& side, | ||
const auto& undefBlocks) { | ||
while (side.it_ != side.end_) { | ||
const auto& lBlock = *side.it_; | ||
for (const auto& rBlock : undefBlocks) { | ||
|
@@ -1228,7 +1253,8 @@ struct BlockZipperJoinImpl { | |
// `side.undefBlocks_` and skipped for subsequent processing. The first block | ||
// containing defined values is split and the defined part is stored in | ||
// `side.currentBlocks_`. | ||
void findFirstBlockWithoutUndef(auto& side) { | ||
void findFirstBlockWithoutUndef( | ||
ad_utility::SameAsAny<LeftSide, RightSide> auto& side) { | ||
// The reference of `it` is there on purpose. | ||
for (auto& it = side.it_; it != side.end_; ++it) { | ||
auto& el = *it; | ||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here
as_const
is dropped. This presumably works because of recent fixes to theIdTable
class.