Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Fast isValidCell #496

Draft
wants to merge 24 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ set(OTHER_SOURCE_FILES
src/apps/benchmarks/benchmarkGridPathCells.c
src/apps/benchmarks/benchmarkDirectedEdge.c
src/apps/benchmarks/benchmarkVertex.c
src/apps/benchmarks/benchmarkIsValidCell.c
src/apps/benchmarks/benchmarkH3Api.c)

set(ALL_SOURCE_FILES
Expand Down Expand Up @@ -654,6 +655,7 @@ if(BUILD_BENCHMARKS)
add_h3_benchmark(benchmarkGridPathCells src/apps/benchmarks/benchmarkGridPathCells.c)
add_h3_benchmark(benchmarkDirectedEdge src/apps/benchmarks/benchmarkDirectedEdge.c)
add_h3_benchmark(benchmarkVertex src/apps/benchmarks/benchmarkVertex.c)
add_h3_benchmark(benchmarkIsValidCell src/apps/benchmarks/benchmarkIsValidCell.c)
add_h3_benchmark(benchmarkH3SetToLinkedGeo src/apps/benchmarks/benchmarkH3SetToLinkedGeo.c)
add_h3_benchmark(benchmarkPolygonToCells src/apps/benchmarks/benchmarkPolygonToCells.c)
if(ENABLE_REQUIRES_ALL_SYMBOLS)
Expand Down
61 changes: 61 additions & 0 deletions src/apps/benchmarks/benchmarkIsValidCell.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright 2021 Uber Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "benchmark.h"
#include "h3api.h"

H3Index p = 0x80c3fffffffffff; // res 0 pentagon

int parentRes;
int childRes;

int64_t N;
H3Index *cells;

BEGIN_BENCHMARKS();

// pentagon 8->14
parentRes = 8;
childRes = 14;
p = H3_EXPORT(cellToCenterChild)(p, parentRes);
N = H3_EXPORT(cellToChildrenSize)(p, childRes);

cells = calloc(N, sizeof(H3Index));
H3_EXPORT(cellToChildren)(p, childRes, cells);

BENCHMARK(pentagonChildren_8_14, 1000, {
for (int64_t i = 0; i < N; i++) {
H3_EXPORT(isValidCell)(cells[i]);
}
});
free(cells);

// pentagon 2->8
parentRes = 2;
childRes = 8;
p = H3_EXPORT(cellToCenterChild)(p, parentRes);
N = H3_EXPORT(cellToChildrenSize)(p, childRes);

cells = calloc(N, sizeof(H3Index));
H3_EXPORT(cellToChildren)(p, childRes, cells);

BENCHMARK(pentagonChildren_2_8, 1000, {
for (int64_t i = 0; i < N; i++) {
H3_EXPORT(isValidCell)(cells[i]);
}
});
free(cells);

END_BENCHMARKS();
125 changes: 92 additions & 33 deletions src/h3lib/lib/h3Index.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,49 +78,108 @@ void H3_EXPORT(h3ToString)(H3Index h, char *str, size_t sz) {
sprintf(str, "%" PRIx64, h);
}

// Get Top t bits from h
#define GT(h, t) ((h) >> (64 - (t)))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: This looks like "greater than" to me - GET_BITS?


static const bool isBaseCellPentagonArr[128] = {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this an improvement over baseCellData[baseCellNum].isPentagon?

[4] = 1, [14] = 1, [24] = 1, [38] = 1, [49] = 1, [58] = 1,
[63] = 1, [72] = 1, [83] = 1, [97] = 1, [107] = 1, [117] = 1};

/**
* Returns whether or not an H3 index is a valid cell (hexagon or pentagon).
* @param h The H3 index to validate.
* @return 1 if the H3 index if valid, and 0 if it is not.
* Determines whether an H3 index is a valid cell (hexagon or pentagon).
*
* @param h H3 index to test.
*
* @return 1 if the H3 index if valid, and 0 if it is not.
*/
int H3_EXPORT(isValidCell)(H3Index h) {
if (H3_GET_HIGH_BIT(h) != 0) return 0;

if (H3_GET_MODE(h) != H3_HEXAGON_MODE) return 0;

if (H3_GET_RESERVED_BITS(h) != 0) return 0;

int baseCell = H3_GET_BASE_CELL(h);
if (baseCell < 0 || baseCell >= NUM_BASE_CELLS) { // LCOV_EXCL_BR_LINE
// Base cells less than zero can not be represented in an index
return 0;
}

int res = H3_GET_RESOLUTION(h);
if (res < 0 || res > MAX_H3_RES) { // LCOV_EXCL_BR_LINE
// Resolutions less than zero can not be represented in an index
return 0;
}

bool foundFirstNonZeroDigit = false;
for (int r = 1; r <= res; r++) {
Direction digit = H3_GET_INDEX_DIGIT(h, r);

if (!foundFirstNonZeroDigit && digit != CENTER_DIGIT) {
foundFirstNonZeroDigit = true;
if (_isBaseCellPentagon(baseCell) && digit == K_AXES_DIGIT) {
// Implementation strategy:
//
// Walk from high to low bits, checking validity of
// groups of bits as we go. After each check, shift bits off
// to the left, so that the next relevant group is at the
// highest bit location in the H3 Index, which we can
// easily read off with the `GT` macro. This strategy helps
// us avoid re-computing shifts and masks for each group.
//
// | Region | # bits |
// |------------|--------|
// | High | 1 |
// | Mode | 4 |
// | Reserved | 3 |
// | Resolution | 4 |
// | Base Cell | 7 |
// | Digit 1 | 3 |
// | Digit 2 | 3 |
// | ... | ... |
// | Digit 15 | 3 |
//
// Additionally, we try to group operations and void loops when possible.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: avoid loops?


// The 1 high bit should be 0b0
// The 4 mode bits should be 0b0001 (H3_CELL_MODE)
// The 3 reserved bits should be 0b000
// In total, the top 8 bits should be 0b00001000
if (GT(h, 8) != 0b00001000) return 0;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Binary 0b literals might not be portable, in which case we can always switch to hex. Binary is a bit easier for prototyping, tho :)

Copy link
Contributor Author

@ajfriend ajfriend Jul 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternative implementation:
(also, could use #defines for the bit lengths here)

if (GT(h, 1) != 0) return 0;
h <<= 1;
if (GT(h, 4) != 1) return 0;
h <<= 4;
if (GT(h, 3) != 0) return 0;
h <<= 3;

This alternative might be clearer, and Clang is even able to compile each version to the same instructions. However, GCC doesn't seem to produce the same for the longer version: https://godbolt.org/z/sjKed8jn1

Screen Shot 2021-07-05 at 3 42 46 PM

(Even though the last shift in each example is optimized out to a no-op, still gives you the general idea.)

h <<= 8;

// Number of bits in each of the resolution, base cell, and digit regions.
const int nBitsRes = 4;
const int nBitsBaseCell = 7;
const int nBitsDigit = H3_PER_DIGIT_OFFSET;

// No need to check resolution; any 4-bit number is a valid resolution.
const int res = GT(h, nBitsRes);
h <<= nBitsRes;

// Check that base cell number is valid.
const int bc = GT(h, nBitsBaseCell);
if (bc >= NUM_BASE_CELLS) return 0;
h <<= nBitsBaseCell;

// Now check that each resolution digit is valid.
// Let `r` denote the resolution we're currently checking.
int r = 1;

// Pentagon cells start with a sequence of 0's (CENTER_DIGIT's).
// The first nonzero digit can't be a 1 (i.e., "deleted subsequence",
// PENTAGON_SKIPPED_DIGIT, or K_AXES_DIGIT).
// Test for pentagon base cell first to avoid this loop if possible.
if (isBaseCellPentagonArr[bc]) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using this lookup table seems to be a bit faster than calling _isBaseCellPentagon(bc).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternative:

int _isBaseCellPentagon(int baseCell) {
    switch(baseCell) {
        case 4:
        case 14:
        case 24:
        case 38:
        case 49:
        case 58:
        case 63:
        case 72:
        case 83:
        case 97:
        case 107:
        case 117:
            return 1;
        default:
            return 0;
    }
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you can use baseCellData[bc].isPentagon that should be equivalent? Not sure of the linking cost of going outside the file.

for (; r <= res; r++) {
int d = GT(h, nBitsDigit);
if (d == 0) {
h <<= nBitsDigit;
} else if (d == 1) {
return 0;
} else {
break;
// But don't increment `r`, since we still need to
// check that it isn't a 7.
}
}

if (digit < CENTER_DIGIT || digit >= NUM_DIGITS) return 0;
}

for (int r = res + 1; r <= MAX_H3_RES; r++) {
Direction digit = H3_GET_INDEX_DIGIT(h, r);
if (digit != INVALID_DIGIT) return 0;
// After (possibly) taking care of pentagon logic, check that
// the remaining digits up to `res` are not 7 (INVALID_DIGIT).
// Don't see a way to avoid this loop :(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could unroll it into groups of 4 or 8, possibly with a macro to simplify the code. This would only work for hex base cells though, or for pentagon base cells if you re-check all the digits you just checked. Might be worth trying.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if this PR is still alive, but I've just stumbled on it.

Last week I've implemented my own quick isValidCell (a simpler one though, because for my use case I don't need to have any specific check for pentagons) and I think this loop can be avoided.

The search for the pattern 0b111 may not be easy to implement quickly, but on the other hand we can apply a bitwise NOT and look for 0b000 which is easier 🙂.
Indeed, looking for a null triplet is similar to looking for a nul-byte, something we do a lot in C (think strlen).

By digging into the annals of the Old Gods, a.k.a. comp.lang.c, we can extract this little gem: Alan Mycroft's null-byte detection algorithm, posted in 1987.

Tweaking the bitmasks to works on 3-bit instead of 8-bit does the trick and we end up with a code that would rougly look like

#include<stdint.h>
#include<stdio.h>
#include<assert.h>

#define MAX_RESOLUTION 15
#define CELL_BITSIZE   3


int has_invalid_digit(uint64_t index) {
    static const uint64_t LO_MAGIC = 0x49249249249ULL;  // ...001 001 001...
    static const uint64_t HI_MAGIC = 0x124924924924ULL; // ...100 100 100...

    const uint64_t resolution = (index >> 52 & 0xF);
    const uint64_t unused_count = MAX_RESOLUTION - resolution;
    const uint64_t unused_bitsize = unused_count * CELL_BITSIZE;
    const uint64_t digits_mask = (1ULL << (resolution * CELL_BITSIZE)) - 1;
    const uint64_t digits = (index >> unused_bitsize) & digits_mask;

    return ((~digits - LO_MAGIC) & (digits & HI_MAGIC)) != 0ULL;
}

int main(void) {
    // Valid H3 index
    // 0-0001-000-1100-0010101-110-101-110-001-100-000-101-001-100-110-110-101-111-111-111
    assert(!has_invalid_digit(0x08c2bae305336bffULL));
    // First digit invalid.
    // 0-0001-000-1100-0010101-111-101-110-001-100-000-101-001-100-110-110-101-111-111-111
    assert(has_invalid_digit(0x08c2bee305336bffULL));
    // Digit in the middle invalid.
    // 0-0001-000-1100-0010101-110-101-110-001-100-111-101-001-100-110-110-101-111-111-111
    assert(has_invalid_digit(0x08c2bae33d336bffULL));
    // Last digit invalid.
    // 0-0001-000-1100-0010101-110-101-110-001-100-000-101-001-100-110-110-111-111-111-111
    assert(has_invalid_digit(0x08c2bae305336fffULL));

    return 0;
}

So, in the end we can replace the loop by ((~digits - LO_MAGIC) & (digits & HI_MAGIC)) != 0ULL.

For sure, this would deserve some nice comments, because it's not obvious on a first read (I went heavy on the comments for my implementation xD).

If something is not clear I can give a more detailed explanation (but that basically boils down on how Alan Mycroft's null-byte detection works).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, this is awesome! What a great find!

I spent a little time reworking this PR to add your logic. It definitely helps on the benchmarks.

It looks like you also came up with some improvements for the pentagon branch here: nmandery/h3ron#34

I'll have to parse that as well. Also happy if you want to make a pull against this PR!

for (; r <= res; r++) {
if (GT(h, nBitsDigit) == 7) return 0;
h <<= nBitsDigit;
Copy link
Collaborator

@dfellis dfellis Jul 15, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assuming bitshifts are as expensive as they were when I was in college, it may make sense to just have an array of 16 masks and do if (h & masks[r] == masks[r]) return 0; as the only part of the inner loop.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ran some toy timing tests and didn't see much difference between masking vs shifting: https://gist.github.com/ajfriend/32571f0af1f3ea3133b6836fc861c730

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't be surprised if both code generate the same (or almost) assembly behind.

Compiler are quite clever for this kind of simple expression nowadays, like you don't have to write n >> 1 instead of n / 2 and so on.
This is basic peephole optimisation.

}

// Now check that all the unused digits after `res` are
// set to 7 (INVALID_DIGIT).
// Bit shift operations allow us to avoid looping through digits;
// this saves time in benchmarks.
int shift = (15 - res) * 3;
uint64_t m = 0;
m = ~m;
m >>= shift;
m = ~m;
if (h != m) return 0;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatives, but the one above using bit shifting seemed to be the fastest:

Loop

(slowest of all 3)

for (; r <= 15; r++) {
    if (GT(h, 3) != 7) return 0;
    h <<= 3;
}

Lookup table

static const uint64_t m7s[16] = {
    0b1111111111111111111111111111111111111111111110000000000000000000,
    0b1111111111111111111111111111111111111111110000000000000000000000,
    0b1111111111111111111111111111111111111110000000000000000000000000,
    0b1111111111111111111111111111111111110000000000000000000000000000,
    0b1111111111111111111111111111111110000000000000000000000000000000,
    0b1111111111111111111111111111110000000000000000000000000000000000,
    0b1111111111111111111111111110000000000000000000000000000000000000,
    0b1111111111111111111111110000000000000000000000000000000000000000,
    0b1111111111111111111110000000000000000000000000000000000000000000,
    0b1111111111111111110000000000000000000000000000000000000000000000,
    0b1111111111111110000000000000000000000000000000000000000000000000,
    0b1111111111110000000000000000000000000000000000000000000000000000,
    0b1111111110000000000000000000000000000000000000000000000000000000,
    0b1111110000000000000000000000000000000000000000000000000000000000,
    0b1110000000000000000000000000000000000000000000000000000000000000,
    0b0};

if (h != m7s[res]) return 0;


// If no flaws were identified above, then the index is a valid H3 cell.
return 1;
}

Expand Down