Skip to content

Commit 73c561b

Browse files
committed
Adding 2 fastest sorts to benchmark
1 parent 0975d9c commit 73c561b

32 files changed

+5022
-13
lines changed

CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,18 @@ cmake_minimum_required(VERSION 3.5)
22
project(UltraSort)
33

44
set( CMAKE_EXPORT_COMPILE_COMMANDS ON )
5-
set(CMAKE_CXX_STANDARD 11)
5+
set(CMAKE_CXX_STANDARD 14)
66

77
add_subdirectory(third_party/googletest)
88

99
include_directories(${CMAKE_SOURCE_DIR}/src/include)
1010
include_directories(${CMAKE_SOURCE_DIR}/test/include)
1111
include_directories(third_party/googletest/googletest/include)
1212
include_directories(third_party/googletest/googlemock/include)
13+
include_directories(third_party/ips4o)
14+
include_directories(third_party/pdqsort)
1315

14-
set(CMAKE_CXX_FLAGS "-g -O0 -Wall -march=native -DDEBUG=0")
16+
set(CMAKE_CXX_FLAGS "-g -O3 -Wall -march=native -DDEBUG=0")
1517

1618
file(GLOB_RECURSE SOURCE_FILES
1719
"src/*.cpp" "test/*.cpp")

test/avx256/simd_sort_test.cpp

Lines changed: 56 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,23 @@
44
#include "avx256/simd_sort.h"
55
#include <algorithm>
66
#include <iterator>
7+
#include "ips4o.hpp"
8+
#include "pdqsort.h"
9+
10+
// TODO: Separate tests from benchmarks
711

812
TEST(SIMDSortTests, SIMDSort32BitIntegerTest) {
913
int N = 65536;
10-
int lo = -1000;
11-
int hi = 1000;
14+
int lo = -10000;
15+
int hi = 10000;
1216
int *rand_arr;
1317
int *soln_arr;
1418
double start, end;
1519

1620
// Initialization
1721
TestUtil::RandGen(rand_arr, N, lo, hi);
1822

19-
// C++11 std::stable_sort
23+
// C++ std::stable_sort
2024
aligned_init<int>(soln_arr, N);
2125
std::copy(rand_arr, rand_arr + N, soln_arr);
2226
start = currentSeconds();
@@ -25,7 +29,7 @@ TEST(SIMDSortTests, SIMDSort32BitIntegerTest) {
2529
printf("[std::stable_sort] %d elements: %.8f seconds\n", N, end - start);
2630
delete soln_arr;
2731

28-
// C++11 std::sort
32+
// C++ std::sort
2933
aligned_init<int>(soln_arr, N);
3034
std::copy(rand_arr, rand_arr + N, soln_arr);
3135
start = currentSeconds();
@@ -34,13 +38,33 @@ TEST(SIMDSortTests, SIMDSort32BitIntegerTest) {
3438
printf("[std::sort] %d elements: %.8f seconds\n", N, end - start);
3539
delete soln_arr;
3640

41+
// C++ ips4o::sort
42+
aligned_init<int>(soln_arr, N);
43+
std::copy(rand_arr, rand_arr + N, soln_arr);
44+
start = currentSeconds();
45+
ips4o::sort(soln_arr, soln_arr + N);
46+
end = currentSeconds();
47+
printf("[ips4o::sort] %d elements: %.8f seconds\n", N, end - start);
48+
delete soln_arr;
49+
50+
// C++ pqd::sort
51+
aligned_init<int>(soln_arr, N);
52+
std::copy(rand_arr, rand_arr + N, soln_arr);
53+
start = currentSeconds();
54+
pdqsort(soln_arr, soln_arr + N);
55+
end = currentSeconds();
56+
printf("[pdqsort] %d elements: %.8f seconds\n", N, end - start);
57+
delete soln_arr;
58+
59+
// AVX256 Sort
3760
aligned_init<int>(soln_arr, N);
3861
std::copy(rand_arr, rand_arr + N, soln_arr);
3962
std::vector<int> check_arr(rand_arr, rand_arr + N);
4063
start = currentSeconds();
4164
SIMDSorter::SIMDSort32(N, soln_arr);
4265
end = currentSeconds();
4366
std::sort(check_arr.begin(), check_arr.end());
67+
// First perform a correctness check
4468
for(int i = 0; i < N; i++) {
4569
EXPECT_EQ(check_arr[i], soln_arr[i]);
4670
}
@@ -60,7 +84,7 @@ TEST(SIMDSortTests, SIMDSort64BitIntegerTest) {
6084
// Initialization
6185
TestUtil::RandGen<int64_t>(rand_arr, N, lo, hi);
6286

63-
// C++11 std::stable_sort
87+
// C++ std::stable_sort
6488
aligned_init<int64_t>(soln_arr, N);
6589
std::copy(rand_arr, rand_arr + N, soln_arr);
6690
start = currentSeconds();
@@ -69,7 +93,7 @@ TEST(SIMDSortTests, SIMDSort64BitIntegerTest) {
6993
printf("[std::stable_sort] %d elements: %.8f seconds\n", N, end - start);
7094
delete soln_arr;
7195

72-
// C++11 std::sort
96+
// C++ std::sort
7397
aligned_init<int64_t>(soln_arr, N);
7498
std::copy(rand_arr, rand_arr + N, soln_arr);
7599
start = currentSeconds();
@@ -78,6 +102,25 @@ TEST(SIMDSortTests, SIMDSort64BitIntegerTest) {
78102
printf("[std::sort] %d elements: %.8f seconds\n", N, end - start);
79103
delete soln_arr;
80104

105+
// C++ ips4o::sort
106+
aligned_init<int64_t>(soln_arr, N);
107+
std::copy(rand_arr, rand_arr + N, soln_arr);
108+
start = currentSeconds();
109+
ips4o::sort(soln_arr, soln_arr + N);
110+
end = currentSeconds();
111+
printf("[ips4o::sort] %d elements: %.8f seconds\n", N, end - start);
112+
delete soln_arr;
113+
114+
// C++ pqd::sort
115+
aligned_init<int64_t>(soln_arr, N);
116+
std::copy(rand_arr, rand_arr + N, soln_arr);
117+
start = currentSeconds();
118+
pdqsort(soln_arr, soln_arr + N);
119+
end = currentSeconds();
120+
printf("[pdqsort] %d elements: %.8f seconds\n", N, end - start);
121+
delete soln_arr;
122+
123+
// AVX256 sort
81124
aligned_init<int64_t>(soln_arr, N);
82125
std::copy(rand_arr, rand_arr + N, soln_arr);
83126
std::vector<int64_t> check_arr(rand_arr, rand_arr + N);
@@ -94,17 +137,17 @@ TEST(SIMDSortTests, SIMDSort64BitIntegerTest) {
94137
}
95138

96139
TEST(SIMDSortTests, SIMDSort32BitKeyValueIntegerTest) {
97-
int N = 65536;
98-
int lo = -10000;
99-
int hi = 10000;
140+
int N = 64;
141+
int lo = -10;
142+
int hi = 10;
100143
std::pair<int,int> *rand_arr;
101144
std::pair<int,int> *soln_arr;
102145
double start, end;
103146

104147
// Initialization
105148
TestUtil::RandPairGen(rand_arr, N, lo, hi);
106149

107-
// C++11 std::stable_sort
150+
// C++ std::stable_sort
108151
aligned_init<std::pair<int,int>>(soln_arr, N);
109152
std::copy(rand_arr, rand_arr + N, soln_arr);
110153
start = currentSeconds();
@@ -115,7 +158,7 @@ TEST(SIMDSortTests, SIMDSort32BitKeyValueIntegerTest) {
115158
printf("[std::stable_sort] %d elements: %.8f seconds\n", N, end - start);
116159
delete soln_arr;
117160

118-
// C++11 std::sort
161+
// C++ std::sort
119162
aligned_init<std::pair<int,int>>(soln_arr, N);
120163
std::copy(rand_arr, rand_arr + N, soln_arr);
121164
start = currentSeconds();
@@ -126,6 +169,8 @@ TEST(SIMDSortTests, SIMDSort32BitKeyValueIntegerTest) {
126169
printf("[std::sort] %d elements: %.8f seconds\n", N, end - start);
127170
delete soln_arr;
128171

172+
// TODO: Add ips4o and pdqsort benchmarks
173+
// AVX256 sort
129174
aligned_init<std::pair<int,int>>(soln_arr, N);
130175
std::copy(rand_arr, rand_arr + N, soln_arr);
131176
std::vector<std::pair<int,int>> check_arr(rand_arr, rand_arr + N);

third_party/ips4o/LICENSE

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
BSD 2-Clause License
2+
3+
Copyright © 2017, Michael Axtmann <[email protected]>
4+
Copyright © 2017, Daniel Ferizovic <[email protected]>
5+
Copyright © 2017, Sascha Witt <[email protected]>
6+
All rights reserved.
7+
8+
Redistribution and use in source and binary forms, with or without
9+
modification, are permitted provided that the following conditions are met:
10+
11+
* Redistributions of source code must retain the above copyright notice, this
12+
list of conditions and the following disclaimer.
13+
14+
* Redistributions in binary form must reproduce the above copyright notice,
15+
this list of conditions and the following disclaimer in the documentation
16+
and/or other materials provided with the distribution.
17+
18+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

third_party/ips4o/README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# In-place Parallel Super Scalar Samplesort (IPS⁴o)
2+
3+
This is the implementation of the algorithm presented in the [eponymous paper](https://arxiv.org/abs/1705.02257),
4+
which contains an in-depth description of its inner workings, as well as an extensive experimental performance evaluation.
5+
Here's the abstract:
6+
7+
> We present a sorting algorithm that works in-place, executes in parallel, is
8+
> cache-efficient, avoids branch-mispredictions, and performs work O(n log n) for
9+
> arbitrary inputs with high probability. The main algorithmic contributions are
10+
> new ways to make distribution-based algorithms in-place: On the practical side,
11+
> by using coarse-grained block-based permutations, and on the theoretical side,
12+
> we show how to eliminate the recursion stack. Extensive experiments show that
13+
> our algorithm IPS⁴o scales well on a variety of multi-core machines. We
14+
> outperform our closest in-place competitor by a factor of up to 3. Even as
15+
> a sequential algorithm, we are up to 1.5 times faster than the closest
16+
> sequential competitor, BlockQuicksort.
17+
18+
## Usage
19+
20+
```C++
21+
#include "ips4o.hpp"
22+
23+
// sort sequentially
24+
ips4o::sort(begin, end[, comparator])
25+
26+
// sort in parallel (uses OpenMP if available, std::thread otherwise)
27+
ips4o::parallel::sort(begin, end[, comparator])
28+
```
29+
30+
Make sure to compile with C++14 support. Currently, the code does not compile on Windows.
31+
32+
For the parallel algorithm, you need to enable either OpenMP (`-fopenmp`) or C++ threads (e.g., `-pthread`).
33+
You also need a CPU that supports 16-byte compare-and-exchange instructions.
34+
If you get undefined references to `__atomic_fetch_add_16`, either set your CPU correctly (e.g., `-march=native`),
35+
enable the instructions explicitly (`-mcx16`), or try linking against GCC's libatomic (`-latomic`).

third_party/ips4o/ips4o.hpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/******************************************************************************
2+
* ips4o.hpp
3+
*
4+
* In-place Parallel Super Scalar Samplesort (IPS⁴o)
5+
*
6+
******************************************************************************
7+
* BSD 2-Clause License
8+
*
9+
* Copyright © 2017, Michael Axtmann <[email protected]>
10+
* Copyright © 2017, Daniel Ferizovic <[email protected]>
11+
* Copyright © 2017, Sascha Witt <[email protected]>
12+
* All rights reserved.
13+
*
14+
* Redistribution and use in source and binary forms, with or without
15+
* modification, are permitted provided that the following conditions are met:
16+
*
17+
* * Redistributions of source code must retain the above copyright notice, this
18+
* list of conditions and the following disclaimer.
19+
*
20+
* * Redistributions in binary form must reproduce the above copyright notice,
21+
* this list of conditions and the following disclaimer in the documentation
22+
* and/or other materials provided with the distribution.
23+
*
24+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
28+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34+
*****************************************************************************/
35+
36+
#pragma once
37+
38+
#include "ips4o/ips4o.hpp"

third_party/ips4o/ips4o/base_case.hpp

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/******************************************************************************
2+
* ips4o/base_case.hpp
3+
*
4+
* In-place Parallel Super Scalar Samplesort (IPS⁴o)
5+
*
6+
******************************************************************************
7+
* BSD 2-Clause License
8+
*
9+
* Copyright © 2017, Michael Axtmann <[email protected]>
10+
* Copyright © 2017, Daniel Ferizovic <[email protected]>
11+
* Copyright © 2017, Sascha Witt <[email protected]>
12+
* All rights reserved.
13+
*
14+
* Redistribution and use in source and binary forms, with or without
15+
* modification, are permitted provided that the following conditions are met:
16+
*
17+
* * Redistributions of source code must retain the above copyright notice, this
18+
* list of conditions and the following disclaimer.
19+
*
20+
* * Redistributions in binary form must reproduce the above copyright notice,
21+
* this list of conditions and the following disclaimer in the documentation
22+
* and/or other materials provided with the distribution.
23+
*
24+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
28+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34+
*****************************************************************************/
35+
36+
#pragma once
37+
38+
#include <algorithm>
39+
#include <cstddef>
40+
#include <utility>
41+
42+
#include "ips4o_fwd.hpp"
43+
#include "utils.hpp"
44+
45+
namespace ips4o {
46+
namespace detail {
47+
48+
/**
49+
* Insertion sort.
50+
*/
51+
template <class It, class Comp>
52+
void insertionSort(const It begin, const It end, Comp comp) {
53+
IPS4O_ASSUME_NOT(begin >= end);
54+
55+
for (It it = begin + 1; it < end; ++it) {
56+
auto val = std::move(*it);
57+
if (comp(val, *begin)) {
58+
std::move_backward(begin, it, it + 1);
59+
*begin = std::move(val);
60+
} else {
61+
auto cur = it;
62+
for (auto next = it - 1; comp(val, *next); --next) {
63+
*cur = std::move(*next);
64+
cur = next;
65+
}
66+
*cur = std::move(val);
67+
}
68+
}
69+
}
70+
71+
/**
72+
* Wrapper for base case sorter, for easier swapping.
73+
*/
74+
template <class It, class Comp>
75+
inline void baseCaseSort(It begin, It end, Comp&& comp) {
76+
if (begin == end) return;
77+
detail::insertionSort(std::move(begin), std::move(end), std::forward<Comp>(comp));
78+
}
79+
80+
81+
} // namespace detail
82+
} // namespace ips4o

0 commit comments

Comments
 (0)