implements AutoCloseable {
/**
- * Bit set for distinct values.
- * If the value at some index is not equal to the previous value,
- * its bit is set to 1, otherwise its bit is set to 0.
+ * Bit set for distinct values. If the value at some index is not equal to the previous value, its
+ * bit is set to 1, otherwise its bit is set to 0.
*/
private ArrowBuf distinctValueBuffer;
- /**
- * The vector to deduplicate.
- */
+ /** The vector to deduplicate. */
private final V vector;
private final BufferAllocator allocator;
/**
* Constructs a vector run deduplicator for a given vector.
- * @param vector the vector to deduplicate. Ownership is NOT taken.
+ *
+ * @param vector the vector to deduplicate. Ownership is NOT taken.
* @param allocator the allocator used for allocating buffers for start indices.
*/
public VectorRunDeduplicator(V vector, BufferAllocator allocator) {
@@ -65,17 +63,20 @@ private void createDistinctValueBuffer() {
/**
* Gets the number of values which are different from their predecessor.
+ *
* @return the run count.
*/
public int getRunCount() {
if (distinctValueBuffer == null) {
createDistinctValueBuffer();
}
- return vector.getValueCount() - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount());
+ return vector.getValueCount()
+ - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount());
}
/**
* Gets the vector with deduplicated adjacent values removed.
+ *
* @param outVector the output vector.
*/
public void populateDeduplicatedValues(V outVector) {
@@ -88,6 +89,7 @@ public void populateDeduplicatedValues(V outVector) {
/**
* Gets the length of each distinct value.
+ *
* @param lengthVector the vector for holding length values.
*/
public void populateRunLengths(IntVector lengthVector) {
@@ -95,7 +97,8 @@ public void populateRunLengths(IntVector lengthVector) {
createDistinctValueBuffer();
}
- DeduplicationUtils.populateRunLengths(distinctValueBuffer, lengthVector, vector.getValueCount());
+ DeduplicationUtils.populateRunLengths(
+ distinctValueBuffer, lengthVector, vector.getValueCount());
}
@Override
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java
index 398368d1fc612..88c4e4dc65450 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java
@@ -14,33 +14,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import org.apache.arrow.vector.ValueVector;
/**
- * A dictionary builder is intended for the scenario frequently encountered in practice:
- * the dictionary is not known a priori, so it is generated dynamically.
- * In particular, when a new value arrives, it is tested to check if it is already
- * in the dictionary. If so, it is simply neglected, otherwise, it is added to the dictionary.
- *
- * The dictionary builder is intended to build a single dictionary.
- * So it cannot be used for different dictionaries.
- *
+ * A dictionary builder is intended for the scenario frequently encountered in practice: the
+ * dictionary is not known a priori, so it is generated dynamically. In particular, when a new value
+ * arrives, it is tested to check if it is already in the dictionary. If so, it is simply neglected,
+ * otherwise, it is added to the dictionary.
+ *
+ * The dictionary builder is intended to build a single dictionary. So it cannot be used for
+ * different dictionaries.
+ *
*
Below gives the sample code for using the dictionary builder
+ *
*
{@code
* DictionaryBuilder dictionaryBuilder = ...
* ...
* dictionaryBuild.addValue(newValue);
* ...
* }
- *
- *
- * With the above code, the dictionary vector will be populated,
- * and it can be retrieved by the {@link DictionaryBuilder#getDictionary()} method.
- * After that, dictionary encoding can proceed with the populated dictionary..
- *
+ *
+ * With the above code, the dictionary vector will be populated, and it can be retrieved by the
+ * {@link DictionaryBuilder#getDictionary()} method. After that, dictionary encoding can proceed
+ * with the populated dictionary..
*
* @param the dictionary vector type.
*/
@@ -58,7 +56,7 @@ public interface DictionaryBuilder {
* Try to add an element from the target vector to the dictionary.
*
* @param targetVector the target vector containing new element.
- * @param targetIndex the index of the new element in the target vector.
+ * @param targetIndex the index of the new element in the target vector.
* @return the index of the new element in the dictionary.
*/
int addValue(V targetVector, int targetIndex);
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java
index cda7b3bf9540e..16e27c3a23e72 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import org.apache.arrow.vector.BaseIntVector;
@@ -22,8 +21,9 @@
/**
* A dictionary encoder translates one vector into another one based on a dictionary vector.
- * According to Arrow specification, the encoded vector must be an integer based vector, which
- * is the index of the original vector element in the dictionary.
+ * According to Arrow specification, the encoded vector must be an integer based vector, which is
+ * the index of the original vector element in the dictionary.
+ *
* @param type of the encoded vector.
* @param type of the vector to encode. It is also the type of the dictionary vector.
*/
@@ -31,9 +31,10 @@ public interface DictionaryEncoder the dictionary vector type.
*/
-public class HashTableBasedDictionaryBuilder implements DictionaryBuilder {
+public class HashTableBasedDictionaryBuilder
+ implements DictionaryBuilder {
- /**
- * The dictionary to be built.
- */
+ /** The dictionary to be built. */
private final V dictionary;
- /**
- * If null should be encoded.
- */
+ /** If null should be encoded. */
private final boolean encodeNull;
/**
- * The hash map for distinct dictionary entries.
- * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary.
+ * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element,
+ * whereas the value is the index in the dictionary.
*/
private HashMap hashMap = new HashMap<>();
- /**
- * The hasher used for calculating the hash code.
- */
+ /** The hasher used for calculating the hash code. */
private final ArrowBufHasher hasher;
- /**
- * Next pointer to try to add to the hash table.
- */
+ /** Next pointer to try to add to the hash table. */
private ArrowBufPointer nextPointer;
/**
@@ -83,7 +73,7 @@ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull) {
*
* @param dictionary the dictionary to populate.
* @param encodeNull if null values should be added to the dictionary.
- * @param hasher the hasher used to compute the hash code.
+ * @param hasher the hasher used to compute the hash code.
*/
public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull, ArrowBufHasher hasher) {
this.dictionary = dictionary;
@@ -125,7 +115,7 @@ public int addValues(V targetVector) {
* Try to add an element from the target vector to the dictionary.
*
* @param targetVector the target vector containing new element.
- * @param targetIndex the index of the new element in the target vector.
+ * @param targetIndex the index of the new element in the target vector.
* @return the index of the new element in the dictionary.
*/
@Override
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java
index bea1a784c3d6a..ac7a7d32bf597 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java
@@ -14,11 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import java.util.HashMap;
-
import org.apache.arrow.memory.util.ArrowBufPointer;
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
import org.apache.arrow.memory.util.hash.SimpleHasher;
@@ -27,43 +25,35 @@
/**
* Dictionary encoder based on hash table.
+ *
* @param encoded vector type.
* @param decoded vector type, which is also the dictionary type.
*/
public class HashTableDictionaryEncoder
implements DictionaryEncoder {
- /**
- * The dictionary for encoding/decoding.
- * It must be sorted.
- */
+ /** The dictionary for encoding/decoding. It must be sorted. */
private final D dictionary;
- /**
- * The hasher used to compute the hash code.
- */
+ /** The hasher used to compute the hash code. */
private final ArrowBufHasher hasher;
- /**
- * A flag indicating if null should be encoded.
- */
+ /** A flag indicating if null should be encoded. */
private final boolean encodeNull;
/**
- * The hash map for distinct dictionary entries.
- * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary.
+ * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element,
+ * whereas the value is the index in the dictionary.
*/
private HashMap hashMap = new HashMap<>();
- /**
- * The pointer used to probe each element to encode.
- */
+ /** The pointer used to probe each element to encode. */
private ArrowBufPointer reusablePointer;
/**
* Constructs a dictionary encoder.
- * @param dictionary the dictionary.
*
+ * @param dictionary the dictionary.
*/
public HashTableDictionaryEncoder(D dictionary) {
this(dictionary, false);
@@ -71,20 +61,17 @@ public HashTableDictionaryEncoder(D dictionary) {
/**
* Constructs a dictionary encoder.
+ *
* @param dictionary the dictionary.
- * @param encodeNull a flag indicating if null should be encoded.
- * It determines the behaviors for processing null values in the input during encoding/decoding.
- *
- * For encoding, when a null is encountered in the input,
- * 1) If the flag is set to true, the encoder searches for the value in the dictionary,
- * and outputs the index in the dictionary.
- * 2) If the flag is set to false, the encoder simply produces a null in the output.
- *
- *
- * For decoding, when a null is encountered in the input,
- * 1) If the flag is set to true, the decoder should never expect a null in the input.
- * 2) If set to false, the decoder simply produces a null in the output.
- *
+ * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for
+ * processing null values in the input during encoding/decoding.
+ * For encoding, when a null is encountered in the input, 1) If the flag is set to true,
+ * the encoder searches for the value in the dictionary, and outputs the index in the
+ * dictionary. 2) If the flag is set to false, the encoder simply produces a null in the
+ * output.
+ * For decoding, when a null is encountered in the input, 1) If the flag is set to true,
+ * the decoder should never expect a null in the input. 2) If set to false, the decoder
+ * simply produces a null in the output.
*/
public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) {
this(dictionary, encodeNull, SimpleHasher.INSTANCE);
@@ -92,13 +79,13 @@ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) {
/**
* Constructs a dictionary encoder.
+ *
* @param dictionary the dictionary.
- * @param encodeNull a flag indicating if null should be encoded.
- * It determines the behaviors for processing null values in the input during encoding.
- * When a null is encountered in the input,
- * 1) If the flag is set to true, the encoder searches for the value in the dictionary,
- * and outputs the index in the dictionary.
- * 2) If the flag is set to false, the encoder simply produces a null in the output.
+ * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for
+ * processing null values in the input during encoding. When a null is encountered in the
+ * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary,
+ * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply
+ * produces a null in the output.
* @param hasher the hasher used to calculate the hash code.
*/
public HashTableDictionaryEncoder(D dictionary, boolean encodeNull, ArrowBufHasher hasher) {
@@ -120,12 +107,12 @@ private void buildHashMap() {
}
/**
- * Encodes an input vector by a hash table.
- * So the algorithm takes O(n) time, where n is the length of the input vector.
+ * Encodes an input vector by a hash table. So the algorithm takes O(n) time, where n is the
+ * length of the input vector.
*
- * @param input the input vector.
+ * @param input the input vector.
* @param output the output vector.
- **/
+ */
@Override
public void encode(D input, E output) {
for (int i = 0; i < input.getValueCount(); i++) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java
index 84a3a96af8ef1..9aeff22005751 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import org.apache.arrow.vector.BaseIntVector;
@@ -24,20 +23,17 @@
/**
* Dictionary encoder based on linear search.
+ *
* @param encoded vector type.
* @param decoded vector type, which is also the dictionary type.
*/
public class LinearDictionaryEncoder
implements DictionaryEncoder {
- /**
- * The dictionary for encoding.
- */
+ /** The dictionary for encoding. */
private final D dictionary;
- /**
- * A flag indicating if null should be encoded.
- */
+ /** A flag indicating if null should be encoded. */
private final boolean encodeNull;
private RangeEqualsVisitor equalizer;
@@ -46,8 +42,10 @@ public class LinearDictionaryEncoder encoded vector type.
* @param decoded vector type, which is also the dictionary type.
*/
public class SearchDictionaryEncoder
implements DictionaryEncoder {
- /**
- * The dictionary for encoding/decoding.
- * It must be sorted.
- */
+ /** The dictionary for encoding/decoding. It must be sorted. */
private final D dictionary;
- /**
- * The criteria by which the dictionary is sorted.
- */
+ /** The criteria by which the dictionary is sorted. */
private final VectorValueComparator comparator;
- /**
- * A flag indicating if null should be encoded.
- */
+ /** A flag indicating if null should be encoded. */
private final boolean encodeNull;
/**
* Constructs a dictionary encoder.
+ *
* @param dictionary the dictionary. It must be in sorted order.
* @param comparator the criteria for sorting.
*/
@@ -57,28 +51,29 @@ public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator
/**
* Constructs a dictionary encoder.
+ *
* @param dictionary the dictionary. It must be in sorted order.
* @param comparator the criteria for sorting.
- * @param encodeNull a flag indicating if null should be encoded.
- * It determines the behaviors for processing null values in the input during encoding.
- * When a null is encountered in the input,
- * 1) If the flag is set to true, the encoder searches for the value in the dictionary,
- * and outputs the index in the dictionary.
- * 2) If the flag is set to false, the encoder simply produces a null in the output.
+ * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for
+ * processing null values in the input during encoding. When a null is encountered in the
+ * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary,
+ * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply
+ * produces a null in the output.
*/
- public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator, boolean encodeNull) {
+ public SearchDictionaryEncoder(
+ D dictionary, VectorValueComparator comparator, boolean encodeNull) {
this.dictionary = dictionary;
this.comparator = comparator;
this.encodeNull = encodeNull;
}
/**
- * Encodes an input vector by binary search.
- * So the algorithm takes O(n * log(m)) time, where n is the length of the input vector,
- * and m is the length of the dictionary.
+ * Encodes an input vector by binary search. So the algorithm takes O(n * log(m)) time, where n is
+ * the length of the input vector, and m is the length of the dictionary.
+ *
* @param input the input vector.
- * @param output the output vector. Note that it must be in a fresh state. At least,
- * all its validity bits should be clear.
+ * @param output the output vector. Note that it must be in a fresh state. At least, all its
+ * validity bits should be clear.
*/
@Override
public void encode(D input, E output) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java
index f9cd77daa2e76..fca7df067dcff 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java
@@ -14,45 +14,36 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import java.util.TreeSet;
-
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.vector.ValueVector;
/**
- * This class builds the dictionary based on a binary search tree.
- * Each add operation can be finished in O(log(n)) time,
- * where n is the current dictionary size.
+ * This class builds the dictionary based on a binary search tree. Each add operation can be
+ * finished in O(log(n)) time, where n is the current dictionary size.
*
* @param the dictionary vector type.
*/
-public class SearchTreeBasedDictionaryBuilder implements DictionaryBuilder {
+public class SearchTreeBasedDictionaryBuilder
+ implements DictionaryBuilder {
- /**
- * The dictionary to be built.
- */
+ /** The dictionary to be built. */
private final V dictionary;
- /**
- * The criteria for sorting in the search tree.
- */
+ /** The criteria for sorting in the search tree. */
protected final VectorValueComparator comparator;
- /**
- * If null should be encoded.
- */
+ /** If null should be encoded. */
private final boolean encodeNull;
- /**
- * The search tree for storing the value index.
- */
+ /** The search tree for storing the value index. */
private TreeSet searchTree;
/**
* Construct a search tree-based dictionary builder.
+ *
* @param dictionary the dictionary vector.
* @param comparator the criteria for value equality.
*/
@@ -62,11 +53,13 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c
/**
* Construct a search tree-based dictionary builder.
+ *
* @param dictionary the dictionary vector.
* @param comparator the criteria for value equality.
* @param encodeNull if null values should be added to the dictionary.
*/
- public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator comparator, boolean encodeNull) {
+ public SearchTreeBasedDictionaryBuilder(
+ V dictionary, VectorValueComparator comparator, boolean encodeNull) {
this.dictionary = dictionary;
this.comparator = comparator;
this.encodeNull = encodeNull;
@@ -76,11 +69,10 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c
}
/**
- * Gets the dictionary built.
- * Please note that the dictionary is not in sorted order.
- * Instead, its order is determined by the order of element insertion.
- * To get the dictionary in sorted order, please use
- * {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}.
+ * Gets the dictionary built. Please note that the dictionary is not in sorted order. Instead, its
+ * order is determined by the order of element insertion. To get the dictionary in sorted order,
+ * please use {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}.
+ *
* @return the dictionary.
*/
@Override
@@ -90,6 +82,7 @@ public V getDictionary() {
/**
* Try to add all values from the target vector to the dictionary.
+ *
* @param targetVector the target vector containing values to probe.
* @return the number of values actually added to the dictionary.
*/
@@ -107,6 +100,7 @@ public int addValues(V targetVector) {
/**
* Try to add an element from the target vector to the dictionary.
+ *
* @param targetVector the target vector containing new element.
* @param targetIndex the index of the new element in the target vector.
* @return the index of the new element in the dictionary.
@@ -132,8 +126,8 @@ public int addValue(V targetVector, int targetIndex) {
}
/**
- * Gets the sorted dictionary.
- * Note that given the binary search tree, the sort can finish in O(n).
+ * Gets the sorted dictionary. Note that given the binary search tree, the sort can finish in
+ * O(n).
*/
public void populateSortedDictionary(V sortedDictionary) {
int idx = 0;
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java
index f5e95cf1033f5..5492676af1a2e 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java
@@ -14,26 +14,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.misc;
import org.apache.arrow.vector.BaseIntVector;
-/**
- * Partial sum related utilities.
- */
+/** Partial sum related utilities. */
public class PartialSumUtils {
/**
- * Converts an input vector to a partial sum vector.
- * This is an inverse operation of {@link PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}.
- * Suppose we have input vector a and output vector b.
- * Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...).
+ * Converts an input vector to a partial sum vector. This is an inverse operation of {@link
+ * PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}. Suppose we have input vector a
+ * and output vector b. Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...).
+ *
* @param deltaVector the input vector.
* @param partialSumVector the output vector.
* @param sumBase the base of the partial sums.
*/
- public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) {
+ public static void toPartialSumVector(
+ BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) {
long sum = sumBase;
partialSumVector.setWithPossibleTruncate(0, sumBase);
@@ -45,10 +43,10 @@ public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector p
}
/**
- * Converts an input vector to the delta vector.
- * This is an inverse operation of {@link PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}.
- * Suppose we have input vector a and output vector b.
- * Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...).
+ * Converts an input vector to the delta vector. This is an inverse operation of {@link
+ * PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}. Suppose we have input
+ * vector a and output vector b. Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...).
+ *
* @param partialSumVector the input vector.
* @param deltaVector the output vector.
*/
@@ -61,18 +59,18 @@ public static void toDeltaVector(BaseIntVector partialSumVector, BaseIntVector d
}
/**
- * Given a value and a partial sum vector, finds its position in the partial sum vector.
- * In particular, given an integer value a and partial sum vector v, we try to find a
- * position i, so that v(i) <= a < v(i + 1).
- * The algorithm is based on binary search, so it takes O(log(n)) time, where n is
- * the length of the partial sum vector.
+ * Given a value and a partial sum vector, finds its position in the partial sum vector. In
+ * particular, given an integer value a and partial sum vector v, we try to find a position i, so
+ * that v(i) <= a < v(i + 1). The algorithm is based on binary search, so it takes O(log(n)) time,
+ * where n is the length of the partial sum vector.
+ *
* @param partialSumVector the input partial sum vector.
* @param value the value to search.
* @return the position in the partial sum vector, if any, or -1, if none is found.
*/
public static int findPositionInPartialSumVector(BaseIntVector partialSumVector, long value) {
- if (value < partialSumVector.getValueAsLong(0) ||
- value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) {
+ if (value < partialSumVector.getValueAsLong(0)
+ || value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) {
return -1;
}
@@ -114,6 +112,5 @@ public static int findPositionInPartialSumVector(BaseIntVector partialSumVector,
throw new IllegalStateException("Should never get here");
}
- private PartialSumUtils() {
- }
+ private PartialSumUtils() {}
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
index 43c9a5b010e8c..baa2058ffc51f 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
@@ -14,11 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.rank;
import java.util.stream.IntStream;
-
import org.apache.arrow.algorithm.sort.IndexSorter;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
@@ -28,21 +26,21 @@
/**
* Utility for calculating ranks of vector elements.
+ *
* @param the vector type
*/
public class VectorRank {
private VectorValueComparator comparator;
- /**
- * Vector indices.
- */
+ /** Vector indices. */
private IntVector indices;
private final BufferAllocator allocator;
/**
* Constructs a vector rank utility.
+ *
* @param allocator the allocator to use.
*/
public VectorRank(BufferAllocator allocator) {
@@ -50,9 +48,10 @@ public VectorRank(BufferAllocator allocator) {
}
/**
- * Given a rank r, gets the index of the element that is the rth smallest in the vector.
- * The operation is performed without changing the vector, and takes O(n) time,
- * where n is the length of the vector.
+ * Given a rank r, gets the index of the element that is the rth smallest in the vector. The
+ * operation is performed without changing the vector, and takes O(n) time, where n is the length
+ * of the vector.
+ *
* @param vector the vector from which to get the element index.
* @param comparator the criteria for vector element comparison.
* @param rank the rank to determine.
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
index 6226921b22ed6..6a48019edc3eb 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
@@ -14,49 +14,40 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.search;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
-
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.compare.Range;
import org.apache.arrow.vector.compare.RangeEqualsVisitor;
/**
- * Search for a value in the vector by multiple threads.
- * This is often used in scenarios where the vector is large or
- * low response time is required.
+ * Search for a value in the vector by multiple threads. This is often used in scenarios where the
+ * vector is large or low response time is required.
+ *
* @param the vector type.
*/
public class ParallelSearcher {
- /**
- * The target vector to search.
- */
+ /** The target vector to search. */
private final V vector;
- /**
- * The thread pool.
- */
+ /** The thread pool. */
private final ExecutorService threadPool;
- /**
- * The number of threads to use.
- */
+ /** The number of threads to use. */
private final int numThreads;
- /**
- * The position of the key in the target vector, if any.
- */
+ /** The position of the key in the target vector, if any. */
private volatile int keyPosition = -1;
/**
* Constructs a parallel searcher.
+ *
* @param vector the vector to search.
* @param threadPool the thread pool to use.
* @param numThreads the number of threads to use.
@@ -77,17 +68,17 @@ private CompletableFuture[] initSearch() {
}
/**
- * Search for the key in the target vector. The element-wise comparison is based on
- * {@link RangeEqualsVisitor}, so there are two possible results for each element-wise
- * comparison: equal and un-equal.
+ * Search for the key in the target vector. The element-wise comparison is based on {@link
+ * RangeEqualsVisitor}, so there are two possible results for each element-wise comparison: equal
+ * and un-equal.
+ *
* @param keyVector the vector containing the search key.
* @param keyIndex the index of the search key in the key vector.
- * @return the position of a matched value in the target vector,
- * or -1 if none is found. Please note that if there are multiple
- * matches of the key in the target vector, this method makes no
- * guarantees about which instance is returned.
- * For an alternative search implementation that always finds the first match of the key,
- * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
+ * @return the position of a matched value in the target vector, or -1 if none is found. Please
+ * note that if there are multiple matches of the key in the target vector, this method makes
+ * no guarantees about which instance is returned. For an alternative search implementation
+ * that always finds the first match of the key, see {@link
+ * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
* @throws ExecutionException if an exception occurs in a thread.
* @throws InterruptedException if a thread is interrupted.
*/
@@ -96,36 +87,38 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup
final int valueCount = vector.getValueCount();
for (int i = 0; i < numThreads; i++) {
final int tid = i;
- Future> unused = threadPool.submit(() -> {
- // convert to long to avoid overflow
- int start = (int) (((long) valueCount) * tid / numThreads);
- int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
-
- if (start >= end) {
- // no data assigned to this task.
- futures[tid].complete(false);
- return;
- }
-
- RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null);
- Range range = new Range(0, 0, 1);
- for (int pos = start; pos < end; pos++) {
- if (keyPosition != -1) {
- // the key has been found by another task
- futures[tid].complete(false);
- return;
- }
- range.setLeftStart(pos).setRightStart(keyIndex);
- if (visitor.rangeEquals(range)) {
- keyPosition = pos;
- futures[tid].complete(true);
- return;
- }
- }
-
- // no match value is found.
- futures[tid].complete(false);
- });
+ Future> unused =
+ threadPool.submit(
+ () -> {
+ // convert to long to avoid overflow
+ int start = (int) (((long) valueCount) * tid / numThreads);
+ int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
+
+ if (start >= end) {
+ // no data assigned to this task.
+ futures[tid].complete(false);
+ return;
+ }
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null);
+ Range range = new Range(0, 0, 1);
+ for (int pos = start; pos < end; pos++) {
+ if (keyPosition != -1) {
+ // the key has been found by another task
+ futures[tid].complete(false);
+ return;
+ }
+ range.setLeftStart(pos).setRightStart(keyIndex);
+ if (visitor.rangeEquals(range)) {
+ keyPosition = pos;
+ futures[tid].complete(true);
+ return;
+ }
+ }
+
+ // no match value is found.
+ futures[tid].complete(false);
+ });
}
CompletableFuture.allOf(futures).get();
@@ -133,56 +126,58 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup
}
/**
- * Search for the key in the target vector. The element-wise comparison is based on
- * {@link VectorValueComparator}, so there are three possible results for each element-wise
- * comparison: less than, equal to and greater than.
+ * Search for the key in the target vector. The element-wise comparison is based on {@link
+ * VectorValueComparator}, so there are three possible results for each element-wise comparison:
+ * less than, equal to and greater than.
+ *
* @param keyVector the vector containing the search key.
* @param keyIndex the index of the search key in the key vector.
* @param comparator the comparator for comparing the key against vector elements.
- * @return the position of a matched value in the target vector,
- * or -1 if none is found. Please note that if there are multiple
- * matches of the key in the target vector, this method makes no
- * guarantees about which instance is returned.
- * For an alternative search implementation that always finds the first match of the key,
- * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
+ * @return the position of a matched value in the target vector, or -1 if none is found. Please
+ * note that if there are multiple matches of the key in the target vector, this method makes
+ * no guarantees about which instance is returned. For an alternative search implementation
+ * that always finds the first match of the key, see {@link
+ * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
* @throws ExecutionException if an exception occurs in a thread.
* @throws InterruptedException if a thread is interrupted.
*/
- public int search(
- V keyVector, int keyIndex, VectorValueComparator comparator) throws ExecutionException, InterruptedException {
+ public int search(V keyVector, int keyIndex, VectorValueComparator comparator)
+ throws ExecutionException, InterruptedException {
final CompletableFuture[] futures = initSearch();
final int valueCount = vector.getValueCount();
for (int i = 0; i < numThreads; i++) {
final int tid = i;
- Future> unused = threadPool.submit(() -> {
- // convert to long to avoid overflow
- int start = (int) (((long) valueCount) * tid / numThreads);
- int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
-
- if (start >= end) {
- // no data assigned to this task.
- futures[tid].complete(false);
- return;
- }
-
- VectorValueComparator localComparator = comparator.createNew();
- localComparator.attachVectors(vector, keyVector);
- for (int pos = start; pos < end; pos++) {
- if (keyPosition != -1) {
- // the key has been found by another task
- futures[tid].complete(false);
- return;
- }
- if (localComparator.compare(pos, keyIndex) == 0) {
- keyPosition = pos;
- futures[tid].complete(true);
- return;
- }
- }
-
- // no match value is found.
- futures[tid].complete(false);
- });
+ Future> unused =
+ threadPool.submit(
+ () -> {
+ // convert to long to avoid overflow
+ int start = (int) (((long) valueCount) * tid / numThreads);
+ int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
+
+ if (start >= end) {
+ // no data assigned to this task.
+ futures[tid].complete(false);
+ return;
+ }
+
+ VectorValueComparator localComparator = comparator.createNew();
+ localComparator.attachVectors(vector, keyVector);
+ for (int pos = start; pos < end; pos++) {
+ if (keyPosition != -1) {
+ // the key has been found by another task
+ futures[tid].complete(false);
+ return;
+ }
+ if (localComparator.compare(pos, keyIndex) == 0) {
+ keyPosition = pos;
+ futures[tid].complete(true);
+ return;
+ }
+ }
+
+ // no match value is found.
+ futures[tid].complete(false);
+ });
}
CompletableFuture.allOf(futures).get();
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
index 249194843f101..c7905dd8956c8 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
@@ -1,108 +1,105 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.arrow.algorithm.search;
-
-import org.apache.arrow.algorithm.sort.VectorValueComparator;
-import org.apache.arrow.vector.ValueVector;
-
-/**
- * Search for the range of a particular element in the target vector.
- */
-public class VectorRangeSearcher {
-
- /**
- * Result returned when a search fails.
- */
- public static final int SEARCH_FAIL_RESULT = -1;
-
- /**
- * Search for the first occurrence of an element.
- * The search is based on the binary search algorithm. So the target vector must be sorted.
- * @param targetVector the vector from which to perform the search.
- * @param comparator the criterion for the comparison.
- * @param keyVector the vector containing the element to search.
- * @param keyIndex the index of the search key in the key vector.
- * @param the vector type.
- * @return the index of the first matched element if any, and -1 otherwise.
- */
- public static int getFirstMatch(
- V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
- comparator.attachVectors(keyVector, targetVector);
-
- int ret = SEARCH_FAIL_RESULT;
-
- int low = 0;
- int high = targetVector.getValueCount() - 1;
-
- while (low <= high) {
- int mid = low + (high - low) / 2;
- int result = comparator.compare(keyIndex, mid);
- if (result < 0) {
- // the key is smaller
- high = mid - 1;
- } else if (result > 0) {
- // the key is larger
- low = mid + 1;
- } else {
- // an equal element is found
- // continue to go left-ward
- ret = mid;
- high = mid - 1;
- }
- }
- return ret;
- }
-
- /**
- * Search for the last occurrence of an element.
- * The search is based on the binary search algorithm. So the target vector must be sorted.
- * @param targetVector the vector from which to perform the search.
- * @param comparator the criterion for the comparison.
- * @param keyVector the vector containing the element to search.
- * @param keyIndex the index of the search key in the key vector.
- * @param the vector type.
- * @return the index of the last matched element if any, and -1 otherwise.
- */
- public static int getLastMatch(
- V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
- comparator.attachVectors(keyVector, targetVector);
-
- int ret = SEARCH_FAIL_RESULT;
-
- int low = 0;
- int high = targetVector.getValueCount() - 1;
-
- while (low <= high) {
- int mid = low + (high - low) / 2;
- int result = comparator.compare(keyIndex, mid);
- if (result < 0) {
- // the key is smaller
- high = mid - 1;
- } else if (result > 0) {
- // the key is larger
- low = mid + 1;
- } else {
- // an equal element is found,
- // continue to go right-ward
- ret = mid;
- low = mid + 1;
- }
- }
- return ret;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.algorithm.search;
+
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.vector.ValueVector;
+
+/** Search for the range of a particular element in the target vector. */
+public class VectorRangeSearcher {
+
+ /** Result returned when a search fails. */
+ public static final int SEARCH_FAIL_RESULT = -1;
+
+ /**
+ * Search for the first occurrence of an element. The search is based on the binary search
+ * algorithm. So the target vector must be sorted.
+ *
+ * @param targetVector the vector from which to perform the search.
+ * @param comparator the criterion for the comparison.
+ * @param keyVector the vector containing the element to search.
+ * @param keyIndex the index of the search key in the key vector.
+ * @param the vector type.
+ * @return the index of the first matched element if any, and -1 otherwise.
+ */
+ public static int getFirstMatch(
+ V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
+ comparator.attachVectors(keyVector, targetVector);
+
+ int ret = SEARCH_FAIL_RESULT;
+
+ int low = 0;
+ int high = targetVector.getValueCount() - 1;
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int result = comparator.compare(keyIndex, mid);
+ if (result < 0) {
+ // the key is smaller
+ high = mid - 1;
+ } else if (result > 0) {
+ // the key is larger
+ low = mid + 1;
+ } else {
+ // an equal element is found
+ // continue to go left-ward
+ ret = mid;
+ high = mid - 1;
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * Search for the last occurrence of an element. The search is based on the binary search
+ * algorithm. So the target vector must be sorted.
+ *
+ * @param targetVector the vector from which to perform the search.
+ * @param comparator the criterion for the comparison.
+ * @param keyVector the vector containing the element to search.
+ * @param keyIndex the index of the search key in the key vector.
+ * @param the vector type.
+ * @return the index of the last matched element if any, and -1 otherwise.
+ */
+ public static int getLastMatch(
+ V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
+ comparator.attachVectors(keyVector, targetVector);
+
+ int ret = SEARCH_FAIL_RESULT;
+
+ int low = 0;
+ int high = targetVector.getValueCount() - 1;
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int result = comparator.compare(keyIndex, mid);
+ if (result < 0) {
+ // the key is smaller
+ high = mid - 1;
+ } else if (result > 0) {
+ // the key is larger
+ low = mid + 1;
+ } else {
+ // an equal element is found,
+ // continue to go right-ward
+ ret = mid;
+ low = mid + 1;
+ }
+ }
+ return ret;
+ }
+}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java
index 646bca01bb81d..dd0b4de5d8677 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java
@@ -14,25 +14,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.search;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.vector.ValueVector;
-/**
- * Search for a particular element in the vector.
- */
+/** Search for a particular element in the vector. */
public final class VectorSearcher {
- /**
- * Result returned when a search fails.
- */
+ /** Result returned when a search fails. */
public static final int SEARCH_FAIL_RESULT = -1;
/**
- * Search for a particular element from the key vector in the target vector by binary search.
- * The target vector must be sorted.
+ * Search for a particular element from the key vector in the target vector by binary search. The
+ * target vector must be sorted.
+ *
* @param targetVector the vector from which to perform the sort.
* @param comparator the criterion for the sort.
* @param keyVector the vector containing the element to search.
@@ -41,7 +37,7 @@ public final class VectorSearcher {
* @return the index of a matched element if any, and -1 otherwise.
*/
public static int binarySearch(
- V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
+ V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
comparator.attachVectors(keyVector, targetVector);
// perform binary search
@@ -63,7 +59,9 @@ public static int binarySearch(
}
/**
- * Search for a particular element from the key vector in the target vector by traversing the vector in sequence.
+ * Search for a particular element from the key vector in the target vector by traversing the
+ * vector in sequence.
+ *
* @param targetVector the vector from which to perform the search.
* @param comparator the criterion for element equality.
* @param keyVector the vector containing the element to search.
@@ -72,7 +70,7 @@ public static int binarySearch(
* @return the index of a matched element if any, and -1 otherwise.
*/
public static int linearSearch(
- V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
+ V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
comparator.attachVectors(keyVector, targetVector);
for (int i = 0; i < targetVector.getValueCount(); i++) {
if (comparator.compare(keyIndex, i) == 0) {
@@ -82,7 +80,5 @@ public static int linearSearch(
return SEARCH_FAIL_RESULT;
}
- private VectorSearcher() {
-
- }
+ private VectorSearcher() {}
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java
index ec74598e0eebf..77093d87bc489 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java
@@ -14,20 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.ValueVector;
/**
- * A composite vector comparator compares a number of vectors
- * by a number of inner comparators.
- *
- * It works by first using the first comparator, if a non-zero value
- * is returned, it simply returns it. Otherwise, it uses the second comparator,
- * and so on, until a non-zero value is produced, or all inner comparators have
- * been used.
- *
+ * A composite vector comparator compares a number of vectors by a number of inner comparators.
+ *
+ * It works by first using the first comparator, if a non-zero value is returned, it simply
+ * returns it. Otherwise, it uses the second comparator, and so on, until a non-zero value is
+ * produced, or all inner comparators have been used.
*/
public class CompositeVectorComparator extends VectorValueComparator {
@@ -62,7 +58,8 @@ public int compare(int index1, int index2) {
@Override
public VectorValueComparator createNew() {
- VectorValueComparator[] newInnerComparators = new VectorValueComparator[innerComparators.length];
+ VectorValueComparator[] newInnerComparators =
+ new VectorValueComparator[innerComparators.length];
for (int i = 0; i < innerComparators.length; i++) {
newInnerComparators[i] = innerComparators[i].createNew();
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
index 588876aa99059..ec650cd9dc88b 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
@@ -14,14 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH;
import java.math.BigDecimal;
import java.time.Duration;
-
import org.apache.arrow.memory.util.ArrowBufPointer;
import org.apache.arrow.memory.util.ByteFunctionHelpers;
import org.apache.arrow.vector.BaseFixedWidthVector;
@@ -56,13 +54,12 @@
import org.apache.arrow.vector.complex.RepeatedValueVector;
import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder;
-/**
- * Default comparator implementations for different types of vectors.
- */
+/** Default comparator implementations for different types of vectors. */
public class DefaultVectorComparators {
/**
* Create the default comparator for the vector.
+ *
* @param vector the vector.
* @param the vector type.
* @return the default comparator.
@@ -104,7 +101,8 @@ public static VectorValueComparator createDefaultComp
} else if (vector instanceof IntervalDayVector) {
return (VectorValueComparator) new IntervalDayComparator();
} else if (vector instanceof IntervalMonthDayNanoVector) {
- throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName());
+ throw new IllegalArgumentException(
+ "No default comparator for " + vector.getClass().getCanonicalName());
} else if (vector instanceof TimeMicroVector) {
return (VectorValueComparator) new TimeMicroComparator();
} else if (vector instanceof TimeMilliVector) {
@@ -122,7 +120,7 @@ public static VectorValueComparator createDefaultComp
return (VectorValueComparator) new VariableWidthComparator();
} else if (vector instanceof RepeatedValueVector) {
VectorValueComparator> innerComparator =
- createDefaultComparator(((RepeatedValueVector) vector).getDataVector());
+ createDefaultComparator(((RepeatedValueVector) vector).getDataVector());
return new RepeatedValueComparator(innerComparator);
} else if (vector instanceof FixedSizeListVector) {
VectorValueComparator> innerComparator =
@@ -132,13 +130,11 @@ public static VectorValueComparator createDefaultComp
return (VectorValueComparator) new NullComparator();
}
- throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName());
+ throw new IllegalArgumentException(
+ "No default comparator for " + vector.getClass().getCanonicalName());
}
- /**
- * Default comparator for bytes.
- * The comparison is based on values, with null comes first.
- */
+ /** Default comparator for bytes. The comparison is based on values, with null comes first. */
public static class ByteComparator extends VectorValueComparator {
public ByteComparator() {
@@ -159,8 +155,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for short integers.
- * The comparison is based on values, with null comes first.
+ * Default comparator for short integers. The comparison is based on values, with null comes
+ * first.
*/
public static class ShortComparator extends VectorValueComparator {
@@ -182,8 +178,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for 32-bit integers.
- * The comparison is based on int values, with null comes first.
+ * Default comparator for 32-bit integers. The comparison is based on int values, with null comes
+ * first.
*/
public static class IntComparator extends VectorValueComparator {
@@ -205,8 +201,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for long integers.
- * The comparison is based on values, with null comes first.
+ * Default comparator for long integers. The comparison is based on values, with null comes first.
*/
public static class LongComparator extends VectorValueComparator {
@@ -229,8 +224,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for unsigned bytes.
- * The comparison is based on values, with null comes first.
+ * Default comparator for unsigned bytes. The comparison is based on values, with null comes
+ * first.
*/
public static class UInt1Comparator extends VectorValueComparator {
@@ -253,8 +248,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for unsigned short integer.
- * The comparison is based on values, with null comes first.
+ * Default comparator for unsigned short integer. The comparison is based on values, with null
+ * comes first.
*/
public static class UInt2Comparator extends VectorValueComparator {
@@ -280,8 +275,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for unsigned integer.
- * The comparison is based on values, with null comes first.
+ * Default comparator for unsigned integer. The comparison is based on values, with null comes
+ * first.
*/
public static class UInt4Comparator extends VectorValueComparator {
@@ -303,8 +298,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for unsigned long integer.
- * The comparison is based on values, with null comes first.
+ * Default comparator for unsigned long integer. The comparison is based on values, with null
+ * comes first.
*/
public static class UInt8Comparator extends VectorValueComparator {
@@ -326,8 +321,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for float type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for float type. The comparison is based on values, with null comes first.
*/
public static class Float4Comparator extends VectorValueComparator {
@@ -363,8 +357,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for double type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for double type. The comparison is based on values, with null comes first.
*/
public static class Float8Comparator extends VectorValueComparator {
@@ -399,10 +392,7 @@ public VectorValueComparator createNew() {
}
}
- /**
- * Default comparator for bit type.
- * The comparison is based on values, with null comes first.
- */
+ /** Default comparator for bit type. The comparison is based on values, with null comes first. */
public static class BitComparator extends VectorValueComparator {
public BitComparator() {
@@ -424,8 +414,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for DateDay type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for DateDay type. The comparison is based on values, with null comes first.
*/
public static class DateDayComparator extends VectorValueComparator {
@@ -447,8 +436,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for DateMilli type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for DateMilli type. The comparison is based on values, with null comes
+ * first.
*/
public static class DateMilliComparator extends VectorValueComparator {
@@ -471,8 +460,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for Decimal256 type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for Decimal256 type. The comparison is based on values, with null comes
+ * first.
*/
public static class Decimal256Comparator extends VectorValueComparator {
@@ -495,8 +484,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for Decimal type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for Decimal type. The comparison is based on values, with null comes first.
*/
public static class DecimalComparator extends VectorValueComparator {
@@ -519,8 +507,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for Duration type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for Duration type. The comparison is based on values, with null comes first.
*/
public static class DurationComparator extends VectorValueComparator {
@@ -543,8 +530,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for IntervalDay type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for IntervalDay type. The comparison is based on values, with null comes
+ * first.
*/
public static class IntervalDayComparator extends VectorValueComparator {
@@ -567,8 +554,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeMicro type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeMicro type. The comparison is based on values, with null comes
+ * first.
*/
public static class TimeMicroComparator extends VectorValueComparator {
@@ -591,8 +578,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeMilli type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeMilli type. The comparison is based on values, with null comes
+ * first.
*/
public static class TimeMilliComparator extends VectorValueComparator {
@@ -615,8 +602,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeNano type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeNano type. The comparison is based on values, with null comes first.
*/
public static class TimeNanoComparator extends VectorValueComparator {
@@ -639,8 +625,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeSec type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeSec type. The comparison is based on values, with null comes first.
*/
public static class TimeSecComparator extends VectorValueComparator {
@@ -663,8 +648,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeSec type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeSec type. The comparison is based on values, with null comes first.
*/
public static class TimeStampComparator extends VectorValueComparator {
@@ -687,10 +671,11 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}.
- * The comparison is in lexicographic order, with null comes first.
+ * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}. The comparison is
+ * in lexicographic order, with null comes first.
*/
- public static class FixedSizeBinaryComparator extends VectorValueComparator {
+ public static class FixedSizeBinaryComparator
+ extends VectorValueComparator {
@Override
public int compare(int index1, int index2) {
@@ -720,9 +705,7 @@ public VectorValueComparator createNew() {
}
}
- /**
- * Default comparator for {@link org.apache.arrow.vector.NullVector}.
- */
+ /** Default comparator for {@link org.apache.arrow.vector.NullVector}. */
public static class NullComparator extends VectorValueComparator {
@Override
public int compare(int index1, int index2) {
@@ -742,8 +725,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}.
- * The comparison is in lexicographic order, with null comes first.
+ * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}. The comparison is
+ * in lexicographic order, with null comes first.
*/
public static class VariableWidthComparator extends VectorValueComparator {
@@ -772,12 +755,13 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for {@link RepeatedValueVector}.
- * It works by comparing the underlying vector in a lexicographic order.
+ * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector
+ * in a lexicographic order.
+ *
* @param inner vector type.
*/
public static class RepeatedValueComparator
- extends VectorValueComparator {
+ extends VectorValueComparator {
private final VectorValueComparator innerComparator;
@@ -823,8 +807,9 @@ public void attachVectors(RepeatedValueVector vector1, RepeatedValueVector vecto
}
/**
- * Default comparator for {@link RepeatedValueVector}.
- * It works by comparing the underlying vector in a lexicographic order.
+ * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector
+ * in a lexicographic order.
+ *
* @param inner vector type.
*/
public static class FixedSizeListComparator
@@ -869,6 +854,5 @@ public void attachVectors(FixedSizeListVector vector1, FixedSizeListVector vecto
}
}
- private DefaultVectorComparators() {
- }
+ private DefaultVectorComparators() {}
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
index aaa7ba117c3ba..ea2b344a1eabb 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
@@ -14,20 +14,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.BaseFixedWidthVector;
/**
- * Default in-place sorter for fixed-width vectors.
- * It is based on quick-sort, with average time complexity O(n*log(n)).
+ * Default in-place sorter for fixed-width vectors. It is based on quick-sort, with average time
+ * complexity O(n*log(n)).
+ *
* @param