From 41810749e086278aea541240a16e9cf1e32eab80 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 6 Jun 2024 16:15:52 +0900 Subject: [PATCH 01/59] MINOR: [Java] Bump org.apache.calcite.avatica:avatica from 1.24.0 to 1.25.0 in /java (#41212) Bumps [org.apache.calcite.avatica:avatica](https://github.com/apache/calcite-avatica) from 1.24.0 to 1.25.0.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.calcite.avatica:avatica&package-manager=maven&previous-version=1.24.0&new-version=1.25.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
> **Note** > Automatic rebases have been disabled on this pull request as it has been open for over 30 days. Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/flight/flight-sql-jdbc-core/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index fbab69df3b305..459412e0f8d8b 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -125,7 +125,7 @@ org.apache.calcite.avatica avatica - 1.24.0 + 1.25.0 From 8b2c7e2893fec41c0b6ed51f5e05a7510b427f64 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 6 Jun 2024 16:16:42 +0900 Subject: [PATCH 02/59] MINOR: [Java] Bump com.github.luben:zstd-jni from 1.5.5-11 to 1.5.6-3 in /java (#41521) Bumps [com.github.luben:zstd-jni](https://github.com/luben/zstd-jni) from 1.5.5-11 to 1.5.6-3.
Commits
  • c77a765 v1.5.6-3
  • 2d33a1e Fix missing Native free of Dict: Even when using a by-reference buffer, the d...
  • 1ff8933 Use cross-compile for i386
  • b833326 Adding a getByReferenceBuffer() method.
  • 2a262bf Add new constructor to ZstdDictCompress and ZstdDictDecompress that
  • a516a43 Add back some inspection on MacOS, bump version
  • 2e00ab1 Enable tests on M1 MacOS
  • c76455c Add debugging in MacOS CI
  • ec1ddeb Use the M1 MacOS runner to compile the aarch64 binary
  • fb6a35d Update also checkout and setup-qemu actions
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.github.luben:zstd-jni&package-manager=maven&previous-version=1.5.5-11&new-version=1.5.6-3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
> **Note** > Automatic rebases have been disabled on this pull request as it has been open for over 30 days. Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/compression/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/compression/pom.xml b/java/compression/pom.xml index 6ed0be6815ca3..a3d6c0ac558dd 100644 --- a/java/compression/pom.xml +++ b/java/compression/pom.xml @@ -47,7 +47,7 @@ com.github.luben zstd-jni - 1.5.5-11 + 1.5.6-3 From cea7323772beb5be6e5002624be36e6575a36418 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Thu, 6 Jun 2024 13:07:11 +0530 Subject: [PATCH 03/59] GH-41968: [Java] Implement TransferPair functionality for BinaryView (#41980) ### Rationale for this change This PR contains the transferPair functionality for BinaryView vectors. ### What changes are included in this PR? This includes the addition of transferPair functionality in `ViewVarCharBinaryVector` and corresponding test cases. ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #41968 Authored-by: Vibhatha Abeykoon Signed-off-by: David Li --- .../arrow/vector/ViewVarBinaryVector.java | 45 +- .../arrow/vector/TestSplitAndTransfer.java | 392 +++++---- .../arrow/vector/TestVarCharViewVector.java | 814 ++++++++++-------- 3 files changed, 756 insertions(+), 495 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java index 393df96b2969e..0a043b51067ef 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java @@ -205,14 +205,12 @@ public void setSafe(int index, NullableViewVarBinaryHolder holder) { */ @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - // TODO: https://github.com/apache/arrow/issues/40932 - throw new UnsupportedOperationException("Unsupported operation"); + return new TransferImpl(ref, allocator); } @Override public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - // TODO: https://github.com/apache/arrow/issues/40932 - throw new UnsupportedOperationException("Unsupported operation"); + return new TransferImpl(field, allocator); } /** @@ -223,7 +221,42 @@ public TransferPair getTransferPair(Field field, BufferAllocator allocator) { */ @Override public TransferPair makeTransferPair(ValueVector to) { - // TODO: https://github.com/apache/arrow/issues/40932 - throw new UnsupportedOperationException("Unsupported operation"); + return new TransferImpl((ViewVarBinaryVector) to); + } + + private class TransferImpl implements TransferPair { + ViewVarBinaryVector to; + + public TransferImpl(String ref, BufferAllocator allocator) { + to = new ViewVarBinaryVector(ref, field.getFieldType(), allocator); + } + + public TransferImpl(Field field, BufferAllocator allocator) { + to = new ViewVarBinaryVector(field, allocator); + } + + public TransferImpl(ViewVarBinaryVector to) { + this.to = to; + } + + @Override + public ViewVarBinaryVector getTo() { + return to; + } + + @Override + public void transfer() { + transferTo(to); + } + + @Override + public void splitAndTransfer(int startIndex, int length) { + splitAndTransferTo(startIndex, length, to); + } + + @Override + public void copyValueSafe(int fromIndex, int toIndex) { + to.copyFromSafe(fromIndex, toIndex, ViewVarBinaryVector.this); + } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java index d2c03930ca37a..fece93de9bf14 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java @@ -67,7 +67,8 @@ private void populateVarcharVector(final VarCharVector vector, int valueCount, S vector.setValueCount(valueCount); } - private void populateViewVarcharVector(final ViewVarCharVector vector, int valueCount, String[] compareArray) { + private void populateBaseVariableWidthViewVector(final BaseVariableWidthViewVector vector, int valueCount, + String[] compareArray) { for (int i = 0; i < valueCount; i += 3) { final String s = String.format("%010d", i); vector.set(i, s.getBytes(StandardCharsets.UTF_8)); @@ -120,11 +121,16 @@ public void testWithEmptyVector() { transferPair = varCharVector.getTransferPair(allocator); transferPair.splitAndTransfer(0, 0); assertEquals(0, transferPair.getTo().getValueCount()); - // BaseVariableWidthViewVector + // BaseVariableWidthViewVector: ViewVarCharVector ViewVarCharVector viewVarCharVector = new ViewVarCharVector("", allocator); transferPair = viewVarCharVector.getTransferPair(allocator); transferPair.splitAndTransfer(0, 0); assertEquals(0, transferPair.getTo().getValueCount()); + // BaseVariableWidthVector: ViewVarBinaryVector + ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("", allocator); + transferPair = viewVarBinaryVector.getTransferPair(allocator); + transferPair.splitAndTransfer(0, 0); + assertEquals(0, transferPair.getTo().getValueCount()); // BaseLargeVariableWidthVector LargeVarCharVector largeVarCharVector = new LargeVarCharVector("", allocator); transferPair = largeVarCharVector.getTransferPair(allocator); @@ -225,36 +231,46 @@ public void test() throws Exception { } } - @Test - public void testView() throws Exception { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - viewVarCharVector.allocateNew(10000, 1000); - - final int valueCount = 500; - final String[] compareArray = new String[valueCount]; + private void testView(BaseVariableWidthViewVector vector) { + vector.allocateNew(10000, 1000); + final int valueCount = 500; + final String[] compareArray = new String[valueCount]; - populateViewVarcharVector(viewVarCharVector, valueCount, compareArray); + populateBaseVariableWidthViewVector(vector, valueCount, compareArray); - final TransferPair tp = viewVarCharVector.getTransferPair(allocator); - final ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); - final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}}; + final TransferPair tp = vector.getTransferPair(allocator); + final BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo();; + final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}}; - for (final int[] startLength : startLengths) { - final int start = startLength[0]; - final int length = startLength[1]; - tp.splitAndTransfer(start, length); - for (int i = 0; i < length; i++) { - final boolean expectedSet = ((start + i) % 3) == 0; - if (expectedSet) { - final byte[] expectedValue = compareArray[start + i].getBytes(StandardCharsets.UTF_8); - assertFalse(newViewVarCharVector.isNull(i)); - assertArrayEquals(expectedValue, newViewVarCharVector.get(i)); - } else { - assertTrue(newViewVarCharVector.isNull(i)); - } + for (final int[] startLength : startLengths) { + final int start = startLength[0]; + final int length = startLength[1]; + tp.splitAndTransfer(start, length); + for (int i = 0; i < length; i++) { + final boolean expectedSet = ((start + i) % 3) == 0; + if (expectedSet) { + final byte[] expectedValue = compareArray[start + i].getBytes(StandardCharsets.UTF_8); + assertFalse(newVector.isNull(i)); + assertArrayEquals(expectedValue, newVector.get(i)); + } else { + assertTrue(newVector.isNull(i)); } - newViewVarCharVector.clear(); } + newVector.clear(); + } + } + + @Test + public void testUtf8View() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testView(viewVarCharVector); + } + } + + @Test + public void testBinaryView() throws Exception { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testView(viewVarBinaryVector); } } @@ -282,35 +298,47 @@ public void testMemoryConstrainedTransfer() { } } - @Test - public void testMemoryConstrainedTransferInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - // Here we have the target vector being transferred with a long string - // hence, the data buffer will be allocated. - // The default data buffer allocation takes - // BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * BaseVariableWidthViewVector.ELEMENT_SIZE - // set limit = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * - // BaseVariableWidthViewVector.ELEMENT_SIZE - final int setLimit = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * - BaseVariableWidthViewVector.ELEMENT_SIZE; - allocator.setLimit(setLimit); - - viewVarCharVector.allocateNew(16000, 1000); + private void testMemoryConstrainedTransferInViews(BaseVariableWidthViewVector vector) { + // Here we have the target vector being transferred with a long string + // hence, the data buffer will be allocated. + // The default data buffer allocation takes + // BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * BaseVariableWidthViewVector.ELEMENT_SIZE + // set limit = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * + // BaseVariableWidthViewVector.ELEMENT_SIZE + final int setLimit = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * + BaseVariableWidthViewVector.ELEMENT_SIZE; + allocator.setLimit(setLimit); - final int valueCount = 1000; + vector.allocateNew(16000, 1000); - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final int valueCount = 1000; - final TransferPair tp = viewVarCharVector.getTransferPair(allocator); - final ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); - final int[][] startLengths = {{0, 700}, {700, 299}}; + populateBaseVariableWidthViewVector(vector, valueCount, null); - for (final int[] startLength : startLengths) { - final int start = startLength[0]; - final int length = startLength[1]; - tp.splitAndTransfer(start, length); - newViewVarCharVector.clear(); - } + final TransferPair tp = vector.getTransferPair(allocator); + final BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo(); + + final int[][] startLengths = {{0, 700}, {700, 299}}; + + for (final int[] startLength : startLengths) { + final int start = startLength[0]; + final int length = startLength[1]; + tp.splitAndTransfer(start, length); + newVector.clear(); + } + } + + @Test + public void testMemoryConstrainedTransferInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testMemoryConstrainedTransferInViews(viewVarCharVector); + } + } + + @Test + public void testMemoryConstrainedTransferInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testMemoryConstrainedTransferInViews(viewVarBinaryVector); } } @@ -345,34 +373,45 @@ public void testTransfer() { } } - @Test - public void testTransferInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - viewVarCharVector.allocateNew(16000, 1000); + private void testTransferInViews(BaseVariableWidthViewVector vector) { + vector.allocateNew(16000, 1000); - final int valueCount = 500; - final String[] compareArray = new String[valueCount]; - populateViewVarcharVector(viewVarCharVector, valueCount, compareArray); + final int valueCount = 500; + final String[] compareArray = new String[valueCount]; + populateBaseVariableWidthViewVector(vector, valueCount, compareArray); - final TransferPair tp = viewVarCharVector.getTransferPair(allocator); - final ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); - tp.transfer(); + final TransferPair tp = vector.getTransferPair(allocator); + final BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo(); + tp.transfer(); - assertEquals(0, viewVarCharVector.valueCount); - assertEquals(valueCount, newViewVarCharVector.valueCount); + assertEquals(0, vector.valueCount); + assertEquals(valueCount, newVector.valueCount); - for (int i = 0; i < valueCount; i++) { - final boolean expectedSet = (i % 3) == 0; - if (expectedSet) { - final byte[] expectedValue = compareArray[i].getBytes(StandardCharsets.UTF_8); - assertFalse(newViewVarCharVector.isNull(i)); - assertArrayEquals(expectedValue, newViewVarCharVector.get(i)); - } else { - assertTrue(newViewVarCharVector.isNull(i)); - } + for (int i = 0; i < valueCount; i++) { + final boolean expectedSet = (i % 3) == 0; + if (expectedSet) { + final byte[] expectedValue = compareArray[i].getBytes(StandardCharsets.UTF_8); + assertFalse(newVector.isNull(i)); + assertArrayEquals(expectedValue, newVector.get(i)); + } else { + assertTrue(newVector.isNull(i)); } + } + + newVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testTransferInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testTransferInViews(viewVarCharVector); + } + } + + @Test + public void testTransferInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testTransferInViews(viewVarBinaryVector); } } @@ -424,21 +463,31 @@ public void testSplitAndTransferNon() { } } - @Test - public void testSplitAndTransferNonInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + private void testSplitAndTransferNonInViews(BaseVariableWidthViewVector vector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); - viewVarCharVector.allocateNew(16000, 1000); - final int valueCount = 500; - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final TransferPair tp = vector.getTransferPair(allocator); + BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo(); - final TransferPair tp = viewVarCharVector.getTransferPair(allocator); - ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); + tp.splitAndTransfer(0, 0); + assertEquals(0, newVector.getValueCount()); - tp.splitAndTransfer(0, 0); - assertEquals(0, newViewVarCharVector.getValueCount()); + newVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testSplitAndTransferNonInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testSplitAndTransferNonInViews(viewVarCharVector); + } + } + + @Test + public void testSplitAndTransferNonInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testSplitAndTransferNonInViews(viewVarBinaryVector); } } @@ -460,21 +509,31 @@ public void testSplitAndTransferAll() { } } - @Test - public void testSplitAndTransferAllInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + private void testSplitAndTransferAllInViews(BaseVariableWidthViewVector vector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); - viewVarCharVector.allocateNew(16000, 1000); - final int valueCount = 500; - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final TransferPair tp = vector.getTransferPair(allocator); + BaseVariableWidthViewVector newViewVarCharVector = (BaseVariableWidthViewVector) tp.getTo(); - final TransferPair tp = viewVarCharVector.getTransferPair(allocator); - ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); + tp.splitAndTransfer(0, valueCount); + assertEquals(valueCount, newViewVarCharVector.getValueCount()); - tp.splitAndTransfer(0, valueCount); - assertEquals(valueCount, newViewVarCharVector.getValueCount()); + newViewVarCharVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testSplitAndTransferAllInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testSplitAndTransferAllInViews(viewVarCharVector); + } + } + + @Test + public void testSplitAndTransferAllInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testSplitAndTransferAllInViews(viewVarBinaryVector); } } @@ -499,24 +558,35 @@ public void testInvalidStartIndex() { } } - @Test - public void testInvalidStartIndexInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); - final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + private void testInvalidStartIndexInViews(BaseVariableWidthViewVector vector, BaseVariableWidthViewVector newVector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); - viewVarCharVector.allocateNew(16000, 1000); - final int valueCount = 500; - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final TransferPair tp = vector.makeTransferPair(newVector); - final TransferPair tp = viewVarCharVector.makeTransferPair(newViewVarCharVector); + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> tp.splitAndTransfer(valueCount, 10)); - IllegalArgumentException e = assertThrows( - IllegalArgumentException.class, - () -> tp.splitAndTransfer(valueCount, 10)); + assertEquals("Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage()); - assertEquals("Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage()); + newVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testInvalidStartIndexInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + testInvalidStartIndexInViews(viewVarCharVector, newViewVarCharVector); + } + } + + @Test + public void testInvalidStartIndexInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator); + final ViewVarBinaryVector newViewVarBinaryVector = new ViewVarBinaryVector("newvector", allocator)) { + testInvalidStartIndexInViews(viewVarBinaryVector, newViewVarBinaryVector); } } @@ -541,24 +611,35 @@ public void testInvalidLength() { } } - @Test - public void testInvalidLengthInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); - final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + private void testInvalidLengthInViews(BaseVariableWidthViewVector vector, BaseVariableWidthViewVector newVector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); - viewVarCharVector.allocateNew(16000, 1000); - final int valueCount = 500; - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final TransferPair tp = vector.makeTransferPair(newVector); - final TransferPair tp = viewVarCharVector.makeTransferPair(newViewVarCharVector); + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> tp.splitAndTransfer(0, valueCount * 2)); - IllegalArgumentException e = assertThrows( - IllegalArgumentException.class, - () -> tp.splitAndTransfer(0, valueCount * 2)); + assertEquals("Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage()); - assertEquals("Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage()); + newVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testInvalidLengthInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + testInvalidLengthInViews(viewVarCharVector, newViewVarCharVector); + } + } + + @Test + public void testInvalidLengthInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator); + final ViewVarBinaryVector newViewVarBinaryVector = new ViewVarBinaryVector("newvector", allocator)) { + testInvalidLengthInViews(viewVarBinaryVector, newViewVarBinaryVector); } } @@ -580,21 +661,33 @@ public void testZeroStartIndexAndLength() { } } - @Test - public void testZeroStartIndexAndLengthInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); - final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + private void testZeroStartIndexAndLengthInViews(BaseVariableWidthViewVector vector, + BaseVariableWidthViewVector newVector) { + vector.allocateNew(0, 0); + final int valueCount = 0; + populateBaseVariableWidthViewVector(vector, valueCount, null); - viewVarCharVector.allocateNew(0, 0); - final int valueCount = 0; - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final TransferPair tp = vector.makeTransferPair(newVector); - final TransferPair tp = viewVarCharVector.makeTransferPair(newViewVarCharVector); + tp.splitAndTransfer(0, 0); + assertEquals(valueCount, newVector.getValueCount()); - tp.splitAndTransfer(0, 0); - assertEquals(valueCount, newViewVarCharVector.getValueCount()); + newVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testZeroStartIndexAndLengthInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + testZeroStartIndexAndLengthInViews(viewVarCharVector, newViewVarCharVector); + } + } + + @Test + public void testZeroStartIndexAndLengthInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator); + final ViewVarBinaryVector newViewVarBinaryVector = new ViewVarBinaryVector("newvector", allocator)) { + testZeroStartIndexAndLengthInViews(viewVarBinaryVector, newViewVarBinaryVector); } } @@ -616,21 +709,32 @@ public void testZeroLength() { } } - @Test - public void testZeroLengthInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); - final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + private void testZeroLengthInViews(BaseVariableWidthViewVector vector, BaseVariableWidthViewVector newVector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); - viewVarCharVector.allocateNew(16000, 1000); - final int valueCount = 500; - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final TransferPair tp = vector.makeTransferPair(newVector); - final TransferPair tp = viewVarCharVector.makeTransferPair(newViewVarCharVector); + tp.splitAndTransfer(500, 0); + assertEquals(0, newVector.getValueCount()); - tp.splitAndTransfer(500, 0); - assertEquals(0, newViewVarCharVector.getValueCount()); + newVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testZeroLengthInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + testZeroLengthInViews(viewVarCharVector, newViewVarCharVector); + } + } + + @Test + public void testZeroLengthInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator); + final ViewVarBinaryVector newViewVarBinaryVector = new ViewVarBinaryVector("newvector", allocator)) { + testZeroLengthInViews(viewVarBinaryVector, newViewVarBinaryVector); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java index 817941ecb46d6..ebf9b58da7b40 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java @@ -37,7 +37,9 @@ import java.util.List; import java.util.Objects; import java.util.Random; +import java.util.function.BiConsumer; import java.util.function.Function; +import java.util.stream.IntStream; import java.util.stream.Stream; import org.apache.arrow.memory.ArrowBuf; @@ -86,9 +88,12 @@ public class TestVarCharViewVector { private BufferAllocator allocator; + private Random random; + @BeforeEach public void prepare() { allocator = new RootAllocator(Integer.MAX_VALUE); + random = new Random(); } @AfterEach @@ -1717,6 +1722,44 @@ public void testCopyFromSafeWithNulls(Function validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { try (final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, STR4); - sourceVector.set(1, STR5); - sourceVector.set(2, STR6); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(0, 2, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - - // The validity buffer is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. - // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. - assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + testSplitAndTransferOnSlicedBufferHelper(targetVector, sourceVector, + startIndex, length, data); } - assertArrayEquals(STR4, targetVector.get(0)); - assertArrayEquals(STR5, targetVector.get(1)); + validateVector.accept(targetVector, data); + } + + final byte [][] binaryData = generateBinaryDataArray(3, 10); + + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator)) { + try (final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnSlicedBufferHelper(targetVector, sourceVector, + startIndex, length, binaryData); + } + validateVector.accept(targetVector, binaryData); } } @@ -1760,36 +1803,58 @@ public void testSplitAndTransfer1() { * With a long string included. */ @Test - public void testSplitAndTransfer2() { + public void testSplitAndTransferWithLongStringsOnSlicedBuffer() { + final byte [][] data = new byte[][]{STR2, STR5, STR6}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { try (final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, STR2); - sourceVector.set(1, STR5); - sourceVector.set(2, STR6); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(0, 2, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - - // The validity buffer is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. - // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. - assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR2, sourceVector.get(0)); - assertArrayEquals(STR5, sourceVector.get(1)); - assertArrayEquals(STR6, sourceVector.get(2)); + testSplitAndTransferOnSlicedBufferHelper(targetVector, sourceVector, + startIndex, length, data); } - assertArrayEquals(STR2, targetVector.get(0)); - assertArrayEquals(STR5, targetVector.get(1)); + validateVector.accept(targetVector, data); + } + + final byte [][] binaryData = generateBinaryDataArray(3, 18); + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator)) { + try (final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnSlicedBufferHelper(targetVector, sourceVector, + startIndex, length, binaryData); + } + validateVector.accept(targetVector, binaryData); + } + } + + private void testSplitAndTransferOnSlicedVectorHelper(BaseVariableWidthViewVector sourceVector, + BaseVariableWidthViewVector targetVector, int startIndex, int length, byte[][] data) { + sourceVector.allocateNew(1024 * 10, 1024); + + for (int i = 0; i < data.length; i++) { + sourceVector.set(i, data[i]); + } + sourceVector.setValueCount(data.length); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); + + sourceVector.splitAndTransferTo(startIndex, length, targetVector); + // we allocate view and data buffers for the target vector + assertTrue(allocatedMem < allocator.getAllocatedMemory()); + // The validity buffer is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. + // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. + assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + + for (int i = startIndex; i < length ; i++) { + assertArrayEquals(data[i], targetVector.get(i - startIndex)); } } @@ -1800,35 +1865,31 @@ public void testSplitAndTransfer2() { * With short strings. */ @Test - public void testSplitAndTransfer3() { + public void testSplitAndTransferWithShortStringsOnSlicedVector() { + byte [][] data = new byte[][]{STR4, STR5, STR6}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (sourceVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], sourceVector.get(i))); + }; + try (final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, STR4); - sourceVector.set(1, STR5); - sourceVector.set(2, STR6); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(0, 2, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - // The validity buffer is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. - // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. - assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR4, targetVector.get(0)); - assertArrayEquals(STR5, targetVector.get(1)); + testSplitAndTransferOnSlicedVectorHelper(sourceVector, targetVector, + startIndex, length, data); + } + validateVector.accept(sourceVector, data); + } + + byte [][] binaryData = generateBinaryDataArray(3, 10); + try (final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator)) { + testSplitAndTransferOnSlicedVectorHelper(sourceVector, targetVector, + startIndex, length, binaryData); } - assertArrayEquals(STR4, sourceVector.get(0)); - assertArrayEquals(STR5, sourceVector.get(1)); - assertArrayEquals(STR6, sourceVector.get(2)); + validateVector.accept(sourceVector, binaryData); } } @@ -1839,35 +1900,77 @@ public void testSplitAndTransfer3() { * With a long string included. */ @Test - public void testSplitAndTransfer4() { + public void testSplitAndTransferWithLongStringsOnSlicedVector() { + final byte [][] data = new byte[][]{STR2, STR5, STR6}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (sourceVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], sourceVector.get(i))); + }; + try (final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, STR2); - sourceVector.set(1, STR5); - sourceVector.set(2, STR6); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(0, 2, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - // The validity buffer is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. - // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. - assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR2, targetVector.get(0)); - assertArrayEquals(STR5, targetVector.get(1)); + testSplitAndTransferOnSlicedVectorHelper(sourceVector, targetVector, + startIndex, length, data); + } + validateVector.accept(sourceVector, data); + } + + final byte [][] binaryData = generateBinaryDataArray(3, 20); + try (final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator)) { + testSplitAndTransferOnSlicedVectorHelper(sourceVector, targetVector, + startIndex, length, binaryData); + } + validateVector.accept(sourceVector, binaryData); + } + } + + private void testSplitAndTransferOnValiditySplitHelper( + BaseVariableWidthViewVector targetVector, BaseVariableWidthViewVector sourceVector, + int startIndex, int length, byte[][] data) { + sourceVector.allocateNew(1024 * 10, 1024); + + sourceVector.set(0, new byte[0]); + sourceVector.setNull(1); + for (int i = 0; i < data.length; i++) { + if (data[i] == null) { + sourceVector.setNull(i); + } else { + sourceVector.set(i, data[i]); + } + } + sourceVector.setValueCount(data.length); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); + + sourceVector.splitAndTransferTo(startIndex, length, targetVector); + // the allocation only consists in the size needed for the validity buffer + final long validitySize = + DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY.getRoundedSize( + BaseValueVector.getValidityBufferSizeFromCount(2)); + // we allocate view and data buffers for the target vector + assertTrue(allocatedMem + validitySize < allocator.getAllocatedMemory()); + // The validity is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. + // Since values up to the startIndex are empty/null validity refcnt should not change. + assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + + for (int i = startIndex; i < startIndex + length; i++) { + assertArrayEquals(data[i], targetVector.get(i - startIndex)); + } + + for (int i = 0; i < data.length; i++) { + if (data[i] == null) { + assertTrue(sourceVector.isNull(i)); + } else { + assertArrayEquals(data[i], sourceVector.get(i)); } - assertArrayEquals(STR2, sourceVector.get(0)); - assertArrayEquals(STR5, sourceVector.get(1)); - assertArrayEquals(STR6, sourceVector.get(2)); } } @@ -1878,43 +1981,24 @@ public void testSplitAndTransfer4() { * With short strings. */ @Test - public void testSplitAndTransfer5() { + public void testSplitAndTransferWithShortStringsOnValiditySplit() { + final byte [][] data = new byte[][]{new byte[0], null, STR4, STR5, STR6}; + final int startIndex = 2; + final int length = 2; + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator); final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, new byte[0]); - sourceVector.setNull(1); - sourceVector.set(2, STR4); - sourceVector.set(3, STR5); - sourceVector.set(4, STR6); - sourceVector.setValueCount(5); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(2, 2, targetVector); - // the allocation only consists in the size needed for the validity buffer - final long validitySize = - DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY.getRoundedSize( - BaseValueVector.getValidityBufferSizeFromCount(2)); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem + validitySize < allocator.getAllocatedMemory()); - // The validity is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. - // Since values up to the startIndex are empty/null validity refcnt should not change. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR4, targetVector.get(0)); - assertArrayEquals(STR5, targetVector.get(1)); - - assertArrayEquals(new byte[0], sourceVector.get(0)); - assertTrue(sourceVector.isNull(1)); - assertArrayEquals(STR4, sourceVector.get(2)); - assertArrayEquals(STR5, sourceVector.get(3)); - assertArrayEquals(STR6, sourceVector.get(4)); + testSplitAndTransferOnValiditySplitHelper(targetVector, sourceVector, + startIndex, length, data); + } + + final byte [][] binaryData = generateBinaryDataArray(5, 10); + binaryData[0] = new byte[0]; + binaryData[1] = null; + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator); + final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnValiditySplitHelper(targetVector, sourceVector, + startIndex, length, binaryData); } } @@ -1925,44 +2009,59 @@ public void testSplitAndTransfer5() { * With long strings. */ @Test - public void testSplitAndTransfer6() { + public void testSplitAndTransferWithLongStringsOnValiditySplit() { + final byte [][] data = new byte[][]{new byte[0], null, STR1, STR2, STR3}; + final int startIndex = 2; + final int length = 2; + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator); final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, new byte[0]); - sourceVector.setNull(1); - sourceVector.set(2, STR1); - sourceVector.set(3, STR2); - sourceVector.set(4, STR3); - sourceVector.setValueCount(5); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(2, 2, targetVector); - // the allocation consists in the size needed for the validity buffer and the long string - // allocation - final long validitySize = - DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY.getRoundedSize( - BaseValueVector.getValidityBufferSizeFromCount(2)); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem + validitySize < allocator.getAllocatedMemory()); - // The validity is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. - // Since values up to the startIndex are empty/null validity refcnt should not change. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR1, targetVector.get(0)); - assertArrayEquals(STR2, targetVector.get(1)); - - assertArrayEquals(new byte[0], sourceVector.get(0)); - assertTrue(sourceVector.isNull(1)); - assertArrayEquals(STR1, sourceVector.get(2)); - assertArrayEquals(STR2, sourceVector.get(3)); - assertArrayEquals(STR3, sourceVector.get(4)); + testSplitAndTransferOnValiditySplitHelper(targetVector, sourceVector, + startIndex, length, data); + } + + final byte [][] binaryData = generateBinaryDataArray(5, 18); + binaryData[0] = new byte[0]; + binaryData[1] = null; + + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator); + final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnValiditySplitHelper(targetVector, sourceVector, + startIndex, length, data); + } + } + + private void testSplitAndTransferOnAllocatorToAllocator(BaseVariableWidthViewVector targetVector, + BaseVariableWidthViewVector sourceVector, int startIndex, int length, byte[][] data) { + sourceVector.allocateNew(50, data.length); + + for (int i = 0; i < data.length; i++) { + sourceVector.set(i, data[i]); + } + sourceVector.setValueCount(data.length); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); + + sourceVector.splitAndTransferTo(startIndex, length, targetVector); + + if (sourceVector.getDataBuffers().isEmpty()) { + // no extra allocation as strings are all inline + assertEquals(allocatedMem, allocator.getAllocatedMemory()); + } else { + // extra allocation as some strings are not inline + assertTrue(allocatedMem < allocator.getAllocatedMemory()); + } + + // the refcnts of each buffer for this test should be the same as what + // the source allocator ended up with. + assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + + for (int i = 0; i < data.length; i++) { + assertArrayEquals(data[i], sourceVector.get(i)); } } @@ -1973,39 +2072,36 @@ public void testSplitAndTransfer6() { * With short strings. */ @Test - public void testSplitAndTransfer7() { + public void testSplitAndTransferWithShortStringsOnAllocatorToAllocator() { final int maxAllocation = 512; + final byte [][] data = new byte[][]{STR4, STR5, STR6}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", 256, maxAllocation); final ViewVarCharVector targetVector = newViewVarCharVector("split-target", targetAllocator)) { try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", 256, maxAllocation); final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { - sourceVector.allocateNew(50, 3); - - sourceVector.set(0, STR4); - sourceVector.set(1, STR5); - sourceVector.set(2, STR6); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(0, 2, targetVector); - // no extra allocation as strings are all inline - assertEquals(allocatedMem, allocator.getAllocatedMemory()); - - // the refcnts of each buffer for this test should be the same as what - // the source allocator ended up with. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR4, sourceVector.get(0)); - assertArrayEquals(STR5, sourceVector.get(1)); - assertArrayEquals(STR6, sourceVector.get(2)); + testSplitAndTransferOnAllocatorToAllocator(targetVector, sourceVector, + startIndex, length, data); } - assertArrayEquals(STR4, targetVector.get(0)); - assertArrayEquals(STR5, targetVector.get(1)); + validateVector.accept(targetVector, data); + } + + final byte [][] binaryData = generateBinaryDataArray(3, 10); + try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", 256, maxAllocation); + final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", targetAllocator)) { + try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", 256, maxAllocation); + final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { + testSplitAndTransferOnAllocatorToAllocator(targetVector, sourceVector, + startIndex, length, binaryData); + } + validateVector.accept(targetVector, binaryData); } } @@ -2016,12 +2112,21 @@ public void testSplitAndTransfer7() { * With long strings. */ @Test - public void testSplitAndTransfer8() { + public void testSplitAndTransferWithLongStringsOnAllocatorToAllocator() { final int initialReservation = 1024; // Here we have the target vector being transferred with a long string // hence, the data buffer will be allocated. // The default data buffer allocation takes // BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * BaseVariableWidthViewVector.ELEMENT_SIZE + final byte [][] data = new byte[][]{STR1, STR2, STR3}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + final int maxAllocation = initialReservation + BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * BaseVariableWidthViewVector.ELEMENT_SIZE; try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", @@ -2030,136 +2135,169 @@ public void testSplitAndTransfer8() { try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", initialReservation, maxAllocation); final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { - sourceVector.allocateNew(48, 3); - - sourceVector.set(0, STR1); - sourceVector.set(1, STR2); - sourceVector.set(2, STR3); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(0, 2, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - - // the refcnts of each buffer for this test should be the same as what - // the source allocator ended up with. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR1, sourceVector.get(0)); - assertArrayEquals(STR2, sourceVector.get(1)); - assertArrayEquals(STR3, sourceVector.get(2)); + testSplitAndTransferOnAllocatorToAllocator(targetVector, sourceVector, + startIndex, length, data); } - assertArrayEquals(STR1, targetVector.get(0)); - assertArrayEquals(STR2, targetVector.get(1)); + validateVector.accept(targetVector, data); } - } - @Test - public void testReallocAfterVectorTransfer1() { - try (final ViewVarCharVector vector = new ViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - /* 4096 values with 16 bytes per record */ - final int bytesPerRecord = 32; - vector.allocateNew(4096 * bytesPerRecord, 4096); - int valueCapacity = vector.getValueCapacity(); - assertTrue(valueCapacity >= 4096); - - /* populate the vector */ - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - vector.set(i, STR1); - } else { - vector.set(i, STR2); - } + final byte [][] binaryData = generateBinaryDataArray(3, 18); + + try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", + initialReservation, maxAllocation); + final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", targetAllocator)) { + try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", + initialReservation, maxAllocation); + final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { + testSplitAndTransferOnAllocatorToAllocator(targetVector, sourceVector, + startIndex, length, binaryData); } + validateVector.accept(targetVector, binaryData); + } + } - /* Check the vector output */ - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, vector.get(i)); - } else { - assertArrayEquals(STR2, vector.get(i)); - } + private void testReallocAfterVectorTransferHelper(BaseVariableWidthViewVector vector, + byte[] str1, byte[] str2) { + /* 4096 values with 16 bytes per record */ + final int bytesPerRecord = 32; + vector.allocateNew(4096 * bytesPerRecord, 4096); + int valueCapacity = vector.getValueCapacity(); + assertTrue(valueCapacity >= 4096); + + /* populate the vector */ + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + vector.set(i, str1); + } else { + vector.set(i, str2); } + } - /* trigger first realloc */ - vector.setSafe(valueCapacity, STR2, 0, STR2.length); - assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); - while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { - vector.reallocViewBuffer(); - vector.reallocViewDataBuffer(); + /* Check the vector output */ + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(str1, vector.get(i)); + } else { + assertArrayEquals(str2, vector.get(i)); } + } - /* populate the remaining vector */ - for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { - if ((i & 1) == 1) { - vector.set(i, STR1); - } else { - vector.set(i, STR2); - } + /* trigger first realloc */ + vector.setSafe(valueCapacity, str2, 0, str2.length); + assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); + while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { + vector.reallocViewBuffer(); + vector.reallocViewDataBuffer(); + } + + /* populate the remaining vector */ + for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { + if ((i & 1) == 1) { + vector.set(i, str1); + } else { + vector.set(i, str2); } + } - /* Check the vector output */ - valueCapacity = vector.getValueCapacity(); - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, vector.get(i)); - } else { - assertArrayEquals(STR2, vector.get(i)); - } + /* Check the vector output */ + valueCapacity = vector.getValueCapacity(); + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(str1, vector.get(i)); + } else { + assertArrayEquals(str2, vector.get(i)); } + } + + /* trigger second realloc */ + vector.setSafe(valueCapacity + bytesPerRecord, str2, 0, str2.length); + assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); + while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { + vector.reallocViewBuffer(); + vector.reallocViewDataBuffer(); + } - /* trigger second realloc */ - vector.setSafe(valueCapacity + bytesPerRecord, STR2, 0, STR2.length); - assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); - while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { - vector.reallocViewBuffer(); - vector.reallocViewDataBuffer(); + /* populate the remaining vector */ + for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { + if ((i & 1) == 1) { + vector.set(i, str1); + } else { + vector.set(i, str2); } + } - /* populate the remaining vector */ - for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { - if ((i & 1) == 1) { - vector.set(i, STR1); - } else { - vector.set(i, STR2); - } + /* Check the vector output */ + valueCapacity = vector.getValueCapacity(); + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(str1, vector.get(i)); + } else { + assertArrayEquals(str2, vector.get(i)); } + } - /* Check the vector output */ - valueCapacity = vector.getValueCapacity(); - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, vector.get(i)); - } else { - assertArrayEquals(STR2, vector.get(i)); - } + /* We are potentially working with 4x the size of vector buffer + * that we initially started with. + * Now let's transfer the vector. + */ + + TransferPair transferPair = vector.getTransferPair(allocator); + transferPair.transfer(); + BaseVariableWidthViewVector toVector = (BaseVariableWidthViewVector) transferPair.getTo(); + valueCapacity = toVector.getValueCapacity(); + + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(str1, toVector.get(i)); + } else { + assertArrayEquals(str2, toVector.get(i)); } + } + toVector.close(); + } - /* We are potentially working with 4x the size of vector buffer - * that we initially started with. - * Now let's transfer the vector. - */ + @Test + public void testReallocAfterVectorTransfer() { + try (final ViewVarCharVector vector = new ViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + testReallocAfterVectorTransferHelper(vector, STR1, STR2); + } - TransferPair transferPair = vector.getTransferPair(allocator); - transferPair.transfer(); - ViewVarCharVector toVector = (ViewVarCharVector) transferPair.getTo(); - valueCapacity = toVector.getValueCapacity(); + try (final ViewVarBinaryVector vector = new ViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testReallocAfterVectorTransferHelper(vector, generateRandomBinaryData(12), + generateRandomBinaryData(13)); + } + } - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, toVector.get(i)); - } else { - assertArrayEquals(STR2, toVector.get(i)); - } - } + private void testSplitAndTransferWithMultipleDataBuffersHelper(BaseVariableWidthViewVector sourceVector, + BaseVariableWidthViewVector targetVector, int startIndex, int length, byte[][] data) { + sourceVector.allocateNew(48, 4); + + for (int i = 0; i < data.length; i++) { + sourceVector.set(i, data[i]); + } + sourceVector.setValueCount(data.length); + + // we should have multiple data buffers + assertTrue(sourceVector.getDataBuffers().size() > 1); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - toVector.close(); + // split and transfer with slice starting at the beginning: + // this should not allocate anything new + sourceVector.splitAndTransferTo(startIndex, length, targetVector); + // we allocate view and data buffers for the target vector + assertTrue(allocatedMem < allocator.getAllocatedMemory()); + + // the refcnts of each buffer for this test should be the same as what + // the source allocator ended up with. + assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + + for (int i = 0; i < data.length; i++) { + assertArrayEquals(data[i], sourceVector.get(i)); } } @@ -2171,45 +2309,31 @@ public void testReallocAfterVectorTransfer1() { * Check multi-data buffer source copying */ @Test - public void testSplitAndTransfer9() { + public void testSplitAndTransferWithMultipleDataBuffers() { + final String str4 = generateRandomString(35); + final byte[][] data = new byte[][]{STR1, STR2, STR3, str4.getBytes(StandardCharsets.UTF_8)}; + final int startIndex = 1; + final int length = 3; + + BiConsumer validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + try (final ViewVarCharVector targetVector = new ViewVarCharVector("target", allocator)) { - String str4 = generateRandomString(35); try (final ViewVarCharVector sourceVector = new ViewVarCharVector("source", allocator)) { - sourceVector.allocateNew(48, 4); - - sourceVector.set(0, STR1); - sourceVector.set(1, STR2); - sourceVector.set(2, STR3); - sourceVector.set(3, str4.getBytes(StandardCharsets.UTF_8)); - sourceVector.setValueCount(4); - - // we should have multiple data buffers - assertTrue(sourceVector.getDataBuffers().size() > 1); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - // split and transfer with slice starting at the beginning: - // this should not allocate anything new - sourceVector.splitAndTransferTo(1, 3, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - - // the refcnts of each buffer for this test should be the same as what - // the source allocator ended up with. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR1, sourceVector.get(0)); - assertArrayEquals(STR2, sourceVector.get(1)); - assertArrayEquals(STR3, sourceVector.get(2)); - assertArrayEquals(str4.getBytes(StandardCharsets.UTF_8), sourceVector.get(3)); + testSplitAndTransferWithMultipleDataBuffersHelper(sourceVector, targetVector, + startIndex, length, data); + } + validateVector.accept(targetVector, data); + } + + try (final ViewVarBinaryVector targetVector = new ViewVarBinaryVector("target", allocator)) { + try (final ViewVarBinaryVector sourceVector = new ViewVarBinaryVector("source", allocator)) { + testSplitAndTransferWithMultipleDataBuffersHelper(sourceVector, targetVector, + startIndex, length, data); } - assertArrayEquals(STR2, targetVector.get(0)); - assertArrayEquals(STR3, targetVector.get(1)); - assertArrayEquals(str4.getBytes(StandardCharsets.UTF_8), targetVector.get(2)); + validateVector.accept(targetVector, data); } } From 67c6df1f2addd39b77e0e66e999164acff3d2ae3 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 6 Jun 2024 17:40:49 +0900 Subject: [PATCH 04/59] GH-41964: [CI][C++] Clear cache for mamba on AppVeyor (#41977) ### Rationale for this change It seems that mamba may use invalid download URL when there are invalid caches. ### What changes are included in this PR? Clear caches explicitly. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #41964 Authored-by: Sutou Kouhei Signed-off-by: Antoine Pitrou --- ci/appveyor-cpp-setup.bat | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat index 5c4a11832d5ee..5a9dffa166fb7 100644 --- a/ci/appveyor-cpp-setup.bat +++ b/ci/appveyor-cpp-setup.bat @@ -66,6 +66,9 @@ set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt @rem Force conda to use conda-forge conda config --add channels conda-forge conda config --remove channels defaults +@rem Ensure using the latest information. If there are invalid caches, +@rem mamba may use invalid download URL. +mamba clean --all -y @rem Arrow conda environment mamba create -n arrow -y -c conda-forge ^ --file=ci\conda_env_python.txt ^ From 374b8f6ddec3b7614408ea874ffb29981c2a295d Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 6 Jun 2024 18:40:30 +0900 Subject: [PATCH 05/59] GH-41903: [CI][GLib] Use the latest Ruby to use OpenSSL 3 (#42001) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Old Ruby ships OpenSSL 1 but googld-cloud-cpp requires OpenSSL 3. We need to use Ruby that ships OpenSSL 3. ### What changes are included in this PR? Use the latest Ruby. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #41903 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- .github/workflows/ruby.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 35c4460d47bc6..eb00bc5f92a8d 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -197,9 +197,7 @@ jobs: mingw-n-bits: - 64 ruby-version: - # TODO: Use the latest Ruby again when we fix GH-39130. - # - ruby - - "3.1" + - ruby env: ARROW_BUILD_STATIC: OFF ARROW_BUILD_TESTS: OFF From cbb0e4cb06fe2cc93295a6b6a9d81dad047feb61 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 6 Jun 2024 11:18:46 -0400 Subject: [PATCH 06/59] MINOR: [JS] Bump @typescript-eslint/eslint-plugin from 7.11.0 to 7.12.0 in /js (#41950) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [@typescript-eslint/eslint-plugin](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin) from 7.11.0 to 7.12.0.
Release notes

Sourced from @​typescript-eslint/eslint-plugin's releases.

v7.12.0

7.12.0 (2024-06-03)

🚀 Features

  • eslint-plugin: [no-useless-template-literals] rename to no-useless-template-expression (deprecate no-useless-template-literals) (#8821)
  • eslint-plugin: [no-floating-promises] add option 'allowForKnownSafePromises' (#9186)
  • rule-tester: check for parsing errors in suggestion fixes (#9052)
  • rule-tester: port checkDuplicateTestCases from ESLint (#9026)

🩹 Fixes

  • no-useless-template-expression -> no-unnecessary-template-expression (#9174)
  • eslint-plugin: [no-unnecessary-type-assertion] combine template literal check with const variable check (#8820)
  • eslint-plugin: [dot-notation] fix false positive when accessing private/protected property with optional chaining (#8851)
  • eslint-plugin: [explicit-member-accessibility] refine report locations (#8869)
  • eslint-plugin: [no-unnecessary-type-assertion] declares are always defined, so always check declares (#8901)
  • eslint-plugin: [prefer-literal-enum-member] allow using member it self on allowBitwiseExpressions (#9114)
  • eslint-plugin: [return-await] clean up in-try-catch detection and make autofixes safe (#9031)
  • eslint-plugin: [member-ordering] also TSMethodSignature can be get/set (#9193)
  • types: correct typing ParserOptions (#9202)

❤️ Thank You

You can read about our versioning strategy and releases on our website.

Changelog

Sourced from @​typescript-eslint/eslint-plugin's changelog.

7.12.0 (2024-06-03)

🚀 Features

  • eslint-plugin: [no-useless-template-literals] rename to no-useless-template-expression (deprecate no-useless-template-literals)

  • rule-tester: check for parsing errors in suggestion fixes

  • rule-tester: port checkDuplicateTestCases from ESLint

  • eslint-plugin: [no-floating-promises] add option 'allowForKnownSafePromises'

🩹 Fixes

  • no-useless-template-expression -> no-unnecessary-template-expression

  • eslint-plugin: [no-unnecessary-type-assertion] combine template literal check with const variable check

  • eslint-plugin: [dot-notation] fix false positive when accessing private/protected property with optional chaining

  • eslint-plugin: [explicit-member-accessibility] refine report locations

  • eslint-plugin: [no-unnecessary-type-assertion] declares are always defined, so always check declares

  • eslint-plugin: [prefer-literal-enum-member] allow using member it self on allowBitwiseExpressions

  • eslint-plugin: [return-await] clean up in-try-catch detection and make autofixes safe

  • eslint-plugin: [member-ordering] also TSMethodSignature can be get/set

❤️ Thank You

  • Abraham Guo
  • Han Yeong-woo
  • Joshua Chen
  • Kim Sang Du
  • Kirk Waiblinger
  • YeonJuan

You can read about our versioning strategy and releases on our website.

Commits
  • 7e93b28 chore(release): publish 7.12.0
  • d0adcf1 docs: clarify what require-await does (#9200)
  • 04990d5 feat(eslint-plugin): [no-floating-promises] add option 'allowForKnownSafeProm...
  • ad85249 docs: mention related ESLint rules in no-unused-vars page (#9198)
  • e80a8d6 docs: improve description for no-dynamic-delete (#9195)
  • 9f92b30 docs: explicitly mention unbound-method limitation with thisArg (#9197)
  • 08a9448 docs: add example with PascalCase function components (#9196)
  • 5ca7f6e feat(rule-tester): port checkDuplicateTestCases from ESLint (#9026)
  • a9dd526 fix(eslint-plugin): [member-ordering] also TSMethodSignature can be get/set (...
  • 2619c3b fix(eslint-plugin): [return-await] clean up in-try-catch detection and make a...
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@typescript-eslint/eslint-plugin&package-manager=npm_and_yarn&previous-version=7.11.0&new-version=7.12.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- js/package.json | 2 +- js/yarn.lock | 77 ++++++++++++++----------------------------------- 2 files changed, 22 insertions(+), 57 deletions(-) diff --git a/js/package.json b/js/package.json index e7be3c80d82c9..ecb6d3a366f7e 100644 --- a/js/package.json +++ b/js/package.json @@ -72,7 +72,7 @@ "@types/glob": "8.1.0", "@types/jest": "29.5.12", "@types/multistream": "4.1.3", - "@typescript-eslint/eslint-plugin": "7.11.0", + "@typescript-eslint/eslint-plugin": "7.12.0", "@typescript-eslint/parser": "7.12.0", "async-done": "2.0.0", "benny": "3.7.1", diff --git a/js/yarn.lock b/js/yarn.lock index 3cf3284a9f306..5ab52beaf8f15 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1434,16 +1434,16 @@ dependencies: "@types/yargs-parser" "*" -"@typescript-eslint/eslint-plugin@7.11.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.11.0.tgz#f90f0914657ead08e1c75f66939c926edeab42dd" - integrity sha512-P+qEahbgeHW4JQ/87FuItjBj8O3MYv5gELDzr8QaQ7fsll1gSMTYb6j87MYyxwf3DtD7uGFB9ShwgmCJB5KmaQ== +"@typescript-eslint/eslint-plugin@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.12.0.tgz#f87a32e8972b8a60024f2f8f12205e7c8108bc41" + integrity sha512-7F91fcbuDf/d3S8o21+r3ZncGIke/+eWk0EpO21LXhDfLahriZF9CGj4fbAetEjlaBdjdSm9a6VeXbpbT6Z40Q== dependencies: "@eslint-community/regexpp" "^4.10.0" - "@typescript-eslint/scope-manager" "7.11.0" - "@typescript-eslint/type-utils" "7.11.0" - "@typescript-eslint/utils" "7.11.0" - "@typescript-eslint/visitor-keys" "7.11.0" + "@typescript-eslint/scope-manager" "7.12.0" + "@typescript-eslint/type-utils" "7.12.0" + "@typescript-eslint/utils" "7.12.0" + "@typescript-eslint/visitor-keys" "7.12.0" graphemer "^1.4.0" ignore "^5.3.1" natural-compare "^1.4.0" @@ -1460,14 +1460,6 @@ "@typescript-eslint/visitor-keys" "7.12.0" debug "^4.3.4" -"@typescript-eslint/scope-manager@7.11.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.11.0.tgz#cf5619b01de62a226a59add15a02bde457335d1d" - integrity sha512-27tGdVEiutD4POirLZX4YzT180vevUURJl4wJGmm6TrQoiYwuxTIY98PBp6L2oN+JQxzE0URvYlzJaBHIekXAw== - dependencies: - "@typescript-eslint/types" "7.11.0" - "@typescript-eslint/visitor-keys" "7.11.0" - "@typescript-eslint/scope-manager@7.12.0": version "7.12.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.12.0.tgz#259c014362de72dd34f995efe6bd8dda486adf58" @@ -1476,40 +1468,21 @@ "@typescript-eslint/types" "7.12.0" "@typescript-eslint/visitor-keys" "7.12.0" -"@typescript-eslint/type-utils@7.11.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.11.0.tgz#ac216697d649084fedf4a910347b9642bd0ff099" - integrity sha512-WmppUEgYy+y1NTseNMJ6mCFxt03/7jTOy08bcg7bxJJdsM4nuhnchyBbE8vryveaJUf62noH7LodPSo5Z0WUCg== +"@typescript-eslint/type-utils@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.12.0.tgz#9dfaaa1972952f395ec5be4f5bbfc4d3cdc63908" + integrity sha512-lib96tyRtMhLxwauDWUp/uW3FMhLA6D0rJ8T7HmH7x23Gk1Gwwu8UZ94NMXBvOELn6flSPiBrCKlehkiXyaqwA== dependencies: - "@typescript-eslint/typescript-estree" "7.11.0" - "@typescript-eslint/utils" "7.11.0" + "@typescript-eslint/typescript-estree" "7.12.0" + "@typescript-eslint/utils" "7.12.0" debug "^4.3.4" ts-api-utils "^1.3.0" -"@typescript-eslint/types@7.11.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.11.0.tgz#5e9702a5e8b424b7fc690e338d359939257d6722" - integrity sha512-MPEsDRZTyCiXkD4vd3zywDCifi7tatc4K37KqTprCvaXptP7Xlpdw0NR2hRJTetG5TxbWDB79Ys4kLmHliEo/w== - "@typescript-eslint/types@7.12.0": version "7.12.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.12.0.tgz#bf208f971a8da1e7524a5d9ae2b5f15192a37981" integrity sha512-o+0Te6eWp2ppKY3mLCU+YA9pVJxhUJE15FV7kxuD9jgwIAa+w/ycGJBMrYDTpVGUM/tgpa9SeMOugSabWFq7bg== -"@typescript-eslint/typescript-estree@7.11.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.11.0.tgz#7cbc569bc7336c3a494ceaf8204fdee5d5dbb7fa" - integrity sha512-cxkhZ2C/iyi3/6U9EPc5y+a6csqHItndvN/CzbNXTNrsC3/ASoYQZEt9uMaEp+xFNjasqQyszp5TumAVKKvJeQ== - dependencies: - "@typescript-eslint/types" "7.11.0" - "@typescript-eslint/visitor-keys" "7.11.0" - debug "^4.3.4" - globby "^11.1.0" - is-glob "^4.0.3" - minimatch "^9.0.4" - semver "^7.6.0" - ts-api-utils "^1.3.0" - "@typescript-eslint/typescript-estree@7.12.0": version "7.12.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.12.0.tgz#e6c1074f248b3db6573ab6a7c47a39c4cd498ff9" @@ -1524,23 +1497,15 @@ semver "^7.6.0" ts-api-utils "^1.3.0" -"@typescript-eslint/utils@7.11.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.11.0.tgz#524f047f2209959424c3ef689b0d83b3bc09919c" - integrity sha512-xlAWwPleNRHwF37AhrZurOxA1wyXowW4PqVXZVUNCLjB48CqdPJoJWkrpH2nij9Q3Lb7rtWindtoXwxjxlKKCA== +"@typescript-eslint/utils@7.12.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.12.0.tgz#c6e58fd7f724cdccc848f71e388ad80cbdb95dd0" + integrity sha512-Y6hhwxwDx41HNpjuYswYp6gDbkiZ8Hin9Bf5aJQn1bpTs3afYY4GX+MPYxma8jtoIV2GRwTM/UJm/2uGCVv+DQ== dependencies: "@eslint-community/eslint-utils" "^4.4.0" - "@typescript-eslint/scope-manager" "7.11.0" - "@typescript-eslint/types" "7.11.0" - "@typescript-eslint/typescript-estree" "7.11.0" - -"@typescript-eslint/visitor-keys@7.11.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.11.0.tgz#2c50cd292e67645eec05ac0830757071b4a4d597" - integrity sha512-7syYk4MzjxTEk0g/w3iqtgxnFQspDJfn6QKD36xMuuhTzjcxY7F8EmBLnALjVyaOF1/bVocu3bS/2/F7rXrveQ== - dependencies: - "@typescript-eslint/types" "7.11.0" - eslint-visitor-keys "^3.4.3" + "@typescript-eslint/scope-manager" "7.12.0" + "@typescript-eslint/types" "7.12.0" + "@typescript-eslint/typescript-estree" "7.12.0" "@typescript-eslint/visitor-keys@7.12.0": version "7.12.0" From 93712bfc71a5013231b950b2b655d77b14f83fa7 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 6 Jun 2024 17:38:48 +0200 Subject: [PATCH 07/59] GH-41691: [Doc] Remove notion of "logical type" (#41958) In several places in the Arrow specification and documentation we use the term "logical types", but we don't use it consistently and we don't actually have physical types (only physical layouts) to contrast it with. This creates confusion for readers as it is not immediately clear whether all data types are "logical" and if there is a meaningful distinction behind our usage of this term. Also address GH-14752 by adding a table of data types with their respective parameters and the corresponding layouts. * GitHub Issue: #41691 Authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- docs/source/format/Columnar.rst | 160 ++++++++++++++++++++----- docs/source/format/Versioning.rst | 2 +- docs/source/python/data.rst | 14 +-- docs/source/python/extending_types.rst | 2 +- 4 files changed, 138 insertions(+), 40 deletions(-) diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst index 7c853de7829be..c4dc772808af4 100644 --- a/docs/source/format/Columnar.rst +++ b/docs/source/format/Columnar.rst @@ -70,21 +70,131 @@ concepts, here is a small glossary to help disambiguate. without taking into account any value semantics. For example, a 32-bit signed integer array and 32-bit floating point array have the same layout. -* **Parent** and **child arrays**: names to express relationships - between physical value arrays in a nested type structure. For - example, a ``List``-type parent array has a T-type array as its - child (see more on lists below). +* **Data type**: An application-facing semantic value type that is + implemented using some physical layout. For example, Decimal128 + values are stored as 16 bytes in a fixed-size binary + layout. A timestamp may be stored as 64-bit fixed-size layout. * **Primitive type**: a data type having no child types. This includes such types as fixed bit-width, variable-size binary, and null types. * **Nested type**: a data type whose full structure depends on one or more other child types. Two fully-specified nested types are equal if and only if their child types are equal. For example, ``List`` is distinct from ``List`` iff U and V are different types. -* **Logical type**: An application-facing semantic value type that is - implemented using some physical layout. For example, Decimal - values are stored as 16 bytes in a fixed-size binary - layout. Similarly, strings can be stored as ``List<1-byte>``. A - timestamp may be stored as 64-bit fixed-size layout. +* **Parent** and **child arrays**: names to express relationships + between physical value arrays in a nested type structure. For + example, a ``List``-type parent array has a T-type array as its + child (see more on lists below). +* **Parametric type**: a type which requires additional parameters + for full determination of its semantics. For example, all nested types + are parametric by construction. A timestamp is also parametric as it needs + a unit (such as microseconds) and a timezone. + +Data Types +========== + +The file `Schema.fbs`_ defines built-in data types supported by the +Arrow columnar format. Each data type uses a well-defined physical layout. + +`Schema.fbs`_ is the authoritative source for the description of the +standard Arrow data types. However, we also provide the below table for +convenience: + ++--------------------+------------------------------+------------------------------------------------------------+ +| Type | Type Parameters *(1)* | Physical Memory Layout | ++====================+==============================+============================================================+ +| Null | | Null | ++--------------------+------------------------------+------------------------------------------------------------+ +| Boolean | | Fixed-size Primitive | ++--------------------+------------------------------+------------------------------------------------------------+ +| Int | * bit width | *" (same as above)* | +| | * signedness | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Floating Point | * precision | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Decimal | * bit width | *"* | +| | * scale | | +| | * precision | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Date | * unit | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Time | * bit width *(2)* | *"* | +| | * unit | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Timestamp | * unit | *"* | +| | * timezone | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Interval | * unit | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Duration | * unit | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Fixed-Size Binary | * byte width | Fixed-size Binary | ++--------------------+------------------------------+------------------------------------------------------------+ +| Binary | | Variable-size Binary with 32-bit offsets | ++--------------------+------------------------------+------------------------------------------------------------+ +| Utf8 | | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Large Binary | | Variable-size Binary with 64-bit offsets | ++--------------------+------------------------------+------------------------------------------------------------+ +| Large Utf8 | | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Binary View | | Variable-size Binary View | ++--------------------+------------------------------+------------------------------------------------------------+ +| Utf8 View | | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Fixed-Size List | * *value type* | Fixed-size List | +| | * list size | | ++--------------------+------------------------------+------------------------------------------------------------+ +| List | * *value type* | Variable-size List with 32-bit offsets | ++--------------------+------------------------------+------------------------------------------------------------+ +| Large List | * *value type* | Variable-size List with 64-bit offsets | ++--------------------+------------------------------+------------------------------------------------------------+ +| List View | * *value type* | Variable-size List View with 32-bit offsets and sizes | ++--------------------+------------------------------+------------------------------------------------------------+ +| Large List View | * *value type* | Variable-size List View with 64-bit offsets and sizes | ++--------------------+------------------------------+------------------------------------------------------------+ +| Struct | * *children* | Struct | ++--------------------+------------------------------+------------------------------------------------------------+ +| Map | * *children* | Variable-size List of Structs | +| | * keys sortedness | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Union | * *children* | Dense or Sparse Union *(3)* | +| | * mode | | +| | * type ids | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Dictionary | * *index type* *(4)* | Dictionary Encoded | +| | * *value type* | | +| | * orderedness | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Run-End Encoded | * *run end type* *(5)* | Run-End Encoded | +| | * *value type* | | ++--------------------+------------------------------+------------------------------------------------------------+ + +* \(1) Type parameters listed in *italics* denote a data type's child types. + +* \(2) The *bit width* parameter of a Time type is technically redundant as + each *unit* mandates a single bit width. + +* \(3) Whether a Union type uses the Sparse or Dense layout is denoted by its + *mode* parameter. + +* \(4) The *index type* of a Dictionary type can only be an integer type, + preferably signed, with width 8 to 64 bits. + +* \(5) The *run end type* of a Run-End Encoded type can only be a signed integer type + with width 16 to 64 bits. + +.. note:: + Sometimes the term "logical type" is used to denote the Arrow data types + and distinguish them from their respective physical layouts. However, + unlike other type systems such as `Apache Parquet `__'s, + the Arrow type system doesn't have separate notions of physical types and + logical types. + + The Arrow type system separately provides + :ref:`extension types `, which allow + annotating standard Arrow data types with richer application-facing semantics + (for example defining a "JSON" type laid upon the standard String data type). + .. _format_layout: @@ -93,7 +203,7 @@ Physical Memory Layout Arrays are defined by a few pieces of metadata and data: -* A logical data type. +* A data type. * A sequence of buffers. * A length as a 64-bit signed integer. Implementations are permitted to be limited to 32-bit lengths, see more on this below. @@ -103,8 +213,8 @@ Arrays are defined by a few pieces of metadata and data: Nested arrays additionally have a sequence of one or more sets of these items, called the **child arrays**. -Each logical data type has a well-defined physical layout. Here are -the different physical layouts defined by Arrow: +Each data type has a well-defined physical layout. Here are the different +physical layouts defined by Arrow: * **Primitive (fixed-size)**: a sequence of values each having the same byte or bit width @@ -138,7 +248,7 @@ the different physical layouts defined by Arrow: * **Run-End Encoded (REE)**: a nested layout consisting of two child arrays, one representing values, and one representing the logical index where the run of a corresponding value ends. -* **Null**: a sequence of all null values, having null logical type +* **Null**: a sequence of all null values. The Arrow columnar memory layout only applies to *data* and not *metadata*. Implementations are free to represent metadata in-memory @@ -313,7 +423,7 @@ arrays have a single values buffer, variable-size binary have an **offsets** buffer and **data** buffer. The offsets buffer contains ``length + 1`` signed integers (either -32-bit or 64-bit, depending on the logical type), which encode the +32-bit or 64-bit, depending on the data type), which encode the start position of each slot in the data buffer. The length of the value in each slot is computed using the difference between the offset at that slot's index and the subsequent offset. For example, the @@ -1070,17 +1180,6 @@ of memory buffers for each layout. "Dictionary-encoded",validity,data (indices),, "Run-end encoded",,,, -Logical Types -============= - -The `Schema.fbs`_ defines built-in logical types supported by the -Arrow columnar format. Each logical type uses one of the above -physical layouts. Nested logical types may have different physical -layouts depending on the particular realization of the type. - -We do not go into detail about the logical types definitions in this -document as we consider `Schema.fbs`_ to be authoritative. - .. _format-ipc: Serialization and Interprocess Communication (IPC) @@ -1160,17 +1259,16 @@ Schema message -------------- The Flatbuffers files `Schema.fbs`_ contains the definitions for all -built-in logical data types and the ``Schema`` metadata type which -represents the schema of a given record batch. A schema consists of -an ordered sequence of fields, each having a name and type. A -serialized ``Schema`` does not contain any data buffers, only type -metadata. +built-in data types and the ``Schema`` metadata type which represents +the schema of a given record batch. A schema consists of an ordered +sequence of fields, each having a name and type. A serialized ``Schema`` +does not contain any data buffers, only type metadata. The ``Field`` Flatbuffers type contains the metadata for a single array. This includes: * The field's name -* The field's logical type +* The field's data type * Whether the field is semantically nullable. While this has no bearing on the array's physical layout, many systems distinguish nullable and non-nullable fields and we want to allow them to diff --git a/docs/source/format/Versioning.rst b/docs/source/format/Versioning.rst index 7ba01107074d0..8fcf11b21f0cc 100644 --- a/docs/source/format/Versioning.rst +++ b/docs/source/format/Versioning.rst @@ -51,7 +51,7 @@ data. An increase in the **minor** version of the format version, such as 1.0.0 to 1.1.0, indicates that 1.1.0 contains new features not available in 1.0.0. So long as these features are not used (such as a -new logical data type), forward compatibility is preserved. +new data type), forward compatibility is preserved. Long-Term Stability =================== diff --git a/docs/source/python/data.rst b/docs/source/python/data.rst index 598c8c125fb83..4a0f2af6d4868 100644 --- a/docs/source/python/data.rst +++ b/docs/source/python/data.rst @@ -26,8 +26,8 @@ with memory buffers, like the ones explained in the documentation on :ref:`Memory and IO `. These data structures are exposed in Python through a series of interrelated classes: -* **Type Metadata**: Instances of ``pyarrow.DataType``, which describe a logical - array type +* **Type Metadata**: Instances of ``pyarrow.DataType``, which describe the + type of an array and govern how its values are interpreted * **Schemas**: Instances of ``pyarrow.Schema``, which describe a named collection of types. These can be thought of as the column types in a table-like object. @@ -55,8 +55,8 @@ array data. These include: * **Nested types**: list, map, struct, and union * **Dictionary type**: An encoded categorical type (more on this later) -Each logical data type in Arrow has a corresponding factory function for -creating an instance of that type object in Python: +Each data type in Arrow has a corresponding factory function for creating +an instance of that type object in Python: .. ipython:: python @@ -72,9 +72,9 @@ creating an instance of that type object in Python: print(t4) print(t5) -We use the name **logical type** because the **physical** storage may be the -same for one or more types. For example, ``int64``, ``float64``, and -``timestamp[ms]`` all occupy 64 bits per value. +.. note:: + Different data types might use a given physical storage. For example, + ``int64``, ``float64``, and ``timestamp[ms]`` all occupy 64 bits per value. These objects are ``metadata``; they are used for describing the data in arrays, schemas, and record batches. In Python, they can be used in functions where the diff --git a/docs/source/python/extending_types.rst b/docs/source/python/extending_types.rst index 83fce84f47c08..d746505348157 100644 --- a/docs/source/python/extending_types.rst +++ b/docs/source/python/extending_types.rst @@ -118,7 +118,7 @@ Defining extension types ("user-defined types") Arrow has the notion of extension types in the metadata specification as a possibility to extend the built-in types. This is done by annotating any of the -built-in Arrow logical types (the "storage type") with a custom type name and +built-in Arrow data types (the "storage type") with a custom type name and optional serialized representation ("ARROW:extension:name" and "ARROW:extension:metadata" keys in the Field’s custom_metadata of an IPC message). From 164be4882176c5f84eb7cde52b98d69a72fe7ea8 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 6 Jun 2024 14:24:47 -0300 Subject: [PATCH 08/59] GH-41994 [C++]: kernel.cc: Remove defaults on switch so that compiler can check full enum coverage for us (#41995) ### Rationale for this change To let the compiler warn us about missing cases and make the non-handled cases more obvious. ### What changes are included in this PR? Removal of `default` in the switches and improving some dchecks with a message. ### Are these changes tested? By existing tests. * GitHub Issue: #41994 Authored-by: Felipe Oliveira Carvalho Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/compute/kernel.cc | 42 ++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc index 9cc5cc10917ee..5c87ef2cd0561 100644 --- a/cpp/src/arrow/compute/kernel.cc +++ b/cpp/src/arrow/compute/kernel.cc @@ -361,7 +361,8 @@ size_t InputType::Hash() const { case InputType::EXACT_TYPE: hash_combine(result, type_->Hash()); break; - default: + case InputType::ANY_TYPE: + case InputType::USE_TYPE_MATCHER: break; } return result; @@ -378,10 +379,8 @@ std::string InputType::ToString() const { break; case InputType::USE_TYPE_MATCHER: { ss << type_matcher_->ToString(); - } break; - default: - DCHECK(false); break; + } } return ss.str(); } @@ -400,9 +399,8 @@ bool InputType::Equals(const InputType& other) const { return type_->Equals(*other.type_); case InputType::USE_TYPE_MATCHER: return type_matcher_->Equals(*other.type_matcher_); - default: - return false; } + return false; } bool InputType::Matches(const DataType& type) const { @@ -411,21 +409,23 @@ bool InputType::Matches(const DataType& type) const { return type_->Equals(type); case InputType::USE_TYPE_MATCHER: return type_matcher_->Matches(type); - default: - // ANY_TYPE + case InputType::ANY_TYPE: return true; } + return false; } bool InputType::Matches(const Datum& value) const { switch (value.kind()) { + case Datum::NONE: + case Datum::RECORD_BATCH: + case Datum::TABLE: + DCHECK(false) << "Matches expects ARRAY, CHUNKED_ARRAY or SCALAR"; + return false; case Datum::ARRAY: case Datum::CHUNKED_ARRAY: case Datum::SCALAR: break; - default: - DCHECK(false); - return false; } return Matches(*value.type()); } @@ -445,11 +445,13 @@ const TypeMatcher& InputType::type_matcher() const { Result OutputType::Resolve(KernelContext* ctx, const std::vector& types) const { - if (kind_ == OutputType::FIXED) { - return type_.get(); - } else { - return resolver_(ctx, types); + switch (kind_) { + case OutputType::FIXED: + return type_; + case OutputType::COMPUTED: + break; } + return resolver_(ctx, types); } const std::shared_ptr& OutputType::type() const { @@ -463,11 +465,13 @@ const OutputType::Resolver& OutputType::resolver() const { } std::string OutputType::ToString() const { - if (kind_ == OutputType::FIXED) { - return type_->ToString(); - } else { - return "computed"; + switch (kind_) { + case OutputType::FIXED: + return type_->ToString(); + case OutputType::COMPUTED: + break; } + return "computed"; } // ---------------------------------------------------------------------- From 41ae29ebd98e66d1502d6a830f88d6da056c670e Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Fri, 7 Jun 2024 10:15:50 +0900 Subject: [PATCH 09/59] GH-42005: [Java][Integration][CI] Fix ARROW_BUILD_ROOT Path to find pom.xml (#42008) ### Rationale for this change This PR aims to fix the issue where the integration tests are failing due to the missing `/java/pom.xml` file. It appears that the current code incorrectly determines the path to `ARROW_BUILD_ROOT`, leading to the failure in locating the `pom.xml` file. ### What changes are included in this PR? - Updating the `ARROW_BUILD_ROOT` path determination logic in `tester_java.py` to correctly reference the project root. ### Are these changes tested? Maybe, Yes. ### Are there any user-facing changes? No. * GitHub Issue: #42005 Authored-by: Hyunseok Seo Signed-off-by: Sutou Kouhei --- dev/archery/archery/integration/tester_java.py | 2 +- dev/archery/archery/integration/tester_js.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/integration/tester_java.py b/dev/archery/archery/integration/tester_java.py index ccc807410a848..9b14c6939cde8 100644 --- a/dev/archery/archery/integration/tester_java.py +++ b/dev/archery/archery/integration/tester_java.py @@ -28,7 +28,7 @@ ARROW_BUILD_ROOT = os.environ.get( 'ARROW_BUILD_ROOT', - Path(__file__).resolve().parents[5] + Path(__file__).resolve().parents[4] ) diff --git a/dev/archery/archery/integration/tester_js.py b/dev/archery/archery/integration/tester_js.py index 3d1a229931cde..dcf56f9a5ab6b 100644 --- a/dev/archery/archery/integration/tester_js.py +++ b/dev/archery/archery/integration/tester_js.py @@ -24,7 +24,7 @@ ARROW_BUILD_ROOT = os.environ.get( 'ARROW_BUILD_ROOT', - Path(__file__).resolve().parents[5] + Path(__file__).resolve().parents[4] ) ARROW_JS_ROOT = os.path.join(ARROW_BUILD_ROOT, 'js') _EXE_PATH = os.path.join(ARROW_JS_ROOT, 'bin') From a708fabfe6f90a890978d8f026c70cdf18caf251 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 6 Jun 2024 19:25:36 -0700 Subject: [PATCH 10/59] MINOR: [C++] Include `` before using `std::string` (#42004) ### Rationale for this change I work on MSVC's STL, and we regularly build popular open-source projects, including yours, with development builds of the MSVC toolset. This allows us to find and fix toolset regressions before they affect users, and also allows us to provide advance notice of breaking changes, which is the case here. We recently merged https://github.com/microsoft/STL/pull/4633 which will ship in VS 2022 17.11 Preview 3. This improved build throughput by refactoring `` so that it no longer drags in `std::string`. It's also a source-breaking change for code that wasn't properly including ``. Your `cpp/src/arrow/json/object_writer.h` declares `std::string Serialize();` without including ``. When built with our updated STL, this will emit a compiler error: ``` C:\gitP\apache\arrow\cpp\src\arrow/json/object_writer.h(39): error C2039: 'string': is not a member of 'std' ``` ### What changes are included in this PR? The fix is simple and portable: include the necessary header. ### Are these changes tested? Nope, I'm totally YOLOing it. If it builds, it's good. (This will be tested in MSVC's internal "Real World Code" test infrastructure. Also, after VS 2022 17.11 ships, your existing build/test coverage will ensure that this keeps compiling.) ### Are there any user-facing changes? No. Authored-by: Stephan T. Lavavej Signed-off-by: Sutou Kouhei --- cpp/src/arrow/json/object_writer.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/json/object_writer.h b/cpp/src/arrow/json/object_writer.h index b15b09dbdacfc..cf1ce62194fb8 100644 --- a/cpp/src/arrow/json/object_writer.h +++ b/cpp/src/arrow/json/object_writer.h @@ -18,6 +18,7 @@ #pragma once #include +#include #include #include "arrow/util/visibility.h" From 290e606c4dd937cd34dbccd6f6801ff1ac1d8b9b Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 7 Jun 2024 11:34:22 +0900 Subject: [PATCH 11/59] GH-41652: [C++][CMake][Windows] Don't build needless object libraries (#41658) ### Rationale for this change * We don't need an object library for a shared library with `ARROW_BUILD_SHARED=OFF`. * We don't need an object library for a static library with `ARROW_BUILD_STATIC=OFF`. ### What changes are included in this PR? Don't build needless object libraries based on `ARROW_BUILD_SHARED`/`ARROW_BUILD_STATIC`. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #41652 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- cpp/src/arrow/CMakeLists.txt | 37 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 150a304975cad..5bcd4625b3b67 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -200,22 +200,29 @@ function(arrow_add_object_library PREFIX) set(SOURCES ${ARGN}) string(TOLOWER "${PREFIX}" prefix) if(WIN32) - add_library(${prefix}_shared OBJECT ${SOURCES}) - add_library(${prefix}_static OBJECT ${SOURCES}) - set_target_properties(${prefix}_shared PROPERTIES POSITION_INDEPENDENT_CODE ON) - set_target_properties(${prefix}_static PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_compile_definitions(${prefix}_shared PRIVATE ARROW_EXPORTING) - target_compile_definitions(${prefix}_static PRIVATE ARROW_STATIC) - target_compile_features(${prefix}_shared PRIVATE cxx_std_17) - target_compile_features(${prefix}_static PRIVATE cxx_std_17) - set(${PREFIX}_TARGET_SHARED - ${prefix}_shared - PARENT_SCOPE) - set(${PREFIX}_TARGET_STATIC - ${prefix}_static - PARENT_SCOPE) + set(targets) + if(ARROW_BUILD_SHARED) + add_library(${prefix}_shared OBJECT ${SOURCES}) + set_target_properties(${prefix}_shared PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(${prefix}_shared PRIVATE ARROW_EXPORTING) + target_compile_features(${prefix}_shared PRIVATE cxx_std_17) + set(${PREFIX}_TARGET_SHARED + ${prefix}_shared + PARENT_SCOPE) + list(APPEND targets ${prefix}_shared) + endif() + if(ARROW_BUILD_STATIC) + add_library(${prefix}_static OBJECT ${SOURCES}) + set_target_properties(${prefix}_static PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(${prefix}_static PRIVATE ARROW_STATIC) + target_compile_features(${prefix}_static PRIVATE cxx_std_17) + set(${PREFIX}_TARGET_STATIC + ${prefix}_static + PARENT_SCOPE) + list(APPEND targets ${prefix}_static) + endif() set(${PREFIX}_TARGETS - ${prefix}_shared ${prefix}_static + ${targets} PARENT_SCOPE) else() add_library(${prefix} OBJECT ${SOURCES}) From 01d2fa0d461869a07b2ffeee517beb8116bd0ce2 Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Thu, 6 Jun 2024 23:19:06 -0700 Subject: [PATCH 12/59] GH-41307: [Java] Use org.apache:apache parent pom version 31 (#41772) Use/update Maven modules to `org.apache:apache:31` and clean up Maven modules to remove unnecessary configuration or outdated workarounds * Add `org.apache:apache:31` to `org.apache.arrow:arrow-bom` and `org.apache.arrow.maven.plugins:arrow-maven-plugins` to make them conformant with ASF standards * Update `org.apache.arrow:arrow-java-root` parent to `org.apache:parent:31` * Use `version.*` and other properties to override plugin versions defined by `org.apache:parent` * Move standalone plugin versions under pluginManagement at the top level * Cleanup redundant plugin version or configuration declaration * Update `maven-dependency-plugin` to 3.6.1 and add the required overrides when necessary * Update `maven-shade-plugin` to 3.5.1 (via `org.apache:parent`) - disable reduced dependency pom creation for non-terminal modules * Remove enforcer check for java and maven version (handled by `org.apache:parent`) * Remove unnecessary `mvnrepository` link comments * Remove `m2e.version` property check in profiles (only needed for errorprone plugin configuration which is incompatible with M2E) * Cleanup `argLine` overrides for surefire/failsafe plugins * Remove unnecessary `../pom.xml` `` directives * Remove source/target/encoding configuration properties for `maven-compiler-plugin`, `maven-javadoc-plugin` and `maven-resources-plugin` as it is handled by `org.apache:parent` and plugin themselves * Remove unnecessary copy of codegen templates in `arrow-vector` module * Remove unnecessary junit jupiter engine dependencies for surefire/failsafe plugins. * GitHub Issue: #41307 Lead-authored-by: Laurent Goujon Co-authored-by: Laurent Goujon Signed-off-by: David Li --- ci/scripts/java_full_build.sh | 10 +- dev/tasks/tasks.yml | 2 - java/adapter/avro/pom.xml | 9 - java/adapter/jdbc/pom.xml | 7 - java/adapter/orc/pom.xml | 17 ++ java/bom/pom.xml | 44 +++- java/c/pom.xml | 1 - java/flight/flight-core/pom.xml | 29 +-- java/flight/flight-integration-tests/pom.xml | 2 - java/flight/flight-sql-jdbc-core/pom.xml | 10 - java/flight/flight-sql-jdbc-driver/pom.xml | 1 - java/flight/flight-sql/pom.xml | 5 - java/format/pom.xml | 2 - java/gandiva/pom.xml | 19 +- .../module-info-compiler-maven-plugin/pom.xml | 28 +-- java/maven/pom.xml | 120 +++++----- java/memory/memory-core/pom.xml | 22 +- java/performance/pom.xml | 41 +--- java/pom.xml | 207 +++++++----------- java/tools/pom.xml | 22 +- java/vector/pom.xml | 94 +------- 21 files changed, 226 insertions(+), 466 deletions(-) diff --git a/ci/scripts/java_full_build.sh b/ci/scripts/java_full_build.sh index 2734f3e9dbec2..d914aa2d8472e 100755 --- a/ci/scripts/java_full_build.sh +++ b/ci/scripts/java_full_build.sh @@ -49,21 +49,13 @@ fi # build the entire project mvn clean \ install \ - assembly:single \ - source:jar \ - javadoc:jar \ -Papache-release \ -Parrow-c-data \ -Parrow-jni \ -Darrow.cpp.build.dir=$dist_dir \ - -Darrow.c.jni.dist.dir=$dist_dir \ - -DdescriptorId=source-release + -Darrow.c.jni.dist.dir=$dist_dir # copy all jar, zip and pom files to the distribution folder -find . \ - "(" -name "*-javadoc.jar" -o -name "*-sources.jar" ")" \ - -exec echo {} ";" \ - -exec cp {} $dist_dir ";" find ~/.m2/repository/org/apache/arrow \ "(" \ -name "*.jar" -o \ diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index d8e09ec2070bb..2d84751d0f363 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -747,7 +747,6 @@ tasks: - arrow-jdbc-{no_rc_snapshot_version}.pom - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.json - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-maven-plugins-{no_rc_snapshot_version}-src.zip - arrow-maven-plugins-{no_rc_snapshot_version}.pom - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.json - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.xml @@ -851,7 +850,6 @@ tasks: - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.xml - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-javadoc.jar - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-sources.jar - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-src.zip - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.jar - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.pom diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index 0046fcac62a22..f9bf29596796f 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -25,36 +25,27 @@ http://maven.apache.org - - org.apache.arrow arrow-memory-core - - org.apache.arrow arrow-memory-netty runtime - - org.apache.arrow arrow-vector - org.immutables value-annotations - org.apache.avro avro ${dep.avro.version} - diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 17681538ac97e..2f2911dd9da95 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -26,20 +26,17 @@ - org.apache.arrow arrow-memory-core - org.apache.arrow arrow-memory-netty runtime - org.apache.arrow arrow-vector @@ -51,7 +48,6 @@ value-annotations - com.h2database h2 @@ -94,9 +90,6 @@ jdk11+ [11,] - - !m2e.version - diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index ca817510bf3e3..bc89c4698eecf 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -134,5 +134,22 @@ + + + org.apache.maven.plugins + maven-dependency-plugin + + + analyze + + + + org.apache.arrow:arrow-format + + + + + + diff --git a/java/bom/pom.xml b/java/bom/pom.xml index 12b9950ad80fc..77aed2d0f6a37 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 18 + 31 org.apache.arrow @@ -27,6 +27,19 @@ + + 1.8 + 1.8 + 3.12.0 + 3.2.5 + 0.16.1 + 3.7.1 + 3.12.1 + 3.6.1 + 3.2.4 + 3.2.2 + 3.6.3 + 3.5.0 @@ -138,11 +151,9 @@ ${project.version} - - @@ -156,12 +167,10 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 com.diffplug.spotless @@ -188,13 +197,34 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 + + + + apache-release + + + + org.apache.maven.plugins + maven-assembly-plugin + + + source-release-assembly + + + true + + + + + + + + diff --git a/java/c/pom.xml b/java/c/pom.xml index bfb233315a839..afb6e0cd8b890 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -83,5 +83,4 @@ - diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index b565572b383ab..f2070d4ff7cba 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-core @@ -151,13 +150,6 @@ org.apache.maven.plugins maven-shade-plugin - - 3.2.4 shade-main @@ -166,6 +158,7 @@ package + false true shaded @@ -192,6 +185,7 @@ package + false true shaded-ext @@ -244,7 +238,6 @@ org.apache.maven.plugins maven-dependency-plugin - 3.3.0 analyze @@ -264,7 +257,6 @@ org.codehaus.mojo build-helper-maven-plugin - 1.9.1 add-generated-sources-to-classpath @@ -282,7 +274,6 @@ maven-assembly-plugin - 3.7.1 jar-with-dependencies @@ -299,13 +290,6 @@ - - - kr.motd.maven - os-maven-plugin - 1.7.1 - - @@ -313,18 +297,14 @@ jdk11+ [11,] - - !m2e.version - org.apache.maven.plugins maven-surefire-plugin - - --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - false + + --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED ${project.basedir}/../../../testing/data @@ -334,5 +314,4 @@ - diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index 74016d81e91e5..cd2c28ba8959f 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-integration-tests @@ -63,7 +62,6 @@ maven-assembly-plugin - 3.7.1 jar-with-dependencies diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index 459412e0f8d8b..50d7b2617a5a9 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-sql-jdbc-core @@ -47,20 +46,17 @@
- org.apache.arrow arrow-memory-core - org.apache.arrow arrow-memory-netty runtime - org.apache.arrow arrow-vector @@ -136,11 +132,6 @@ - - - src/main/resources - - maven-surefire-plugin @@ -154,7 +145,6 @@ org.codehaus.mojo properties-maven-plugin - 1.2.1 write-project-properties-to-file diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index b3afbe1defdba..4456270e7b347 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-sql-jdbc-driver diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index e6d703c673ad5..14fde34c3b4f3 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-sql @@ -119,9 +118,6 @@ jdk11+ [11,] - - !m2e.version - @@ -136,5 +132,4 @@ - diff --git a/java/format/pom.xml b/java/format/pom.xml index e9eded79de660..4483047e20960 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -31,7 +31,6 @@ - @@ -42,6 +41,5 @@ - diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index a87f26028ba86..1c17023e5c8ad 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -22,13 +22,12 @@ jar Arrow Gandiva Java wrappers around the native Gandiva SQL expression compiler. + - 1.8 - 1.8 - 3.25.1 true ../../../cpp/release-build + org.apache.arrow @@ -51,7 +50,6 @@ com.google.protobuf protobuf-java - ${protobuf.version} com.google.guava @@ -62,6 +60,7 @@ slf4j-api + @@ -88,14 +87,6 @@ - - - - kr.motd.maven - os-maven-plugin - 1.7.1 - - @@ -105,7 +96,6 @@ org.apache.maven.plugins maven-source-plugin - 2.2.1 attach-sources @@ -118,7 +108,6 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.6.3 attach-javadocs @@ -131,7 +120,6 @@ org.apache.maven.plugins maven-gpg-plugin - 3.2.4 sign-artifacts @@ -146,5 +134,4 @@ - diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml index 57ba7933ea1c6..9f0cd7b1039dd 100644 --- a/java/maven/module-info-compiler-maven-plugin/pom.xml +++ b/java/maven/module-info-compiler-maven-plugin/pom.xml @@ -64,39 +64,14 @@ org.apache.maven.plugin-tools maven-plugin-annotations - 3.11.0 + ${maven.plugin.tools.version} provided - - - maven-clean-plugin - 3.3.2 - - - maven-plugin-plugin - 3.12.0 - - - maven-jar-plugin - 3.3.0 - - - maven-install-plugin - 3.1.2 - - - maven-deploy-plugin - 3.1.1 - - - maven-invoker-plugin - 3.1.0 - com.gradle develocity-maven-extension @@ -118,7 +93,6 @@ org.apache.maven.plugins maven-plugin-plugin - 3.12.0 true diff --git a/java/maven/pom.xml b/java/maven/pom.xml index 470e198caebc1..72140dd6570d0 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -15,6 +15,13 @@ Note: Do not inherit from the Arrow parent POM as plugins can be referenced during the parent POM, introducing circular dependencies. --> + + org.apache + apache + 31 + + + org.apache.arrow.maven.plugins arrow-maven-plugins 17.0.0-SNAPSHOT @@ -27,25 +34,38 @@ true + + 1.8 + 1.8 + 3.12.0 + 3.2.5 + 0.16.1 + 3.7.1 + 3.12.1 + 3.6.1 + 3.2.4 + 3.2.2 + 3.6.3 + 3.5.0 - org.apache.maven.plugins - maven-project-info-reports-plugin - 3.5.0 + com.diffplug.spotless + spotless-maven-plugin + 2.30.0 - org.apache.maven.plugins - maven-site-plugin - 3.12.1 + pl.project13.maven + git-commit-id-plugin + 4.0.5 - com.diffplug.spotless - spotless-maven-plugin - 2.30.0 + org.cyclonedx + cyclonedx-maven-plugin + 2.7.11 @@ -119,11 +139,6 @@ **/logback.xml - true - - true - true - org.apache.arrow ${username} @@ -143,43 +158,17 @@ - - org.apache.maven.plugins - maven-resources-plugin - - UTF-8 - - org.apache.maven.plugins maven-compiler-plugin - UTF-8 - 1.8 - 1.8 2048m - false true maven-enforcer-plugin - - validate_java_and_maven_version - - enforce - - verify - false - - - - [3.3.0,4) - - - - avoid_bad_dependencies @@ -205,8 +194,6 @@ pl.project13.maven git-commit-id-plugin - 4.0.5 - dd.MM.yyyy '@' HH:mm:ss z false @@ -248,7 +235,6 @@ org.apache.maven.plugins maven-checkstyle-plugin - 3.1.0 ../dev/checkstyle/checkstyle.xml ../dev/checkstyle/checkstyle.license @@ -288,7 +274,6 @@ org.cyclonedx cyclonedx-maven-plugin - 2.7.11 @@ -298,28 +283,6 @@ - - - org.apache.maven.plugins - maven-assembly-plugin - - - src - - - - - - single - - package - - - - org.apache.maven.plugins maven-project-info-reports-plugin @@ -353,13 +316,34 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 + + + + apache-release + + + + org.apache.maven.plugins + maven-assembly-plugin + + + source-release-assembly + + + true + + + + + + + + diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index 1e29ccf8ab9db..783a13a6fb0ad 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -61,9 +61,6 @@ jdk11+ [11,] - - !m2e.version - @@ -92,7 +89,6 @@ org.apache.maven.plugins maven-surefire-plugin - opens-tests @@ -101,12 +97,9 @@ test - - -Dfoo=bar - - - **/TestArrowBuf.java - + + + **/TestOpens.java @@ -129,9 +122,6 @@ org.apache.maven.plugins maven-compiler-plugin - 8 - 8 - UTF-8 -Xmaxerrs @@ -150,12 +140,6 @@ ${checker.framework.version} - - - org.immutables.value.internal.$processor$.$Processor - - org.checkerframework.checker.nullness.NullnessChecker - diff --git a/java/performance/pom.xml b/java/performance/pom.xml index f01e8d9a4e0e4..07ca8d1e61d48 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -22,9 +22,7 @@ JMH Performance benchmarks for other Arrow libraries. - UTF-8 1.37 - 1.8 benchmarks true .* @@ -83,42 +81,6 @@ - - - - maven-clean-plugin - 3.3.2 - - - maven-deploy-plugin - 3.1.1 - - - maven-install-plugin - 3.1.2 - - - maven-jar-plugin - 3.3.0 - - - maven-javadoc-plugin - 3.6.3 - - - maven-resources-plugin - 3.3.1 - - - maven-source-plugin - 2.2.1 - - - maven-surefire-plugin - 3.2.5 - - - org.apache.maven.plugins @@ -144,6 +106,7 @@ package ${uberjar.name} + false org.openjdk.jmh.Main @@ -166,7 +129,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 ${skip.perf.benchmarks} test @@ -203,5 +165,4 @@ - diff --git a/java/pom.xml b/java/pom.xml index 0e9b7f0e25a34..9624444cf422d 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 18 + 31 org.apache.arrow @@ -85,7 +85,7 @@ 33.2.1-jre 4.1.108.Final 1.63.0 - 3.23.1 + 3.25.1 2.17.0 3.4.0 23.5.26 @@ -95,10 +95,28 @@ true 9+181-r4173-1 2.28.0 - 3.12.1 5.11.0 5.2.0 3.43.0 + none + -Xdoclint:none + + 1.8 + 1.8 + 3.12.0 + 3.2.5 + 0.16.1 + 3.7.1 + 3.12.1 + 3.6.1 + 3.2.4 + + 3.2.2 + 3.6.3 + 3.5.0 @@ -269,40 +287,16 @@ 8.3.0 test - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - 3.1.2 - - - org.apache.rat - apache-rat-plugin - 0.16.1 - - - org.apache.maven.plugins - maven-resources-plugin - 3.3.1 - org.apache.maven.plugins maven-compiler-plugin - ${maven-compiler-plugin.version} + true **/module-info.java **/module-info.java false @@ -315,18 +309,8 @@ - - maven-enforcer-plugin - 3.4.1 - - - org.apache.maven.plugins - maven-shade-plugin - 3.5.1 - maven-surefire-plugin - 3.2.5 true true @@ -341,22 +325,9 @@ 1048576 - - - org.junit.jupiter - junit-jupiter-engine - ${dep.junit.jupiter.version} - - - org.apache.maven.surefire - surefire-junit-platform - 3.2.5 - - maven-failsafe-plugin - 3.2.5 ${project.build.directory} @@ -445,6 +416,22 @@ + + + org.apache.drill.tools + drill-fmpp-maven-plugin + [1.0,) + + generate + + + + + false + true + + + @@ -452,9 +439,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.6.3 - 8 **/module-info.java @@ -465,16 +450,6 @@ module-info-compiler-maven-plugin ${project.version} - - org.apache.maven.plugins - maven-project-info-reports-plugin - 3.5.0 - - - org.apache.maven.plugins - maven-site-plugin - 3.12.1 - com.gradle develocity-maven-extension @@ -522,6 +497,36 @@ spotless-maven-plugin 2.30.0 + + org.codehaus.mojo + build-helper-maven-plugin + 1.9.1 + + + org.codehaus.mojo + properties-maven-plugin + 1.2.1 + + + org.codehaus.mojo + exec-maven-plugin + 3.2.0 + + + pl.project13.maven + git-commit-id-plugin + 4.0.5 + + + org.cyclonedx + cyclonedx-maven-plugin + 2.7.11 + + + org.apache.drill.tools + drill-fmpp-maven-plugin + 1.21.1 + @@ -595,11 +600,6 @@ **/logback.xml - true - - true - true - org.apache.arrow ${username} @@ -619,42 +619,17 @@ - - org.apache.maven.plugins - maven-resources-plugin - - UTF-8 - - org.apache.maven.plugins maven-compiler-plugin - 1.8 - 1.8 2048m - false true maven-enforcer-plugin - - validate_java_and_maven_version - - enforce - - verify - false - - - - [3.3.0,4) - - - - avoid_bad_dependencies @@ -683,8 +658,6 @@ pl.project13.maven git-commit-id-plugin - 4.0.5 - dd.MM.yyyy '@' HH:mm:ss z false @@ -726,7 +699,6 @@ org.apache.maven.plugins maven-checkstyle-plugin - 3.1.0 **/module-info.java dev/checkstyle/checkstyle.xml @@ -789,7 +761,6 @@ org.cyclonedx cyclonedx-maven-plugin - 2.7.11 @@ -820,12 +791,10 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 com.diffplug.spotless @@ -860,7 +829,6 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.6.3 **/module-info.java @@ -888,28 +856,15 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 - - java-nodoclint - - [1.8,) - - - none - -Xdoclint:none - - - arrow-c-data @@ -960,7 +915,6 @@ org.apache.maven.plugins maven-compiler-plugin - true -XDcompilePolicy=simple -Xplugin:ErrorProne @@ -1000,9 +954,6 @@ org.apache.maven.plugins maven-compiler-plugin - 8 - 8 - UTF-8 -XDcompilePolicy=simple -Xplugin:ErrorProne -XepExcludedPaths:.*/(target/generated-source|format/src/main/java/org/apache/arrow/flatbuf)/.* @@ -1026,6 +977,16 @@ + + + + + jdk11+ + + [11,] + + + org.apache.maven.plugins maven-surefire-plugin @@ -1033,6 +994,13 @@ --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + + org.apache.maven.plugins + maven-failsafe-plugin + + --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + + @@ -1073,7 +1041,6 @@ org.jacoco jacoco-maven-plugin - 0.8.11 @@ -1119,7 +1086,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 cdata-cmake @@ -1176,7 +1142,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 jni-cpp-cmake @@ -1283,7 +1248,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 jni-cpp-cmake @@ -1373,5 +1337,4 @@ - diff --git a/java/tools/pom.xml b/java/tools/pom.xml index 5d9db75e525bd..53dcd51771054 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -54,6 +54,11 @@ 1.3.14 test + com.fasterxml.jackson.core jackson-core @@ -85,7 +90,6 @@ maven-assembly-plugin - 3.7.1 jar-with-dependencies @@ -101,7 +105,21 @@ + + org.apache.maven.plugins + maven-dependency-plugin + + + analyze + verify + + + com.fasterxml.jackson.core:* + + + + + - diff --git a/java/vector/pom.xml b/java/vector/pom.xml index c39504df2b207..6ff869ee21aff 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -76,64 +76,7 @@ - - - - true - - - false - - apache - apache - https://repo.maven.apache.org/maven2/ - - - - - - - codegen - - ${basedir}/src/main/codegen - - - - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - org.apache.drill.tools - drill-fmpp-maven-plugin - [1.0,) - - generate - - - - - false - true - - - - - - - - - - org.apache.maven.plugins @@ -163,33 +106,10 @@ - - maven-resources-plugin - - - - copy-fmpp-resources - - copy-resources - - initialize - - ${project.build.directory}/codegen - - - src/main/codegen - false - - - - - - org.apache.drill.tools drill-fmpp-maven-plugin - 1.21.1 generate-fmpp @@ -200,7 +120,7 @@ src/main/codegen/config.fmpp ${project.build.directory}/generated-sources/fmpp - ${project.build.directory}/codegen/templates + src/main/codegen/templates @@ -208,13 +128,6 @@ org.apache.maven.plugins maven-shade-plugin - - 3.2.4 @@ -228,10 +141,9 @@ com.google.flatbuffers:* + false true shade-format-flatbuffers - true - true com.google.flatbuffers @@ -243,7 +155,6 @@ - @@ -276,5 +187,4 @@ - From b51e997df7dcde843befffed2d63d6a8e741beef Mon Sep 17 00:00:00 2001 From: Haocheng Liu <30446009+HaochengLIU@users.noreply.github.com> Date: Fri, 7 Jun 2024 04:03:15 -0400 Subject: [PATCH 13/59] GH-41960: Expose new S3 option check_directory_existence_before_creation (#41972) ### Rationale for this change Expose new S3 option `check_directory_existence_before_creation` from GH-41493 ### What changes are included in this PR? Expose new S3 option `check_directory_existence_before_creation` from GH-41493 ### Are these changes tested? yes ### Are there any user-facing changes? Yes. Python function documentation is updated. * GitHub Issue: #41960 Lead-authored-by: Haocheng Liu Co-authored-by: Joris Van den Bossche Signed-off-by: Joris Van den Bossche --- python/pyarrow/_s3fs.pyx | 20 ++++++++++++++++---- python/pyarrow/includes/libarrow_fs.pxd | 1 + python/pyarrow/tests/test_fs.py | 5 +++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx index f5bab99a49f7a..ba6603322838d 100644 --- a/python/pyarrow/_s3fs.pyx +++ b/python/pyarrow/_s3fs.pyx @@ -185,7 +185,7 @@ cdef class S3FileSystem(FileSystem): session_token : str, default None AWS Session Token. An optional session token, required if access_key and secret_key are temporary credentials from STS. - anonymous : boolean, default False + anonymous : bool, default False Whether to connect anonymously if access_key and secret_key are None. If true, will not attempt to look up credentials using standard AWS configuration methods. @@ -217,7 +217,7 @@ cdef class S3FileSystem(FileSystem): S3 connection transport scheme. endpoint_override : str, default None Override region with a connect string such as "localhost:9000" - background_writes : boolean, default True + background_writes : bool, default True Whether file writes will be issued in the background, without blocking. default_metadata : mapping or pyarrow.KeyValueMetadata, default None @@ -237,11 +237,20 @@ cdef class S3FileSystem(FileSystem): 'port': 8020, 'username': 'username', 'password': 'password'}) allow_bucket_creation : bool, default False - Whether to allow CreateDir at the bucket-level. This option may also be + Whether to allow directory creation at the bucket-level. This option may also be passed in a URI query parameter. allow_bucket_deletion : bool, default False - Whether to allow DeleteDir at the bucket-level. This option may also be + Whether to allow directory deletion at the bucket-level. This option may also be passed in a URI query parameter. + check_directory_existence_before_creation : bool, default false + Whether to check the directory existence before creating it. + If false, when creating a directory the code will not check if it already + exists or not. It's an optimization to try directory creation and catch the error, + rather than issue two dependent I/O calls. + If true, when creating a directory the code will only create the directory when necessary + at the cost of extra I/O calls. This can be used for key/value cloud storage which has + a hard rate limit to number of object mutation operations or scenerios such as + the directories already exist and you do not have creation access. retry_strategy : S3RetryStrategy, default AwsStandardS3RetryStrategy(max_attempts=3) The retry strategy to use with S3; fail after max_attempts. Available strategies are AwsStandardS3RetryStrategy, AwsDefaultS3RetryStrategy. @@ -273,6 +282,7 @@ cdef class S3FileSystem(FileSystem): role_arn=None, session_name=None, external_id=None, load_frequency=900, proxy_options=None, allow_bucket_creation=False, allow_bucket_deletion=False, + check_directory_existence_before_creation=False, retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy( max_attempts=3), force_virtual_addressing=False): @@ -387,6 +397,7 @@ cdef class S3FileSystem(FileSystem): options.value().allow_bucket_creation = allow_bucket_creation options.value().allow_bucket_deletion = allow_bucket_deletion + options.value().check_directory_existence_before_creation = check_directory_existence_before_creation options.value().force_virtual_addressing = force_virtual_addressing if isinstance(retry_strategy, AwsStandardS3RetryStrategy): @@ -447,6 +458,7 @@ cdef class S3FileSystem(FileSystem): background_writes=opts.background_writes, allow_bucket_creation=opts.allow_bucket_creation, allow_bucket_deletion=opts.allow_bucket_deletion, + check_directory_existence_before_creation=opts.check_directory_existence_before_creation, default_metadata=pyarrow_wrap_metadata(opts.default_metadata), proxy_options={'scheme': frombytes(opts.proxy_options.scheme), 'host': frombytes(opts.proxy_options.host), diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd index f1f2985f65394..cc260b80c7779 100644 --- a/python/pyarrow/includes/libarrow_fs.pxd +++ b/python/pyarrow/includes/libarrow_fs.pxd @@ -157,6 +157,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: c_bool background_writes c_bool allow_bucket_creation c_bool allow_bucket_deletion + c_bool check_directory_existence_before_creation c_bool force_virtual_addressing shared_ptr[const CKeyValueMetadata] default_metadata c_string role_arn diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index 845f1eccecc72..58380f1652558 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -1226,6 +1226,11 @@ def test_s3_options(pickle_module): assert isinstance(fs, S3FileSystem) assert pickle_module.loads(pickle_module.dumps(fs)) == fs + fs = S3FileSystem(allow_bucket_creation=True, allow_bucket_deletion=True, + check_directory_existence_before_creation=True) + assert isinstance(fs, S3FileSystem) + assert pickle_module.loads(pickle_module.dumps(fs)) == fs + fs = S3FileSystem(request_timeout=0.5, connect_timeout=0.25) assert isinstance(fs, S3FileSystem) assert pickle_module.loads(pickle_module.dumps(fs)) == fs From 1dde3995238d4a771c9525e1e5189c1db4a8a95a Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 7 Jun 2024 17:12:53 +0900 Subject: [PATCH 14/59] GH-42017: [CI][Python][C++] Fix utf8proc detection for wheel on Windows (#42022) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change utf8proc in vcpkg provides CMake package. If we use it, we don't need to care about static library name (`utf8proc.lib` or `utf8proc_static.lib`). ### What changes are included in this PR? Use `unofficial-utf8proc` CMake package with vcpkg. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #42017 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- cpp/cmake_modules/Findutf8proc.cmake | 2 +- cpp/cmake_modules/ThirdpartyToolchain.cmake | 12 +++++++----- cpp/cmake_modules/Usevcpkg.cmake | 3 --- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/cpp/cmake_modules/Findutf8proc.cmake b/cpp/cmake_modules/Findutf8proc.cmake index e347414090549..9721f76f0631b 100644 --- a/cpp/cmake_modules/Findutf8proc.cmake +++ b/cpp/cmake_modules/Findutf8proc.cmake @@ -19,7 +19,7 @@ if(utf8proc_FOUND) return() endif() -if(ARROW_PACKAGE_KIND STREQUAL "vcpkg") +if(ARROW_PACKAGE_KIND STREQUAL "vcpkg" OR VCPKG_TOOLCHAIN) set(find_package_args "") if(utf8proc_FIND_VERSION) list(APPEND find_package_args ${utf8proc_FIND_VERSION}) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index f102c7bb81683..3c58ba649c4dd 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2819,11 +2819,13 @@ macro(build_utf8proc) endmacro() if(ARROW_WITH_UTF8PROC) - resolve_dependency(utf8proc - PC_PACKAGE_NAMES - libutf8proc - REQUIRED_VERSION - "2.2.0") + set(utf8proc_resolve_dependency_args utf8proc PC_PACKAGE_NAMES libutf8proc) + if(NOT VCPKG_TOOLCHAIN) + # utf8proc in vcpkg doesn't provide version information: + # https://github.com/microsoft/vcpkg/issues/39176 + list(APPEND utf8proc_resolve_dependency_args REQUIRED_VERSION "2.2.0") + endif() + resolve_dependency(${utf8proc_resolve_dependency_args}) endif() macro(build_cares) diff --git a/cpp/cmake_modules/Usevcpkg.cmake b/cpp/cmake_modules/Usevcpkg.cmake index 37a732f4b85a0..b6192468da342 100644 --- a/cpp/cmake_modules/Usevcpkg.cmake +++ b/cpp/cmake_modules/Usevcpkg.cmake @@ -237,9 +237,6 @@ set(LZ4_ROOT CACHE STRING "") if(CMAKE_HOST_WIN32) - set(utf8proc_MSVC_STATIC_LIB_SUFFIX - "" - CACHE STRING "") set(LZ4_MSVC_LIB_PREFIX "" CACHE STRING "") From a045770b94972bb4063bde13cb95f1c5b5c8bbe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 7 Jun 2024 10:16:50 +0200 Subject: [PATCH 15/59] GH-42006: [CI][Python] Use pip install -e instead of setup.py build_ext --inplace for installing pyarrow on verification script (#42007) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Due to https://github.com/apache/arrow/issues/37929 we require a higher version of setuptools and setuptools_scm to be installed otherwise the job fails with setuptools_scm failing with ` TypeError: Configuration.__init__() got an unexpected keyword argument 'version_file'` ### What changes are included in this PR? Remove the dependencies for the environment and let installation handle those using pip install -e instead of setup.py build_ext --inplace for installing pyarrow on verification script ### Are these changes tested? Via Archery ### Are there any user-facing changes? No * GitHub Issue: #42006 Lead-authored-by: Raúl Cumplido Co-authored-by: Joris Van den Bossche Signed-off-by: Raúl Cumplido --- dev/release/verify-release-candidate.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 3ed871bd5305b..fcaaa423a4c75 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -756,7 +756,7 @@ test_python() { show_header "Build and test Python libraries" # Build and test Python - maybe_setup_virtualenv "cython>=0.29.31" numpy "setuptools_scm<8.0.0" setuptools + maybe_setup_virtualenv maybe_setup_conda --file ci/conda_env_python.txt if [ "${USE_CONDA}" -gt 0 ]; then @@ -788,7 +788,7 @@ test_python() { pushd python # Build pyarrow - python setup.py build_ext --inplace + python -m pip install -e . # Check mandatory and optional imports python -c " From fe4d04f081e55ca2de7b1b67b10ad7dca96cfd9e Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Fri, 7 Jun 2024 22:26:34 +0900 Subject: [PATCH 16/59] GH-42002: [Java] Update Unit Tests for Vector Module (#42019) ### Rationale for this change Update package from JUnit 4(`org.junit`) to JUnit 5(`org.junit.jupiter`). ### What changes are included in this PR? - [x] Replacing `org.junit` with `org.junit.jupiter.api`. - [x] Updating `Assertions.assertXXX` to `assertXXX` using static imports. - [x] Updating annotations such as `@ Before`, `@ BeforeClass`, `@ After`, `@ AfterClass`. - `@ Before` -> `@ BeforeEach` - `@ BeforeClass` -> `@ BeforeAll` - `@ After` -> `@ AfterEach` - `@ AfterClass` -> `@ AfterAll` - `@ Test` -> `@ Test` with `org.junit.jupiter` - [x] Removing unused `@ Rule` Annotation - [x] Updating `Parameterized` test - [x] Doing self review ### Are these changes tested? Yes, existing tests have passed. ### Are there any user-facing changes? No. * GitHub Issue: #42002 Authored-by: Hyunseok Seo Signed-off-by: David Li --- .../arrow/vector/ITTestLargeVector.java | 8 +- .../apache/arrow/vector/TestBitVector.java | 73 +++--- .../arrow/vector/TestBitVectorHelper.java | 6 +- .../vector/TestBufferOwnershipTransfer.java | 8 +- .../org/apache/arrow/vector/TestCopyFrom.java | 85 +++--- .../arrow/vector/TestDecimal256Vector.java | 18 +- .../arrow/vector/TestDecimalVector.java | 20 +- .../arrow/vector/TestDenseUnionVector.java | 25 +- .../arrow/vector/TestDictionaryVector.java | 28 +- .../arrow/vector/TestDurationVector.java | 16 +- .../vector/TestFixedSizeBinaryVector.java | 16 +- .../arrow/vector/TestFixedSizeListVector.java | 120 ++++----- .../TestIntervalMonthDayNanoVector.java | 13 +- .../arrow/vector/TestIntervalYearVector.java | 14 +- .../arrow/vector/TestLargeListVector.java | 35 ++- .../vector/TestLargeVarBinaryVector.java | 18 +- .../arrow/vector/TestLargeVarCharVector.java | 92 +++---- .../apache/arrow/vector/TestListVector.java | 35 ++- .../arrow/vector/TestNullCheckingForGet.java | 10 +- .../vector/TestOutOfMemoryForValueVector.java | 52 ++-- ...TestOversizedAllocationForValueVector.java | 155 +++++------ .../arrow/vector/TestPeriodDuration.java | 6 +- .../apache/arrow/vector/TestStructVector.java | 23 +- .../apache/arrow/vector/TestUnionVector.java | 25 +- .../arrow/vector/TestVarCharListVector.java | 17 +- .../apache/arrow/vector/TestVectorAlloc.java | 14 +- .../arrow/vector/TestVectorReAlloc.java | 70 ++--- .../apache/arrow/vector/TestVectorReset.java | 14 +- .../arrow/vector/TestVectorSchemaRoot.java | 43 +-- .../arrow/vector/TestVectorUnloadLoad.java | 35 ++- .../vector/compare/TestTypeEqualsVisitor.java | 14 +- .../complex/TestDenseUnionBufferSize.java | 3 +- .../complex/impl/TestComplexCopier.java | 13 +- .../complex/impl/TestPromotableWriter.java | 42 +-- .../complex/writer/TestComplexWriter.java | 244 +++++++++--------- .../complex/writer/TestSimpleWriter.java | 38 +-- .../apache/arrow/vector/ipc/BaseFileTest.java | 209 +++++++-------- .../ipc/ITTestIPCWithLargeArrowBuffers.java | 10 +- .../vector/ipc/MessageSerializerTest.java | 14 +- .../arrow/vector/ipc/TestArrowFile.java | 6 +- .../arrow/vector/ipc/TestArrowFooter.java | 4 +- .../vector/ipc/TestArrowReaderWriter.java | 37 ++- .../arrow/vector/ipc/TestArrowStream.java | 15 +- .../arrow/vector/ipc/TestArrowStreamPipe.java | 20 +- .../apache/arrow/vector/ipc/TestJSONFile.java | 11 +- .../arrow/vector/ipc/TestRoundTrip.java | 158 +++++++----- .../ipc/TestUIntDictionaryRoundTrip.java | 88 +++---- .../message/TestMessageMetadataResult.java | 4 +- .../apache/arrow/vector/pojo/TestConvert.java | 6 +- .../testing/TestValueVectorPopulator.java | 12 +- .../testing/ValueVectorDataPopulator.java | 2 +- .../vector/types/pojo/TestExtensionType.java | 67 +++-- .../arrow/vector/types/pojo/TestField.java | 8 +- .../arrow/vector/types/pojo/TestSchema.java | 10 +- .../arrow/vector/util/DecimalUtilityTest.java | 19 +- .../vector/util/TestDataSizeRoundingUtil.java | 4 +- .../TestElementAddressableVectorIterator.java | 14 +- .../arrow/vector/util/TestMapWithOrdinal.java | 12 +- .../vector/util/TestMultiMapWithOrdinal.java | 43 +-- .../vector/util/TestReusableByteArray.java | 20 +- .../arrow/vector/util/TestSchemaUtil.java | 4 +- .../arrow/vector/util/TestValidator.java | 6 +- .../arrow/vector/util/TestVectorAppender.java | 14 +- .../vector/util/TestVectorBatchAppender.java | 12 +- .../util/TestVectorSchemaRootAppender.java | 12 +- .../vector/validate/TestValidateVector.java | 12 +- .../validate/TestValidateVectorFull.java | 12 +- .../TestValidateVectorSchemaRoot.java | 12 +- .../TestValidateVectorTypeVisitor.java | 10 +- 69 files changed, 1191 insertions(+), 1144 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java b/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java index 8596399e7e08c..b65e6fd36c158 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; @@ -28,7 +28,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.holders.NullableDecimalHolder; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java index 075a05c04b641..cebd70fcc5a71 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.stream.IntStream; @@ -29,22 +29,21 @@ import org.apache.arrow.memory.util.hash.MurmurHasher; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestBitVector { private static final String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -124,8 +123,8 @@ public void testSplitAndTransfer() throws Exception { for (int i = 0; i < length; i++) { int actual = toVector.get(i); int expected = sourceVector.get(start + i); - assertEquals("different data values not expected --> sourceVector index: " + (start + i) + - " toVector index: " + i, expected, actual); + assertEquals(expected, actual, + "different data values not expected --> sourceVector index: " + (start + i) + " toVector index: " + i); } } } @@ -167,8 +166,8 @@ public void testSplitAndTransfer1() throws Exception { for (int i = 0; i < length; i++) { int actual = toVector.get(i); int expected = sourceVector.get(start + i); - assertEquals("different data values not expected --> sourceVector index: " + (start + i) + - " toVector index: " + i, expected, actual); + assertEquals(expected, actual, + "different data values not expected --> sourceVector index: " + (start + i) + " toVector index: " + i); } } } @@ -218,8 +217,8 @@ public void testSplitAndTransfer2() throws Exception { for (int i = 0; i < length; i++) { int actual = toVector.get(i); int expected = sourceVector.get(start + i); - assertEquals("different data values not expected --> sourceVector index: " + (start + i) + - " toVector index: " + i, expected, actual); + assertEquals(expected, actual, + "different data values not expected --> sourceVector index: " + (start + i) + " toVector index: " + i); } } } @@ -241,9 +240,9 @@ public void testReallocAfterVectorTransfer1() { for (int i = 0; i < valueCapacity; i++) { if ((i & 1) == 1) { - assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i)); + assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -259,9 +258,9 @@ public void testReallocAfterVectorTransfer1() { for (int i = 0; i < valueCapacity * 2; i++) { if (((i & 1) == 1) || (i == valueCapacity)) { - assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i)); + assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -277,9 +276,9 @@ public void testReallocAfterVectorTransfer1() { for (int i = 0; i < valueCapacity * 4; i++) { if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) { - assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i)); + assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -297,12 +296,12 @@ public void testReallocAfterVectorTransfer1() { if (i <= valueCapacity * 4) { if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2) || (i == valueCapacity * 4)) { - assertEquals("unexpected cleared bit at index: " + i, 1, toVector.get(i)); + assertEquals(1, toVector.get(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); } } else { - assertTrue("unexpected set bit at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); } } @@ -325,9 +324,9 @@ public void testReallocAfterVectorTransfer2() { for (int i = 0; i < valueCapacity; i++) { if ((i & 1) == 1) { - assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i)); + assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -343,9 +342,9 @@ public void testReallocAfterVectorTransfer2() { for (int i = 0; i < valueCapacity * 2; i++) { if (((i & 1) == 1) || (i == valueCapacity)) { - assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i)); + assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -361,9 +360,9 @@ public void testReallocAfterVectorTransfer2() { for (int i = 0; i < valueCapacity * 4; i++) { if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) { - assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i)); + assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -381,12 +380,12 @@ public void testReallocAfterVectorTransfer2() { if (i <= valueCapacity * 4) { if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2) || (i == valueCapacity * 4)) { - assertFalse("unexpected cleared bit at index: " + i, toVector.isNull(i)); + assertFalse(toVector.isNull(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); } } else { - assertTrue("unexpected set bit at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); } } @@ -500,13 +499,13 @@ private void validateRange(int length, int start, int count) { bitVector.allocateNew(length); bitVector.setRangeToOne(start, count); for (int i = 0; i < start; i++) { - Assert.assertTrue(desc + i, bitVector.isNull(i)); + assertTrue(bitVector.isNull(i), desc + i); } for (int i = start; i < start + count; i++) { - Assert.assertEquals(desc + i, 1, bitVector.get(i)); + assertEquals(1, bitVector.get(i), desc + i); } for (int i = start + count; i < length; i++) { - Assert.assertTrue(desc + i, bitVector.isNull(i)); + assertTrue(bitVector.isNull(i), desc + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java index 1da4a4c4914b9..b1ef45c918b72 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java @@ -17,16 +17,16 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.memory.util.MemoryUtil; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestBitVectorHelper { @Test diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java index 056b6bdd2b787..b38e046659669 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; @@ -30,7 +30,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestBufferOwnershipTransfer { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java index 97de27bec8237..7d4d08636d740 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java @@ -18,9 +18,10 @@ package org.apache.arrow.vector; import static org.apache.arrow.vector.TestUtils.newVector; -import static org.junit.Assert.*; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; @@ -31,9 +32,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.types.Types.MinorType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /* * Tested field types: @@ -60,12 +61,12 @@ public class TestCopyFrom { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -99,10 +100,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector.getObject(i).toString()); + assertEquals(Integer.toString(i), vector.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -116,10 +114,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -133,10 +128,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } } @@ -171,10 +163,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector.getObject(i).toString()); + assertEquals(Integer.toString(i), vector.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -192,10 +181,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -209,10 +195,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } } @@ -247,7 +230,7 @@ public void testCopyFromWithNulls2() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 1000 + i, vector1.get(i)); + assertEquals(1000 + i, vector1.get(i), "unexpected value at index: " + i); } } @@ -274,7 +257,7 @@ public void testCopyFromWithNulls2() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 1000 + i, vector2.get(i)); + assertEquals(1000 + i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -309,7 +292,7 @@ public void testCopyFromWithNulls3() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector1.get(i)); + assertEquals(10000000000L + (long) i, vector1.get(i), "unexpected value at index: " + i); } } @@ -336,7 +319,7 @@ public void testCopyFromWithNulls3() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector2.get(i)); + assertEquals(10000000000L + (long) i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -450,7 +433,7 @@ public void testCopyFromWithNulls5() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 100.25f + (float) i, vector1.get(i), 0); + assertEquals(100.25f + (float) i, vector1.get(i), 0, "unexpected value at index: " + i); } } @@ -477,7 +460,7 @@ public void testCopyFromWithNulls5() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 100.25f + i * 1.0f, vector2.get(i), 0); + assertEquals(100.25f + i * 1.0f, vector2.get(i), 0, "unexpected value at index: " + i); } } } @@ -512,8 +495,7 @@ public void testCopyFromWithNulls6() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, 123456.7865 + (double) i, vector1.get(i), 0); + assertEquals(123456.7865 + (double) i, vector1.get(i), 0, "unexpected value at index: " + i); } } @@ -540,8 +522,7 @@ public void testCopyFromWithNulls6() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, 123456.7865 + (double) i, vector2.get(i), 0); + assertEquals(123456.7865 + (double) i, vector2.get(i), 0, "unexpected value at index: " + i); } } } @@ -715,7 +696,7 @@ public void testCopyFromWithNulls9() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (short) i, vector1.get(i)); + assertEquals(val + (short) i, vector1.get(i), "unexpected value at index: " + i); } } @@ -742,7 +723,7 @@ public void testCopyFromWithNulls9() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (short) i, vector2.get(i)); + assertEquals(val + (short) i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -778,7 +759,7 @@ public void testCopyFromWithNulls10() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i)); + assertEquals(val + (long) i, vector1.get(i), "unexpected value at index: " + i); } } @@ -805,7 +786,7 @@ public void testCopyFromWithNulls10() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i)); + assertEquals(val + (long) i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -841,7 +822,7 @@ public void testCopyFromWithNulls11() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + i, vector1.get(i)); + assertEquals(val + i, vector1.get(i), "unexpected value at index: " + i); } } @@ -868,7 +849,7 @@ public void testCopyFromWithNulls11() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + i, vector2.get(i)); + assertEquals(val + i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -906,7 +887,7 @@ public void testCopyFromWithNulls12() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val, vector1.get(i)); + assertEquals(val, vector1.get(i), "unexpected value at index: " + i); val++; } } @@ -934,7 +915,7 @@ public void testCopyFromWithNulls12() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val, vector2.get(i)); + assertEquals(val, vector2.get(i), "unexpected value at index: " + i); val++; } } @@ -1039,7 +1020,7 @@ public void testCopyFromWithNulls14() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i)); + assertEquals(val + (long) i, vector1.get(i), "unexpected value at index: " + i); } } @@ -1066,7 +1047,7 @@ public void testCopyFromWithNulls14() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i)); + assertEquals(val + (long) i, vector2.get(i), "unexpected value at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java index fc5dfc38587a4..6886abcc63cdf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.math.BigInteger; @@ -29,9 +29,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDecimal256Vector { @@ -49,12 +49,12 @@ public class TestDecimal256Vector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -80,7 +80,7 @@ public void testValuesWriteRead() { for (int i = 0; i < intValues.length; i++) { BigDecimal value = decimalVector.getObject(i); - assertEquals("unexpected data at index: " + i, values[i], value); + assertEquals(values[i], value, "unexpected data at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java index 572f13fea1ed1..c7a12fd6ac87c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.math.BigDecimal; import java.math.BigInteger; @@ -29,9 +29,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDecimalVector { @@ -49,12 +49,12 @@ public class TestDecimalVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -80,7 +80,7 @@ public void testValuesWriteRead() { for (int i = 0; i < intValues.length; i++) { BigDecimal value = decimalVector.getObject(i); - assertEquals("unexpected data at index: " + i, values[i], value); + assertEquals(values[i], value, "unexpected data at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java index 0621fd4527520..0b74f760d2941 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.HashMap; @@ -47,21 +47,21 @@ import org.apache.arrow.vector.util.JsonStringHashMap; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDenseUnionVector { private static final String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -262,8 +262,8 @@ public void testSplitAndTransfer() throws Exception { /* check the toVector output after doing the splitAndTransfer */ for (int i = 0; i < length; i++) { - assertEquals("Different data at indexes: " + (start + i) + "and " + i, sourceVector.getObject(start + i), - toVector.getObject(i)); + assertEquals(sourceVector.getObject(start + i), toVector.getObject(i), + "Different data at indexes: " + (start + i) + "and " + i); } } } @@ -356,7 +356,8 @@ public void testSplitAndTransferWithMixedVectors() throws Exception { /* check the toVector output after doing the splitAndTransfer */ for (int i = 0; i < length; i++) { - assertEquals("Different values at index: " + i, sourceVector.getObject(start + i), toVector.getObject(i)); + assertEquals(sourceVector.getObject(start + i), toVector.getObject(i), + "Different values at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java index 9ffa79470eeb8..caccc2360e85c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java @@ -19,7 +19,11 @@ import static org.apache.arrow.vector.TestUtils.*; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -49,9 +53,9 @@ import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.Text; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDictionaryVector { @@ -63,12 +67,12 @@ public class TestDictionaryVector { byte[][] data = new byte[][] {zero, one, two}; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -896,7 +900,7 @@ public void testNoMemoryLeak() { assertEquals("Dictionary encoding not defined for value:" + new Text(two), e.getMessage()); } } - assertEquals("encode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "encode memory leak"); // test no memory leak when decode try (final IntVector indices = newVector(IntVector.class, "", Types.MinorType.INT, allocator); @@ -914,7 +918,7 @@ public void testNoMemoryLeak() { assertEquals("Provided dictionary does not contain value for index 3", e.getMessage()); } } - assertEquals("decode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "decode memory leak"); } @Test @@ -942,7 +946,7 @@ public void testListNoMemoryLeak() { assertEquals("Dictionary encoding not defined for value:20", e.getMessage()); } } - assertEquals("list encode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "list encode memory leak"); try (final ListVector indices = ListVector.empty("indices", allocator); final ListVector dictionaryVector = ListVector.empty("dict", allocator)) { @@ -966,7 +970,7 @@ public void testListNoMemoryLeak() { assertEquals("Provided dictionary does not contain value for index 3", e.getMessage()); } } - assertEquals("list decode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "list decode memory leak"); } @Test @@ -1003,7 +1007,7 @@ public void testStructNoMemoryLeak() { assertEquals("Dictionary encoding not defined for value:baz", e.getMessage()); } } - assertEquals("struct encode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "struct encode memory leak"); try (final StructVector indices = StructVector.empty("indices", allocator); final VarCharVector dictVector1 = new VarCharVector("f0", allocator); @@ -1040,7 +1044,7 @@ public void testStructNoMemoryLeak() { assertEquals("Provided dictionary does not contain value for index 3", e.getMessage()); } } - assertEquals("struct decode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "struct decode memory leak"); } private void testDictionary(Dictionary dictionary, ToIntBiFunction valGetter) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java index c5d4d296cc024..6ed44be849726 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; import java.time.Duration; @@ -28,19 +28,19 @@ import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDurationVector { RootAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java index b9cd89e4ad731..4b52c7a41ff07 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java @@ -17,8 +17,12 @@ package org.apache.arrow.vector; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -26,9 +30,9 @@ import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; import org.apache.arrow.vector.util.ReusableByteArray; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestFixedSizeBinaryVector { private static final int numValues = 123; @@ -85,7 +89,7 @@ private static void failWithException(String message) throws Exception { } - @Before + @BeforeEach public void init() throws Exception { allocator = new DirtyRootAllocator(Integer.MAX_VALUE, (byte) 100); vector = new FixedSizeBinaryVector("fixedSizeBinary", allocator, typeWidth); @@ -128,7 +132,7 @@ public void init() throws Exception { largeNullableHolder.buffer = largeBuf; } - @After + @AfterEach public void terminate() throws Exception { for (int i = 0; i < numValues; i++) { bufs[i].close(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java index bde6dd491dd71..54ce8e2ae0e7c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java @@ -17,11 +17,12 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.nio.ByteBuffer; @@ -41,21 +42,20 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestFixedSizeListVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -76,12 +76,12 @@ public void testIntType() { UnionFixedSizeListReader reader = vector.getReader(); for (int i = 0; i < 10; i++) { reader.setPosition(i); - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(i, reader.reader().readInteger().intValue()); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(i + 10, reader.reader().readInteger().intValue()); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(i, i + 10), reader.readObject()); } } @@ -107,16 +107,16 @@ public void testFloatTypeNullable() { for (int i = 0; i < 10; i++) { reader.setPosition(i); if (i % 2 == 0) { - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(i + 0.1f, reader.reader().readFloat(), 0.00001); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(i + 10.1f, reader.reader().readFloat(), 0.00001); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(i + 0.1f, i + 10.1f), reader.readObject()); } else { - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); } } } @@ -149,18 +149,18 @@ public void testNestedInList() { reader.setPosition(i); if (i % 2 == 0) { for (int j = 0; j < i % 7; j++) { - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); FieldReader innerListReader = reader.reader(); for (int k = 0; k < 2; k++) { - Assert.assertTrue(innerListReader.next()); + assertTrue(innerListReader.next()); assertEquals(k + j, innerListReader.reader().readInteger().intValue()); } - Assert.assertFalse(innerListReader.next()); + assertFalse(innerListReader.next()); } - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); } else { - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); } } } @@ -196,40 +196,40 @@ public void testTransferPair() { UnionFixedSizeListReader reader = to.getReader(); reader.setPosition(0); - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); reader.setPosition(1); - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(0.1f, reader.reader().readFloat(), 0.00001); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(10.1f, reader.reader().readFloat(), 0.00001); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(0.1f, 10.1f), reader.readObject()); reader.setPosition(2); - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(2.1f, reader.reader().readFloat(), 0.00001); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(12.1f, reader.reader().readFloat(), 0.00001); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(2.1f, 12.1f), reader.readObject()); reader.setPosition(3); - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(4.1f, reader.reader().readFloat(), 0.00001); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(14.1f, reader.reader().readFloat(), 0.00001); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(4.1f, 14.1f), reader.readObject()); for (int i = 4; i < 10; i++) { reader.setPosition(i); - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); } } } @@ -238,11 +238,11 @@ public void testTransferPair() { public void testConsistentChildName() throws Exception { try (FixedSizeListVector listVector = FixedSizeListVector.empty("sourceVector", /*size=*/2, allocator)) { String emptyListStr = listVector.getField().toString(); - Assert.assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME)); + assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME)); listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); String emptyVectorStr = listVector.getField().toString(); - Assert.assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); + assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); } } @@ -354,27 +354,29 @@ public void testDecimalIndexCheck() throws Exception { } - @Test(expected = IllegalStateException.class) + @Test public void testWriteIllegalData() throws Exception { - try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", /*size=*/3, allocator)) { + assertThrows(IllegalStateException.class, () -> { + try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", /*size=*/3, allocator)) { - UnionFixedSizeListWriter writer1 = vector1.getWriter(); - writer1.allocate(); + UnionFixedSizeListWriter writer1 = vector1.getWriter(); + writer1.allocate(); - int[] values1 = new int[] {1, 2, 3}; - int[] values2 = new int[] {4, 5, 6, 7, 8}; + int[] values1 = new int[]{1, 2, 3}; + int[] values2 = new int[]{4, 5, 6, 7, 8}; - //set some values - writeListVector(vector1, writer1, values1); - writeListVector(vector1, writer1, values2); - writer1.setValueCount(3); + //set some values + writeListVector(vector1, writer1, values1); + writeListVector(vector1, writer1, values2); + writer1.setValueCount(3); - assertEquals(3, vector1.getValueCount()); - int[] realValue1 = convertListToIntArray(vector1.getObject(0)); - assertTrue(Arrays.equals(values1, realValue1)); - int[] realValue2 = convertListToIntArray(vector1.getObject(1)); - assertTrue(Arrays.equals(values2, realValue2)); - } + assertEquals(3, vector1.getValueCount()); + int[] realValue1 = convertListToIntArray(vector1.getObject(0)); + assertTrue(Arrays.equals(values1, realValue1)); + int[] realValue2 = convertListToIntArray(vector1.getObject(1)); + assertTrue(Arrays.equals(values2, realValue2)); + } + }); } @Test diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java index 681897b93c12c..82bf1dd423b5e 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java @@ -17,8 +17,7 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; - +import static org.junit.jupiter.api.Assertions.assertEquals; import java.time.Duration; import java.time.Period; @@ -29,20 +28,20 @@ import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestIntervalMonthDayNanoVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java index 4b2ae2eb3d49b..6cb72f38307df 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java @@ -17,28 +17,28 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestIntervalYearVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index ffd87c99d508d..d4bb3d4c97bcf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -17,11 +17,11 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Arrays; @@ -39,21 +39,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestLargeListVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -91,11 +90,11 @@ public void testCopyFrom() throws Exception { // assert the output vector is correct FieldReader reader = outVector.getReader(); - Assert.assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); reader.setPosition(1); - Assert.assertFalse("should be null", reader.isSet()); + assertFalse(reader.isSet(), "should be null"); reader.setPosition(2); - Assert.assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); /* index 0 */ @@ -433,15 +432,15 @@ public void testSplitAndTransfer() throws Exception { dataLength2 = (int) toOffsetBuffer.getLong((i + 1) * LargeListVector.OFFSET_WIDTH) - (int) toOffsetBuffer.getLong(i * LargeListVector.OFFSET_WIDTH); - assertEquals("Different data lengths at index: " + i + " and start: " + start, - dataLength1, dataLength2); + assertEquals(dataLength1, dataLength2, + "Different data lengths at index: " + i + " and start: " + start); offset1 = (int) offsetBuffer.getLong((start + i) * LargeListVector.OFFSET_WIDTH); offset2 = (int) toOffsetBuffer.getLong(i * LargeListVector.OFFSET_WIDTH); for (int j = 0; j < dataLength1; j++) { - assertEquals("Different data at indexes: " + offset1 + " and " + offset2, - dataVector.getObject(offset1), dataVector1.getObject(offset2)); + assertEquals(dataVector.getObject(offset1), dataVector1.getObject(offset2), + "Different data at indexes: " + offset1 + " and " + offset2); offset1++; offset2++; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java index 36607903b01a2..3a51cca51706c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -32,20 +32,20 @@ import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder; import org.apache.arrow.vector.util.ReusableByteArray; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestLargeVarBinaryVector { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java index 06b27a9eba156..aa9c7fed38a6b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java @@ -17,12 +17,14 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -41,11 +43,9 @@ import org.apache.arrow.vector.util.OversizedAllocationException; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestLargeVarCharVector { @@ -58,12 +58,12 @@ public class TestLargeVarCharVector { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void shutdown() { allocator.close(); } @@ -162,7 +162,7 @@ public void testInvalidStartIndex() { final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector); - IllegalArgumentException e = Assertions.assertThrows( + IllegalArgumentException e = assertThrows( IllegalArgumentException.class, () -> tp.splitAndTransfer(valueCount, 10)); @@ -181,7 +181,7 @@ public void testInvalidLength() { final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector); - IllegalArgumentException e = Assertions.assertThrows( + IllegalArgumentException e = assertThrows( IllegalArgumentException.class, () -> tp.splitAndTransfer(0, valueCount * 2)); @@ -298,39 +298,43 @@ public void testSetLastSetUsage() { } } - @Test(expected = OutOfMemoryException.class) + @Test public void testVectorAllocateNew() { - try (RootAllocator smallAllocator = new RootAllocator(200); - LargeVarCharVector vector = new LargeVarCharVector("vec", smallAllocator)) { - vector.allocateNew(); - } + assertThrows(OutOfMemoryException.class, () -> { + try (RootAllocator smallAllocator = new RootAllocator(200); + LargeVarCharVector vector = new LargeVarCharVector("vec", smallAllocator)) { + vector.allocateNew(); + } + }); } - @Test(expected = OversizedAllocationException.class) + @Test public void testLargeVariableVectorReallocation() { - final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator); - // edge case 1: value count = MAX_VALUE_ALLOCATION - final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; - final int expectedOffsetSize = 10; - try { - vector.allocateNew(expectedAllocationInBytes, 10); - assertTrue(expectedOffsetSize <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); - vector.reAlloc(); - assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); - } finally { - vector.close(); - } + assertThrows(OversizedAllocationException.class, () -> { + final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator); + // edge case 1: value count = MAX_VALUE_ALLOCATION + final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; + final int expectedOffsetSize = 10; + try { + vector.allocateNew(expectedAllocationInBytes, 10); + assertTrue(expectedOffsetSize <= vector.getValueCapacity()); + assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); + vector.reAlloc(); + assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); + assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); + } finally { + vector.close(); + } - // common: value count < MAX_VALUE_ALLOCATION - try { - vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); - vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION - vector.reAlloc(); // this tests if it overflows - } finally { - vector.close(); - } + // common: value count < MAX_VALUE_ALLOCATION + try { + vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); + vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION + vector.reAlloc(); // this tests if it overflows + } finally { + vector.close(); + } + }); } @Test @@ -784,7 +788,7 @@ public void testNullableType() { try { vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 97f2d9fd6def1..cbcb6cf9d7963 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -17,12 +17,12 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Arrays; @@ -44,21 +44,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestListVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -96,11 +95,11 @@ public void testCopyFrom() throws Exception { // assert the output vector is correct FieldReader reader = outVector.getReader(); - Assert.assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); reader.setPosition(1); - Assert.assertFalse("should be null", reader.isSet()); + assertFalse(reader.isSet(), "should be null"); reader.setPosition(2); - Assert.assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); /* index 0 */ @@ -439,15 +438,15 @@ public void testSplitAndTransfer() throws Exception { dataLength2 = toOffsetBuffer.getInt((i + 1) * ListVector.OFFSET_WIDTH) - toOffsetBuffer.getInt(i * ListVector.OFFSET_WIDTH); - assertEquals("Different data lengths at index: " + i + " and start: " + start, - dataLength1, dataLength2); + assertEquals(dataLength1, dataLength2, + "Different data lengths at index: " + i + " and start: " + start); offset1 = offsetBuffer.getInt((start + i) * ListVector.OFFSET_WIDTH); offset2 = toOffsetBuffer.getInt(i * ListVector.OFFSET_WIDTH); for (int j = 0; j < dataLength1; j++) { - assertEquals("Different data at indexes: " + offset1 + " and " + offset2, - dataVector.getObject(offset1), dataVector1.getObject(offset2)); + assertEquals(dataVector.getObject(offset1), dataVector1.getObject(offset2), + "Different data at indexes: " + offset1 + " and " + offset2); offset1++; offset2++; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java b/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java index f1345e88ab8b9..51ad470bb6417 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java @@ -17,11 +17,13 @@ package org.apache.arrow.vector; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.lang.reflect.Field; import java.net.URLClassLoader; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** * Test cases for {@link NullCheckingForGet}. @@ -63,7 +65,7 @@ public void testDefaultValue() throws Exception { ClassLoader classLoader = copyClassLoader(); if (classLoader != null) { boolean nullCheckingEnabled = getFlagValue(classLoader); - Assert.assertTrue(nullCheckingEnabled); + assertTrue(nullCheckingEnabled); } } @@ -79,7 +81,7 @@ public void testEnableSysProperty() throws Exception { ClassLoader classLoader = copyClassLoader(); if (classLoader != null) { boolean nullCheckingEnabled = getFlagValue(classLoader); - Assert.assertFalse(nullCheckingEnabled); + assertFalse(nullCheckingEnabled); } // restore system property diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java index 7f26b5c1b79f6..200786f54a92d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java @@ -17,12 +17,14 @@ package org.apache.arrow.vector; +import static org.junit.jupiter.api.Assertions.assertThrows; + import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.memory.RootAllocator; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * This class tests cases where we expect to receive {@link OutOfMemoryException}. @@ -33,40 +35,48 @@ public class TestOutOfMemoryForValueVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(200); // Start with low memory limit } - @Test(expected = OutOfMemoryException.class) + @Test public void variableWidthVectorAllocateNew() { - try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(); - } + assertThrows(OutOfMemoryException.class, () -> { + try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(); + } + }); } - @Test(expected = OutOfMemoryException.class) + @Test public void variableWidthVectorAllocateNewCustom() { - try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(2342, 234); - } + assertThrows(OutOfMemoryException.class, () -> { + try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(2342, 234); + } + }); } - @Test(expected = OutOfMemoryException.class) + @Test public void fixedWidthVectorAllocateNew() { - try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(); - } + assertThrows(OutOfMemoryException.class, () -> { + try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(); + } + }); } - @Test(expected = OutOfMemoryException.class) + @Test public void fixedWidthVectorAllocateNewCustom() { - try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(2342); - } + assertThrows(OutOfMemoryException.class, () -> { + try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(2342); + } + }); } - @After + @AfterEach public void terminate() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java index 23414e9f5df1c..f89828e4ceeb2 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java @@ -18,15 +18,16 @@ package org.apache.arrow.vector; import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.util.OversizedAllocationException; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * This class tests that OversizedAllocationException occurs when a large memory is allocated for a vector. @@ -39,94 +40,100 @@ public class TestOversizedAllocationForValueVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } - @Test(expected = OversizedAllocationException.class) + @Test public void testFixedVectorReallocation() { - final UInt4Vector vector = new UInt4Vector(EMPTY_SCHEMA_PATH, allocator); - // edge case 1: buffer size = max value capacity - final int expectedValueCapacity = checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 4); - try { - vector.allocateNew(expectedValueCapacity); - assertEquals(expectedValueCapacity, vector.getValueCapacity()); - vector.reAlloc(); - assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); - } finally { - vector.close(); - } + assertThrows(OversizedAllocationException.class, () -> { + final UInt4Vector vector = new UInt4Vector(EMPTY_SCHEMA_PATH, allocator); + // edge case 1: buffer size = max value capacity + final int expectedValueCapacity = checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 4); + try { + vector.allocateNew(expectedValueCapacity); + assertEquals(expectedValueCapacity, vector.getValueCapacity()); + vector.reAlloc(); + assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); + } finally { + vector.close(); + } - // common case: value count < max value capacity - try { - vector.allocateNew(checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 8)); - vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION - vector.reAlloc(); // this should throw an IOOB - } finally { - vector.close(); - } + // common case: value count < max value capacity + try { + vector.allocateNew(checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 8)); + vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION + vector.reAlloc(); // this should throw an IOOB + } finally { + vector.close(); + } + }); } - @Test(expected = OversizedAllocationException.class) + @Test public void testBitVectorReallocation() { - final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - // edge case 1: buffer size ~ max value capacity - final int expectedValueCapacity = 1 << 29; - try { - vector.allocateNew(expectedValueCapacity); - assertEquals(expectedValueCapacity, vector.getValueCapacity()); - vector.reAlloc(); - assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); - } finally { - vector.close(); - } + assertThrows(OversizedAllocationException.class, () -> { + final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator); + // edge case 1: buffer size ~ max value capacity + final int expectedValueCapacity = 1 << 29; + try { + vector.allocateNew(expectedValueCapacity); + assertEquals(expectedValueCapacity, vector.getValueCapacity()); + vector.reAlloc(); + assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); + } finally { + vector.close(); + } - // common: value count < MAX_VALUE_ALLOCATION - try { - vector.allocateNew(expectedValueCapacity); - for (int i = 0; i < 3; i++) { - vector.reAlloc(); // expand buffer size + // common: value count < MAX_VALUE_ALLOCATION + try { + vector.allocateNew(expectedValueCapacity); + for (int i = 0; i < 3; i++) { + vector.reAlloc(); // expand buffer size + } + assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); + vector.reAlloc(); // buffer size ~ max allocation + assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); + vector.reAlloc(); // overflow + } finally { + vector.close(); } - assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); - vector.reAlloc(); // buffer size ~ max allocation - assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); - vector.reAlloc(); // overflow - } finally { - vector.close(); - } + }); } - @Test(expected = OversizedAllocationException.class) + @Test public void testVariableVectorReallocation() { - final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator); - // edge case 1: value count = MAX_VALUE_ALLOCATION - final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; - final int expectedOffsetSize = 10; - try { - vector.allocateNew(expectedAllocationInBytes, 10); - assertTrue(expectedOffsetSize <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); - vector.reAlloc(); - assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); - } finally { - vector.close(); - } + assertThrows(OversizedAllocationException.class, () -> { + final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator); + // edge case 1: value count = MAX_VALUE_ALLOCATION + final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; + final int expectedOffsetSize = 10; + try { + vector.allocateNew(expectedAllocationInBytes, 10); + assertTrue(expectedOffsetSize <= vector.getValueCapacity()); + assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); + vector.reAlloc(); + assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); + assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); + } finally { + vector.close(); + } - // common: value count < MAX_VALUE_ALLOCATION - try { - vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); - vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION - vector.reAlloc(); // this tests if it overflows - } finally { - vector.close(); - } + // common: value count < MAX_VALUE_ALLOCATION + try { + vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); + vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION + vector.reAlloc(); // this tests if it overflows + } finally { + vector.close(); + } + }); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java b/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java index 2b9f4cca8c22f..bf4cda6b4271a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import java.time.Duration; import java.time.LocalDate; @@ -26,7 +26,7 @@ import java.time.Period; import java.time.temporal.ChronoUnit; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestPeriodDuration { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index 68f5e14dabb9b..ccb2890863314 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -17,7 +17,11 @@ package org.apache.arrow.vector; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; import java.util.ArrayList; import java.util.HashMap; @@ -39,21 +43,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestStructVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -64,7 +67,7 @@ public void testFieldMetadata() throws Exception { metadata.put("k1", "v1"); FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata); try (StructVector vector = new StructVector("struct", allocator, type, null)) { - Assert.assertEquals(vector.getField().getMetadata(), type.getMetadata()); + assertEquals(vector.getField().getMetadata(), type.getMetadata()); } } @@ -108,8 +111,8 @@ public void testAllocateAfterReAlloc() throws Exception { /* * Verify that the buffer sizes haven't changed. */ - Assert.assertEquals(vector.getValidityBuffer().capacity(), savedValidityBufferCapacity); - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValidityBuffer().capacity(), savedValidityBufferCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java index 1b0387feb73ff..10298112ddc98 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java @@ -17,11 +17,11 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.HashMap; @@ -44,21 +44,21 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestUnionVector { private static final String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -283,8 +283,8 @@ public void testSplitAndTransfer() throws Exception { /* check the toVector output after doing the splitAndTransfer */ for (int i = 0; i < length; i++) { - assertEquals("Different data at indexes: " + (start + i) + "and " + i, sourceVector.getObject(start + i), - toVector.getObject(i)); + assertEquals(sourceVector.getObject(start + i), toVector.getObject(i), + "Different data at indexes: " + (start + i) + "and " + i); } } } @@ -373,7 +373,8 @@ public void testSplitAndTransferWithMixedVectors() throws Exception { /* check the toVector output after doing the splitAndTransfer */ for (int i = 0; i < length; i++) { - assertEquals("Different values at index: " + i, sourceVector.getObject(start + i), toVector.getObject(i)); + assertEquals(sourceVector.getObject(start + i), toVector.getObject(i), + "Different values at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java index bfe489fa5af4e..6d4e64837adbc 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java @@ -17,6 +17,8 @@ package org.apache.arrow.vector; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.nio.charset.StandardCharsets; import org.apache.arrow.memory.ArrowBuf; @@ -25,21 +27,20 @@ import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVarCharListVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -72,8 +73,8 @@ public void testVarCharListWithNulls() { writer.setValueCount(2); - Assert.assertEquals(2, vector.getValueCount()); - Assert.assertEquals(2, vector.getDataVector().getValueCount()); + assertEquals(2, vector.getValueCount()); + assertEquals(2, vector.getDataVector().getValueCount()); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java index b96f6ab6afedd..02a85faa20cd6 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Arrays; import java.util.Collections; @@ -39,23 +39,23 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorAlloc { private BufferAllocator rootAllocator; private BufferAllocator policyAllocator; - @Before + @BeforeEach public void init() { rootAllocator = new RootAllocator(Long.MAX_VALUE); policyAllocator = new RootAllocator(AllocationListener.NOOP, Integer.MAX_VALUE, new CustomPolicy()); } - @After + @AfterEach public void terminate() throws Exception { rootAllocator.close(); policyAllocator.close(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java index 9043bd4f8f2d4..21cbefae45161 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java @@ -17,7 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.nio.charset.StandardCharsets; @@ -37,22 +40,21 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.DataSizeRoundingUtil; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorReAlloc { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -68,7 +70,7 @@ public void testFixedType() { try { vector.set(initialCapacity, 0); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } @@ -92,7 +94,7 @@ public void testNullableType() { try { vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } @@ -101,7 +103,7 @@ public void testNullableType() { assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); - assertEquals("foo", new String(vector.get(initialCapacity), StandardCharsets.UTF_8)); + assertEquals(new String(vector.get(initialCapacity), StandardCharsets.UTF_8), "foo"); } } @@ -117,7 +119,7 @@ public void testListType() { try { vector.getInnerValueCountAt(2014); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } @@ -140,7 +142,7 @@ public void testStructType() { try { vector.getObject(513); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } @@ -161,7 +163,7 @@ public void testVariableWidthTypeSetNullValues() { for (int i = 0; i < numNullValues1; i++) { v1.setNull(i); } - Assert.assertTrue(v1.getBufferSizeFor(numNullValues1) > 0); + assertTrue(v1.getBufferSizeFor(numNullValues1) > 0); } try (final BaseLargeVariableWidthVector v2 = new LargeVarCharVector("var2", allocator)) { @@ -171,7 +173,7 @@ public void testVariableWidthTypeSetNullValues() { for (int i = 0; i < numNullValues2; i++) { v2.setNull(i); } - Assert.assertTrue(v2.getBufferSizeFor(numNullValues2) > 0); + assertTrue(v2.getBufferSizeFor(numNullValues2) > 0); } } @@ -194,7 +196,7 @@ public void testFixedAllocateAfterReAlloc() throws Exception { /* * Verify that the buffer sizes haven't changed. */ - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -218,8 +220,8 @@ public void testVariableAllocateAfterReAlloc() throws Exception { /* * Verify that the buffer sizes haven't changed. */ - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); - Assert.assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); + assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); } } @@ -243,8 +245,8 @@ public void testLargeVariableAllocateAfterReAlloc() throws Exception { /* * Verify that the buffer sizes haven't changed. */ - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); - Assert.assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); + assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); } } @@ -256,8 +258,8 @@ public void testVarCharAllocateNew() throws Exception { vector.allocateNew(count); // verify that the validity buffer and value buffer have capacity for at least 'count' elements. - Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); + assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); + assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); } } @@ -269,8 +271,8 @@ public void testLargeVarCharAllocateNew() throws Exception { vector.allocateNew(count); // verify that the validity buffer and value buffer have capacity for at least 'count' elements. - Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); + assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); + assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); } } @@ -282,8 +284,8 @@ public void testVarCharAllocateNewUsingHelper() throws Exception { AllocationHelper.allocateNew(vector, count); // verify that the validity buffer and value buffer have capacity for at least 'count' elements. - Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); + assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); + assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); } } @@ -295,8 +297,8 @@ public void testLargeVarCharAllocateNewUsingHelper() throws Exception { AllocationHelper.allocateNew(vector, count); // verify that the validity buffer and value buffer have capacity for at least 'count' elements. - Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); + assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); + assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); } } @@ -314,7 +316,7 @@ public void testFixedRepeatedClearAndSet() throws Exception { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -333,7 +335,7 @@ public void testVariableRepeatedClearAndSet() throws Exception { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -359,7 +361,7 @@ public void testRepeatedValueVectorClearAndSet() throws Exception { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -386,7 +388,7 @@ public void testStructVectorClearAndSet() throws Exception { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -415,7 +417,7 @@ public void testFixedSizeListVectorClearAndSet() { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -440,7 +442,7 @@ public void testUnionVectorClearAndSet() { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -468,7 +470,7 @@ public void testDenseUnionVectorClearAndSet() { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java index 19700e02161c7..2a6f86426ae8a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; @@ -34,20 +34,20 @@ import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorReset { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java index 207962eb45b85..76500052fa632 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java @@ -17,10 +17,11 @@ package org.apache.arrow.vector; -import static junit.framework.TestCase.assertTrue; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Arrays; @@ -35,20 +36,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorSchemaRoot { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() { allocator.close(); } @@ -226,20 +227,22 @@ public void testSlice() { } } - @Test(expected = IllegalArgumentException.class) + @Test public void testSliceWithInvalidParam() { - try (final IntVector intVector = new IntVector("intVector", allocator); - final Float4Vector float4Vector = new Float4Vector("float4Vector", allocator)) { - intVector.setValueCount(10); - float4Vector.setValueCount(10); - for (int i = 0; i < 10; i++) { - intVector.setSafe(i, i); - float4Vector.setSafe(i, i + 0.1f); - } - final VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector, float4Vector)); + assertThrows(IllegalArgumentException.class, () -> { + try (final IntVector intVector = new IntVector("intVector", allocator); + final Float4Vector float4Vector = new Float4Vector("float4Vector", allocator)) { + intVector.setValueCount(10); + float4Vector.setValueCount(10); + for (int i = 0; i < 10; i++) { + intVector.setSafe(i, i); + float4Vector.setSafe(i, i + 0.1f); + } + final VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector, float4Vector)); - original.slice(0, 20); - } + original.slice(0, 20); + } + }); } @Test diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java index eac72f4b2c893..82ae5c038cbc2 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java @@ -18,9 +18,9 @@ package org.apache.arrow.vector; import static java.util.Arrays.asList; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.util.ArrayList; @@ -44,21 +44,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorUnloadLoad { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -116,9 +115,9 @@ public void testUnloadLoad() throws IOException { FieldReader bigIntReader = newRoot.getVector("bigInt").getReader(); for (int i = 0; i < count; i++) { intReader.setPosition(i); - Assert.assertEquals(i, intReader.readInteger().intValue()); + assertEquals(i, intReader.readInteger().intValue()); bigIntReader.setPosition(i); - Assert.assertEquals(i, bigIntReader.readLong().longValue()); + assertEquals(i, bigIntReader.readLong().longValue()); } } } @@ -188,7 +187,7 @@ public void testUnloadLoadAddPadding() throws IOException { for (int j = 0; j < i % 4 + 1; j++) { expected.add(i); } - Assert.assertEquals(expected, reader.readObject()); + assertEquals(expected, reader.readObject()); } } @@ -256,9 +255,9 @@ public void testLoadValidityBuffer() throws IOException { IntVector intDefinedVector = (IntVector) newRoot.getVector("intDefined"); IntVector intNullVector = (IntVector) newRoot.getVector("intNull"); for (int i = 0; i < count; i++) { - assertFalse("#" + i, intDefinedVector.isNull(i)); - assertEquals("#" + i, i, intDefinedVector.get(i)); - assertTrue("#" + i, intNullVector.isNull(i)); + assertFalse(intDefinedVector.isNull(i), "#" + i); + assertEquals(i, intDefinedVector.get(i), "#" + i); + assertTrue(intNullVector.isNull(i), "#" + i); } intDefinedVector.setSafe(count + 10, 1234); assertTrue(intDefinedVector.isNull(count + 1)); @@ -319,13 +318,13 @@ public void testUnloadLoadDuplicates() throws IOException { vectorLoader.load(recordBatch); List targets = newRoot.getFieldVectors(); - Assert.assertEquals(sources.size(), targets.size()); + assertEquals(sources.size(), targets.size()); for (int k = 0; k < sources.size(); k++) { IntVector src = (IntVector) sources.get(k); IntVector tgt = (IntVector) targets.get(k); - Assert.assertEquals(src.getValueCount(), tgt.getValueCount()); + assertEquals(src.getValueCount(), tgt.getValueCount()); for (int i = 0; i < count; i++) { - Assert.assertEquals(src.get(i), tgt.get(i)); + assertEquals(src.get(i), tgt.get(i)); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java index 736b0f1b1aeac..6ff81faba73e8 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.compare; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.HashMap; @@ -41,20 +41,20 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestTypeEqualsVisitor { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java index 82ef7a479d05c..0e24fd0af6806 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java @@ -17,7 +17,8 @@ package org.apache.arrow.vector.complex; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java index 29f25170332a2..67bdb9945fc94 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.complex.impl; -import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; @@ -39,9 +39,9 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.DecimalUtility; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestComplexCopier { @@ -49,12 +49,12 @@ public class TestComplexCopier { private static final int COUNT = 100; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -90,7 +90,6 @@ public void testCopyFixedSizeListVector() { // validate equals assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index b7fc681c16118..3a54d539c290a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.complex.impl; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import java.nio.ByteBuffer; @@ -50,21 +50,21 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.Text; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestPromotableWriter { private static final String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -128,33 +128,33 @@ public void testPromoteToUnion() throws Exception { final UnionVector uv = v.getChild("A", UnionVector.class); - assertFalse("0 shouldn't be null", uv.isNull(0)); + assertFalse(uv.isNull(0), "0 shouldn't be null"); assertEquals(false, uv.getObject(0)); - assertFalse("1 shouldn't be null", uv.isNull(1)); + assertFalse(uv.isNull(1), "1 shouldn't be null"); assertEquals(true, uv.getObject(1)); - assertFalse("2 shouldn't be null", uv.isNull(2)); + assertFalse(uv.isNull(2), "2 shouldn't be null"); assertEquals(10, uv.getObject(2)); - assertNull("3 should be null", uv.getObject(3)); + assertNull(uv.getObject(3), "3 should be null"); - assertFalse("4 shouldn't be null", uv.isNull(4)); + assertFalse(uv.isNull(4), "4 shouldn't be null"); assertEquals(100, uv.getObject(4)); - assertFalse("5 shouldn't be null", uv.isNull(5)); + assertFalse(uv.isNull(5), "5 shouldn't be null"); assertEquals(123123L, uv.getObject(5)); - assertFalse("6 shouldn't be null", uv.isNull(6)); + assertFalse(uv.isNull(6), "6 shouldn't be null"); NullableTimeStampMilliTZHolder readBackHolder = new NullableTimeStampMilliTZHolder(); uv.getTimeStampMilliTZVector().get(6, readBackHolder); assertEquals(12345L, readBackHolder.value); assertEquals("UTC", readBackHolder.timezone); - assertFalse("7 shouldn't be null", uv.isNull(7)); + assertFalse(uv.isNull(7), "7 shouldn't be null"); assertEquals(444413L, ((java.time.Duration) uv.getObject(7)).getSeconds()); - assertFalse("8 shouldn't be null", uv.isNull(8)); + assertFalse(uv.isNull(8), "8 shouldn't be null"); assertEquals(18978, ByteBuffer.wrap(uv.getFixedSizeBinaryVector().get(8)).order(ByteOrder.nativeOrder()).getInt()); @@ -172,10 +172,10 @@ public void testPromoteToUnion() throws Exception { Field childField1 = container.getField().getChildren().get(0).getChildren().get(0); Field childField2 = container.getField().getChildren().get(0).getChildren().get(1); - assertEquals("Child field should be union type: " + - childField1.getName(), ArrowTypeID.Union, childField1.getType().getTypeID()); - assertEquals("Child field should be decimal type: " + - childField2.getName(), ArrowTypeID.Decimal, childField2.getType().getTypeID()); + assertEquals(ArrowTypeID.Union, childField1.getType().getTypeID(), + "Child field should be union type: " + childField1.getName()); + assertEquals(ArrowTypeID.Decimal, childField2.getType().getTypeID(), + "Child field should be decimal type: " + childField2.getName()); buf.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 19f0ea9d4e392..c7ed893d4c340 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -17,7 +17,12 @@ package org.apache.arrow.vector.complex.writer; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.nio.ByteBuffer; @@ -92,10 +97,9 @@ import org.apache.arrow.vector.util.JsonStringHashMap; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestComplexWriter { @@ -103,12 +107,12 @@ public class TestComplexWriter { private static final int COUNT = 100; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -119,8 +123,8 @@ public void simpleNestedTypes() { StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); for (int i = 0; i < COUNT; i++) { rootReader.setPosition(i); - Assert.assertEquals(i, rootReader.reader("int").readInteger().intValue()); - Assert.assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); + assertEquals(i, rootReader.reader("int").readInteger().intValue()); + assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); } parent.close(); @@ -210,15 +214,15 @@ private void checkNullableStruct(NonNullableStructVector structVector) { StructReader rootReader = new SingleStructReaderImpl(structVector).reader("root"); for (int i = 0; i < COUNT; i++) { rootReader.setPosition(i); - assertTrue("index is set: " + i, rootReader.isSet()); + assertTrue(rootReader.isSet(), "index is set: " + i); FieldReader struct = rootReader.reader("struct"); if (i % 2 == 0) { - assertTrue("index is set: " + i, struct.isSet()); - assertNotNull("index is set: " + i, struct.readObject()); + assertTrue(struct.isSet(), "index is set: " + i); + assertNotNull(struct.readObject(), "index is set: " + i); assertEquals(i, struct.reader("nested").readLong().longValue()); } else { - assertFalse("index is not set: " + i, struct.isSet()); - assertNull("index is not set: " + i, struct.readObject()); + assertFalse(struct.isSet(), "index is not set: " + i); + assertNull(struct.readObject(), "index is not set: " + i); } } } @@ -245,11 +249,11 @@ public void testList() { StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); rootReader.setPosition(0); - assertTrue("row 0 list is not set", rootReader.reader("list").isSet()); + assertTrue(rootReader.reader("list").isSet(), "row 0 list is not set"); assertEquals(Long.valueOf(0), rootReader.reader("list").reader().readLong()); rootReader.setPosition(1); - assertFalse("row 1 list is set", rootReader.reader("list").isSet()); + assertFalse(rootReader.reader("list").isSet(), "row 1 list is set"); } } @@ -312,9 +316,9 @@ public void testListScalarNull() { for (int j = 0; j < i % 7; j++) { listReader.next(); if (j % 2 == 0) { - assertFalse("index is set: " + j, listReader.reader().isSet()); + assertFalse(listReader.reader().isSet(), "index is set: " + j); } else { - assertTrue("index is not set: " + j, listReader.reader().isSet()); + assertTrue(listReader.reader().isSet(), "index is not set: " + j); assertEquals(j, listReader.reader().readInteger().intValue()); } } @@ -392,7 +396,7 @@ public void listTimeStampMilliTZType() { for (int j = 0; j < i % 7; j++) { listReader.next(); if (j % 2 == 0) { - assertFalse("index is set: " + j, listReader.reader().isSet()); + assertFalse(listReader.reader().isSet(), "index is set: " + j); } else { NullableTimeStampMilliTZHolder actual = new NullableTimeStampMilliTZHolder(); listReader.reader().read(actual); @@ -430,7 +434,7 @@ public void listDurationType() { for (int j = 0; j < i % 7; j++) { listReader.next(); if (j % 2 == 0) { - assertFalse("index is set: " + j, listReader.reader().isSet()); + assertFalse(listReader.reader().isSet(), "index is set: " + j); } else { NullableDurationHolder actual = new NullableDurationHolder(); listReader.reader().read(actual); @@ -472,7 +476,7 @@ public void listFixedSizeBinaryType() throws Exception { for (int j = 0; j < i % 7; j++) { listReader.next(); if (j % 2 == 0) { - assertFalse("index is set: " + j, listReader.reader().isSet()); + assertFalse(listReader.reader().isSet(), "index is set: " + j); } else { NullableFixedSizeBinaryHolder actual = new NullableFixedSizeBinaryHolder(); listReader.reader().read(actual); @@ -505,11 +509,11 @@ public void listScalarTypeNullable() { for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); if (i % 2 == 0) { - assertTrue("index is set: " + i, listReader.isSet()); - assertEquals("correct length at: " + i, i % 7, ((List) listReader.readObject()).size()); + assertTrue(listReader.isSet(), "index is set: " + i); + assertEquals(i % 7, ((List) listReader.readObject()).size(), "correct length at: " + i); } else { - assertFalse("index is not set: " + i, listReader.isSet()); - assertNull("index is not set: " + i, listReader.readObject()); + assertFalse(listReader.isSet(), "index is not set: " + i); + assertNull(listReader.readObject(), "index is not set: " + i); } } } @@ -537,8 +541,8 @@ public void listStructType() { listReader.setPosition(i); for (int j = 0; j < i % 7; j++) { listReader.next(); - Assert.assertEquals("record: " + i, j, listReader.reader().reader("int").readInteger().intValue()); - Assert.assertEquals(j, listReader.reader().reader("bigInt").readLong().longValue()); + assertEquals(j, listReader.reader().reader("int").readInteger().intValue(), "record: " + i); + assertEquals(j, listReader.reader().reader("bigInt").readLong().longValue()); } } } @@ -601,7 +605,7 @@ private void checkListOfLists(final ListVector listVector) { FieldReader innerListReader = listReader.reader(); for (int k = 0; k < i % 13; k++) { innerListReader.next(); - Assert.assertEquals("record: " + i, k, innerListReader.reader().readInteger().intValue()); + assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i); } } } @@ -673,9 +677,9 @@ private void checkUnionList(ListVector listVector) { for (int k = 0; k < i % 13; k++) { innerListReader.next(); if (k % 2 == 0) { - Assert.assertEquals("record: " + i, k, innerListReader.reader().readInteger().intValue()); + assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i); } else { - Assert.assertEquals("record: " + i, k, innerListReader.reader().readLong().longValue()); + assertEquals(k, innerListReader.reader().readLong().longValue(), "record: " + i); } } } @@ -724,11 +728,11 @@ private void checkListMap(ListVector listVector) { UnionMapReader mapReader = (UnionMapReader) listReader.reader(); for (int k = 0; k < i % 13; k++) { mapReader.next(); - Assert.assertEquals("record key: " + i, k, mapReader.key().readInteger().intValue()); + assertEquals(k, mapReader.key().readInteger().intValue(), "record key: " + i); if (k % 2 == 0) { - Assert.assertEquals("record value: " + i, k, mapReader.value().readLong().longValue()); + assertEquals(k, mapReader.value().readLong().longValue(), "record value: " + i); } else { - Assert.assertNull("record value: " + i, mapReader.value().readLong()); + assertNull(mapReader.value().readLong(), "record value: " + i); } } } @@ -772,24 +776,24 @@ public void simpleUnion() throws Exception { for (int i = 0; i < COUNT; i++) { unionReader.setPosition(i); if (i % 5 == 0) { - Assert.assertEquals(i, unionReader.readInteger().intValue()); + assertEquals(i, unionReader.readInteger().intValue()); } else if (i % 5 == 1) { NullableTimeStampMilliTZHolder holder = new NullableTimeStampMilliTZHolder(); unionReader.read(holder); - Assert.assertEquals(i, holder.value); - Assert.assertEquals("AsdfTimeZone", holder.timezone); + assertEquals(i, holder.value); + assertEquals("AsdfTimeZone", holder.timezone); } else if (i % 5 == 2) { NullableDurationHolder holder = new NullableDurationHolder(); unionReader.read(holder); - Assert.assertEquals(i, holder.value); - Assert.assertEquals(TimeUnit.NANOSECOND, holder.unit); + assertEquals(i, holder.value); + assertEquals(TimeUnit.NANOSECOND, holder.unit); } else if (i % 5 == 3) { NullableFixedSizeBinaryHolder holder = new NullableFixedSizeBinaryHolder(); unionReader.read(holder); assertEquals(i, holder.buffer.getInt(0)); assertEquals(4, holder.byteWidth); } else { - Assert.assertEquals((float) i, unionReader.readFloat(), 1e-12); + assertEquals((float) i, unionReader.readFloat(), 1e-12); } } vector.close(); @@ -808,12 +812,12 @@ public void promotableWriter() { bigIntWriter.writeBigInt(i); } Field field = parent.getField().getChildren().get(0).getChildren().get(0); - Assert.assertEquals("a", field.getName()); - Assert.assertEquals(Int.TYPE_TYPE, field.getType().getTypeID()); + assertEquals("a", field.getName()); + assertEquals(Int.TYPE_TYPE, field.getType().getTypeID()); Int intType = (Int) field.getType(); - Assert.assertEquals(64, intType.getBitWidth()); - Assert.assertTrue(intType.getIsSigned()); + assertEquals(64, intType.getBitWidth()); + assertTrue(intType.getIsSigned()); for (int i = 100; i < 200; i++) { VarCharWriter varCharWriter = rootWriter.varChar("a"); varCharWriter.setPosition(i); @@ -824,23 +828,23 @@ public void promotableWriter() { tempBuf.close(); } field = parent.getField().getChildren().get(0).getChildren().get(0); - Assert.assertEquals("a", field.getName()); - Assert.assertEquals(Union.TYPE_TYPE, field.getType().getTypeID()); - Assert.assertEquals(Int.TYPE_TYPE, field.getChildren().get(0).getType().getTypeID()); - Assert.assertEquals(Utf8.TYPE_TYPE, field.getChildren().get(1).getType().getTypeID()); + assertEquals("a", field.getName()); + assertEquals(Union.TYPE_TYPE, field.getType().getTypeID()); + assertEquals(Int.TYPE_TYPE, field.getChildren().get(0).getType().getTypeID()); + assertEquals(Utf8.TYPE_TYPE, field.getChildren().get(1).getType().getTypeID()); StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); for (int i = 0; i < 100; i++) { rootReader.setPosition(i); FieldReader reader = rootReader.reader("a"); Long value = reader.readLong(); - Assert.assertNotNull("index: " + i, value); - Assert.assertEquals(i, value.intValue()); + assertNotNull(value, "index: " + i); + assertEquals(i, value.intValue()); } for (int i = 100; i < 200; i++) { rootReader.setPosition(i); FieldReader reader = rootReader.reader("a"); Text value = reader.readText(); - Assert.assertEquals(Integer.toString(i), value.toString()); + assertEquals(Integer.toString(i), value.toString()); } } } @@ -857,14 +861,14 @@ public void promotableWriterSchema() { rootWriter.varChar("a"); Field field = parent.getField().getChildren().get(0).getChildren().get(0); - Assert.assertEquals("a", field.getName()); - Assert.assertEquals(ArrowTypeID.Union, field.getType().getTypeID()); + assertEquals("a", field.getName()); + assertEquals(ArrowTypeID.Union, field.getType().getTypeID()); - Assert.assertEquals(ArrowTypeID.Int, field.getChildren().get(0).getType().getTypeID()); + assertEquals(ArrowTypeID.Int, field.getChildren().get(0).getType().getTypeID()); Int intType = (Int) field.getChildren().get(0).getType(); - Assert.assertEquals(64, intType.getBitWidth()); - Assert.assertTrue(intType.getIsSigned()); - Assert.assertEquals(ArrowTypeID.Utf8, field.getChildren().get(1).getType().getTypeID()); + assertEquals(64, intType.getBitWidth()); + assertTrue(intType.getIsSigned()); + assertEquals(ArrowTypeID.Utf8, field.getChildren().get(1).getType().getTypeID()); } } @@ -901,18 +905,18 @@ public void structWriterMixedCaseFieldNames() { List fieldsCaseSensitive = parent.getField().getChildren().get(0).getChildren(); Set fieldNamesCaseSensitive = getFieldNames(fieldsCaseSensitive); - Assert.assertEquals(11, fieldNamesCaseSensitive.size()); - Assert.assertTrue(fieldNamesCaseSensitive.contains("int_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("Int_Field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("float_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("Float_Field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field::char_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field::Char_Field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::Bit_Field")); + assertEquals(11, fieldNamesCaseSensitive.size()); + assertTrue(fieldNamesCaseSensitive.contains("int_field")); + assertTrue(fieldNamesCaseSensitive.contains("Int_Field")); + assertTrue(fieldNamesCaseSensitive.contains("float_field")); + assertTrue(fieldNamesCaseSensitive.contains("Float_Field")); + assertTrue(fieldNamesCaseSensitive.contains("struct_field")); + assertTrue(fieldNamesCaseSensitive.contains("struct_field::char_field")); + assertTrue(fieldNamesCaseSensitive.contains("struct_field::Char_Field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::Bit_Field")); // test case-insensitive StructWriter ComplexWriter writerCaseInsensitive = new ComplexWriterImpl("rootCaseInsensitive", parent, false, false); @@ -932,14 +936,14 @@ public void structWriterMixedCaseFieldNames() { List fieldsCaseInsensitive = parent.getField().getChildren().get(1).getChildren(); Set fieldNamesCaseInsensitive = getFieldNames(fieldsCaseInsensitive); - Assert.assertEquals(7, fieldNamesCaseInsensitive.size()); - Assert.assertTrue(fieldNamesCaseInsensitive.contains("int_field")); - Assert.assertTrue(fieldNamesCaseInsensitive.contains("float_field")); - Assert.assertTrue(fieldNamesCaseInsensitive.contains("struct_field")); - Assert.assertTrue(fieldNamesCaseInsensitive.contains("struct_field::char_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); + assertEquals(7, fieldNamesCaseInsensitive.size()); + assertTrue(fieldNamesCaseInsensitive.contains("int_field")); + assertTrue(fieldNamesCaseInsensitive.contains("float_field")); + assertTrue(fieldNamesCaseInsensitive.contains("struct_field")); + assertTrue(fieldNamesCaseInsensitive.contains("struct_field::char_field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); } } @@ -976,15 +980,15 @@ public void timeStampSecWriter() throws Exception { FieldReader secReader = rootReader.reader("sec"); secReader.setPosition(0); LocalDateTime secDateTime = secReader.readLocalDateTime(); - Assert.assertEquals(expectedSecDateTime, secDateTime); + assertEquals(expectedSecDateTime, secDateTime); long secLong = secReader.readLong(); - Assert.assertEquals(expectedSecs, secLong); + assertEquals(expectedSecs, secLong); } { FieldReader secTZReader = rootReader.reader("secTZ"); secTZReader.setPosition(1); long secTZLong = secTZReader.readLong(); - Assert.assertEquals(expectedSecs, secTZLong); + assertEquals(expectedSecs, secTZLong); } } } @@ -1022,27 +1026,27 @@ public void timeStampMilliWriters() throws Exception { FieldReader milliReader = rootReader.reader("milli"); milliReader.setPosition(0); LocalDateTime milliDateTime = milliReader.readLocalDateTime(); - Assert.assertEquals(expectedMilliDateTime, milliDateTime); + assertEquals(expectedMilliDateTime, milliDateTime); long milliLong = milliReader.readLong(); - Assert.assertEquals(expectedMillis, milliLong); + assertEquals(expectedMillis, milliLong); } { FieldReader milliTZReader = rootReader.reader("milliTZ"); milliTZReader.setPosition(0); long milliTZLong = milliTZReader.readLong(); - Assert.assertEquals(expectedMillis, milliTZLong); + assertEquals(expectedMillis, milliTZLong); } } } private void checkTimestampField(Field field, String name) { - Assert.assertEquals(name, field.getName()); - Assert.assertEquals(ArrowType.Timestamp.TYPE_TYPE, field.getType().getTypeID()); + assertEquals(name, field.getName()); + assertEquals(ArrowType.Timestamp.TYPE_TYPE, field.getType().getTypeID()); } private void checkTimestampTZField(Field field, String name, String tz) { checkTimestampField(field, name); - Assert.assertEquals(tz, ((Timestamp) field.getType()).getTimezone()); + assertEquals(tz, ((Timestamp) field.getType()).getTimezone()); } @Test @@ -1079,15 +1083,15 @@ public void timeStampMicroWriters() throws Exception { FieldReader microReader = rootReader.reader("micro"); microReader.setPosition(0); LocalDateTime microDateTime = microReader.readLocalDateTime(); - Assert.assertEquals(expectedMicroDateTime, microDateTime); + assertEquals(expectedMicroDateTime, microDateTime); long microLong = microReader.readLong(); - Assert.assertEquals(expectedMicros, microLong); + assertEquals(expectedMicros, microLong); } { FieldReader microReader = rootReader.reader("microTZ"); microReader.setPosition(1); long microLong = microReader.readLong(); - Assert.assertEquals(expectedMicros, microLong); + assertEquals(expectedMicros, microLong); } } } @@ -1125,18 +1129,18 @@ public void timeStampNanoWriters() throws Exception { FieldReader nanoReader = rootReader.reader("nano"); nanoReader.setPosition(0); LocalDateTime nanoDateTime = nanoReader.readLocalDateTime(); - Assert.assertEquals(expectedNanoDateTime, nanoDateTime); + assertEquals(expectedNanoDateTime, nanoDateTime); long nanoLong = nanoReader.readLong(); - Assert.assertEquals(expectedNanos, nanoLong); + assertEquals(expectedNanos, nanoLong); } { FieldReader nanoReader = rootReader.reader("nanoTZ"); nanoReader.setPosition(0); long nanoLong = nanoReader.readLong(); - Assert.assertEquals(expectedNanos, nanoLong); + assertEquals(expectedNanos, nanoLong); NullableTimeStampNanoTZHolder h = new NullableTimeStampNanoTZHolder(); nanoReader.read(h); - Assert.assertEquals(expectedNanos, h.value); + assertEquals(expectedNanos, h.value); } } @@ -1173,8 +1177,8 @@ public void fixedSizeBinaryWriters() throws Exception { // schema List children = parent.getField().getChildren().get(0).getChildren(); - Assert.assertEquals(fieldName, children.get(0).getName()); - Assert.assertEquals(ArrowType.FixedSizeBinary.TYPE_TYPE, children.get(0).getType().getTypeID()); + assertEquals(fieldName, children.get(0).getName()); + assertEquals(ArrowType.FixedSizeBinary.TYPE_TYPE, children.get(0).getType().getTypeID()); // read StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); @@ -1183,7 +1187,7 @@ public void fixedSizeBinaryWriters() throws Exception { for (int i = 0; i < numValues; i++) { fixedSizeBinaryReader.setPosition(i); byte[] readValues = fixedSizeBinaryReader.readByteArray(); - Assert.assertArrayEquals(values[i], readValues); + assertArrayEquals(values[i], readValues); } } @@ -1369,17 +1373,17 @@ public void testListWriterWithNulls() { for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); if (i % 2 == 0) { - Assert.assertTrue(listReader.isSet()); + assertTrue(listReader.isSet()); listReader.next(); if (i % 4 == 0) { - Assert.assertNull(listReader.reader().readInteger()); + assertNull(listReader.reader().readInteger()); } else { - Assert.assertEquals(i, listReader.reader().readInteger().intValue()); + assertEquals(i, listReader.reader().readInteger().intValue()); listReader.next(); - Assert.assertEquals(i * 2, listReader.reader().readInteger().intValue()); + assertEquals(i * 2, listReader.reader().readInteger().intValue()); } } else { - Assert.assertFalse(listReader.isSet()); + assertFalse(listReader.isSet()); } } } @@ -1419,20 +1423,20 @@ public void testListOfListWriterWithNulls() { for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); if (i % 2 == 0) { - Assert.assertTrue(listReader.isSet()); + assertTrue(listReader.isSet()); listReader.next(); if (i % 4 == 0) { - Assert.assertFalse(listReader.reader().isSet()); + assertFalse(listReader.reader().isSet()); } else { listReader.reader().next(); - Assert.assertFalse(listReader.reader().reader().isSet()); + assertFalse(listReader.reader().reader().isSet()); listReader.reader().next(); - Assert.assertEquals(i, listReader.reader().reader().readInteger().intValue()); + assertEquals(i, listReader.reader().reader().readInteger().intValue()); listReader.reader().next(); - Assert.assertEquals(i * 2, listReader.reader().reader().readInteger().intValue()); + assertEquals(i * 2, listReader.reader().reader().readInteger().intValue()); } } else { - Assert.assertFalse(listReader.isSet()); + assertFalse(listReader.isSet()); } } } @@ -1478,23 +1482,23 @@ public void testListOfListOfListWriterWithNulls() { for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); if (i % 4 == 0) { - Assert.assertFalse(listReader.isSet()); + assertFalse(listReader.isSet()); } else { - Assert.assertTrue(listReader.isSet()); + assertTrue(listReader.isSet()); listReader.next(); if (i % 4 == 1) { - Assert.assertFalse(listReader.reader().isSet()); + assertFalse(listReader.reader().isSet()); } else if (i % 4 == 2) { listReader.reader().next(); - Assert.assertFalse(listReader.reader().reader().isSet()); + assertFalse(listReader.reader().reader().isSet()); } else { listReader.reader().next(); listReader.reader().reader().next(); - Assert.assertFalse(listReader.reader().reader().reader().isSet()); + assertFalse(listReader.reader().reader().reader().isSet()); listReader.reader().reader().next(); - Assert.assertEquals(i, listReader.reader().reader().reader().readInteger().intValue()); + assertEquals(i, listReader.reader().reader().reader().readInteger().intValue()); listReader.reader().reader().next(); - Assert.assertEquals(i * 2, listReader.reader().reader().reader().readInteger().intValue()); + assertEquals(i * 2, listReader.reader().reader().reader().readInteger().intValue()); } } } @@ -1507,7 +1511,7 @@ public void testStructOfList() { structVector.addOrGetList("childList1"); NullableStructReaderImpl structReader = structVector.getReader(); FieldReader childListReader = structReader.reader("childList1"); - Assert.assertNotNull(childListReader); + assertNotNull(childListReader); } try (StructVector structVector = StructVector.empty("struct2", allocator)) { @@ -1523,9 +1527,9 @@ public void testStructOfList() { NullableStructReaderImpl structReader = structVector.getReader(); FieldReader childListReader = structReader.reader("childList2"); int size = childListReader.size(); - Assert.assertEquals(1, size); + assertEquals(1, size); int data = childListReader.reader().readInteger(); - Assert.assertEquals(10, data); + assertEquals(10, data); } try (StructVector structVector = StructVector.empty("struct3", allocator)) { @@ -1545,9 +1549,9 @@ public void testStructOfList() { structReader.setPosition(3); FieldReader childListReader = structReader.reader("childList3"); int size = childListReader.size(); - Assert.assertEquals(1, size); + assertEquals(1, size); int data = ((List) childListReader.readObject()).get(0); - Assert.assertEquals(3, data); + assertEquals(3, data); } try (StructVector structVector = StructVector.empty("struct4", allocator)) { @@ -1564,7 +1568,7 @@ public void testStructOfList() { structReader.setPosition(3); FieldReader childListReader = structReader.reader("childList4"); int size = childListReader.size(); - Assert.assertEquals(0, size); + assertEquals(0, size); } } @@ -1618,7 +1622,7 @@ public void testMapWithNulls() { mapWriter.endMap(); writer.setValueCount(1); UnionMapReader mapReader = (UnionMapReader) new SingleStructReaderImpl(parent).reader("root"); - Assert.assertNull(mapReader.key().readInteger()); + assertNull(mapReader.key().readInteger()); assertEquals(1, mapReader.value().readInteger().intValue()); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java index 27b8f1796ee31..f17c370c89522 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java @@ -17,6 +17,9 @@ package org.apache.arrow.vector.complex.writer; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.nio.ByteBuffer; import org.apache.arrow.memory.BufferAllocator; @@ -30,21 +33,20 @@ import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.VarCharWriterImpl; import org.apache.arrow.vector.util.Text; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestSimpleWriter { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -56,7 +58,7 @@ public void testWriteByteArrayToVarBinary() throws Exception { byte[] input = new byte[] { 0x01, 0x02 }; writer.writeVarBinary(input); byte[] result = vector.get(0); - Assert.assertArrayEquals(input, result); + assertArrayEquals(input, result); } } @@ -67,7 +69,7 @@ public void testWriteByteArrayWithOffsetToVarBinary() throws Exception { byte[] input = new byte[] { 0x01, 0x02 }; writer.writeVarBinary(input, 1, 1); byte[] result = vector.get(0); - Assert.assertArrayEquals(new byte[] { 0x02 }, result); + assertArrayEquals(new byte[] { 0x02 }, result); } } @@ -79,7 +81,7 @@ public void testWriteByteBufferToVarBinary() throws Exception { ByteBuffer buffer = ByteBuffer.wrap(input); writer.writeVarBinary(buffer); byte[] result = vector.get(0); - Assert.assertArrayEquals(input, result); + assertArrayEquals(input, result); } } @@ -91,7 +93,7 @@ public void testWriteByteBufferWithOffsetToVarBinary() throws Exception { ByteBuffer buffer = ByteBuffer.wrap(input); writer.writeVarBinary(buffer, 1, 1); byte[] result = vector.get(0); - Assert.assertArrayEquals(new byte[] { 0x02 }, result); + assertArrayEquals(new byte[] { 0x02 }, result); } } @@ -102,7 +104,7 @@ public void testWriteByteArrayToLargeVarBinary() throws Exception { byte[] input = new byte[] { 0x01, 0x02 }; writer.writeLargeVarBinary(input); byte[] result = vector.get(0); - Assert.assertArrayEquals(input, result); + assertArrayEquals(input, result); } } @@ -113,7 +115,7 @@ public void testWriteByteArrayWithOffsetToLargeVarBinary() throws Exception { byte[] input = new byte[] { 0x01, 0x02 }; writer.writeLargeVarBinary(input, 1, 1); byte[] result = vector.get(0); - Assert.assertArrayEquals(new byte[] { 0x02 }, result); + assertArrayEquals(new byte[] { 0x02 }, result); } } @@ -125,7 +127,7 @@ public void testWriteByteBufferToLargeVarBinary() throws Exception { ByteBuffer buffer = ByteBuffer.wrap(input); writer.writeLargeVarBinary(buffer); byte[] result = vector.get(0); - Assert.assertArrayEquals(input, result); + assertArrayEquals(input, result); } } @@ -137,7 +139,7 @@ public void testWriteByteBufferWithOffsetToLargeVarBinary() throws Exception { ByteBuffer buffer = ByteBuffer.wrap(input); writer.writeLargeVarBinary(buffer, 1, 1); byte[] result = vector.get(0); - Assert.assertArrayEquals(new byte[] { 0x02 }, result); + assertArrayEquals(new byte[] { 0x02 }, result); } } @@ -148,7 +150,7 @@ public void testWriteStringToVarChar() throws Exception { String input = "testInput"; writer.writeVarChar(input); String result = vector.getObject(0).toString(); - Assert.assertEquals(input, result); + assertEquals(input, result); } } @@ -159,7 +161,7 @@ public void testWriteTextToVarChar() throws Exception { String input = "testInput"; writer.writeVarChar(new Text(input)); String result = vector.getObject(0).toString(); - Assert.assertEquals(input, result); + assertEquals(input, result); } } @@ -170,7 +172,7 @@ public void testWriteStringToLargeVarChar() throws Exception { String input = "testInput"; writer.writeLargeVarChar(input); String result = vector.getObject(0).toString(); - Assert.assertEquals(input, result); + assertEquals(input, result); } } @@ -181,7 +183,7 @@ public void testWriteTextToLargeVarChar() throws Exception { String input = "testInput"; writer.writeLargeVarChar(new Text(input)); String result = vector.getObject(0).toString(); - Assert.assertEquals(input, result); + assertEquals(input, result); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java index de9187edb667e..77eeb3589058d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java @@ -18,9 +18,12 @@ package org.apache.arrow.vector.ipc; import static org.apache.arrow.vector.TestUtils.newVarCharVector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.math.BigDecimal; @@ -84,9 +87,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.Text; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -98,12 +100,12 @@ public class BaseFileTest { protected static final int COUNT = 10; protected BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void tearDown() { allocator.close(); } @@ -150,17 +152,20 @@ protected void writeData(int count, StructVector parent) { protected void validateContent(int count, VectorSchemaRoot root) { for (int i = 0; i < count; i++) { - Assert.assertEquals(i, root.getVector("int").getObject(i)); - Assert.assertEquals((Short) uint1Values[i % uint1Values.length], + assertEquals(i, root.getVector("int").getObject(i)); + assertEquals((Short) uint1Values[i % uint1Values.length], ((UInt1Vector) root.getVector("uint1")).getObjectNoOverflow(i)); - Assert.assertEquals("Failed for index: " + i, (Character) uint2Values[i % uint2Values.length], - (Character) ((UInt2Vector) root.getVector("uint2")).get(i)); - Assert.assertEquals("Failed for index: " + i, (Long) uint4Values[i % uint4Values.length], - ((UInt4Vector) root.getVector("uint4")).getObjectNoOverflow(i)); - Assert.assertEquals("Failed for index: " + i, uint8Values[i % uint8Values.length], - ((UInt8Vector) root.getVector("uint8")).getObjectNoOverflow(i)); - Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); - Assert.assertEquals(i == 0 ? Float.NaN : i, root.getVector("float").getObject(i)); + assertEquals((Character) uint2Values[i % uint2Values.length], + (Character) ((UInt2Vector) root.getVector("uint2")).get(i), + "Failed for index: " + i); + assertEquals((Long) uint4Values[i % uint4Values.length], + ((UInt4Vector) root.getVector("uint4")).getObjectNoOverflow(i), + "Failed for index: " + i); + assertEquals(uint8Values[i % uint8Values.length], + ((UInt8Vector) root.getVector("uint8")).getObjectNoOverflow(i), + "Failed for index: " + i); + assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); + assertEquals(i == 0 ? Float.NaN : i, root.getVector("float").getObject(i)); } } @@ -210,23 +215,23 @@ public void printVectors(List vectors) { } protected void validateComplexContent(int count, VectorSchemaRoot root) { - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); printVectors(root.getFieldVectors()); for (int i = 0; i < count; i++) { Object intVal = root.getVector("int").getObject(i); if (i % 5 != 3) { - Assert.assertEquals(i, intVal); + assertEquals(i, intVal); } else { - Assert.assertNull(intVal); + assertNull(intVal); } - Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); - Assert.assertEquals(i % 3, ((List) root.getVector("list").getObject(i)).size()); + assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); + assertEquals(i % 3, ((List) root.getVector("list").getObject(i)).size()); NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder(); FieldReader structReader = root.getVector("struct").getReader(); structReader.setPosition(i); structReader.reader("timestamp").read(h); - Assert.assertEquals(i, h.value); + assertEquals(i, h.value); } } @@ -235,7 +240,7 @@ private LocalDateTime makeDateTimeFromCount(int i) { } protected void writeDateTimeData(int count, StructVector parent) { - Assert.assertTrue(count < 100); + assertTrue(count < 100); ComplexWriter writer = new ComplexWriterImpl("root", parent); StructWriter rootWriter = writer.rootAsStruct(); DateMilliWriter dateWriter = rootWriter.dateMilli("date"); @@ -268,22 +273,22 @@ protected void writeDateTimeData(int count, StructVector parent) { } protected void validateDateTimeContent(int count, VectorSchemaRoot root) { - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); printVectors(root.getFieldVectors()); for (int i = 0; i < count; i++) { LocalDateTime dt = makeDateTimeFromCount(i); LocalDateTime dtMilli = dt.minusNanos(i); LocalDateTime dateVal = ((DateMilliVector) root.getVector("date")).getObject(i); LocalDateTime dateExpected = dt.toLocalDate().atStartOfDay(); - Assert.assertEquals(dateExpected, dateVal); + assertEquals(dateExpected, dateVal); LocalTime timeVal = ((TimeMilliVector) root.getVector("time")).getObject(i).toLocalTime(); - Assert.assertEquals(dtMilli.toLocalTime(), timeVal); + assertEquals(dtMilli.toLocalTime(), timeVal); Object timestampMilliVal = root.getVector("timestamp-milli").getObject(i); - Assert.assertEquals(dtMilli, timestampMilliVal); + assertEquals(dtMilli, timestampMilliVal); Object timestampMilliTZVal = root.getVector("timestamp-milliTZ").getObject(i); - Assert.assertEquals(dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli(), timestampMilliTZVal); + assertEquals(dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli(), timestampMilliTZVal); Object timestampNanoVal = root.getVector("timestamp-nano").getObject(i); - Assert.assertEquals(dt, timestampNanoVal); + assertEquals(dt, timestampNanoVal); } } @@ -355,66 +360,66 @@ protected VectorSchemaRoot writeFlatDictionaryData( protected void validateFlatDictionary(VectorSchemaRoot root, DictionaryProvider provider) { FieldVector vector1A = root.getVector("varcharA"); - Assert.assertNotNull(vector1A); + assertNotNull(vector1A); DictionaryEncoding encoding1A = vector1A.getField().getDictionary(); - Assert.assertNotNull(encoding1A); - Assert.assertEquals(1L, encoding1A.getId()); + assertNotNull(encoding1A); + assertEquals(1L, encoding1A.getId()); - Assert.assertEquals(6, vector1A.getValueCount()); - Assert.assertEquals(0, vector1A.getObject(0)); - Assert.assertEquals(1, vector1A.getObject(1)); - Assert.assertEquals(null, vector1A.getObject(2)); - Assert.assertEquals(2, vector1A.getObject(3)); - Assert.assertEquals(1, vector1A.getObject(4)); - Assert.assertEquals(2, vector1A.getObject(5)); + assertEquals(6, vector1A.getValueCount()); + assertEquals(0, vector1A.getObject(0)); + assertEquals(1, vector1A.getObject(1)); + assertEquals(null, vector1A.getObject(2)); + assertEquals(2, vector1A.getObject(3)); + assertEquals(1, vector1A.getObject(4)); + assertEquals(2, vector1A.getObject(5)); FieldVector vector1B = root.getVector("varcharB"); - Assert.assertNotNull(vector1B); + assertNotNull(vector1B); DictionaryEncoding encoding1B = vector1A.getField().getDictionary(); - Assert.assertNotNull(encoding1B); - Assert.assertTrue(encoding1A.equals(encoding1B)); - Assert.assertEquals(1L, encoding1B.getId()); - - Assert.assertEquals(6, vector1B.getValueCount()); - Assert.assertEquals(2, vector1B.getObject(0)); - Assert.assertEquals(1, vector1B.getObject(1)); - Assert.assertEquals(2, vector1B.getObject(2)); - Assert.assertEquals(null, vector1B.getObject(3)); - Assert.assertEquals(1, vector1B.getObject(4)); - Assert.assertEquals(0, vector1B.getObject(5)); + assertNotNull(encoding1B); + assertTrue(encoding1A.equals(encoding1B)); + assertEquals(1L, encoding1B.getId()); + + assertEquals(6, vector1B.getValueCount()); + assertEquals(2, vector1B.getObject(0)); + assertEquals(1, vector1B.getObject(1)); + assertEquals(2, vector1B.getObject(2)); + assertEquals(null, vector1B.getObject(3)); + assertEquals(1, vector1B.getObject(4)); + assertEquals(0, vector1B.getObject(5)); FieldVector vector2 = root.getVector("sizes"); - Assert.assertNotNull(vector2); + assertNotNull(vector2); DictionaryEncoding encoding2 = vector2.getField().getDictionary(); - Assert.assertNotNull(encoding2); - Assert.assertEquals(2L, encoding2.getId()); + assertNotNull(encoding2); + assertEquals(2L, encoding2.getId()); - Assert.assertEquals(6, vector2.getValueCount()); - Assert.assertEquals(null, vector2.getObject(0)); - Assert.assertEquals(2, vector2.getObject(1)); - Assert.assertEquals(1, vector2.getObject(2)); - Assert.assertEquals(1, vector2.getObject(3)); - Assert.assertEquals(2, vector2.getObject(4)); - Assert.assertEquals(null, vector2.getObject(5)); + assertEquals(6, vector2.getValueCount()); + assertEquals(null, vector2.getObject(0)); + assertEquals(2, vector2.getObject(1)); + assertEquals(1, vector2.getObject(2)); + assertEquals(1, vector2.getObject(3)); + assertEquals(2, vector2.getObject(4)); + assertEquals(null, vector2.getObject(5)); Dictionary dictionary1 = provider.lookup(1L); - Assert.assertNotNull(dictionary1); + assertNotNull(dictionary1); VarCharVector dictionaryVector = ((VarCharVector) dictionary1.getVector()); - Assert.assertEquals(3, dictionaryVector.getValueCount()); - Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0)); - Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1)); - Assert.assertEquals(new Text("baz"), dictionaryVector.getObject(2)); + assertEquals(3, dictionaryVector.getValueCount()); + assertEquals(new Text("foo"), dictionaryVector.getObject(0)); + assertEquals(new Text("bar"), dictionaryVector.getObject(1)); + assertEquals(new Text("baz"), dictionaryVector.getObject(2)); Dictionary dictionary2 = provider.lookup(2L); - Assert.assertNotNull(dictionary2); + assertNotNull(dictionary2); dictionaryVector = ((VarCharVector) dictionary2.getVector()); - Assert.assertEquals(3, dictionaryVector.getValueCount()); - Assert.assertEquals(new Text("micro"), dictionaryVector.getObject(0)); - Assert.assertEquals(new Text("small"), dictionaryVector.getObject(1)); - Assert.assertEquals(new Text("large"), dictionaryVector.getObject(2)); + assertEquals(3, dictionaryVector.getValueCount()); + assertEquals(new Text("micro"), dictionaryVector.getObject(0)); + assertEquals(new Text("small"), dictionaryVector.getObject(1)); + assertEquals(new Text("large"), dictionaryVector.getObject(2)); } protected VectorSchemaRoot writeNestedDictionaryData( @@ -456,26 +461,26 @@ protected VectorSchemaRoot writeNestedDictionaryData( protected void validateNestedDictionary(VectorSchemaRoot root, DictionaryProvider provider) { FieldVector vector = root.getFieldVectors().get(0); - Assert.assertNotNull(vector); - Assert.assertNull(vector.getField().getDictionary()); + assertNotNull(vector); + assertNull(vector.getField().getDictionary()); Field nestedField = vector.getField().getChildren().get(0); DictionaryEncoding encoding = nestedField.getDictionary(); - Assert.assertNotNull(encoding); - Assert.assertEquals(2L, encoding.getId()); - Assert.assertEquals(new ArrowType.Int(32, true), encoding.getIndexType()); + assertNotNull(encoding); + assertEquals(2L, encoding.getId()); + assertEquals(new ArrowType.Int(32, true), encoding.getIndexType()); - Assert.assertEquals(3, vector.getValueCount()); - Assert.assertEquals(Arrays.asList(0, 1), vector.getObject(0)); - Assert.assertEquals(Arrays.asList(0), vector.getObject(1)); - Assert.assertEquals(Arrays.asList(1), vector.getObject(2)); + assertEquals(3, vector.getValueCount()); + assertEquals(Arrays.asList(0, 1), vector.getObject(0)); + assertEquals(Arrays.asList(0), vector.getObject(1)); + assertEquals(Arrays.asList(1), vector.getObject(2)); Dictionary dictionary = provider.lookup(2L); - Assert.assertNotNull(dictionary); + assertNotNull(dictionary); VarCharVector dictionaryVector = ((VarCharVector) dictionary.getVector()); - Assert.assertEquals(2, dictionaryVector.getValueCount()); - Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0)); - Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1)); + assertEquals(2, dictionaryVector.getValueCount()); + assertEquals(new Text("foo"), dictionaryVector.getObject(0)); + assertEquals(new Text("bar"), dictionaryVector.getObject(1)); } protected VectorSchemaRoot writeDecimalData(BufferAllocator bufferAllocator) { @@ -509,26 +514,26 @@ protected void validateDecimalData(VectorSchemaRoot root) { DecimalVector decimalVector2 = (DecimalVector) root.getVector("decimal2"); DecimalVector decimalVector3 = (DecimalVector) root.getVector("decimal3"); int count = 10; - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); for (int i = 0; i < count; i++) { // Verify decimal 1 vector BigDecimal readValue = decimalVector1.getObject(i); ArrowType.Decimal type = (ArrowType.Decimal) decimalVector1.getField().getType(); BigDecimal genValue = new BigDecimal(BigInteger.valueOf(i), type.getScale()); - Assert.assertEquals(genValue, readValue); + assertEquals(genValue, readValue); // Verify decimal 2 vector readValue = decimalVector2.getObject(i); type = (ArrowType.Decimal) decimalVector2.getField().getType(); genValue = new BigDecimal(BigInteger.valueOf(i * (1 << 10)), type.getScale()); - Assert.assertEquals(genValue, readValue); + assertEquals(genValue, readValue); // Verify decimal 3 vector readValue = decimalVector3.getObject(i); type = (ArrowType.Decimal) decimalVector3.getField().getType(); genValue = new BigDecimal(BigInteger.valueOf(i * 1111111111111111L), type.getScale()); - Assert.assertEquals(genValue, readValue); + assertEquals(genValue, readValue); } } @@ -558,18 +563,18 @@ public void validateUnionData(int count, VectorSchemaRoot root) { unionReader.setPosition(i); switch (i % 4) { case 0: - Assert.assertEquals(i, unionReader.readInteger().intValue()); + assertEquals(i, unionReader.readInteger().intValue()); break; case 1: - Assert.assertEquals(i, unionReader.readLong().longValue()); + assertEquals(i, unionReader.readLong().longValue()); break; case 2: - Assert.assertEquals(i % 3, unionReader.size()); + assertEquals(i % 3, unionReader.size()); break; case 3: NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder(); unionReader.reader("timestamp").read(h); - Assert.assertEquals(i, h.value); + assertEquals(i, h.value); break; default: assert false : "Unexpected value in switch statement: " + i; @@ -623,7 +628,7 @@ public void writeUnionData(int count, StructVector parent) { } protected void writeVarBinaryData(int count, StructVector parent) { - Assert.assertTrue(count < 100); + assertTrue(count < 100); ComplexWriter writer = new ComplexWriterImpl("root", parent); StructWriter rootWriter = writer.rootAsStruct(); ListWriter listWriter = rootWriter.list("list"); @@ -642,7 +647,7 @@ protected void writeVarBinaryData(int count, StructVector parent) { } protected void validateVarBinary(int count, VectorSchemaRoot root) { - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); ListVector listVector = (ListVector) root.getVector("list"); byte[] expectedArray = new byte[count]; int numVarBinaryValues = 0; @@ -650,23 +655,23 @@ protected void validateVarBinary(int count, VectorSchemaRoot root) { expectedArray[i] = (byte) i; List objList = listVector.getObject(i); if (i % 3 == 0) { - Assert.assertTrue(objList.isEmpty()); + assertTrue(objList.isEmpty()); } else { byte[] expected = Arrays.copyOfRange(expectedArray, 0, i + 1); for (int j = 0; j < i % 3; j++) { byte[] result = (byte[]) objList.get(j); - Assert.assertArrayEquals(result, expected); + assertArrayEquals(result, expected); numVarBinaryValues++; } } } // ListVector lastSet should be the index of last value + 1 - Assert.assertEquals(listVector.getLastSet(), count - 1); + assertEquals(listVector.getLastSet(), count - 1); // VarBinaryVector lastSet should be the index of last value VarBinaryVector binaryVector = (VarBinaryVector) listVector.getChildrenFromFields().get(0); - Assert.assertEquals(binaryVector.getLastSet(), numVarBinaryValues - 1); + assertEquals(binaryVector.getLastSet(), numVarBinaryValues - 1); } protected void writeBatchData(ArrowWriter writer, IntVector vector, VectorSchemaRoot root) throws IOException { @@ -762,7 +767,7 @@ protected void validateMapData(VectorSchemaRoot root) { MapVector sortedMapVector = (MapVector) root.getVector("mapSorted"); final int count = 10; - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); UnionMapReader mapReader = new UnionMapReader(mapVector); UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector); @@ -833,7 +838,7 @@ protected void validateListAsMapData(VectorSchemaRoot root) { MapVector sortedMapVector = (MapVector) root.getVector("map"); final int count = 10; - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector); sortedMapReader.setKeyValueNames("myKey", "myValue"); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java index d3c91fd144356..52d093ae29ebf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector.ipc; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.File; import java.io.FileInputStream; @@ -40,7 +40,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java index 79a4b249a8a89..d5120b70d01e9 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java @@ -19,10 +19,10 @@ import static java.util.Arrays.asList; import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -48,9 +48,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; +import org.junit.jupiter.api.Test; public class MessageSerializerTest { @@ -154,9 +152,6 @@ public void testSchemaDictionaryMessageSerialization() throws IOException { assertEquals(schema, deserialized); } - @Rule - public ExpectedException expectedEx = ExpectedException.none(); - @Test public void testSerializeRecordBatchV4() throws IOException { byte[] validity = new byte[]{(byte) 255, 0}; @@ -243,5 +238,4 @@ public static void verifyBatch(ArrowRecordBatch batch, byte[] validity, byte[] v assertArrayEquals(validity, MessageSerializerTest.array(buffers.get(0))); assertArrayEquals(values, MessageSerializerTest.array(buffers.get(1))); } - } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java index 4fb5822786083..d76e5263122fe 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java @@ -19,8 +19,8 @@ import static java.nio.channels.Channels.newChannel; import static org.apache.arrow.vector.TestUtils.newVarCharVector; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -39,7 +39,7 @@ import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.types.pojo.Field; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java index 38c65bddeddea..beb6500ac2ca0 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java @@ -18,7 +18,7 @@ package org.apache.arrow.vector.ipc; import static java.util.Arrays.asList; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -32,7 +32,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Test; +import org.junit.jupiter.api.Test; import com.google.flatbuffers.FlatBufferBuilder; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java index 07875b25029ea..ad9ca50a14979 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java @@ -23,11 +23,12 @@ import static org.apache.arrow.vector.TestUtils.newVarCharVector; import static org.apache.arrow.vector.TestUtils.newVector; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -87,10 +88,9 @@ import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; import org.apache.arrow.vector.util.DictionaryUtility; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestArrowReaderWriter { @@ -109,7 +109,7 @@ public class TestArrowReaderWriter { private Schema schema; private Schema encodedSchema; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); @@ -150,7 +150,7 @@ public void init() { new DictionaryEncoding(/*id=*/3L, /*ordered=*/false, /*indexType=*/null)); } - @After + @AfterEach public void terminate() throws Exception { dictionaryVector1.close(); dictionaryVector2.close(); @@ -386,18 +386,17 @@ public void testWriteReadWithStructDictionaries() throws IOException { assertEquals(dictionaryVector4.getValueCount(), readDictionaryVector.getValueCount()); final BiFunction typeComparatorIgnoreName = (v1, v2) -> new TypeEqualsVisitor(v1, false, true).equals(v2); - assertTrue("Dictionary vectors are not equal", - new RangeEqualsVisitor(dictionaryVector4, readDictionaryVector, - typeComparatorIgnoreName) - .rangeEquals(new Range(0, 0, dictionaryVector4.getValueCount()))); + assertTrue(new RangeEqualsVisitor(dictionaryVector4, readDictionaryVector, typeComparatorIgnoreName) + .rangeEquals(new Range(0, 0, dictionaryVector4.getValueCount())), + "Dictionary vectors are not equal"); // Assert the decoded vector is correct try (final ValueVector readVector = DictionaryEncoder.decode(readEncoded, readDictionary)) { assertEquals(vector.getValueCount(), readVector.getValueCount()); - assertTrue("Decoded vectors are not equal", - new RangeEqualsVisitor(vector, readVector, typeComparatorIgnoreName) - .rangeEquals(new Range(0, 0, vector.getValueCount()))); + assertTrue(new RangeEqualsVisitor(vector, readVector, typeComparatorIgnoreName) + .rangeEquals(new Range(0, 0, vector.getValueCount())), + "Decoded vectors are not equal"); } } } @@ -986,7 +985,7 @@ public void testFileFooterSizeOverflow() { System.arraycopy(magicBytes, 0, data, footerOffset + 4, ArrowMagic.MAGIC_LENGTH); // test file reader - InvalidArrowFileException e = Assertions.assertThrows(InvalidArrowFileException.class, () -> { + InvalidArrowFileException e = assertThrows(InvalidArrowFileException.class, () -> { try (SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(data)); ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { reader.getVectorSchemaRoot().getSchema(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java index 145bdd588e945..7f3541252772f 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.ipc; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -31,8 +31,7 @@ import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestArrowStream extends BaseFileTest { @Test @@ -44,15 +43,15 @@ public void testEmptyStream() throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); ArrowStreamWriter writer = new ArrowStreamWriter(root, null, out); writer.close(); - Assert.assertTrue(out.size() > 0); + assertTrue(out.size() > 0); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator)) { assertEquals(schema, reader.getVectorSchemaRoot().getSchema()); // Empty should return false - Assert.assertFalse(reader.loadNextBatch()); + assertFalse(reader.loadNextBatch()); assertEquals(0, reader.getVectorSchemaRoot().getRowCount()); - Assert.assertFalse(reader.loadNextBatch()); + assertFalse(reader.loadNextBatch()); assertEquals(0, reader.getVectorSchemaRoot().getRowCount()); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java index 422a63f57f7d8..4ba11fb05ff5d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java @@ -17,8 +17,9 @@ package org.apache.arrow.vector.ipc; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.io.IOException; import java.nio.channels.Pipe; @@ -33,8 +34,7 @@ import org.apache.arrow.vector.ipc.ArrowStreamWriter; import org.apache.arrow.vector.ipc.MessageSerializerTest; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestArrowStreamPipe { Schema schema = MessageSerializerTest.testSchema(); @@ -75,7 +75,7 @@ public void run() { root.close(); } catch (IOException e) { e.printStackTrace(); - Assert.fail(e.toString()); // have to explicitly fail since we're in a separate thread + fail(e.toString()); // have to explicitly fail since we're in a separate thread } } @@ -103,14 +103,14 @@ public boolean loadNextBatch() throws IOException { return false; } VectorSchemaRoot root = getVectorSchemaRoot(); - Assert.assertEquals(16, root.getRowCount()); + assertEquals(16, root.getRowCount()); TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0); - Assert.assertEquals((byte) (batchesRead - 1), vector.get(0)); + assertEquals((byte) (batchesRead - 1), vector.get(0)); for (int i = 1; i < 16; i++) { if (i < 8) { - Assert.assertEquals((byte) (i + 1), vector.get(i)); + assertEquals((byte) (i + 1), vector.get(i)); } else { - Assert.assertTrue(vector.isNull(i)); + assertTrue(vector.isNull(i)); } } @@ -129,7 +129,7 @@ public void run() { reader.close(); } catch (IOException e) { e.printStackTrace(); - Assert.fail(e.toString()); // have to explicitly fail since we're in a separate thread + fail(e.toString()); // have to explicitly fail since we're in a separate thread } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java index bd5bd4feabbd4..a90b97310a1cf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.ipc; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import java.io.File; import java.io.IOException; @@ -43,8 +43,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.Validator; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -342,7 +341,7 @@ public void testSetStructLength() throws IOException { // initialize vectors try (VectorSchemaRoot root = reader.read();) { FieldVector vector = root.getVector("struct_nullable"); - Assert.assertEquals(7, vector.getValueCount()); + assertEquals(7, vector.getValueCount()); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java index 5f57e90f6ba19..d1a3a6db0da44 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java @@ -18,12 +18,13 @@ package org.apache.arrow.vector.ipc; import static org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -33,14 +34,13 @@ import java.io.IOException; import java.nio.channels.Channels; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; +import java.util.stream.Stream; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -68,55 +68,47 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.AfterClass; -import org.junit.Assume; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@RunWith(Parameterized.class) public class TestRoundTrip extends BaseFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(TestRoundTrip.class); private static BufferAllocator allocator; - private final String name; - private final IpcOption writeOption; - public TestRoundTrip(String name, IpcOption writeOption) { - this.name = name; - this.writeOption = writeOption; - } - - @Parameterized.Parameters(name = "options = {0}") - public static Collection getWriteOption() { + static Stream getWriteOption() { final IpcOption legacy = new IpcOption(true, MetadataVersion.V4); final IpcOption version4 = new IpcOption(false, MetadataVersion.V4); - return Arrays.asList( + return Stream.of( new Object[] {"V4Legacy", legacy}, new Object[] {"V4", version4}, new Object[] {"V5", IpcOption.DEFAULT} ); } - @BeforeClass + @BeforeAll public static void setUpClass() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @AfterClass + @AfterAll public static void tearDownClass() { allocator.close(); } - @Test - public void testStruct() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testStruct(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { writeData(COUNT, parent); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent.getChild("root")), /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -125,13 +117,16 @@ public void testStruct() throws Exception { } } - @Test - public void testComplex() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testComplex(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { writeComplexData(COUNT, parent); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent.getChild("root")), /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -140,14 +135,17 @@ public void testComplex() throws Exception { } } - @Test - public void testMultipleRecordBatches() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testMultipleRecordBatches(String name, IpcOption writeOption) throws Exception { int[] counts = {10, 5}; try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { writeData(counts[0], parent); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent.getChild("root")), /* dictionaryProvider */null, (root, writer) -> { @@ -170,9 +168,10 @@ public void testMultipleRecordBatches() throws Exception { } } - @Test - public void testUnionV4() throws Exception { - Assume.assumeTrue(writeOption.metadataVersion == MetadataVersion.V4); + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testUnionV4(String name, IpcOption writeOption) throws Exception { + assumeTrue(writeOption.metadataVersion == MetadataVersion.V4); final File temp = File.createTempFile("arrow-test-" + name + "-", ".arrow"); temp.deleteOnExit(); final ByteArrayOutputStream memoryStream = new ByteArrayOutputStream(); @@ -188,17 +187,18 @@ public void testUnionV4() throws Exception { new ArrowStreamWriter(root, null, Channels.newChannel(memoryStream), writeOption); } }); - assertTrue(e.getMessage(), e.getMessage().contains("Cannot write union with V4 metadata")); + assertTrue(e.getMessage().contains("Cannot write union with V4 metadata"), e.getMessage()); e = assertThrows(IllegalArgumentException.class, () -> { new ArrowStreamWriter(root, null, Channels.newChannel(memoryStream), writeOption); }); - assertTrue(e.getMessage(), e.getMessage().contains("Cannot write union with V4 metadata")); + assertTrue(e.getMessage().contains("Cannot write union with V4 metadata"), e.getMessage()); } } - @Test - public void testUnionV5() throws Exception { - Assume.assumeTrue(writeOption.metadataVersion == MetadataVersion.V5); + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testUnionV5(String name, IpcOption writeOption) throws Exception { + assumeTrue(writeOption.metadataVersion == MetadataVersion.V5); try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { @@ -206,6 +206,8 @@ public void testUnionV5() throws Exception { VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root")); validateUnionData(COUNT, root); roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -214,8 +216,9 @@ public void testUnionV5() throws Exception { } } - @Test - public void testTiny() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testTiny(String name, IpcOption writeOption) throws Exception { try (final VectorSchemaRoot root = VectorSchemaRoot.create(MessageSerializerTest.testSchema(), allocator)) { root.getFieldVectors().get(0).allocateNew(); int count = 16; @@ -227,6 +230,8 @@ public void testTiny() throws Exception { root.setRowCount(count); roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -247,8 +252,9 @@ private void validateTinyData(int count, VectorSchemaRoot root) { } } - @Test - public void testMetadata() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testMetadata(String name, IpcOption writeOption) throws Exception { List childFields = new ArrayList<>(); childFields.add(new Field("varchar-child", new FieldType(true, ArrowType.Utf8.INSTANCE, null, metadata(1)), null)); childFields.add(new Field("float-child", @@ -283,6 +289,8 @@ public void testMetadata() throws Exception { } }; roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -298,14 +306,17 @@ private Map metadata(int i) { return Collections.unmodifiableMap(map); } - @Test - public void testFlatDictionary() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testFlatDictionary(String name, IpcOption writeOption) throws Exception { AtomicInteger numDictionaryBlocksWritten = new AtomicInteger(); MapDictionaryProvider provider = new MapDictionaryProvider(); try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final VectorSchemaRoot root = writeFlatDictionaryData(originalVectorAllocator, provider)) { roundTrip( + name, + writeOption, root, provider, (ignored, writer) -> { @@ -339,8 +350,9 @@ public void testFlatDictionary() throws Exception { } } - @Test - public void testNestedDictionary() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testNestedDictionary(String name, IpcOption writeOption) throws Exception { AtomicInteger numDictionaryBlocksWritten = new AtomicInteger(); MapDictionaryProvider provider = new MapDictionaryProvider(); // data being written: @@ -356,6 +368,8 @@ public void testNestedDictionary() throws Exception { validateNestedDictionary(readRoot, streamReader); }; roundTrip( + name, + writeOption, root, provider, (ignored, writer) -> { @@ -376,8 +390,9 @@ public void testNestedDictionary() throws Exception { } } - @Test - public void testFixedSizeBinary() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testFixedSizeBinary(String name, IpcOption writeOption) throws Exception { final int count = 10; final int typeWidth = 11; byte[][] byteValues = new byte[count][typeWidth]; @@ -405,6 +420,8 @@ public void testFixedSizeBinary() throws Exception { parent.setValueCount(count); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent), /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -413,8 +430,9 @@ public void testFixedSizeBinary() throws Exception { } } - @Test - public void testFixedSizeList() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testFixedSizeList(String name, IpcOption writeOption) throws Exception { BiConsumer validator = (expectedCount, root) -> { for (int i = 0; i < expectedCount; i++) { assertEquals(Collections2.asImmutableList(i + 0.1f, i + 10.1f), root.getVector("float-pairs") @@ -441,6 +459,8 @@ public void testFixedSizeList() throws Exception { parent.setValueCount(COUNT); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent), /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -449,8 +469,9 @@ public void testFixedSizeList() throws Exception { } } - @Test - public void testVarBinary() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testVarBinary(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { @@ -459,6 +480,8 @@ public void testVarBinary() throws Exception { validateVarBinary(COUNT, root); roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -467,8 +490,9 @@ public void testVarBinary() throws Exception { } } - @Test - public void testReadWriteMultipleBatches() throws IOException { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testReadWriteMultipleBatches(String name, IpcOption writeOption) throws IOException { File file = new File("target/mytest_nulls_multibatch.arrow"); int numBlocksWritten = 0; @@ -491,12 +515,15 @@ public void testReadWriteMultipleBatches() throws IOException { } } - @Test - public void testMap() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testMap(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final VectorSchemaRoot root = writeMapData(originalVectorAllocator)) { roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -505,12 +532,15 @@ public void testMap() throws Exception { } } - @Test - public void testListAsMap() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testListAsMap(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final VectorSchemaRoot root = writeListAsMapData(originalVectorAllocator)) { roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -539,10 +569,10 @@ private CheckedConsumer validateFileBatches( assertEquals(counts.length, recordBatches.size()); long previousOffset = 0; for (ArrowBlock rbBlock : recordBatches) { - assertTrue(rbBlock.getOffset() + " > " + previousOffset, rbBlock.getOffset() > previousOffset); + assertTrue(rbBlock.getOffset() > previousOffset, rbBlock.getOffset() + " > " + previousOffset); previousOffset = rbBlock.getOffset(); arrowReader.loadRecordBatch(rbBlock); - assertEquals("RB #" + i, counts[i], root.getRowCount()); + assertEquals(counts[i], root.getRowCount(), "RB #" + i); validator.accept(counts[i], root); try (final ArrowRecordBatch batch = unloader.getRecordBatch()) { List buffersLayout = batch.getBuffersLayout(); @@ -566,7 +596,7 @@ private CheckedConsumer validateStreamBatches( for (int n = 0; n < counts.length; n++) { assertTrue(arrowReader.loadNextBatch()); - assertEquals("RB #" + i, counts[i], root.getRowCount()); + assertEquals(counts[i], root.getRowCount(), "RB #" + i); validator.accept(counts[i], root); try (final ArrowRecordBatch batch = unloader.getRecordBatch()) { final List buffersLayout = batch.getBuffersLayout(); @@ -590,7 +620,7 @@ interface CheckedBiConsumer { void accept(T t, U u) throws Exception; } - private void roundTrip(VectorSchemaRoot root, DictionaryProvider provider, + private void roundTrip(String name, IpcOption writeOption, VectorSchemaRoot root, DictionaryProvider provider, CheckedBiConsumer writer, CheckedConsumer fileValidator, CheckedConsumer streamValidator) throws Exception { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java index ac95121eb73f2..db1e787d04d27 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java @@ -18,10 +18,10 @@ package org.apache.arrow.vector.ipc; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -29,9 +29,9 @@ import java.nio.channels.Channels; import java.nio.charset.StandardCharsets; import java.util.Arrays; -import java.util.Collection; import java.util.Map; import java.util.function.ToIntBiFunction; +import java.util.stream.Stream; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -51,41 +51,34 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; /** * Test the round-trip of dictionary encoding, * with unsigned integer as indices. */ -@RunWith(Parameterized.class) public class TestUIntDictionaryRoundTrip { - private final boolean streamMode; - - public TestUIntDictionaryRoundTrip(boolean streamMode) { - this.streamMode = streamMode; - } - private BufferAllocator allocator; private DictionaryProvider.MapDictionaryProvider dictionaryProvider; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); dictionaryProvider = new DictionaryProvider.MapDictionaryProvider(); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } - private byte[] writeData(FieldVector encodedVector) throws IOException { + private byte[] writeData(boolean streamMode, FieldVector encodedVector) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); VectorSchemaRoot root = new VectorSchemaRoot( @@ -102,6 +95,7 @@ private byte[] writeData(FieldVector encodedVector) throws IOException { } private void readData( + boolean streamMode, byte[] data, Field expectedField, ToIntBiFunction valGetter, @@ -156,8 +150,9 @@ private ValueVector createEncodedVector(int bitWidth, VarCharVector dictionaryVe return field.createVector(allocator); } - @Test - public void testUInt1RoundTrip() throws IOException { + @ParameterizedTest(name = "stream mode = {0}") + @MethodSource("getRepeat") + public void testUInt1RoundTrip(boolean streamMode) throws IOException { final int vectorLength = UInt1Vector.MAX_UINT1 & UInt1Vector.PROMOTION_MASK; try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); UInt1Vector encodedVector1 = (UInt1Vector) createEncodedVector(8, dictionaryVector)) { @@ -170,15 +165,15 @@ public void testUInt1RoundTrip() throws IOException { } encodedVector1.setValueCount(vectorLength); setVector(dictionaryVector, dictionaryItems); - byte[] data = writeData(encodedVector1); - readData( - data, encodedVector1.getField(), (vector, index) -> (int) ((UInt1Vector) vector).getValueAsLong(index), - 8L, indices, dictionaryItems); + byte[] data = writeData(streamMode, encodedVector1); + readData(streamMode, data, encodedVector1.getField(), + (vector, index) -> (int) ((UInt1Vector) vector).getValueAsLong(index), 8L, indices, dictionaryItems); } } - @Test - public void testUInt2RoundTrip() throws IOException { + @ParameterizedTest(name = "stream mode = {0}") + @MethodSource("getRepeat") + public void testUInt2RoundTrip(boolean streamMode) throws IOException { try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); UInt2Vector encodedVector2 = (UInt2Vector) createEncodedVector(16, dictionaryVector)) { int[] indices = new int[]{1, 3, 5, 7, 9, UInt2Vector.MAX_UINT2}; @@ -190,14 +185,15 @@ public void testUInt2RoundTrip() throws IOException { setVector(encodedVector2, (char) 1, (char) 3, (char) 5, (char) 7, (char) 9, UInt2Vector.MAX_UINT2); setVector(dictionaryVector, dictItems); - byte[] data = writeData(encodedVector2); - readData(data, encodedVector2.getField(), (vector, index) -> (int) ((UInt2Vector) vector).getValueAsLong(index), - 16L, indices, dictItems); + byte[] data = writeData(streamMode, encodedVector2); + readData(streamMode, data, encodedVector2.getField(), + (vector, index) -> (int) ((UInt2Vector) vector).getValueAsLong(index), 16L, indices, dictItems); } } - @Test - public void testUInt4RoundTrip() throws IOException { + @ParameterizedTest(name = "stream mode = {0}") + @MethodSource("getRepeat") + public void testUInt4RoundTrip(boolean streamMode) throws IOException { final int dictLength = 10; try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); UInt4Vector encodedVector4 = (UInt4Vector) createEncodedVector(32, dictionaryVector)) { @@ -211,14 +207,15 @@ public void testUInt4RoundTrip() throws IOException { setVector(dictionaryVector, dictItems); setVector(encodedVector4, 1, 3, 5, 7, 9); - byte[] data = writeData(encodedVector4); - readData(data, encodedVector4.getField(), (vector, index) -> (int) ((UInt4Vector) vector).getValueAsLong(index), - 32L, indices, dictItems); + byte[] data = writeData(streamMode, encodedVector4); + readData(streamMode, data, encodedVector4.getField(), + (vector, index) -> (int) ((UInt4Vector) vector).getValueAsLong(index), 32L, indices, dictItems); } } - @Test - public void testUInt8RoundTrip() throws IOException { + @ParameterizedTest(name = "stream mode = {0}") + @MethodSource("getRepeat") + public void testUInt8RoundTrip(boolean streamMode) throws IOException { final int dictLength = 10; try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); UInt8Vector encodedVector8 = (UInt8Vector) createEncodedVector(64, dictionaryVector)) { @@ -231,17 +228,16 @@ public void testUInt8RoundTrip() throws IOException { setVector(encodedVector8, 1L, 3L, 5L, 7L, 9L); setVector(dictionaryVector, dictItems); - byte[] data = writeData(encodedVector8); - readData(data, encodedVector8.getField(), (vector, index) -> (int) ((UInt8Vector) vector).getValueAsLong(index), - 64L, indices, dictItems); + byte[] data = writeData(streamMode, encodedVector8); + readData(streamMode, data, encodedVector8.getField(), + (vector, index) -> (int) ((UInt8Vector) vector).getValueAsLong(index), 64L, indices, dictItems); } } - @Parameterized.Parameters(name = "stream mode = {0}") - public static Collection getRepeat() { - return Arrays.asList( - new Object[]{true}, - new Object[]{false} + static Stream getRepeat() { + return Stream.of( + Arguments.of(true), + Arguments.of(false) ); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java index 0505a18484b54..89cbb9f3f1b89 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java @@ -17,11 +17,11 @@ package org.apache.arrow.vector.ipc.message; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.nio.ByteBuffer; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestMessageMetadataResult { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java index 5cc0d080053af..925f6ca254544 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java @@ -19,8 +19,8 @@ import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -46,7 +46,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Test; +import org.junit.jupiter.api.Test; import com.google.flatbuffers.FlatBufferBuilder; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java index 3c075c9293079..369fcc140a1b1 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.testing; -import static junit.framework.TestCase.assertTrue; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; @@ -56,20 +56,20 @@ import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValueVectorPopulator { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java index 45e6e630792a9..66dc13d6ef545 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java @@ -17,7 +17,7 @@ package org.apache.arrow.vector.testing; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java index 872b2f3934b07..5ebfb62038919 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java @@ -17,10 +17,12 @@ package org.apache.arrow.vector.types.pojo; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.File; import java.io.IOException; @@ -51,8 +53,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; import org.apache.arrow.vector.util.VectorBatchAppender; import org.apache.arrow.vector.validate.ValidateVectorVisitor; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestExtensionType { /** @@ -85,21 +86,19 @@ public void roundtripUuid() throws IOException { final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { reader.loadNextBatch(); final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - Assert.assertEquals(root.getSchema(), readerRoot.getSchema()); + assertEquals(root.getSchema(), readerRoot.getSchema()); final Field field = readerRoot.getSchema().getFields().get(0); final UuidType expectedType = new UuidType(); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), expectedType.serialize()); final ExtensionTypeVector deserialized = (ExtensionTypeVector) readerRoot.getFieldVectors().get(0); - Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount()); + assertEquals(vector.getValueCount(), deserialized.getValueCount()); for (int i = 0; i < vector.getValueCount(); i++) { - Assert.assertEquals(vector.isNull(i), deserialized.isNull(i)); + assertEquals(vector.isNull(i), deserialized.isNull(i)); if (!vector.isNull(i)) { - Assert.assertEquals(vector.getObject(i), deserialized.getObject(i)); + assertEquals(vector.getObject(i), deserialized.getObject(i)); } } } @@ -138,29 +137,27 @@ public void readUnderlyingType() throws IOException { final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { reader.loadNextBatch(); final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - Assert.assertEquals(1, readerRoot.getSchema().getFields().size()); - Assert.assertEquals("a", readerRoot.getSchema().getFields().get(0).getName()); - Assert.assertTrue(readerRoot.getSchema().getFields().get(0).getType() instanceof ArrowType.FixedSizeBinary); - Assert.assertEquals(16, + assertEquals(1, readerRoot.getSchema().getFields().size()); + assertEquals("a", readerRoot.getSchema().getFields().get(0).getName()); + assertTrue(readerRoot.getSchema().getFields().get(0).getType() instanceof ArrowType.FixedSizeBinary); + assertEquals(16, ((ArrowType.FixedSizeBinary) readerRoot.getSchema().getFields().get(0).getType()).getByteWidth()); final Field field = readerRoot.getSchema().getFields().get(0); final UuidType expectedType = new UuidType(); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), expectedType.serialize()); final FixedSizeBinaryVector deserialized = (FixedSizeBinaryVector) readerRoot.getFieldVectors().get(0); - Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount()); + assertEquals(vector.getValueCount(), deserialized.getValueCount()); for (int i = 0; i < vector.getValueCount(); i++) { - Assert.assertEquals(vector.isNull(i), deserialized.isNull(i)); + assertEquals(vector.isNull(i), deserialized.isNull(i)); if (!vector.isNull(i)) { final UUID uuid = vector.getObject(i); final ByteBuffer bb = ByteBuffer.allocate(16); bb.putLong(uuid.getMostSignificantBits()); bb.putLong(uuid.getLeastSignificantBits()); - Assert.assertArrayEquals(bb.array(), deserialized.get(i)); + assertArrayEquals(bb.array(), deserialized.get(i)); } } } @@ -210,26 +207,24 @@ public void roundtripLocation() throws IOException { final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { reader.loadNextBatch(); final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - Assert.assertEquals(root.getSchema(), readerRoot.getSchema()); + assertEquals(root.getSchema(), readerRoot.getSchema()); final Field field = readerRoot.getSchema().getFields().get(0); final LocationType expectedType = new LocationType(); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), expectedType.serialize()); final ExtensionTypeVector deserialized = (ExtensionTypeVector) readerRoot.getFieldVectors().get(0); - Assert.assertTrue(deserialized instanceof LocationVector); - Assert.assertEquals("location", deserialized.getName()); + assertTrue(deserialized instanceof LocationVector); + assertEquals("location", deserialized.getName()); StructVector deserStruct = (StructVector) deserialized.getUnderlyingVector(); - Assert.assertNotNull(deserStruct.getChild("Latitude")); - Assert.assertNotNull(deserStruct.getChild("Longitude")); - Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount()); + assertNotNull(deserStruct.getChild("Latitude")); + assertNotNull(deserStruct.getChild("Longitude")); + assertEquals(vector.getValueCount(), deserialized.getValueCount()); for (int i = 0; i < vector.getValueCount(); i++) { - Assert.assertEquals(vector.isNull(i), deserialized.isNull(i)); + assertEquals(vector.isNull(i), deserialized.isNull(i)); if (!vector.isNull(i)) { - Assert.assertEquals(vector.getObject(i), deserialized.getObject(i)); + assertEquals(vector.getObject(i), deserialized.getObject(i)); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java index bc984fa642d52..8f98a9e9f8b53 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java @@ -19,8 +19,8 @@ import static org.apache.arrow.vector.types.pojo.Schema.METADATA_KEY; import static org.apache.arrow.vector.types.pojo.Schema.METADATA_VALUE; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.util.Collections; @@ -28,7 +28,7 @@ import java.util.Map; import org.apache.arrow.vector.types.pojo.ArrowType.Int; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestField { @@ -57,7 +57,7 @@ public void testMetadata() throws IOException { private void jsonContains(String json, String... strings) { for (String string : strings) { - assertTrue(json + " contains " + string, json.contains(string)); + assertTrue(json.contains(string), json + " contains " + string); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java index 7b62247c6e12d..e51e76737dfb7 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java @@ -20,8 +20,8 @@ import static java.util.Arrays.asList; import static org.apache.arrow.vector.types.pojo.Schema.METADATA_KEY; import static org.apache.arrow.vector.types.pojo.Schema.METADATA_VALUE; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.nio.ByteBuffer; @@ -49,7 +49,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSchema { @@ -280,13 +280,13 @@ private void validateFieldsHashcode(java.util.List schemaFields, java.uti private void validateHashCode(Object o1, Object o2) { assertEquals(o1, o2); - assertEquals(o1 + " == " + o2, o1.hashCode(), o2.hashCode()); + assertEquals(o1.hashCode(), o2.hashCode(), o1 + " == " + o2); } private void contains(Schema schema, String... s) { String json = schema.toJson(); for (String string : s) { - assertTrue(json + " contains " + string, json.contains(string)); + assertTrue(json.contains(string), json + " contains " + string); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java b/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java index 804092ed94ac7..21906cb89af24 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java @@ -17,14 +17,15 @@ package org.apache.arrow.vector.util; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.math.BigDecimal; import java.math.BigInteger; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class DecimalUtilityTest { private static final BigInteger[] MAX_BIG_INT = new BigInteger[]{BigInteger.valueOf(10).pow(38) @@ -45,7 +46,7 @@ public void testSetLongInDecimalArrowBuf() { DecimalUtility.writeLongToArrowBuf((long) val, buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } } } @@ -64,7 +65,7 @@ public void testSetByteArrayInDecimalArrowBuf() { DecimalUtility.writeByteArrayToArrowBuf(BigInteger.valueOf(val).toByteArray(), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } long [] longValues = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE}; @@ -73,7 +74,7 @@ public void testSetByteArrayInDecimalArrowBuf() { DecimalUtility.writeByteArrayToArrowBuf(BigInteger.valueOf(val).toByteArray(), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } BigInteger [] decimals = new BigInteger[] {MAX_BIG_INT[x], new BigInteger("0"), MIN_BIG_INT[x]}; @@ -82,7 +83,7 @@ public void testSetByteArrayInDecimalArrowBuf() { DecimalUtility.writeByteArrayToArrowBuf(val.toByteArray(), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = new BigDecimal(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } } } @@ -101,7 +102,7 @@ public void testSetBigDecimalInDecimalArrowBuf() { DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } long [] longValues = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE}; @@ -110,7 +111,7 @@ public void testSetBigDecimalInDecimalArrowBuf() { DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } BigInteger [] decimals = new BigInteger[] {MAX_BIG_INT[x], new BigInteger("0"), MIN_BIG_INT[x]}; @@ -119,7 +120,7 @@ public void testSetBigDecimalInDecimalArrowBuf() { DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(val), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = new BigDecimal(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java index 4138ea9d7a181..636de9aab1f2b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.util; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** * Test cases for {@link DataSizeRoundingUtil}. diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java index 1c8281c85981b..fb954413e9f29 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.util; -import static junit.framework.TestCase.assertNull; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; import java.nio.charset.StandardCharsets; @@ -27,9 +27,9 @@ import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.VarCharVector; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link ElementAddressableVectorIterator}. @@ -40,12 +40,12 @@ public class TestElementAddressableVectorIterator { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(1024 * 1024); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java index edd5221faf268..e0c9031c49b94 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java @@ -18,20 +18,20 @@ package org.apache.arrow.vector.util; import static junit.framework.TestCase.assertNull; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Collection; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestMapWithOrdinal { private MapWithOrdinal map; - @Before + @BeforeEach public void setUp() { map = new MapWithOrdinalImpl<>(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java index ea829060d1c04..0c03f3dfeac46 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java @@ -17,8 +17,11 @@ package org.apache.arrow.vector.util; -import org.junit.Assert; -import org.junit.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; public class TestMultiMapWithOrdinal { @@ -27,33 +30,33 @@ public void test() { MultiMapWithOrdinal map = new MultiMapWithOrdinal<>(); map.put("x", "1", false); - Assert.assertEquals(1, map.size()); + assertEquals(1, map.size()); map.remove("x", "1"); - Assert.assertTrue(map.isEmpty()); + assertTrue(map.isEmpty()); map.put("x", "1", false); map.put("x", "2", false); map.put("y", "0", false); - Assert.assertEquals(3, map.size()); - Assert.assertEquals(2, map.getAll("x").size()); - Assert.assertEquals("1", map.getAll("x").stream().findFirst().get()); - Assert.assertEquals("1", map.getByOrdinal(0)); - Assert.assertEquals("2", map.getByOrdinal(1)); - Assert.assertEquals("0", map.getByOrdinal(2)); - Assert.assertTrue(map.remove("x", "1")); - Assert.assertFalse(map.remove("x", "1")); - Assert.assertEquals("0", map.getByOrdinal(0)); - Assert.assertEquals(2, map.size()); + assertEquals(3, map.size()); + assertEquals(2, map.getAll("x").size()); + assertEquals("1", map.getAll("x").stream().findFirst().get()); + assertEquals("1", map.getByOrdinal(0)); + assertEquals("2", map.getByOrdinal(1)); + assertEquals("0", map.getByOrdinal(2)); + assertTrue(map.remove("x", "1")); + assertFalse(map.remove("x", "1")); + assertEquals("0", map.getByOrdinal(0)); + assertEquals(2, map.size()); map.put("x", "3", true); - Assert.assertEquals(1, map.getAll("x").size()); - Assert.assertEquals("3", map.getAll("x").stream().findFirst().get()); + assertEquals(1, map.getAll("x").size()); + assertEquals("3", map.getAll("x").stream().findFirst().get()); map.put("z", "4", false); - Assert.assertEquals(3, map.size()); + assertEquals(3, map.size()); map.put("z", "5", false); map.put("z", "6", false); - Assert.assertEquals(5, map.size()); + assertEquals(5, map.size()); map.removeAll("z"); - Assert.assertEquals(2, map.size()); - Assert.assertFalse(map.containsKey("z")); + assertEquals(2, map.size()); + assertFalse(map.containsKey("z")); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java index f562e63b4bf8d..80420608c3912 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java @@ -17,11 +17,11 @@ package org.apache.arrow.vector.util; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotSame; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -31,21 +31,21 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseValueVector; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestReusableByteArray { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { // Permit allocating 4 vectors of max size. allocator = new RootAllocator(4 * BaseValueVector.MAX_ALLOCATION_SIZE); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java index 52b6584086832..4375ca6e690b7 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java @@ -18,7 +18,7 @@ package org.apache.arrow.vector.util; import static java.util.Arrays.asList; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.IOException; @@ -28,7 +28,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.SchemaUtility; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSchemaUtil { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java index 2db70ca5d5b8d..0f72ada76f933 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java @@ -18,10 +18,10 @@ package org.apache.arrow.vector.util; import static org.apache.arrow.vector.util.Validator.equalEnough; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestValidator { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java index 93e7535947536..45563a69ba9e6 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.util; -import static junit.framework.TestCase.assertEquals; -import static junit.framework.TestCase.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -53,9 +53,9 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link VectorAppender}. @@ -64,13 +64,13 @@ public class TestVectorAppender { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { // Permit allocating 4 vectors of max size. allocator = new RootAllocator(4 * BaseValueVector.MAX_ALLOCATION_SIZE); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java index 799c25c0ad71c..193736e70cadf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java @@ -17,15 +17,15 @@ package org.apache.arrow.vector.util; -import static junit.framework.TestCase.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link VectorBatchAppender}. @@ -34,12 +34,12 @@ public class TestVectorBatchAppender { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(1024 * 1024); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java index 6309d385870c9..82a4589c3ba64 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.util; -import static junit.framework.TestCase.assertEquals; import static org.apache.arrow.vector.util.TestVectorAppender.assertVectorsEqual; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import org.apache.arrow.memory.BufferAllocator; @@ -28,9 +28,9 @@ import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link VectorSchemaRootAppender}. @@ -39,12 +39,12 @@ public class TestVectorSchemaRootAppender { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(1024 * 1024); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java index 20492036dab99..837b865c30b26 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java @@ -19,8 +19,8 @@ import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; import static org.apache.arrow.vector.util.ValueVectorUtility.validate; -import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.Charset; import java.util.Arrays; @@ -44,15 +44,15 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValidateVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } @@ -62,7 +62,7 @@ public void init() { private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset); private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset); - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java index ca71a622bb8ea..fcf031fc33824 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java @@ -19,9 +19,9 @@ import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; import static org.apache.arrow.vector.util.ValueVectorUtility.validateFull; -import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -48,20 +48,20 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValidateVectorFull { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java index 1885fb21f17b6..bdb9ad3e8e530 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java @@ -19,8 +19,8 @@ import static org.apache.arrow.vector.util.ValueVectorUtility.validate; import static org.apache.arrow.vector.util.ValueVectorUtility.validateFull; -import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -29,20 +29,20 @@ import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValidateVectorSchemaRoot { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java index 0ddd790d6ffab..42297e1d37fe0 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java @@ -70,9 +70,9 @@ import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link ValidateVectorTypeVisitor}. @@ -83,12 +83,12 @@ public class TestValidateVectorTypeVisitor { private ValidateVectorTypeVisitor visitor = new ValidateVectorTypeVisitor(); - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } From 3999384c3e05ef8ef804ab651e1bebee8bf7670c Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Sat, 8 Jun 2024 16:08:01 -0400 Subject: [PATCH 17/59] MINOR: [Python] spell "language" correctly in trove classifier (#42031) ### Rationale for this change Newer (possibly unreleased) version of the Python build tools check that the classifiers are valid and the build failed due to this typo. ### What changes are included in this PR? Fix the spelling of a word ### Are these changes tested? Build will fail without these changes, has no run-time effect. ### Are there any user-facing changes? no Authored-by: Thomas A Caswell Signed-off-by: Sutou Kouhei --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index f72c3a91eb436..86a90906d02f9 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -47,7 +47,7 @@ classifiers = [ 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', - 'Programming Langauge :: Python :: 3.12', + 'Programming Language :: Python :: 3.12', ] maintainers = [ {name = "Apache Arrow Developers", email = "dev@arrow.apache.org"} From 601be7687ba89f711b876397746b5f49503c0871 Mon Sep 17 00:00:00 2001 From: abandy Date: Sat, 8 Jun 2024 17:25:28 -0400 Subject: [PATCH 18/59] GH-42020: [Swift] Add Arrow decoding implementation for Swift Codable (#42023) ### Rationale for this change This change implements decode for the Arrow Swift Codable implementation. This allows the data in a RecordBatch to be copied to properties in a struct/class. The PR is a bit longer than desired but all three container types are required in order to implement the Decoder protocol. ### What changes are included in this PR? The ArrowDecoder class is included in this PR along with a class for each container type (keyed, unkeyed, and single). Most of the logic is encapsulated in the ArrowDecoder with minimal logic in each container class (Most of the methods in the container classes are a single line that calls the ArrowDecoder doDecode methods) ### Are these changes tested? Yes, a test has been added to test the three types of containers provided by the decoder. * GitHub Issue: #42020 Authored-by: Alva Bandy Signed-off-by: Sutou Kouhei --- swift/Arrow/Sources/Arrow/ArrowDecoder.swift | 347 ++++++++++++++++++ .../Arrow/Tests/ArrowTests/CodableTests.swift | 170 +++++++++ 2 files changed, 517 insertions(+) create mode 100644 swift/Arrow/Sources/Arrow/ArrowDecoder.swift create mode 100644 swift/Arrow/Tests/ArrowTests/CodableTests.swift diff --git a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift new file mode 100644 index 0000000000000..7e0c69b1e79e8 --- /dev/null +++ b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift @@ -0,0 +1,347 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import Foundation + +public class ArrowDecoder: Decoder { + var rbIndex: UInt = 0 + public var codingPath: [CodingKey] = [] + public var userInfo: [CodingUserInfoKey: Any] = [:] + public let rb: RecordBatch + public let nameToCol: [String: ArrowArrayHolder] + public let columns: [ArrowArrayHolder] + public init(_ decoder: ArrowDecoder) { + self.userInfo = decoder.userInfo + self.codingPath = decoder.codingPath + self.rb = decoder.rb + self.columns = decoder.columns + self.nameToCol = decoder.nameToCol + self.rbIndex = decoder.rbIndex + } + + public init(_ rb: RecordBatch) { + self.rb = rb + var colMapping = [String: ArrowArrayHolder]() + var columns = [ArrowArrayHolder]() + for index in 0..(_ type: T.Type) throws -> [T] { + var output = [T]() + for index in 0..(keyedBy type: Key.Type + ) -> KeyedDecodingContainer where Key: CodingKey { + let container = ArrowKeyedDecoding(self, codingPath: codingPath) + return KeyedDecodingContainer(container) + } + + public func unkeyedContainer() -> UnkeyedDecodingContainer { + return ArrowUnkeyedDecoding(self, codingPath: codingPath) + } + + public func singleValueContainer() -> SingleValueDecodingContainer { + return ArrowSingleValueDecoding(self, codingPath: codingPath) + } + + func getCol(_ name: String) throws -> AnyArray { + guard let col = self.nameToCol[name] else { + throw ArrowError.invalid("Column for key \"\(name)\" not found") + } + + guard let anyArray = col.array as? AnyArray else { + throw ArrowError.invalid("Unable to convert array to AnyArray") + } + + return anyArray + } + + func getCol(_ index: Int) throws -> AnyArray { + if index >= self.columns.count { + throw ArrowError.outOfBounds(index: Int64(index)) + } + + guard let anyArray = self.columns[index].array as? AnyArray else { + throw ArrowError.invalid("Unable to convert array to AnyArray") + } + + return anyArray + } + + func doDecode(_ key: CodingKey) throws -> T? { + let array: AnyArray = try self.getCol(key.stringValue) + return array.asAny(self.rbIndex) as? T + } + + func doDecode(_ col: Int) throws -> T? { + let array: AnyArray = try self.getCol(col) + return array.asAny(self.rbIndex) as? T + } +} + +private struct ArrowUnkeyedDecoding: UnkeyedDecodingContainer { + var codingPath: [CodingKey] + var count: Int? = 0 + var isAtEnd: Bool = false + var currentIndex: Int = 0 + let decoder: ArrowDecoder + + init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) { + self.decoder = decoder + self.codingPath = codingPath + self.count = self.decoder.columns.count + } + + mutating func increment() { + self.currentIndex += 1 + self.isAtEnd = self.currentIndex >= self.count! + } + + mutating func decodeNil() throws -> Bool { + defer {increment()} + return try self.decoder.doDecode(self.currentIndex) == nil + } + + mutating func decode(_ type: T.Type) throws -> T where T: Decodable { + if type == Int8.self || type == Int16.self || + type == Int32.self || type == Int64.self || + type == UInt8.self || type == UInt16.self || + type == UInt32.self || type == UInt64.self || + type == String.self || type == Double.self || + type == Float.self || type == Date.self { + defer {increment()} + return try self.decoder.doDecode(self.currentIndex)! + } else { + throw ArrowError.invalid("Type \(type) is currently not supported") + } + } + + func nestedContainer( + keyedBy type: NestedKey.Type + ) throws -> KeyedDecodingContainer where NestedKey: CodingKey { + throw ArrowError.invalid("Nested decoding is currently not supported.") + } + + func nestedUnkeyedContainer() throws -> UnkeyedDecodingContainer { + throw ArrowError.invalid("Nested decoding is currently not supported.") + } + + func superDecoder() throws -> Decoder { + throw ArrowError.invalid("super decoding is currently not supported.") + } +} + +private struct ArrowKeyedDecoding: KeyedDecodingContainerProtocol { + var codingPath = [CodingKey]() + var allKeys = [Key]() + let decoder: ArrowDecoder + + init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) { + self.decoder = decoder + self.codingPath = codingPath + } + + func contains(_ key: Key) -> Bool { + return self.decoder.nameToCol.keys.contains(key.stringValue) + } + + func decodeNil(forKey key: Key) throws -> Bool { + return try self.decoder.doDecode(key) == nil + } + + func decode(_ type: Bool.Type, forKey key: Key) throws -> Bool { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: String.Type, forKey key: Key) throws -> String { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Double.Type, forKey key: Key) throws -> Double { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Float.Type, forKey key: Key) throws -> Float { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Int.Type, forKey key: Key) throws -> Int { + throw ArrowError.invalid( + "Int type is not supported (please use Int8, Int16, Int32 or Int64)") + } + + func decode(_ type: Int8.Type, forKey key: Key) throws -> Int8 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Int16.Type, forKey key: Key) throws -> Int16 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Int32.Type, forKey key: Key) throws -> Int32 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Int64.Type, forKey key: Key) throws -> Int64 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: UInt.Type, forKey key: Key) throws -> UInt { + throw ArrowError.invalid( + "UInt type is not supported (please use UInt8, UInt16, UInt32 or UInt64)") + } + + func decode(_ type: UInt8.Type, forKey key: Key) throws -> UInt8 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: UInt16.Type, forKey key: Key) throws -> UInt16 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: UInt32.Type, forKey key: Key) throws -> UInt32 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: UInt64.Type, forKey key: Key) throws -> UInt64 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: T.Type, forKey key: Key) throws -> T where T: Decodable { + if type == Date.self { + return try self.decoder.doDecode(key)! + } else { + throw ArrowError.invalid("Type \(type) is currently not supported") + } + } + + func nestedContainer( + keyedBy type: NestedKey.Type, + forKey key: Key + ) throws -> KeyedDecodingContainer where NestedKey: CodingKey { + throw ArrowError.invalid("Nested decoding is currently not supported.") + } + + func nestedUnkeyedContainer(forKey key: Key) throws -> UnkeyedDecodingContainer { + throw ArrowError.invalid("Nested decoding is currently not supported.") + } + + func superDecoder() throws -> Decoder { + throw ArrowError.invalid("super decoding is currently not supported.") + } + + func superDecoder(forKey key: Key) throws -> Decoder { + throw ArrowError.invalid("super decoding is currently not supported.") + } +} + +private struct ArrowSingleValueDecoding: SingleValueDecodingContainer { + var codingPath = [CodingKey]() + let decoder: ArrowDecoder + + init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) { + self.decoder = decoder + self.codingPath = codingPath + } + + func decodeNil() -> Bool { + do { + return try self.decoder.doDecode(0) == nil + } catch { + return false + } + } + + func decode(_ type: Bool.Type) throws -> Bool { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: String.Type) throws -> String { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Double.Type) throws -> Double { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Float.Type) throws -> Float { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Int.Type) throws -> Int { + throw ArrowError.invalid( + "Int type is not supported (please use Int8, Int16, Int32 or Int64)") + } + + func decode(_ type: Int8.Type) throws -> Int8 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Int16.Type) throws -> Int16 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Int32.Type) throws -> Int32 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Int64.Type) throws -> Int64 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: UInt.Type) throws -> UInt { + throw ArrowError.invalid( + "UInt type is not supported (please use UInt8, UInt16, UInt32 or UInt64)") + } + + func decode(_ type: UInt8.Type) throws -> UInt8 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: UInt16.Type) throws -> UInt16 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: UInt32.Type) throws -> UInt32 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: UInt64.Type) throws -> UInt64 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: T.Type) throws -> T where T: Decodable { + if type == Date.self { + return try self.decoder.doDecode(0)! + } else { + throw ArrowError.invalid("Type \(type) is currently not supported") + } + } +} diff --git a/swift/Arrow/Tests/ArrowTests/CodableTests.swift b/swift/Arrow/Tests/ArrowTests/CodableTests.swift new file mode 100644 index 0000000000000..e7359467ae1c5 --- /dev/null +++ b/swift/Arrow/Tests/ArrowTests/CodableTests.swift @@ -0,0 +1,170 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import XCTest +@testable import Arrow + +final class CodableTests: XCTestCase { + public class TestClass: Codable { + public var propBool: Bool + public var propInt8: Int8 + public var propInt16: Int16 + public var propInt32: Int32 + public var propInt64: Int64 + public var propUInt8: UInt8 + public var propUInt16: UInt16 + public var propUInt32: UInt32 + public var propUInt64: UInt64 + public var propFloat: Float + public var propDouble: Double + public var propString: String + public var propDate: Date + + public required init() { + self.propBool = false + self.propInt8 = 1 + self.propInt16 = 2 + self.propInt32 = 3 + self.propInt64 = 4 + self.propUInt8 = 5 + self.propUInt16 = 6 + self.propUInt32 = 7 + self.propUInt64 = 8 + self.propFloat = 9 + self.propDouble = 10 + self.propString = "11" + self.propDate = Date.now + } + } + + func testArrowKeyedDecoder() throws { // swiftlint:disable:this function_body_length + let date1 = Date(timeIntervalSinceReferenceDate: 86400 * 5000 + 352) + + let boolBuilder = try ArrowArrayBuilders.loadBoolArrayBuilder() + let int8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let int16Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let int32Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let int64Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let uint8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let uint16Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let uint32Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let uint64Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let floatBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let doubleBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + let dateBuilder = try ArrowArrayBuilders.loadDate64ArrayBuilder() + + boolBuilder.append(false, true, false) + int8Builder.append(10, 11, 12) + int16Builder.append(20, 21, 22) + int32Builder.append(30, 31, 32) + int64Builder.append(40, 41, 42) + uint8Builder.append(50, 51, 52) + uint16Builder.append(60, 61, 62) + uint32Builder.append(70, 71, 72) + uint64Builder.append(80, 81, 82) + floatBuilder.append(90.1, 91.1, 92.1) + doubleBuilder.append(100.1, 101.1, 102.1) + stringBuilder.append("test0", "test1", "test2") + dateBuilder.append(date1, date1, date1) + let result = RecordBatch.Builder() + .addColumn("propBool", arrowArray: try boolBuilder.toHolder()) + .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .addColumn("propInt16", arrowArray: try int16Builder.toHolder()) + .addColumn("propInt32", arrowArray: try int32Builder.toHolder()) + .addColumn("propInt64", arrowArray: try int64Builder.toHolder()) + .addColumn("propUInt8", arrowArray: try uint8Builder.toHolder()) + .addColumn("propUInt16", arrowArray: try uint16Builder.toHolder()) + .addColumn("propUInt32", arrowArray: try uint32Builder.toHolder()) + .addColumn("propUInt64", arrowArray: try uint64Builder.toHolder()) + .addColumn("propFloat", arrowArray: try floatBuilder.toHolder()) + .addColumn("propDouble", arrowArray: try doubleBuilder.toHolder()) + .addColumn("propString", arrowArray: try stringBuilder.toHolder()) + .addColumn("propDate", arrowArray: try dateBuilder.toHolder()) + .finish() + switch result { + case .success(let rb): + let decoder = ArrowDecoder(rb) + var testClasses = try decoder.decode(TestClass.self) + for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() + int8Builder.append(10, 11, 12, nil) + let result = RecordBatch.Builder() + .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .finish() + switch result { + case .success(let rb): + let decoder = ArrowDecoder(rb) + let testData = try decoder.decode(Int8?.self) + for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() + let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + int8Builder.append(10, 11, 12) + stringBuilder.append("test0", "test1", "test2") + let result = RecordBatch.Builder() + .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .addColumn("propString", arrowArray: try stringBuilder.toHolder()) + .finish() + switch result { + case .success(let rb): + let decoder = ArrowDecoder(rb) + let testData = try decoder.decode([Int8: String].self) + var index: Int8 = 0 + for data in testData { + let str = data[10 + index] + XCTAssertEqual(str, "test\(index)") + index += 1 + } + case .failure(let err): + throw err + } + } + +} From 399408cb273c47f490f65cdad95bc184a652826c Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 9 Jun 2024 14:50:25 +0900 Subject: [PATCH 19/59] GH-42039: [Docs][Go] Fix broken link (#42040) ### Rationale for this change Fix the broken link to the correct link due to a change in the path. ### What changes are included in this PR? Updating link from the incorrect `go/` path to change in the path. - old link: https://arrow.apache.org/adbc/main/driver/go/flight_sql.html#client-options - new link: https://arrow.apache.org/adbc/main/driver/flight_sql.html#client-options ### Are these changes tested? Yes. I have checked the link. ### Are there any user-facing changes? Yes, the updated link will be visible to users. * GitHub Issue: #42039 Authored-by: Hyunseok Seo Signed-off-by: Sutou Kouhei --- go/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/README.md b/go/README.md index 4f97c49e1c7e8..220b0a230a615 100644 --- a/go/README.md +++ b/go/README.md @@ -48,7 +48,7 @@ func main() { DSN option keys are expressed as `k=v`, delimited with `;`. Some options keys are defined in ADBC, others are defined in the FlightSQL ADBC driver. -- Arrow ADBC [developer doc](https://arrow.apache.org/adbc/main/driver/go/flight_sql.html#client-options) +- Arrow ADBC [developer doc](https://arrow.apache.org/adbc/main/driver/flight_sql.html#client-options) - ADBC [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/adbc.go#L149-L158) - FlightSQL driver option keys [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/driver/flightsql/flightsql_adbc.go#L70-L81) From 7aaea3d9bb65ad37a17a9d3a52341f0fe2478903 Mon Sep 17 00:00:00 2001 From: abandy Date: Sun, 9 Jun 2024 19:55:16 -0400 Subject: [PATCH 20/59] GH-42041: [Swift] Fix nullable type decoder issue (#42043) ### Rationale for this change There is an issue when decoding nullable types. The previous method of checking for nil values always returned false for nullable types due too the ArrowArray types being non nullable. ### What changes are included in this PR? This PR adds a IsNull method to the ArrowDecoder to be used for null checks. Also, a check for nullable types has been added to the Unkeyed decode method. ### Are these changes tested? Yes, tests have been added/modified to test this fix. * GitHub Issue: #42041 Authored-by: Alva Bandy Signed-off-by: Sutou Kouhei --- swift/Arrow/Sources/Arrow/ArrowDecoder.swift | 31 ++++++-- .../Arrow/Tests/ArrowTests/CodableTests.swift | 73 ++++++++++++++++--- 2 files changed, 88 insertions(+), 16 deletions(-) diff --git a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift index 7e0c69b1e79e8..9aa8a65137d28 100644 --- a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift +++ b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift @@ -104,6 +104,16 @@ public class ArrowDecoder: Decoder { let array: AnyArray = try self.getCol(col) return array.asAny(self.rbIndex) as? T } + + func isNull(_ key: CodingKey) throws -> Bool { + let array: AnyArray = try self.getCol(key.stringValue) + return array.asAny(self.rbIndex) == nil + } + + func isNull(_ col: Int) throws -> Bool { + let array: AnyArray = try self.getCol(col) + return array.asAny(self.rbIndex) == nil + } } private struct ArrowUnkeyedDecoding: UnkeyedDecodingContainer { @@ -126,11 +136,17 @@ private struct ArrowUnkeyedDecoding: UnkeyedDecodingContainer { mutating func decodeNil() throws -> Bool { defer {increment()} - return try self.decoder.doDecode(self.currentIndex) == nil + return try self.decoder.isNull(self.currentIndex) } mutating func decode(_ type: T.Type) throws -> T where T: Decodable { - if type == Int8.self || type == Int16.self || + if type == Int8?.self || type == Int16?.self || + type == Int32?.self || type == Int64?.self || + type == UInt8?.self || type == UInt16?.self || + type == UInt32?.self || type == UInt64?.self || + type == String?.self || type == Double?.self || + type == Float?.self || type == Date?.self || + type == Int8.self || type == Int16.self || type == Int32.self || type == Int64.self || type == UInt8.self || type == UInt16.self || type == UInt32.self || type == UInt64.self || @@ -173,7 +189,7 @@ private struct ArrowKeyedDecoding: KeyedDecodingContainerProtoco } func decodeNil(forKey key: Key) throws -> Bool { - return try self.decoder.doDecode(key) == nil + try self.decoder.isNull(key) } func decode(_ type: Bool.Type, forKey key: Key) throws -> Bool { @@ -273,7 +289,7 @@ private struct ArrowSingleValueDecoding: SingleValueDecodingContainer { func decodeNil() -> Bool { do { - return try self.decoder.doDecode(0) == nil + return try self.decoder.isNull(0) } catch { return false } @@ -338,7 +354,12 @@ private struct ArrowSingleValueDecoding: SingleValueDecodingContainer { } func decode(_ type: T.Type) throws -> T where T: Decodable { - if type == Date.self { + if type == Int8.self || type == Int16.self || + type == Int32.self || type == Int64.self || + type == UInt8.self || type == UInt16.self || + type == UInt32.self || type == UInt64.self || + type == String.self || type == Double.self || + type == Float.self || type == Date.self { return try self.decoder.doDecode(0)! } else { throw ArrowError.invalid("Type \(type) is currently not supported") diff --git a/swift/Arrow/Tests/ArrowTests/CodableTests.swift b/swift/Arrow/Tests/ArrowTests/CodableTests.swift index e7359467ae1c5..d7d3414cf6250 100644 --- a/swift/Arrow/Tests/ArrowTests/CodableTests.swift +++ b/swift/Arrow/Tests/ArrowTests/CodableTests.swift @@ -30,7 +30,7 @@ final class CodableTests: XCTestCase { public var propUInt32: UInt32 public var propUInt64: UInt64 public var propFloat: Float - public var propDouble: Double + public var propDouble: Double? public var propString: String public var propDate: Date @@ -53,7 +53,6 @@ final class CodableTests: XCTestCase { func testArrowKeyedDecoder() throws { // swiftlint:disable:this function_body_length let date1 = Date(timeIntervalSinceReferenceDate: 86400 * 5000 + 352) - let boolBuilder = try ArrowArrayBuilders.loadBoolArrayBuilder() let int8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() let int16Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() @@ -78,7 +77,7 @@ final class CodableTests: XCTestCase { uint32Builder.append(70, 71, 72) uint64Builder.append(80, 81, 82) floatBuilder.append(90.1, 91.1, 92.1) - doubleBuilder.append(100.1, 101.1, 102.1) + doubleBuilder.append(101.1, nil, nil) stringBuilder.append("test0", "test1", "test2") dateBuilder.append(date1, date1, date1) let result = RecordBatch.Builder() @@ -102,7 +101,6 @@ final class CodableTests: XCTestCase { var testClasses = try decoder.decode(TestClass.self) for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() - int8Builder.append(10, 11, 12, nil) + int8Builder.append(10, 11, 12) let result = RecordBatch.Builder() .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) .finish() @@ -134,7 +136,28 @@ final class CodableTests: XCTestCase { let testData = try decoder.decode(Int8?.self) for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() + int8WNilBuilder.append(10, nil, 12, nil) + let resultWNil = RecordBatch.Builder() + .addColumn("propInt8", arrowArray: try int8WNilBuilder.toHolder()) + .finish() + switch resultWNil { + case .success(let rb): + let decoder = ArrowDecoder(rb) + let testData = try decoder.decode(Int8?.self) + for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() - int8Builder.append(10, 11, 12) - stringBuilder.append("test0", "test1", "test2") + int8Builder.append(10, 11, 12, 13) + stringBuilder.append("test0", "test1", "test2", "test3") let result = RecordBatch.Builder() .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) .addColumn("propString", arrowArray: try stringBuilder.toHolder()) @@ -167,4 +190,32 @@ final class CodableTests: XCTestCase { } } + func testArrowUnkeyedDecoderWithNull() throws { + let int8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let stringWNilBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + int8Builder.append(10, 11, 12, 13) + stringWNilBuilder.append(nil, "test1", nil, "test3") + let resultWNil = RecordBatch.Builder() + .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .addColumn("propString", arrowArray: try stringWNilBuilder.toHolder()) + .finish() + switch resultWNil { + case .success(let rb): + let decoder = ArrowDecoder(rb) + let testData = try decoder.decode([Int8: String?].self) + var index: Int8 = 0 + for data in testData { + let str = data[10 + index] + if index % 2 == 0 { + XCTAssertNil(str!) + } else { + XCTAssertEqual(str, "test\(index)") + } + index += 1 + } + case .failure(let err): + throw err + } + + } } From 7c15568aa71c1366af5eadb6140fa445f6ce4cd0 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Mon, 10 Jun 2024 09:48:05 +0900 Subject: [PATCH 21/59] GH-42042: [Java] Update Unit Tests for Compressions Module (#42044) ### Rationale for this change Update package from JUnit 4(`org.junit`) to JUnit 5(`org.junit.jupiter`). ### What changes are included in this PR? - [x] Replacing `org.junit` with `org.junit.jupiter.api`. - [x] Updating `Assertions.assertXXX` to `assertXXX` using static imports - [x] Updating annotations such as `@ After`. - `@ After` -> `@ AfterEach` - [x] Doing self review ### Are these changes tested? Yes, existing tests have passed. ### Are there any user-facing changes? No. * GitHub Issue: #42042 Authored-by: Hyunseok Seo Signed-off-by: David Li --- .../TestArrowReaderWriterWithCompression.java | 59 ++++++++++--------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java index af28333746290..24d6abf3cb7c3 100644 --- a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java +++ b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java @@ -17,6 +17,11 @@ package org.apache.arrow.compression; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.channels.Channels; @@ -46,9 +51,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.junit.After; -import org.junit.Assert; -import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -67,7 +70,7 @@ public void setup() { root = null; } - @After + @AfterEach public void tearDown() { if (root != null) { root.close(); @@ -134,19 +137,19 @@ public void testArrowFileZstdRoundTrip() throws Exception { try (ArrowFileReader reader = new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, CommonsCompressionFactory.INSTANCE)) { - Assertions.assertEquals(1, reader.getRecordBlocks().size()); - Assertions.assertTrue(reader.loadNextBatch()); - Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot())); - Assertions.assertFalse(reader.loadNextBatch()); + assertEquals(1, reader.getRecordBlocks().size()); + assertTrue(reader.loadNextBatch()); + assertTrue(root.equals(reader.getVectorSchemaRoot())); + assertFalse(reader.loadNextBatch()); } // without compression try (ArrowFileReader reader = new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, NoCompressionCodec.Factory.INSTANCE)) { - Assertions.assertEquals(1, reader.getRecordBlocks().size()); - Exception exception = Assert.assertThrows(IllegalArgumentException.class, + assertEquals(1, reader.getRecordBlocks().size()); + Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", + assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", exception.getMessage()); } } @@ -158,17 +161,17 @@ public void testArrowStreamZstdRoundTrip() throws Exception { try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, CommonsCompressionFactory.INSTANCE)) { - Assert.assertTrue(reader.loadNextBatch()); - Assert.assertTrue(root.equals(reader.getVectorSchemaRoot())); - Assert.assertFalse(reader.loadNextBatch()); + assertTrue(reader.loadNextBatch()); + assertTrue(root.equals(reader.getVectorSchemaRoot())); + assertFalse(reader.loadNextBatch()); } // without compression try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, NoCompressionCodec.Factory.INSTANCE)) { - Exception exception = Assert.assertThrows(IllegalArgumentException.class, + Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - Assert.assertEquals( + assertEquals( "Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", exception.getMessage() ); @@ -189,19 +192,19 @@ public void testArrowFileZstdRoundTripWithDictionary() throws Exception { try (ArrowFileReader reader = new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, CommonsCompressionFactory.INSTANCE)) { - Assertions.assertEquals(1, reader.getRecordBlocks().size()); - Assertions.assertTrue(reader.loadNextBatch()); - Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot())); - Assertions.assertFalse(reader.loadNextBatch()); + assertEquals(1, reader.getRecordBlocks().size()); + assertTrue(reader.loadNextBatch()); + assertTrue(root.equals(reader.getVectorSchemaRoot())); + assertFalse(reader.loadNextBatch()); } // without compression try (ArrowFileReader reader = new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, NoCompressionCodec.Factory.INSTANCE)) { - Assertions.assertEquals(1, reader.getRecordBlocks().size()); - Exception exception = Assert.assertThrows(IllegalArgumentException.class, + assertEquals(1, reader.getRecordBlocks().size()); + Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", + assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", exception.getMessage()); } dictionaryVector.close(); @@ -221,17 +224,17 @@ public void testArrowStreamZstdRoundTripWithDictionary() throws Exception { try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, CommonsCompressionFactory.INSTANCE)) { - Assertions.assertTrue(reader.loadNextBatch()); - Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot())); - Assertions.assertFalse(reader.loadNextBatch()); + assertTrue(reader.loadNextBatch()); + assertTrue(root.equals(reader.getVectorSchemaRoot())); + assertFalse(reader.loadNextBatch()); } // without compression try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, NoCompressionCodec.Factory.INSTANCE)) { - Exception exception = Assert.assertThrows(IllegalArgumentException.class, + Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", + assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", exception.getMessage()); } dictionaryVector.close(); From f086b76fdd6bd3693bf3b5c9ac89081772d61e26 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Mon, 10 Jun 2024 06:26:39 +0530 Subject: [PATCH 22/59] GH-40819: [Java] Adding Spotless to Algorithm module (#41825) ### Rationale for this change Adding code style and formatting options for Algorithm module. ### What changes are included in this PR? Code formatting spotless plugin has been added. ### Are these changes tested? Yes, but doesn't involve test cases, the plugin itself corrects. ### Are there any user-facing changes? No * GitHub Issue: #40819 Lead-authored-by: Vibhatha Abeykoon Co-authored-by: Vibhatha Lakmal Abeykoon Co-authored-by: David Li Signed-off-by: David Li --- .gitignore | 4 +- docs/source/developers/java/development.rst | 46 ++- {.mvn => java/.mvn}/develocity.xml | 0 {.mvn => java/.mvn}/extensions.xml | 0 java/algorithm/pom.xml | 7 +- .../deduplicate/DeduplicationUtils.java | 16 +- .../deduplicate/VectorRunDeduplicator.java | 27 +- .../dictionary/DictionaryBuilder.java | 30 +- .../dictionary/DictionaryEncoder.java | 11 +- .../HashTableBasedDictionaryBuilder.java | 34 +-- .../HashTableDictionaryEncoder.java | 69 ++--- .../dictionary/LinearDictionaryEncoder.java | 44 +-- .../dictionary/SearchDictionaryEncoder.java | 41 ++- .../SearchTreeBasedDictionaryBuilder.java | 46 ++- .../arrow/algorithm/misc/PartialSumUtils.java | 41 ++- .../arrow/algorithm/rank/VectorRank.java | 15 +- .../algorithm/search/ParallelSearcher.java | 187 ++++++------ .../algorithm/search/VectorRangeSearcher.java | 213 +++++++------ .../algorithm/search/VectorSearcher.java | 26 +- .../sort/CompositeVectorComparator.java | 17 +- .../sort/DefaultVectorComparators.java | 126 ++++---- .../sort/FixedWidthInPlaceVectorSorter.java | 25 +- .../FixedWidthOutOfPlaceVectorSorter.java | 35 ++- .../sort/GeneralOutOfPlaceVectorSorter.java | 20 +- .../algorithm/sort/InPlaceVectorSorter.java | 7 +- .../arrow/algorithm/sort/IndexSorter.java | 33 +- .../arrow/algorithm/sort/InsertionSorter.java | 23 +- .../arrow/algorithm/sort/OffHeapIntStack.java | 5 +- .../sort/OutOfPlaceVectorSorter.java | 8 +- .../sort/StableVectorComparator.java | 13 +- .../VariableWidthOutOfPlaceVectorSorter.java | 56 ++-- .../algorithm/sort/VectorValueComparator.java | 56 ++-- .../deduplicate/TestDeduplicationUtils.java | 46 +-- .../TestVectorRunDeduplicator.java | 19 +- .../TestHashTableBasedDictionaryBuilder.java | 62 ++-- .../TestHashTableDictionaryEncoder.java | 72 +++-- .../TestLinearDictionaryEncoder.java | 72 +++-- .../TestSearchDictionaryEncoder.java | 84 ++--- .../TestSearchTreeBasedDictionaryBuilder.java | 90 ++++-- .../algorithm/misc/TestPartialSumUtils.java | 18 +- .../arrow/algorithm/rank/TestVectorRank.java | 20 +- .../search/TestParallelSearcher.java | 36 ++- .../search/TestVectorRangeSearcher.java | 30 +- .../algorithm/search/TestVectorSearcher.java | 30 +- .../sort/TestCompositeVectorComparator.java | 18 +- .../sort/TestDefaultVectorComparator.java | 167 ++++++---- .../TestFixedWidthInPlaceVectorSorter.java | 48 ++- .../TestFixedWidthOutOfPlaceVectorSorter.java | 69 +++-- .../algorithm/sort/TestFixedWidthSorting.java | 126 +++++--- .../TestGeneralOutOfPlaceVectorSorter.java | 79 ++--- .../arrow/algorithm/sort/TestIndexSorter.java | 31 +- .../algorithm/sort/TestInsertionSorter.java | 9 +- .../algorithm/sort/TestOffHeapIntStack.java | 5 +- .../sort/TestOutOfPlaceVectorSorter.java | 6 +- .../arrow/algorithm/sort/TestSortingUtil.java | 136 +++++---- .../sort/TestStableVectorComparator.java | 50 +-- ...stVariableWidthOutOfPlaceVectorSorter.java | 40 +-- .../sort/TestVariableWidthSorting.java | 44 +-- java/dev/checkstyle/checkstyle-spotless.xml | 286 ++++++++++++++++++ .../asf-java.license} | 0 java/dev/license/asf-xml.license | 11 + java/maven/pom.xml | 2 +- java/pom.xml | 22 +- 63 files changed, 1716 insertions(+), 1293 deletions(-) rename {.mvn => java/.mvn}/develocity.xml (100%) rename {.mvn => java/.mvn}/extensions.xml (100%) create mode 100644 java/dev/checkstyle/checkstyle-spotless.xml rename java/dev/{checkstyle/checkstyle.license => license/asf-java.license} (100%) create mode 100644 java/dev/license/asf-xml.license diff --git a/.gitignore b/.gitignore index 3192069d1ac7a..52ffa6c6124c2 100644 --- a/.gitignore +++ b/.gitignore @@ -102,8 +102,8 @@ __debug_bin .envrc # Develocity -.mvn/.gradle-enterprise/ -.mvn/.develocity/ +java/.mvn/.gradle-enterprise/ +java/.mvn/.develocity/ # rat filtered_rat.txt diff --git a/docs/source/developers/java/development.rst b/docs/source/developers/java/development.rst index 9f78eccf6c525..dd1839257a30e 100644 --- a/docs/source/developers/java/development.rst +++ b/docs/source/developers/java/development.rst @@ -110,7 +110,46 @@ integration tests, you would do: Code Style ========== -Java code style is enforced with Checkstyle. The configuration is located at `checkstyle`_. +The current Java code follows the `Google Java Style`_ with Apache license headers. + +Java code style is checked by `Spotless`_ during the build, and the continuous integration build will verify +that changes adhere to the style guide. + +Automatically fixing code style issues +-------------------------------------- + +- You can check the style without building the project with ``mvn spotless:check``. +- You can autoformat the source with ``mvn spotless:apply``. + +Example: + +.. code-block:: bash + + The following files had format violations: + src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java + @@ -15,7 +15,6 @@ + ·*·limitations·under·the·License. + ·*/ + + - + package·org.apache.arrow.algorithm.rank; + + import·java.util.stream.IntStream; + Run 'mvn spotless:apply' to fix these violations. + +Code Formatter for Intellij IDEA and Eclipse +-------------------------------------------- + +Follow the instructions to set up google-java-format for: + +- `Eclipse`_ +- `IntelliJ`_ + + +Checkstyle +---------- + +Checkstyle is also used for general linting. The configuration is located at `checkstyle`_. You can also just check the style without building the project. This checks the code style of all source code under the current directory or from within an individual module. @@ -137,7 +176,10 @@ This applies the style to all pom.xml files under the current directory or from .. _conbench: https://github.com/conbench/conbench .. _checkstyle: https://github.com/apache/arrow/blob/main/java/dev/checkstyle/checkstyle.xml .. _Apache Maven pom.xml guidelines: https://maven.apache.org/developers/conventions/code.html#pom-code-convention - +.. _Spotless: https://github.com/diffplug/spotless +.. _Google Java Style: https://google.github.io/styleguide/javaguide.html +.. _Eclipse: https://github.com/google/google-java-format?tab=readme-ov-file#eclipse +.. _IntelliJ: https://github.com/google/google-java-format?tab=readme-ov-file#intellij-android-studio-and-other-jetbrains-ides Build Caching ============= diff --git a/.mvn/develocity.xml b/java/.mvn/develocity.xml similarity index 100% rename from .mvn/develocity.xml rename to java/.mvn/develocity.xml diff --git a/.mvn/extensions.xml b/java/.mvn/extensions.xml similarity index 100% rename from .mvn/extensions.xml rename to java/.mvn/extensions.xml diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml index 0854da48b718a..5984cce766d9e 100644 --- a/java/algorithm/pom.xml +++ b/java/algorithm/pom.xml @@ -20,6 +20,11 @@ Arrow Algorithms (Experimental/Contrib) A collection of algorithms for working with ValueVectors. + + dev/checkstyle/checkstyle-spotless.xml + none + + org.apache.arrow @@ -47,6 +52,4 @@ value-annotations - - diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java index 8811e43d3d08d..e9364b2a85b7b 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import org.apache.arrow.memory.ArrowBuf; @@ -26,18 +25,18 @@ import org.apache.arrow.vector.compare.RangeEqualsVisitor; import org.apache.arrow.vector.util.DataSizeRoundingUtil; -/** - * Utilities for vector deduplication. - */ +/** Utilities for vector deduplication. */ class DeduplicationUtils { /** * Gets the start positions of the first distinct values in a vector. + * * @param vector the target vector. * @param runStarts the bit set to hold the start positions. * @param vector type. */ - public static void populateRunStartIndicators(V vector, ArrowBuf runStarts) { + public static void populateRunStartIndicators( + V vector, ArrowBuf runStarts) { int bufSize = DataSizeRoundingUtil.divideBy8Ceil(vector.getValueCount()); Preconditions.checkArgument(runStarts.capacity() >= bufSize); runStarts.setZero(0, bufSize); @@ -55,6 +54,7 @@ public static void populateRunStartIndicators(V vector, /** * Gets the run lengths, given the start positions. + * * @param runStarts the bit set for start positions. * @param runLengths the run length vector to populate. * @param valueCount the number of values in the bit set. @@ -76,15 +76,15 @@ public static void populateRunLengths(ArrowBuf runStarts, IntVector runLengths, } /** - * Gets distinct values from the input vector by removing adjacent - * duplicated values. + * Gets distinct values from the input vector by removing adjacent duplicated values. + * * @param indicators the bit set containing the start positions of distinct values. * @param inputVector the input vector. * @param outputVector the output vector. * @param vector type. */ public static void populateDeduplicatedValues( - ArrowBuf indicators, V inputVector, V outputVector) { + ArrowBuf indicators, V inputVector, V outputVector) { int dstIdx = 0; for (int srcIdx = 0; srcIdx < inputVector.getValueCount(); srcIdx++) { if (BitVectorHelper.get(indicators, srcIdx) != 0) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java index 5ef03cbe4a734..4e49de14f5956 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import org.apache.arrow.memory.ArrowBuf; @@ -26,29 +25,28 @@ import org.apache.arrow.vector.util.DataSizeRoundingUtil; /** - * Remove adjacent equal elements from a vector. - * If the vector is sorted, it removes all duplicated values in the vector. + * Remove adjacent equal elements from a vector. If the vector is sorted, it removes all duplicated + * values in the vector. + * * @param vector type. */ public class VectorRunDeduplicator implements AutoCloseable { /** - * Bit set for distinct values. - * If the value at some index is not equal to the previous value, - * its bit is set to 1, otherwise its bit is set to 0. + * Bit set for distinct values. If the value at some index is not equal to the previous value, its + * bit is set to 1, otherwise its bit is set to 0. */ private ArrowBuf distinctValueBuffer; - /** - * The vector to deduplicate. - */ + /** The vector to deduplicate. */ private final V vector; private final BufferAllocator allocator; /** * Constructs a vector run deduplicator for a given vector. - * @param vector the vector to deduplicate. Ownership is NOT taken. + * + * @param vector the vector to deduplicate. Ownership is NOT taken. * @param allocator the allocator used for allocating buffers for start indices. */ public VectorRunDeduplicator(V vector, BufferAllocator allocator) { @@ -65,17 +63,20 @@ private void createDistinctValueBuffer() { /** * Gets the number of values which are different from their predecessor. + * * @return the run count. */ public int getRunCount() { if (distinctValueBuffer == null) { createDistinctValueBuffer(); } - return vector.getValueCount() - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount()); + return vector.getValueCount() + - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount()); } /** * Gets the vector with deduplicated adjacent values removed. + * * @param outVector the output vector. */ public void populateDeduplicatedValues(V outVector) { @@ -88,6 +89,7 @@ public void populateDeduplicatedValues(V outVector) { /** * Gets the length of each distinct value. + * * @param lengthVector the vector for holding length values. */ public void populateRunLengths(IntVector lengthVector) { @@ -95,7 +97,8 @@ public void populateRunLengths(IntVector lengthVector) { createDistinctValueBuffer(); } - DeduplicationUtils.populateRunLengths(distinctValueBuffer, lengthVector, vector.getValueCount()); + DeduplicationUtils.populateRunLengths( + distinctValueBuffer, lengthVector, vector.getValueCount()); } @Override diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java index 398368d1fc612..88c4e4dc65450 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java @@ -14,33 +14,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import org.apache.arrow.vector.ValueVector; /** - * A dictionary builder is intended for the scenario frequently encountered in practice: - * the dictionary is not known a priori, so it is generated dynamically. - * In particular, when a new value arrives, it is tested to check if it is already - * in the dictionary. If so, it is simply neglected, otherwise, it is added to the dictionary. - *

- * The dictionary builder is intended to build a single dictionary. - * So it cannot be used for different dictionaries. - *

+ * A dictionary builder is intended for the scenario frequently encountered in practice: the + * dictionary is not known a priori, so it is generated dynamically. In particular, when a new value + * arrives, it is tested to check if it is already in the dictionary. If so, it is simply neglected, + * otherwise, it is added to the dictionary. + * + *

The dictionary builder is intended to build a single dictionary. So it cannot be used for + * different dictionaries. + * *

Below gives the sample code for using the dictionary builder + * *

{@code
  * DictionaryBuilder dictionaryBuilder = ...
  * ...
  * dictionaryBuild.addValue(newValue);
  * ...
  * }
- *

- *

- * With the above code, the dictionary vector will be populated, - * and it can be retrieved by the {@link DictionaryBuilder#getDictionary()} method. - * After that, dictionary encoding can proceed with the populated dictionary.. - *

+ * + *

With the above code, the dictionary vector will be populated, and it can be retrieved by the + * {@link DictionaryBuilder#getDictionary()} method. After that, dictionary encoding can proceed + * with the populated dictionary.. * * @param the dictionary vector type. */ @@ -58,7 +56,7 @@ public interface DictionaryBuilder { * Try to add an element from the target vector to the dictionary. * * @param targetVector the target vector containing new element. - * @param targetIndex the index of the new element in the target vector. + * @param targetIndex the index of the new element in the target vector. * @return the index of the new element in the dictionary. */ int addValue(V targetVector, int targetIndex); diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java index cda7b3bf9540e..16e27c3a23e72 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import org.apache.arrow.vector.BaseIntVector; @@ -22,8 +21,9 @@ /** * A dictionary encoder translates one vector into another one based on a dictionary vector. - * According to Arrow specification, the encoded vector must be an integer based vector, which - * is the index of the original vector element in the dictionary. + * According to Arrow specification, the encoded vector must be an integer based vector, which is + * the index of the original vector element in the dictionary. + * * @param type of the encoded vector. * @param type of the vector to encode. It is also the type of the dictionary vector. */ @@ -31,9 +31,10 @@ public interface DictionaryEncoder the dictionary vector type. */ -public class HashTableBasedDictionaryBuilder implements DictionaryBuilder { +public class HashTableBasedDictionaryBuilder + implements DictionaryBuilder { - /** - * The dictionary to be built. - */ + /** The dictionary to be built. */ private final V dictionary; - /** - * If null should be encoded. - */ + /** If null should be encoded. */ private final boolean encodeNull; /** - * The hash map for distinct dictionary entries. - * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary. + * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element, + * whereas the value is the index in the dictionary. */ private HashMap hashMap = new HashMap<>(); - /** - * The hasher used for calculating the hash code. - */ + /** The hasher used for calculating the hash code. */ private final ArrowBufHasher hasher; - /** - * Next pointer to try to add to the hash table. - */ + /** Next pointer to try to add to the hash table. */ private ArrowBufPointer nextPointer; /** @@ -83,7 +73,7 @@ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull) { * * @param dictionary the dictionary to populate. * @param encodeNull if null values should be added to the dictionary. - * @param hasher the hasher used to compute the hash code. + * @param hasher the hasher used to compute the hash code. */ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull, ArrowBufHasher hasher) { this.dictionary = dictionary; @@ -125,7 +115,7 @@ public int addValues(V targetVector) { * Try to add an element from the target vector to the dictionary. * * @param targetVector the target vector containing new element. - * @param targetIndex the index of the new element in the target vector. + * @param targetIndex the index of the new element in the target vector. * @return the index of the new element in the dictionary. */ @Override diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java index bea1a784c3d6a..ac7a7d32bf597 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import java.util.HashMap; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.memory.util.hash.SimpleHasher; @@ -27,43 +25,35 @@ /** * Dictionary encoder based on hash table. + * * @param encoded vector type. * @param decoded vector type, which is also the dictionary type. */ public class HashTableDictionaryEncoder implements DictionaryEncoder { - /** - * The dictionary for encoding/decoding. - * It must be sorted. - */ + /** The dictionary for encoding/decoding. It must be sorted. */ private final D dictionary; - /** - * The hasher used to compute the hash code. - */ + /** The hasher used to compute the hash code. */ private final ArrowBufHasher hasher; - /** - * A flag indicating if null should be encoded. - */ + /** A flag indicating if null should be encoded. */ private final boolean encodeNull; /** - * The hash map for distinct dictionary entries. - * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary. + * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element, + * whereas the value is the index in the dictionary. */ private HashMap hashMap = new HashMap<>(); - /** - * The pointer used to probe each element to encode. - */ + /** The pointer used to probe each element to encode. */ private ArrowBufPointer reusablePointer; /** * Constructs a dictionary encoder. - * @param dictionary the dictionary. * + * @param dictionary the dictionary. */ public HashTableDictionaryEncoder(D dictionary) { this(dictionary, false); @@ -71,20 +61,17 @@ public HashTableDictionaryEncoder(D dictionary) { /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. - * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding/decoding. - *

  • - * For encoding, when a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. - *
  • - *
  • - * For decoding, when a null is encountered in the input, - * 1) If the flag is set to true, the decoder should never expect a null in the input. - * 2) If set to false, the decoder simply produces a null in the output. - *
  • + * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for + * processing null values in the input during encoding/decoding. + *
  • For encoding, when a null is encountered in the input, 1) If the flag is set to true, + * the encoder searches for the value in the dictionary, and outputs the index in the + * dictionary. 2) If the flag is set to false, the encoder simply produces a null in the + * output. + *
  • For decoding, when a null is encountered in the input, 1) If the flag is set to true, + * the decoder should never expect a null in the input. 2) If set to false, the decoder + * simply produces a null in the output. */ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) { this(dictionary, encodeNull, SimpleHasher.INSTANCE); @@ -92,13 +79,13 @@ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) { /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. - * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding. - * When a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. + * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for + * processing null values in the input during encoding. When a null is encountered in the + * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary, + * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply + * produces a null in the output. * @param hasher the hasher used to calculate the hash code. */ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull, ArrowBufHasher hasher) { @@ -120,12 +107,12 @@ private void buildHashMap() { } /** - * Encodes an input vector by a hash table. - * So the algorithm takes O(n) time, where n is the length of the input vector. + * Encodes an input vector by a hash table. So the algorithm takes O(n) time, where n is the + * length of the input vector. * - * @param input the input vector. + * @param input the input vector. * @param output the output vector. - **/ + */ @Override public void encode(D input, E output) { for (int i = 0; i < input.getValueCount(); i++) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java index 84a3a96af8ef1..9aeff22005751 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import org.apache.arrow.vector.BaseIntVector; @@ -24,20 +23,17 @@ /** * Dictionary encoder based on linear search. + * * @param encoded vector type. * @param decoded vector type, which is also the dictionary type. */ public class LinearDictionaryEncoder implements DictionaryEncoder { - /** - * The dictionary for encoding. - */ + /** The dictionary for encoding. */ private final D dictionary; - /** - * A flag indicating if null should be encoded. - */ + /** A flag indicating if null should be encoded. */ private final boolean encodeNull; private RangeEqualsVisitor equalizer; @@ -46,8 +42,10 @@ public class LinearDictionaryEncoder encoded vector type. * @param decoded vector type, which is also the dictionary type. */ public class SearchDictionaryEncoder implements DictionaryEncoder { - /** - * The dictionary for encoding/decoding. - * It must be sorted. - */ + /** The dictionary for encoding/decoding. It must be sorted. */ private final D dictionary; - /** - * The criteria by which the dictionary is sorted. - */ + /** The criteria by which the dictionary is sorted. */ private final VectorValueComparator comparator; - /** - * A flag indicating if null should be encoded. - */ + /** A flag indicating if null should be encoded. */ private final boolean encodeNull; /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. It must be in sorted order. * @param comparator the criteria for sorting. */ @@ -57,28 +51,29 @@ public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. It must be in sorted order. * @param comparator the criteria for sorting. - * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding. - * When a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. + * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for + * processing null values in the input during encoding. When a null is encountered in the + * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary, + * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply + * produces a null in the output. */ - public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator, boolean encodeNull) { + public SearchDictionaryEncoder( + D dictionary, VectorValueComparator comparator, boolean encodeNull) { this.dictionary = dictionary; this.comparator = comparator; this.encodeNull = encodeNull; } /** - * Encodes an input vector by binary search. - * So the algorithm takes O(n * log(m)) time, where n is the length of the input vector, - * and m is the length of the dictionary. + * Encodes an input vector by binary search. So the algorithm takes O(n * log(m)) time, where n is + * the length of the input vector, and m is the length of the dictionary. + * * @param input the input vector. - * @param output the output vector. Note that it must be in a fresh state. At least, - * all its validity bits should be clear. + * @param output the output vector. Note that it must be in a fresh state. At least, all its + * validity bits should be clear. */ @Override public void encode(D input, E output) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java index f9cd77daa2e76..fca7df067dcff 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java @@ -14,45 +14,36 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import java.util.TreeSet; - import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.vector.ValueVector; /** - * This class builds the dictionary based on a binary search tree. - * Each add operation can be finished in O(log(n)) time, - * where n is the current dictionary size. + * This class builds the dictionary based on a binary search tree. Each add operation can be + * finished in O(log(n)) time, where n is the current dictionary size. * * @param the dictionary vector type. */ -public class SearchTreeBasedDictionaryBuilder implements DictionaryBuilder { +public class SearchTreeBasedDictionaryBuilder + implements DictionaryBuilder { - /** - * The dictionary to be built. - */ + /** The dictionary to be built. */ private final V dictionary; - /** - * The criteria for sorting in the search tree. - */ + /** The criteria for sorting in the search tree. */ protected final VectorValueComparator comparator; - /** - * If null should be encoded. - */ + /** If null should be encoded. */ private final boolean encodeNull; - /** - * The search tree for storing the value index. - */ + /** The search tree for storing the value index. */ private TreeSet searchTree; /** * Construct a search tree-based dictionary builder. + * * @param dictionary the dictionary vector. * @param comparator the criteria for value equality. */ @@ -62,11 +53,13 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c /** * Construct a search tree-based dictionary builder. + * * @param dictionary the dictionary vector. * @param comparator the criteria for value equality. * @param encodeNull if null values should be added to the dictionary. */ - public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator comparator, boolean encodeNull) { + public SearchTreeBasedDictionaryBuilder( + V dictionary, VectorValueComparator comparator, boolean encodeNull) { this.dictionary = dictionary; this.comparator = comparator; this.encodeNull = encodeNull; @@ -76,11 +69,10 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c } /** - * Gets the dictionary built. - * Please note that the dictionary is not in sorted order. - * Instead, its order is determined by the order of element insertion. - * To get the dictionary in sorted order, please use - * {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}. + * Gets the dictionary built. Please note that the dictionary is not in sorted order. Instead, its + * order is determined by the order of element insertion. To get the dictionary in sorted order, + * please use {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}. + * * @return the dictionary. */ @Override @@ -90,6 +82,7 @@ public V getDictionary() { /** * Try to add all values from the target vector to the dictionary. + * * @param targetVector the target vector containing values to probe. * @return the number of values actually added to the dictionary. */ @@ -107,6 +100,7 @@ public int addValues(V targetVector) { /** * Try to add an element from the target vector to the dictionary. + * * @param targetVector the target vector containing new element. * @param targetIndex the index of the new element in the target vector. * @return the index of the new element in the dictionary. @@ -132,8 +126,8 @@ public int addValue(V targetVector, int targetIndex) { } /** - * Gets the sorted dictionary. - * Note that given the binary search tree, the sort can finish in O(n). + * Gets the sorted dictionary. Note that given the binary search tree, the sort can finish in + * O(n). */ public void populateSortedDictionary(V sortedDictionary) { int idx = 0; diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java index f5e95cf1033f5..5492676af1a2e 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java @@ -14,26 +14,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.misc; import org.apache.arrow.vector.BaseIntVector; -/** - * Partial sum related utilities. - */ +/** Partial sum related utilities. */ public class PartialSumUtils { /** - * Converts an input vector to a partial sum vector. - * This is an inverse operation of {@link PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}. - * Suppose we have input vector a and output vector b. - * Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...). + * Converts an input vector to a partial sum vector. This is an inverse operation of {@link + * PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}. Suppose we have input vector a + * and output vector b. Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...). + * * @param deltaVector the input vector. * @param partialSumVector the output vector. * @param sumBase the base of the partial sums. */ - public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) { + public static void toPartialSumVector( + BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) { long sum = sumBase; partialSumVector.setWithPossibleTruncate(0, sumBase); @@ -45,10 +43,10 @@ public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector p } /** - * Converts an input vector to the delta vector. - * This is an inverse operation of {@link PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}. - * Suppose we have input vector a and output vector b. - * Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...). + * Converts an input vector to the delta vector. This is an inverse operation of {@link + * PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}. Suppose we have input + * vector a and output vector b. Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...). + * * @param partialSumVector the input vector. * @param deltaVector the output vector. */ @@ -61,18 +59,18 @@ public static void toDeltaVector(BaseIntVector partialSumVector, BaseIntVector d } /** - * Given a value and a partial sum vector, finds its position in the partial sum vector. - * In particular, given an integer value a and partial sum vector v, we try to find a - * position i, so that v(i) <= a < v(i + 1). - * The algorithm is based on binary search, so it takes O(log(n)) time, where n is - * the length of the partial sum vector. + * Given a value and a partial sum vector, finds its position in the partial sum vector. In + * particular, given an integer value a and partial sum vector v, we try to find a position i, so + * that v(i) <= a < v(i + 1). The algorithm is based on binary search, so it takes O(log(n)) time, + * where n is the length of the partial sum vector. + * * @param partialSumVector the input partial sum vector. * @param value the value to search. * @return the position in the partial sum vector, if any, or -1, if none is found. */ public static int findPositionInPartialSumVector(BaseIntVector partialSumVector, long value) { - if (value < partialSumVector.getValueAsLong(0) || - value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) { + if (value < partialSumVector.getValueAsLong(0) + || value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) { return -1; } @@ -114,6 +112,5 @@ public static int findPositionInPartialSumVector(BaseIntVector partialSumVector, throw new IllegalStateException("Should never get here"); } - private PartialSumUtils() { - } + private PartialSumUtils() {} } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java index 43c9a5b010e8c..baa2058ffc51f 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.rank; import java.util.stream.IntStream; - import org.apache.arrow.algorithm.sort.IndexSorter; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -28,21 +26,21 @@ /** * Utility for calculating ranks of vector elements. + * * @param the vector type */ public class VectorRank { private VectorValueComparator comparator; - /** - * Vector indices. - */ + /** Vector indices. */ private IntVector indices; private final BufferAllocator allocator; /** * Constructs a vector rank utility. + * * @param allocator the allocator to use. */ public VectorRank(BufferAllocator allocator) { @@ -50,9 +48,10 @@ public VectorRank(BufferAllocator allocator) { } /** - * Given a rank r, gets the index of the element that is the rth smallest in the vector. - * The operation is performed without changing the vector, and takes O(n) time, - * where n is the length of the vector. + * Given a rank r, gets the index of the element that is the rth smallest in the vector. The + * operation is performed without changing the vector, and takes O(n) time, where n is the length + * of the vector. + * * @param vector the vector from which to get the element index. * @param comparator the criteria for vector element comparison. * @param rank the rank to determine. diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java index 6226921b22ed6..6a48019edc3eb 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java @@ -14,49 +14,40 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; - import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.compare.Range; import org.apache.arrow.vector.compare.RangeEqualsVisitor; /** - * Search for a value in the vector by multiple threads. - * This is often used in scenarios where the vector is large or - * low response time is required. + * Search for a value in the vector by multiple threads. This is often used in scenarios where the + * vector is large or low response time is required. + * * @param the vector type. */ public class ParallelSearcher { - /** - * The target vector to search. - */ + /** The target vector to search. */ private final V vector; - /** - * The thread pool. - */ + /** The thread pool. */ private final ExecutorService threadPool; - /** - * The number of threads to use. - */ + /** The number of threads to use. */ private final int numThreads; - /** - * The position of the key in the target vector, if any. - */ + /** The position of the key in the target vector, if any. */ private volatile int keyPosition = -1; /** * Constructs a parallel searcher. + * * @param vector the vector to search. * @param threadPool the thread pool to use. * @param numThreads the number of threads to use. @@ -77,17 +68,17 @@ private CompletableFuture[] initSearch() { } /** - * Search for the key in the target vector. The element-wise comparison is based on - * {@link RangeEqualsVisitor}, so there are two possible results for each element-wise - * comparison: equal and un-equal. + * Search for the key in the target vector. The element-wise comparison is based on {@link + * RangeEqualsVisitor}, so there are two possible results for each element-wise comparison: equal + * and un-equal. + * * @param keyVector the vector containing the search key. * @param keyIndex the index of the search key in the key vector. - * @return the position of a matched value in the target vector, - * or -1 if none is found. Please note that if there are multiple - * matches of the key in the target vector, this method makes no - * guarantees about which instance is returned. - * For an alternative search implementation that always finds the first match of the key, - * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. + * @return the position of a matched value in the target vector, or -1 if none is found. Please + * note that if there are multiple matches of the key in the target vector, this method makes + * no guarantees about which instance is returned. For an alternative search implementation + * that always finds the first match of the key, see {@link + * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. * @throws ExecutionException if an exception occurs in a thread. * @throws InterruptedException if a thread is interrupted. */ @@ -96,36 +87,38 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup final int valueCount = vector.getValueCount(); for (int i = 0; i < numThreads; i++) { final int tid = i; - Future unused = threadPool.submit(() -> { - // convert to long to avoid overflow - int start = (int) (((long) valueCount) * tid / numThreads); - int end = (int) ((long) valueCount) * (tid + 1) / numThreads; - - if (start >= end) { - // no data assigned to this task. - futures[tid].complete(false); - return; - } - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null); - Range range = new Range(0, 0, 1); - for (int pos = start; pos < end; pos++) { - if (keyPosition != -1) { - // the key has been found by another task - futures[tid].complete(false); - return; - } - range.setLeftStart(pos).setRightStart(keyIndex); - if (visitor.rangeEquals(range)) { - keyPosition = pos; - futures[tid].complete(true); - return; - } - } - - // no match value is found. - futures[tid].complete(false); - }); + Future unused = + threadPool.submit( + () -> { + // convert to long to avoid overflow + int start = (int) (((long) valueCount) * tid / numThreads); + int end = (int) ((long) valueCount) * (tid + 1) / numThreads; + + if (start >= end) { + // no data assigned to this task. + futures[tid].complete(false); + return; + } + + RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null); + Range range = new Range(0, 0, 1); + for (int pos = start; pos < end; pos++) { + if (keyPosition != -1) { + // the key has been found by another task + futures[tid].complete(false); + return; + } + range.setLeftStart(pos).setRightStart(keyIndex); + if (visitor.rangeEquals(range)) { + keyPosition = pos; + futures[tid].complete(true); + return; + } + } + + // no match value is found. + futures[tid].complete(false); + }); } CompletableFuture.allOf(futures).get(); @@ -133,56 +126,58 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup } /** - * Search for the key in the target vector. The element-wise comparison is based on - * {@link VectorValueComparator}, so there are three possible results for each element-wise - * comparison: less than, equal to and greater than. + * Search for the key in the target vector. The element-wise comparison is based on {@link + * VectorValueComparator}, so there are three possible results for each element-wise comparison: + * less than, equal to and greater than. + * * @param keyVector the vector containing the search key. * @param keyIndex the index of the search key in the key vector. * @param comparator the comparator for comparing the key against vector elements. - * @return the position of a matched value in the target vector, - * or -1 if none is found. Please note that if there are multiple - * matches of the key in the target vector, this method makes no - * guarantees about which instance is returned. - * For an alternative search implementation that always finds the first match of the key, - * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. + * @return the position of a matched value in the target vector, or -1 if none is found. Please + * note that if there are multiple matches of the key in the target vector, this method makes + * no guarantees about which instance is returned. For an alternative search implementation + * that always finds the first match of the key, see {@link + * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. * @throws ExecutionException if an exception occurs in a thread. * @throws InterruptedException if a thread is interrupted. */ - public int search( - V keyVector, int keyIndex, VectorValueComparator comparator) throws ExecutionException, InterruptedException { + public int search(V keyVector, int keyIndex, VectorValueComparator comparator) + throws ExecutionException, InterruptedException { final CompletableFuture[] futures = initSearch(); final int valueCount = vector.getValueCount(); for (int i = 0; i < numThreads; i++) { final int tid = i; - Future unused = threadPool.submit(() -> { - // convert to long to avoid overflow - int start = (int) (((long) valueCount) * tid / numThreads); - int end = (int) ((long) valueCount) * (tid + 1) / numThreads; - - if (start >= end) { - // no data assigned to this task. - futures[tid].complete(false); - return; - } - - VectorValueComparator localComparator = comparator.createNew(); - localComparator.attachVectors(vector, keyVector); - for (int pos = start; pos < end; pos++) { - if (keyPosition != -1) { - // the key has been found by another task - futures[tid].complete(false); - return; - } - if (localComparator.compare(pos, keyIndex) == 0) { - keyPosition = pos; - futures[tid].complete(true); - return; - } - } - - // no match value is found. - futures[tid].complete(false); - }); + Future unused = + threadPool.submit( + () -> { + // convert to long to avoid overflow + int start = (int) (((long) valueCount) * tid / numThreads); + int end = (int) ((long) valueCount) * (tid + 1) / numThreads; + + if (start >= end) { + // no data assigned to this task. + futures[tid].complete(false); + return; + } + + VectorValueComparator localComparator = comparator.createNew(); + localComparator.attachVectors(vector, keyVector); + for (int pos = start; pos < end; pos++) { + if (keyPosition != -1) { + // the key has been found by another task + futures[tid].complete(false); + return; + } + if (localComparator.compare(pos, keyIndex) == 0) { + keyPosition = pos; + futures[tid].complete(true); + return; + } + } + + // no match value is found. + futures[tid].complete(false); + }); } CompletableFuture.allOf(futures).get(); diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java index 249194843f101..c7905dd8956c8 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java @@ -1,108 +1,105 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.arrow.algorithm.search; - -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.vector.ValueVector; - -/** - * Search for the range of a particular element in the target vector. - */ -public class VectorRangeSearcher { - - /** - * Result returned when a search fails. - */ - public static final int SEARCH_FAIL_RESULT = -1; - - /** - * Search for the first occurrence of an element. - * The search is based on the binary search algorithm. So the target vector must be sorted. - * @param targetVector the vector from which to perform the search. - * @param comparator the criterion for the comparison. - * @param keyVector the vector containing the element to search. - * @param keyIndex the index of the search key in the key vector. - * @param the vector type. - * @return the index of the first matched element if any, and -1 otherwise. - */ - public static int getFirstMatch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { - comparator.attachVectors(keyVector, targetVector); - - int ret = SEARCH_FAIL_RESULT; - - int low = 0; - int high = targetVector.getValueCount() - 1; - - while (low <= high) { - int mid = low + (high - low) / 2; - int result = comparator.compare(keyIndex, mid); - if (result < 0) { - // the key is smaller - high = mid - 1; - } else if (result > 0) { - // the key is larger - low = mid + 1; - } else { - // an equal element is found - // continue to go left-ward - ret = mid; - high = mid - 1; - } - } - return ret; - } - - /** - * Search for the last occurrence of an element. - * The search is based on the binary search algorithm. So the target vector must be sorted. - * @param targetVector the vector from which to perform the search. - * @param comparator the criterion for the comparison. - * @param keyVector the vector containing the element to search. - * @param keyIndex the index of the search key in the key vector. - * @param the vector type. - * @return the index of the last matched element if any, and -1 otherwise. - */ - public static int getLastMatch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { - comparator.attachVectors(keyVector, targetVector); - - int ret = SEARCH_FAIL_RESULT; - - int low = 0; - int high = targetVector.getValueCount() - 1; - - while (low <= high) { - int mid = low + (high - low) / 2; - int result = comparator.compare(keyIndex, mid); - if (result < 0) { - // the key is smaller - high = mid - 1; - } else if (result > 0) { - // the key is larger - low = mid + 1; - } else { - // an equal element is found, - // continue to go right-ward - ret = mid; - low = mid + 1; - } - } - return ret; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.algorithm.search; + +import org.apache.arrow.algorithm.sort.VectorValueComparator; +import org.apache.arrow.vector.ValueVector; + +/** Search for the range of a particular element in the target vector. */ +public class VectorRangeSearcher { + + /** Result returned when a search fails. */ + public static final int SEARCH_FAIL_RESULT = -1; + + /** + * Search for the first occurrence of an element. The search is based on the binary search + * algorithm. So the target vector must be sorted. + * + * @param targetVector the vector from which to perform the search. + * @param comparator the criterion for the comparison. + * @param keyVector the vector containing the element to search. + * @param keyIndex the index of the search key in the key vector. + * @param the vector type. + * @return the index of the first matched element if any, and -1 otherwise. + */ + public static int getFirstMatch( + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + comparator.attachVectors(keyVector, targetVector); + + int ret = SEARCH_FAIL_RESULT; + + int low = 0; + int high = targetVector.getValueCount() - 1; + + while (low <= high) { + int mid = low + (high - low) / 2; + int result = comparator.compare(keyIndex, mid); + if (result < 0) { + // the key is smaller + high = mid - 1; + } else if (result > 0) { + // the key is larger + low = mid + 1; + } else { + // an equal element is found + // continue to go left-ward + ret = mid; + high = mid - 1; + } + } + return ret; + } + + /** + * Search for the last occurrence of an element. The search is based on the binary search + * algorithm. So the target vector must be sorted. + * + * @param targetVector the vector from which to perform the search. + * @param comparator the criterion for the comparison. + * @param keyVector the vector containing the element to search. + * @param keyIndex the index of the search key in the key vector. + * @param the vector type. + * @return the index of the last matched element if any, and -1 otherwise. + */ + public static int getLastMatch( + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + comparator.attachVectors(keyVector, targetVector); + + int ret = SEARCH_FAIL_RESULT; + + int low = 0; + int high = targetVector.getValueCount() - 1; + + while (low <= high) { + int mid = low + (high - low) / 2; + int result = comparator.compare(keyIndex, mid); + if (result < 0) { + // the key is smaller + high = mid - 1; + } else if (result > 0) { + // the key is larger + low = mid + 1; + } else { + // an equal element is found, + // continue to go right-ward + ret = mid; + low = mid + 1; + } + } + return ret; + } +} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java index 646bca01bb81d..dd0b4de5d8677 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java @@ -14,25 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.vector.ValueVector; -/** - * Search for a particular element in the vector. - */ +/** Search for a particular element in the vector. */ public final class VectorSearcher { - /** - * Result returned when a search fails. - */ + /** Result returned when a search fails. */ public static final int SEARCH_FAIL_RESULT = -1; /** - * Search for a particular element from the key vector in the target vector by binary search. - * The target vector must be sorted. + * Search for a particular element from the key vector in the target vector by binary search. The + * target vector must be sorted. + * * @param targetVector the vector from which to perform the sort. * @param comparator the criterion for the sort. * @param keyVector the vector containing the element to search. @@ -41,7 +37,7 @@ public final class VectorSearcher { * @return the index of a matched element if any, and -1 otherwise. */ public static int binarySearch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { comparator.attachVectors(keyVector, targetVector); // perform binary search @@ -63,7 +59,9 @@ public static int binarySearch( } /** - * Search for a particular element from the key vector in the target vector by traversing the vector in sequence. + * Search for a particular element from the key vector in the target vector by traversing the + * vector in sequence. + * * @param targetVector the vector from which to perform the search. * @param comparator the criterion for element equality. * @param keyVector the vector containing the element to search. @@ -72,7 +70,7 @@ public static int binarySearch( * @return the index of a matched element if any, and -1 otherwise. */ public static int linearSearch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { comparator.attachVectors(keyVector, targetVector); for (int i = 0; i < targetVector.getValueCount(); i++) { if (comparator.compare(keyIndex, i) == 0) { @@ -82,7 +80,5 @@ public static int linearSearch( return SEARCH_FAIL_RESULT; } - private VectorSearcher() { - - } + private VectorSearcher() {} } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java index ec74598e0eebf..77093d87bc489 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java @@ -14,20 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * A composite vector comparator compares a number of vectors - * by a number of inner comparators. - *

    - * It works by first using the first comparator, if a non-zero value - * is returned, it simply returns it. Otherwise, it uses the second comparator, - * and so on, until a non-zero value is produced, or all inner comparators have - * been used. - *

    + * A composite vector comparator compares a number of vectors by a number of inner comparators. + * + *

    It works by first using the first comparator, if a non-zero value is returned, it simply + * returns it. Otherwise, it uses the second comparator, and so on, until a non-zero value is + * produced, or all inner comparators have been used. */ public class CompositeVectorComparator extends VectorValueComparator { @@ -62,7 +58,8 @@ public int compare(int index1, int index2) { @Override public VectorValueComparator createNew() { - VectorValueComparator[] newInnerComparators = new VectorValueComparator[innerComparators.length]; + VectorValueComparator[] newInnerComparators = + new VectorValueComparator[innerComparators.length]; for (int i = 0; i < innerComparators.length; i++) { newInnerComparators[i] = innerComparators[i].createNew(); } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java index 588876aa99059..ec650cd9dc88b 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; import java.math.BigDecimal; import java.time.Duration; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -56,13 +54,12 @@ import org.apache.arrow.vector.complex.RepeatedValueVector; import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; -/** - * Default comparator implementations for different types of vectors. - */ +/** Default comparator implementations for different types of vectors. */ public class DefaultVectorComparators { /** * Create the default comparator for the vector. + * * @param vector the vector. * @param the vector type. * @return the default comparator. @@ -104,7 +101,8 @@ public static VectorValueComparator createDefaultComp } else if (vector instanceof IntervalDayVector) { return (VectorValueComparator) new IntervalDayComparator(); } else if (vector instanceof IntervalMonthDayNanoVector) { - throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName()); + throw new IllegalArgumentException( + "No default comparator for " + vector.getClass().getCanonicalName()); } else if (vector instanceof TimeMicroVector) { return (VectorValueComparator) new TimeMicroComparator(); } else if (vector instanceof TimeMilliVector) { @@ -122,7 +120,7 @@ public static VectorValueComparator createDefaultComp return (VectorValueComparator) new VariableWidthComparator(); } else if (vector instanceof RepeatedValueVector) { VectorValueComparator innerComparator = - createDefaultComparator(((RepeatedValueVector) vector).getDataVector()); + createDefaultComparator(((RepeatedValueVector) vector).getDataVector()); return new RepeatedValueComparator(innerComparator); } else if (vector instanceof FixedSizeListVector) { VectorValueComparator innerComparator = @@ -132,13 +130,11 @@ public static VectorValueComparator createDefaultComp return (VectorValueComparator) new NullComparator(); } - throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName()); + throw new IllegalArgumentException( + "No default comparator for " + vector.getClass().getCanonicalName()); } - /** - * Default comparator for bytes. - * The comparison is based on values, with null comes first. - */ + /** Default comparator for bytes. The comparison is based on values, with null comes first. */ public static class ByteComparator extends VectorValueComparator { public ByteComparator() { @@ -159,8 +155,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for short integers. - * The comparison is based on values, with null comes first. + * Default comparator for short integers. The comparison is based on values, with null comes + * first. */ public static class ShortComparator extends VectorValueComparator { @@ -182,8 +178,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for 32-bit integers. - * The comparison is based on int values, with null comes first. + * Default comparator for 32-bit integers. The comparison is based on int values, with null comes + * first. */ public static class IntComparator extends VectorValueComparator { @@ -205,8 +201,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for long integers. - * The comparison is based on values, with null comes first. + * Default comparator for long integers. The comparison is based on values, with null comes first. */ public static class LongComparator extends VectorValueComparator { @@ -229,8 +224,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned bytes. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned bytes. The comparison is based on values, with null comes + * first. */ public static class UInt1Comparator extends VectorValueComparator { @@ -253,8 +248,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned short integer. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned short integer. The comparison is based on values, with null + * comes first. */ public static class UInt2Comparator extends VectorValueComparator { @@ -280,8 +275,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned integer. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned integer. The comparison is based on values, with null comes + * first. */ public static class UInt4Comparator extends VectorValueComparator { @@ -303,8 +298,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned long integer. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned long integer. The comparison is based on values, with null + * comes first. */ public static class UInt8Comparator extends VectorValueComparator { @@ -326,8 +321,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for float type. - * The comparison is based on values, with null comes first. + * Default comparator for float type. The comparison is based on values, with null comes first. */ public static class Float4Comparator extends VectorValueComparator { @@ -363,8 +357,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for double type. - * The comparison is based on values, with null comes first. + * Default comparator for double type. The comparison is based on values, with null comes first. */ public static class Float8Comparator extends VectorValueComparator { @@ -399,10 +392,7 @@ public VectorValueComparator createNew() { } } - /** - * Default comparator for bit type. - * The comparison is based on values, with null comes first. - */ + /** Default comparator for bit type. The comparison is based on values, with null comes first. */ public static class BitComparator extends VectorValueComparator { public BitComparator() { @@ -424,8 +414,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for DateDay type. - * The comparison is based on values, with null comes first. + * Default comparator for DateDay type. The comparison is based on values, with null comes first. */ public static class DateDayComparator extends VectorValueComparator { @@ -447,8 +436,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for DateMilli type. - * The comparison is based on values, with null comes first. + * Default comparator for DateMilli type. The comparison is based on values, with null comes + * first. */ public static class DateMilliComparator extends VectorValueComparator { @@ -471,8 +460,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for Decimal256 type. - * The comparison is based on values, with null comes first. + * Default comparator for Decimal256 type. The comparison is based on values, with null comes + * first. */ public static class Decimal256Comparator extends VectorValueComparator { @@ -495,8 +484,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for Decimal type. - * The comparison is based on values, with null comes first. + * Default comparator for Decimal type. The comparison is based on values, with null comes first. */ public static class DecimalComparator extends VectorValueComparator { @@ -519,8 +507,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for Duration type. - * The comparison is based on values, with null comes first. + * Default comparator for Duration type. The comparison is based on values, with null comes first. */ public static class DurationComparator extends VectorValueComparator { @@ -543,8 +530,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for IntervalDay type. - * The comparison is based on values, with null comes first. + * Default comparator for IntervalDay type. The comparison is based on values, with null comes + * first. */ public static class IntervalDayComparator extends VectorValueComparator { @@ -567,8 +554,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeMicro type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeMicro type. The comparison is based on values, with null comes + * first. */ public static class TimeMicroComparator extends VectorValueComparator { @@ -591,8 +578,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeMilli type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeMilli type. The comparison is based on values, with null comes + * first. */ public static class TimeMilliComparator extends VectorValueComparator { @@ -615,8 +602,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeNano type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeNano type. The comparison is based on values, with null comes first. */ public static class TimeNanoComparator extends VectorValueComparator { @@ -639,8 +625,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeSec type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeSec type. The comparison is based on values, with null comes first. */ public static class TimeSecComparator extends VectorValueComparator { @@ -663,8 +648,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeSec type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeSec type. The comparison is based on values, with null comes first. */ public static class TimeStampComparator extends VectorValueComparator { @@ -687,10 +671,11 @@ public VectorValueComparator createNew() { } /** - * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}. - * The comparison is in lexicographic order, with null comes first. + * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}. The comparison is + * in lexicographic order, with null comes first. */ - public static class FixedSizeBinaryComparator extends VectorValueComparator { + public static class FixedSizeBinaryComparator + extends VectorValueComparator { @Override public int compare(int index1, int index2) { @@ -720,9 +705,7 @@ public VectorValueComparator createNew() { } } - /** - * Default comparator for {@link org.apache.arrow.vector.NullVector}. - */ + /** Default comparator for {@link org.apache.arrow.vector.NullVector}. */ public static class NullComparator extends VectorValueComparator { @Override public int compare(int index1, int index2) { @@ -742,8 +725,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}. - * The comparison is in lexicographic order, with null comes first. + * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}. The comparison is + * in lexicographic order, with null comes first. */ public static class VariableWidthComparator extends VectorValueComparator { @@ -772,12 +755,13 @@ public VectorValueComparator createNew() { } /** - * Default comparator for {@link RepeatedValueVector}. - * It works by comparing the underlying vector in a lexicographic order. + * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector + * in a lexicographic order. + * * @param inner vector type. */ public static class RepeatedValueComparator - extends VectorValueComparator { + extends VectorValueComparator { private final VectorValueComparator innerComparator; @@ -823,8 +807,9 @@ public void attachVectors(RepeatedValueVector vector1, RepeatedValueVector vecto } /** - * Default comparator for {@link RepeatedValueVector}. - * It works by comparing the underlying vector in a lexicographic order. + * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector + * in a lexicographic order. + * * @param inner vector type. */ public static class FixedSizeListComparator @@ -869,6 +854,5 @@ public void attachVectors(FixedSizeListVector vector1, FixedSizeListVector vecto } } - private DefaultVectorComparators() { - } + private DefaultVectorComparators() {} } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java index aaa7ba117c3ba..ea2b344a1eabb 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java @@ -14,20 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.BaseFixedWidthVector; /** - * Default in-place sorter for fixed-width vectors. - * It is based on quick-sort, with average time complexity O(n*log(n)). + * Default in-place sorter for fixed-width vectors. It is based on quick-sort, with average time + * complexity O(n*log(n)). + * * @param vector type. */ -public class FixedWidthInPlaceVectorSorter implements InPlaceVectorSorter { +public class FixedWidthInPlaceVectorSorter + implements InPlaceVectorSorter { /** - * If the number of items is smaller than this threshold, we will use another algorithm to sort the data. + * If the number of items is smaller than this threshold, we will use another algorithm to sort + * the data. */ public static final int CHANGE_ALGORITHM_THRESHOLD = 15; @@ -35,15 +37,10 @@ public class FixedWidthInPlaceVectorSorter imple VectorValueComparator comparator; - /** - * The vector to sort. - */ + /** The vector to sort. */ V vec; - /** - * The buffer to hold the pivot. - * It always has length 1. - */ + /** The buffer to hold the pivot. It always has length 1. */ V pivotBuffer; @Override @@ -99,9 +96,7 @@ private void quickSort() { } } - /** - * Select the pivot as the median of 3 samples. - */ + /** Select the pivot as the median of 3 samples. */ void choosePivot(int low, int high) { // we need at least 3 items if (high - low + 1 < STOP_CHOOSING_PIVOT_THRESHOLD) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java index 05a4585792dc2..817e890a5abe1 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.memory.ArrowBuf; @@ -26,18 +25,21 @@ import org.apache.arrow.vector.IntVector; /** - * Default out-of-place sorter for fixed-width vectors. - * It is an out-of-place sort, with time complexity O(n*log(n)). + * Default out-of-place sorter for fixed-width vectors. It is an out-of-place sort, with time + * complexity O(n*log(n)). + * * @param vector type. */ -public class FixedWidthOutOfPlaceVectorSorter implements OutOfPlaceVectorSorter { +public class FixedWidthOutOfPlaceVectorSorter + implements OutOfPlaceVectorSorter { protected IndexSorter indexSorter = new IndexSorter<>(); @Override public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) { if (srcVector instanceof BitVector) { - throw new IllegalArgumentException("BitVector is not supported with FixedWidthOutOfPlaceVectorSorter."); + throw new IllegalArgumentException( + "BitVector is not supported with FixedWidthOutOfPlaceVectorSorter."); } comparator.attachVector(srcVector); @@ -49,15 +51,18 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co ArrowBuf dstValueBuffer = dstVector.getDataBuffer(); // check buffer size - Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), - "Not enough capacity for the validity buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity()); + Preconditions.checkArgument( + dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), + "Not enough capacity for the validity buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + (srcVector.getValueCount() + 7) / 8, + dstValidityBuffer.capacity()); Preconditions.checkArgument( dstValueBuffer.capacity() >= srcVector.getValueCount() * ((long) srcVector.getTypeWidth()), - "Not enough capacity for the data buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - srcVector.getValueCount() * srcVector.getTypeWidth(), dstValueBuffer.capacity()); + "Not enough capacity for the data buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + srcVector.getValueCount() * srcVector.getTypeWidth(), + dstValueBuffer.capacity()); // sort value indices try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { @@ -73,9 +78,9 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co } else { BitVectorHelper.setBit(dstValidityBuffer, dstIndex); MemoryUtil.UNSAFE.copyMemory( - srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth), - dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth), - valueWidth); + srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth), + dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth), + valueWidth); } } } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java index 9ea39f638aebe..18f5e94314f83 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.util.Preconditions; @@ -22,23 +21,26 @@ import org.apache.arrow.vector.ValueVector; /** - * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)). - * Since it does not make any assumptions about the memory layout of the vector, its performance - * can be sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}), - * it should be used in preference. + * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)). Since it + * does not make any assumptions about the memory layout of the vector, its performance can be + * sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}), it + * should be used in preference. * * @param vector type. */ -public class GeneralOutOfPlaceVectorSorter implements OutOfPlaceVectorSorter { +public class GeneralOutOfPlaceVectorSorter + implements OutOfPlaceVectorSorter { @Override public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) { comparator.attachVector(srcVector); // check vector capacity - Preconditions.checkArgument(dstVector.getValueCapacity() >= srcVector.getValueCount(), - "Not enough capacity for the target vector. " + - "Expected capacity %s, actual capacity %s", srcVector.getValueCount(), dstVector.getValueCapacity()); + Preconditions.checkArgument( + dstVector.getValueCapacity() >= srcVector.getValueCount(), + "Not enough capacity for the target vector. " + "Expected capacity %s, actual capacity %s", + srcVector.getValueCount(), + dstVector.getValueCapacity()); // sort value indices try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java index 19817fe76b8ec..ba41bb9e4eac7 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java @@ -14,15 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * Basic interface for sorting a vector in-place. - * That is, the sorting is performed by modifying the input vector, - * without creating a new sorted vector. + * Basic interface for sorting a vector in-place. That is, the sorting is performed by modifying the + * input vector, without creating a new sorted vector. * * @param the vector type. */ @@ -30,6 +28,7 @@ public interface InPlaceVectorSorter { /** * Sort a vector in-place. + * * @param vec the vector to sort. * @param comparator the criteria for sort. */ diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java index 3072717f43123..b8ce3289d2889 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java @@ -14,39 +14,35 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import java.util.stream.IntStream; - import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.ValueVector; /** * Sorter for the indices of a vector. + * * @param vector type. */ public class IndexSorter { /** - * If the number of items is smaller than this threshold, we will use another algorithm to sort the data. + * If the number of items is smaller than this threshold, we will use another algorithm to sort + * the data. */ public static final int CHANGE_ALGORITHM_THRESHOLD = 15; - /** - * Comparator for vector indices. - */ + /** Comparator for vector indices. */ private VectorValueComparator comparator; - /** - * Vector indices to sort. - */ + /** Vector indices to sort. */ private IntVector indices; /** - * Sorts indices, by quick-sort. Suppose the vector is denoted by v. - * After calling this method, the following relations hold: - * v(indices[0]) <= v(indices[1]) <= ... + * Sorts indices, by quick-sort. Suppose the vector is denoted by v. After calling this method, + * the following relations hold: v(indices[0]) <= v(indices[1]) <= ... + * * @param vector the vector whose indices need to be sorted. * @param indices the vector for storing the sorted indices. * @param comparator the comparator to sort indices. @@ -100,11 +96,9 @@ private void quickSort() { } } - /** - * Select the pivot as the median of 3 samples. - */ + /** Select the pivot as the median of 3 samples. */ static int choosePivot( - int low, int high, IntVector indices, VectorValueComparator comparator) { + int low, int high, IntVector indices, VectorValueComparator comparator) { // we need at least 3 items if (high - low + 1 < FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD) { return indices.get(low); @@ -149,8 +143,9 @@ static int choosePivot( /** * Partition a range of values in a vector into two parts, with elements in one part smaller than - * elements from the other part. The partition is based on the element indices, so it does - * not modify the underlying vector. + * elements from the other part. The partition is based on the element indices, so it does not + * modify the underlying vector. + * * @param low the lower bound of the range. * @param high the upper bound of the range. * @param indices vector element indices. @@ -159,7 +154,7 @@ static int choosePivot( * @return the index of the split point. */ public static int partition( - int low, int high, IntVector indices, VectorValueComparator comparator) { + int low, int high, IntVector indices, VectorValueComparator comparator) { int pivotIndex = choosePivot(low, high, indices, comparator); while (low < high) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java index dc12a5fefdb65..c058636d66d1e 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java @@ -14,27 +14,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.ValueVector; -/** - * Insertion sorter. - */ +/** Insertion sorter. */ class InsertionSorter { /** * Sorts the range of a vector by insertion sort. * - * @param vector the vector to be sorted. - * @param startIdx the start index of the range (inclusive). - * @param endIdx the end index of the range (inclusive). - * @param buffer an extra buffer with capacity 1 to hold the current key. + * @param vector the vector to be sorted. + * @param startIdx the start index of the range (inclusive). + * @param endIdx the end index of the range (inclusive). + * @param buffer an extra buffer with capacity 1 to hold the current key. * @param comparator the criteria for vector element comparison. - * @param the vector type. + * @param the vector type. */ static void insertionSort( V vector, int startIdx, int endIdx, VectorValueComparator comparator, V buffer) { @@ -53,11 +50,11 @@ static void insertionSort( /** * Sorts the range of vector indices by insertion sort. * - * @param indices the vector indices. - * @param startIdx the start index of the range (inclusive). - * @param endIdx the end index of the range (inclusive). + * @param indices the vector indices. + * @param startIdx the start index of the range (inclusive). + * @param endIdx the end index of the range (inclusive). * @param comparator the criteria for vector element comparison. - * @param the vector type. + * @param the vector type. */ static void insertionSort( IntVector indices, int startIdx, int endIdx, VectorValueComparator comparator) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java index df96121f1f8f7..ccb7bea4e2bd3 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java @@ -14,15 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.IntVector; -/** - * An off heap implementation of stack with int elements. - */ +/** An off heap implementation of stack with int elements. */ class OffHeapIntStack implements AutoCloseable { private static final int INIT_SIZE = 128; diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java index 41d6dadc49147..b18e9b35d0895 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java @@ -14,21 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * Basic interface for sorting a vector out-of-place. - * That is, the sorting is performed on a newly-created vector, - * and the original vector is not modified. + * Basic interface for sorting a vector out-of-place. That is, the sorting is performed on a + * newly-created vector, and the original vector is not modified. + * * @param the vector type. */ public interface OutOfPlaceVectorSorter { /** * Sort a vector out-of-place. + * * @param inVec the input vector. * @param outVec the output vector, which has the same size as the input vector. * @param comparator the criteria for sort. diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java index 0b0c3bd55b271..3fcfa5f8f215c 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java @@ -14,17 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.ValueVector; /** - * Stable sorter. It compares values like ordinary comparators. - * However, when values are equal, it breaks ties by the value indices. - * Therefore, sort algorithms using this comparator always produce + * Stable sorter. It compares values like ordinary comparators. However, when values are equal, it + * breaks ties by the value indices. Therefore, sort algorithms using this comparator always produce * stable sort results. + * * @param type of the vector. */ public class StableVectorComparator extends VectorValueComparator { @@ -33,6 +32,7 @@ public class StableVectorComparator extends VectorValueCo /** * Constructs a stable comparator from a given comparator. + * * @param innerComparator the comparator to convert to stable comparator.. */ public StableVectorComparator(VectorValueComparator innerComparator) { @@ -47,8 +47,9 @@ public void attachVector(V vector) { @Override public void attachVectors(V vector1, V vector2) { - Preconditions.checkArgument(vector1 == vector2, - "Stable comparator only supports comparing values from the same vector"); + Preconditions.checkArgument( + vector1 == vector2, + "Stable comparator only supports comparing values from the same vector"); super.attachVectors(vector1, vector2); innerComparator.attachVectors(vector1, vector2); } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java index 863b07c348ef2..8f58dc0dcee0f 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.memory.ArrowBuf; @@ -25,12 +24,13 @@ import org.apache.arrow.vector.IntVector; /** - * Default sorter for variable-width vectors. - * It is an out-of-place sort, with time complexity O(n*log(n)). + * Default sorter for variable-width vectors. It is an out-of-place sort, with time complexity + * O(n*log(n)). + * * @param vector type. */ public class VariableWidthOutOfPlaceVectorSorter - implements OutOfPlaceVectorSorter { + implements OutOfPlaceVectorSorter { protected IndexSorter indexSorter = new IndexSorter<>(); @@ -46,20 +46,29 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co ArrowBuf dstOffsetBuffer = dstVector.getOffsetBuffer(); // check buffer size - Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), - "Not enough capacity for the validity buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity()); Preconditions.checkArgument( - dstOffsetBuffer.capacity() >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), - "Not enough capacity for the offset buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, dstOffsetBuffer.capacity()); - long dataSize = srcVector.getOffsetBuffer().getInt( - srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); + dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), + "Not enough capacity for the validity buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + (srcVector.getValueCount() + 7) / 8, + dstValidityBuffer.capacity()); + Preconditions.checkArgument( + dstOffsetBuffer.capacity() + >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), + "Not enough capacity for the offset buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, + dstOffsetBuffer.capacity()); + long dataSize = + srcVector + .getOffsetBuffer() + .getInt(srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); Preconditions.checkArgument( - dstValueBuffer.capacity() >= dataSize, "No enough capacity for the data buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", dataSize, dstValueBuffer.capacity()); + dstValueBuffer.capacity() >= dataSize, + "No enough capacity for the data buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + dataSize, + dstValueBuffer.capacity()); // sort value indices try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { @@ -77,16 +86,19 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co BitVectorHelper.unsetBit(dstValidityBuffer, dstIndex); } else { BitVectorHelper.setBit(dstValidityBuffer, dstIndex); - int srcOffset = srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); + int srcOffset = + srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); int valueLength = - srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH)) - srcOffset; + srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH)) + - srcOffset; MemoryUtil.UNSAFE.copyMemory( - srcValueBuffer.memoryAddress() + srcOffset, - dstValueBuffer.memoryAddress() + dstOffset, - valueLength); + srcValueBuffer.memoryAddress() + srcOffset, + dstValueBuffer.memoryAddress() + dstOffset, + valueLength); dstOffset += valueLength; } - dstOffsetBuffer.setInt((dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset); + dstOffsetBuffer.setInt( + (dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset); } } } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java index d2c772ca8a819..0472f04109b1c 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java @@ -14,54 +14,44 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * Compare two values at the given indices in the vectors. - * This is used for vector sorting. + * Compare two values at the given indices in the vectors. This is used for vector sorting. + * * @param type of the vector. */ public abstract class VectorValueComparator { - /** - * The first vector to compare. - */ + /** The first vector to compare. */ protected V vector1; - /** - * The second vector to compare. - */ + /** The second vector to compare. */ protected V vector2; - /** - * Width of the vector value. For variable-length vectors, this value makes no sense. - */ + /** Width of the vector value. For variable-length vectors, this value makes no sense. */ protected int valueWidth; - private boolean checkNullsOnCompare = true; /** - * This value is true by default and re-computed when vectors are attached to the comparator. If both vectors cannot - * contain nulls then this value is {@code false} and calls to {@code compare(i1, i2)} are short-circuited - * to {@code compareNotNull(i1, i2)} thereby speeding up comparisons resulting in faster sorts etc. + * This value is true by default and re-computed when vectors are attached to the comparator. If + * both vectors cannot contain nulls then this value is {@code false} and calls to {@code + * compare(i1, i2)} are short-circuited to {@code compareNotNull(i1, i2)} thereby speeding up + * comparisons resulting in faster sorts etc. */ public boolean checkNullsOnCompare() { return this.checkNullsOnCompare; } - /** - * Constructor for variable-width vectors. - */ - protected VectorValueComparator() { - - } + /** Constructor for variable-width vectors. */ + protected VectorValueComparator() {} /** * Constructor for fixed-width vectors. + * * @param valueWidth the record width (in bytes). */ protected VectorValueComparator(int valueWidth) { @@ -74,6 +64,7 @@ public int getValueWidth() { /** * Attach both vectors to compare to the same input vector. + * * @param vector the vector to attach. */ public void attachVector(V vector) { @@ -82,6 +73,7 @@ public void attachVector(V vector) { /** * Attach vectors to compare. + * * @param vector1 the first vector to compare. * @param vector2 the second vector to compare. */ @@ -99,7 +91,7 @@ private boolean mayHaveNulls(V v) { if (v.getValueCount() == 0) { return true; } - if (! v.getField().isNullable()) { + if (!v.getField().isNullable()) { return false; } return v.getNullCount() > 0; @@ -107,11 +99,11 @@ private boolean mayHaveNulls(V v) { /** * Compare two values, given their indices. + * * @param index1 index of the first value to compare. * @param index2 index of the second value to compare. - * @return an integer greater than 0, if the first value is greater; - * an integer smaller than 0, if the first value is smaller; or 0, if both - * values are equal. + * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if + * the first value is smaller; or 0, if both values are equal. */ public int compare(int index1, int index2) { if (checkNullsOnCompare) { @@ -133,19 +125,19 @@ public int compare(int index1, int index2) { } /** - * Compare two values, given their indices. - * This is a fast path for comparing non-null values, so the caller - * must make sure that values at both indices are not null. + * Compare two values, given their indices. This is a fast path for comparing non-null values, so + * the caller must make sure that values at both indices are not null. + * * @param index1 index of the first value to compare. * @param index2 index of the second value to compare. - * @return an integer greater than 0, if the first value is greater; - * an integer smaller than 0, if the first value is smaller; or 0, if both - * values are equal. + * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if + * the first value is smaller; or 0, if both values are equal. */ public abstract int compareNotNull(int index1, int index2); /** * Creates a comparator of the same type. + * * @return the newly created comparator. */ public abstract VectorValueComparator createNew(); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java index ac083b84f1611..537189013a731 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.nio.charset.StandardCharsets; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -33,9 +31,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link DeduplicationUtils}. - */ +/** Test cases for {@link DeduplicationUtils}. */ public class TestDeduplicationUtils { private static final int VECTOR_LENGTH = 100; @@ -57,10 +53,11 @@ public void shutdown() { @Test public void testDeduplicateFixedWidth() { try (IntVector origVec = new IntVector("original vec", allocator); - IntVector dedupVec = new IntVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - ArrowBuf distinctBuf = allocator.buffer( - DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { + IntVector dedupVec = new IntVector("deduplicated vec", allocator); + IntVector lengthVec = new IntVector("length vec", allocator); + ArrowBuf distinctBuf = + allocator.buffer( + DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); @@ -73,9 +70,10 @@ public void testDeduplicateFixedWidth() { } DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf); - assertEquals( VECTOR_LENGTH, - VECTOR_LENGTH * REPETITION_COUNT - - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); + assertEquals( + VECTOR_LENGTH, + VECTOR_LENGTH * REPETITION_COUNT + - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec); assertEquals(VECTOR_LENGTH, dedupVec.getValueCount()); @@ -84,7 +82,8 @@ public void testDeduplicateFixedWidth() { assertEquals(i, dedupVec.get(i)); } - DeduplicationUtils.populateRunLengths(distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); + DeduplicationUtils.populateRunLengths( + distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); assertEquals(VECTOR_LENGTH, lengthVec.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { @@ -96,12 +95,12 @@ public void testDeduplicateFixedWidth() { @Test public void testDeduplicateVariableWidth() { try (VarCharVector origVec = new VarCharVector("original vec", allocator); - VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - ArrowBuf distinctBuf = allocator.buffer( - DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { - origVec.allocateNew( - VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); + VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); + IntVector lengthVec = new IntVector("length vec", allocator); + ArrowBuf distinctBuf = + allocator.buffer( + DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { + origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); @@ -114,9 +113,10 @@ public void testDeduplicateVariableWidth() { } DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf); - assertEquals(VECTOR_LENGTH, - VECTOR_LENGTH * REPETITION_COUNT - - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); + assertEquals( + VECTOR_LENGTH, + VECTOR_LENGTH * REPETITION_COUNT + - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec); assertEquals(VECTOR_LENGTH, dedupVec.getValueCount()); @@ -126,7 +126,7 @@ public void testDeduplicateVariableWidth() { } DeduplicationUtils.populateRunLengths( - distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); + distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); assertEquals(VECTOR_LENGTH, lengthVec.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java index 788213b162870..820cadccae537 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.nio.charset.StandardCharsets; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -30,9 +28,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link VectorRunDeduplicator}. - */ +/** Test cases for {@link VectorRunDeduplicator}. */ public class TestVectorRunDeduplicator { private static final int VECTOR_LENGTH = 100; @@ -57,7 +53,7 @@ public void testDeduplicateFixedWidth() { IntVector dedupVec = new IntVector("deduplicated vec", allocator); IntVector lengthVec = new IntVector("length vec", allocator); VectorRunDeduplicator deduplicator = - new VectorRunDeduplicator<>(origVec, allocator)) { + new VectorRunDeduplicator<>(origVec, allocator)) { origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); @@ -93,12 +89,11 @@ public void testDeduplicateFixedWidth() { @Test public void testDeduplicateVariableWidth() { try (VarCharVector origVec = new VarCharVector("original vec", allocator); - VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - VectorRunDeduplicator deduplicator = - new VectorRunDeduplicator<>(origVec, allocator)) { - origVec.allocateNew( - VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); + VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); + IntVector lengthVec = new IntVector("length vec", allocator); + VectorRunDeduplicator deduplicator = + new VectorRunDeduplicator<>(origVec, allocator)) { + origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java index 45c47626b720e..bfda86f26883d 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -23,7 +22,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link HashTableBasedDictionaryBuilder}. - */ +/** Test cases for {@link HashTableBasedDictionaryBuilder}. */ public class TestHashTableBasedDictionaryBuilder { private BufferAllocator allocator; @@ -52,7 +48,7 @@ public void shutdown() { @Test public void testBuildVariableWidthDictionaryWithNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -72,27 +68,34 @@ public void testBuildVariableWidthDictionaryWithNull() { vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, true); + new HashTableBasedDictionaryBuilder<>(dictionary, true); int result = dictionaryBuilder.addValues(vec); assertEquals(7, result); assertEquals(7, dictionary.getValueCount()); - assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); assertNull(dictionary.get(2)); - assertEquals("world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8)); } } @Test public void testBuildVariableWidthDictionaryWithoutNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -112,27 +115,33 @@ public void testBuildVariableWidthDictionaryWithoutNull() { vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, false); + new HashTableBasedDictionaryBuilder<>(dictionary, false); int result = dictionaryBuilder.addValues(vec); assertEquals(6, result); assertEquals(6, dictionary.getValueCount()); - assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals("12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); - + assertEquals( + "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); } } @Test public void testBuildFixedWidthDictionaryWithNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -151,7 +160,7 @@ public void testBuildFixedWidthDictionaryWithNull() { vec.setNull(9); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, true); + new HashTableBasedDictionaryBuilder<>(dictionary, true); int result = dictionaryBuilder.addValues(vec); @@ -169,7 +178,7 @@ public void testBuildFixedWidthDictionaryWithNull() { @Test public void testBuildFixedWidthDictionaryWithoutNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -188,7 +197,7 @@ public void testBuildFixedWidthDictionaryWithoutNull() { vec.setNull(9); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, false); + new HashTableBasedDictionaryBuilder<>(dictionary, false); int result = dictionaryBuilder.addValues(vec); @@ -199,7 +208,6 @@ public void testBuildFixedWidthDictionaryWithoutNull() { assertEquals(8, dictionary.get(1)); assertEquals(32, dictionary.get(2)); assertEquals(16, dictionary.get(3)); - } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java index 60efbf58bebda..b9646284a015b 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -25,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -38,9 +36,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link HashTableDictionaryEncoder}. - */ +/** Test cases for {@link HashTableDictionaryEncoder}. */ public class TestHashTableDictionaryEncoder { private final int VECTOR_LENGTH = 50; @@ -53,7 +49,7 @@ public class TestHashTableDictionaryEncoder { byte[] one = "111".getBytes(StandardCharsets.UTF_8); byte[] two = "222".getBytes(StandardCharsets.UTF_8); - byte[][] data = new byte[][]{zero, one, two}; + byte[][] data = new byte[][] {zero, one, two}; @Before public void prepare() { @@ -69,8 +65,8 @@ public void shutdown() { public void testEncodeAndDecode() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -89,7 +85,7 @@ public void testEncodeAndDecode() { rawVector.setValueCount(VECTOR_LENGTH); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, false); + new HashTableDictionaryEncoder<>(dictionary, false); // perform encoding encodedVector.allocateNew(); @@ -98,17 +94,21 @@ public void testEncodeAndDecode() { // verify encoding results assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -119,8 +119,8 @@ public void testEncodeAndDecode() { public void testEncodeAndDecodeWithNull() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -144,7 +144,7 @@ public void testEncodeAndDecodeWithNull() { rawVector.setValueCount(VECTOR_LENGTH); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, true); + new HashTableDictionaryEncoder<>(dictionary, true); // perform encoding encodedVector.allocateNew(); @@ -156,20 +156,24 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertEquals(0, encodedVector.get(i)); } else { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { if (i % 10 == 0) { assertTrue(decodedVector.isNull(i)); } else { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -180,8 +184,8 @@ public void testEncodeAndDecodeWithNull() { @Test public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary, with no null in it. dictionary.allocateNew(); @@ -199,13 +203,15 @@ public void testEncodeNullWithoutNullInDictionary() { encodedVector.allocateNew(); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, true); + new HashTableDictionaryEncoder<>(dictionary, true); // the encoder should encode null, but no null in the dictionary, // so an exception should be thrown. - assertThrows(IllegalArgumentException.class, () -> { - encoder.encode(rawVector, encodedVector); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + encoder.encode(rawVector, encodedVector); + }); } } @@ -213,8 +219,8 @@ public void testEncodeNullWithoutNullInDictionary() { public void testEncodeStrings() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(512, 5); encoded.allocateNew(); @@ -235,7 +241,7 @@ public void testEncodeStrings() { dictionaryVector.setValueCount(3); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); + new HashTableDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); // verify indices @@ -262,8 +268,8 @@ public void testEncodeStrings() { public void testEncodeLargeVector() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(); encoded.allocateNew(); @@ -281,7 +287,7 @@ public void testEncodeLargeVector() { dictionaryVector.setValueCount(3); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); + new HashTableDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(count, encoded.getValueCount()); @@ -305,8 +311,8 @@ public void testEncodeLargeVector() { public void testEncodeBinaryVector() { // Create a new value vector try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { + final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); + final IntVector encoded = new IntVector("encoded", allocator)) { vector.allocateNew(512, 5); vector.allocateNew(); encoded.allocateNew(); @@ -327,7 +333,7 @@ public void testEncodeBinaryVector() { dictionaryVector.setValueCount(3); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); + new HashTableDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(5, encoded.getValueCount()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java index a76aedffa308d..a4641704198cb 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -25,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -39,9 +37,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link LinearDictionaryEncoder}. - */ +/** Test cases for {@link LinearDictionaryEncoder}. */ public class TestLinearDictionaryEncoder { private final int VECTOR_LENGTH = 50; @@ -54,7 +50,7 @@ public class TestLinearDictionaryEncoder { byte[] one = "111".getBytes(StandardCharsets.UTF_8); byte[] two = "222".getBytes(StandardCharsets.UTF_8); - byte[][] data = new byte[][]{zero, one, two}; + byte[][] data = new byte[][] {zero, one, two}; @Before public void prepare() { @@ -70,8 +66,8 @@ public void shutdown() { public void testEncodeAndDecode() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -90,7 +86,7 @@ public void testEncodeAndDecode() { rawVector.setValueCount(VECTOR_LENGTH); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, false); + new LinearDictionaryEncoder<>(dictionary, false); // perform encoding encodedVector.allocateNew(); @@ -99,17 +95,21 @@ public void testEncodeAndDecode() { // verify encoding results assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -120,8 +120,8 @@ public void testEncodeAndDecode() { public void testEncodeAndDecodeWithNull() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -145,7 +145,7 @@ public void testEncodeAndDecodeWithNull() { rawVector.setValueCount(VECTOR_LENGTH); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, true); + new LinearDictionaryEncoder<>(dictionary, true); // perform encoding encodedVector.allocateNew(); @@ -157,13 +157,16 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertEquals(0, encodedVector.get(i)); } else { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); @@ -171,7 +174,8 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertTrue(decodedVector.isNull(i)); } else { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -182,8 +186,8 @@ public void testEncodeAndDecodeWithNull() { @Test public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary, with no null in it. dictionary.allocateNew(); @@ -201,13 +205,15 @@ public void testEncodeNullWithoutNullInDictionary() { encodedVector.allocateNew(); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, true); + new LinearDictionaryEncoder<>(dictionary, true); // the encoder should encode null, but no null in the dictionary, // so an exception should be thrown. - assertThrows(IllegalArgumentException.class, () -> { - encoder.encode(rawVector, encodedVector); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + encoder.encode(rawVector, encodedVector); + }); } } @@ -215,8 +221,8 @@ public void testEncodeNullWithoutNullInDictionary() { public void testEncodeStrings() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(512, 5); encoded.allocateNew(); @@ -237,7 +243,7 @@ public void testEncodeStrings() { dictionaryVector.setValueCount(3); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); + new LinearDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); // verify indices @@ -263,8 +269,8 @@ public void testEncodeStrings() { public void testEncodeLargeVector() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(); encoded.allocateNew(); @@ -282,7 +288,7 @@ public void testEncodeLargeVector() { dictionaryVector.setValueCount(3); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); + new LinearDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(count, encoded.getValueCount()); @@ -306,8 +312,8 @@ public void testEncodeLargeVector() { public void testEncodeBinaryVector() { // Create a new value vector try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { + final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); + final IntVector encoded = new IntVector("encoded", allocator)) { vector.allocateNew(512, 5); vector.allocateNew(); encoded.allocateNew(); @@ -328,7 +334,7 @@ public void testEncodeBinaryVector() { dictionaryVector.setValueCount(3); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); + new LinearDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(5, encoded.getValueCount()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java index e01c2e7905b46..e783e1f76818c 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -25,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -40,9 +38,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link SearchDictionaryEncoder}. - */ +/** Test cases for {@link SearchDictionaryEncoder}. */ public class TestSearchDictionaryEncoder { private final int VECTOR_LENGTH = 50; @@ -55,7 +51,7 @@ public class TestSearchDictionaryEncoder { byte[] one = "111".getBytes(StandardCharsets.UTF_8); byte[] two = "222".getBytes(StandardCharsets.UTF_8); - byte[][] data = new byte[][]{zero, one, two}; + byte[][] data = new byte[][] {zero, one, two}; @Before public void prepare() { @@ -71,8 +67,8 @@ public void shutdown() { public void testEncodeAndDecode() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -91,8 +87,8 @@ public void testEncodeAndDecode() { rawVector.setValueCount(VECTOR_LENGTH); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false); + new SearchDictionaryEncoder<>( + dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false); // perform encoding encodedVector.allocateNew(); @@ -101,17 +97,21 @@ public void testEncodeAndDecode() { // verify encoding results assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -122,8 +122,8 @@ public void testEncodeAndDecode() { public void testEncodeAndDecodeWithNull() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -147,8 +147,8 @@ public void testEncodeAndDecodeWithNull() { rawVector.setValueCount(VECTOR_LENGTH); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); + new SearchDictionaryEncoder<>( + dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); // perform encoding encodedVector.allocateNew(); @@ -160,13 +160,16 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertEquals(0, encodedVector.get(i)); } else { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); @@ -174,7 +177,8 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertTrue(decodedVector.isNull(i)); } else { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -185,8 +189,8 @@ public void testEncodeAndDecodeWithNull() { @Test public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary, with no null in it. dictionary.allocateNew(); @@ -204,14 +208,16 @@ public void testEncodeNullWithoutNullInDictionary() { encodedVector.allocateNew(); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); + new SearchDictionaryEncoder<>( + dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); // the encoder should encode null, but no null in the dictionary, // so an exception should be thrown. - assertThrows(IllegalArgumentException.class, () -> { - encoder.encode(rawVector, encodedVector); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + encoder.encode(rawVector, encodedVector); + }); } } @@ -219,8 +225,8 @@ public void testEncodeNullWithoutNullInDictionary() { public void testEncodeStrings() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(512, 5); encoded.allocateNew(); @@ -241,8 +247,8 @@ public void testEncodeStrings() { dictionaryVector.setValueCount(3); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); + new SearchDictionaryEncoder<>( + dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); encoder.encode(vector, encoded); // verify indices @@ -268,8 +274,8 @@ public void testEncodeStrings() { public void testEncodeLargeVector() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(); encoded.allocateNew(); @@ -287,8 +293,8 @@ public void testEncodeLargeVector() { dictionaryVector.setValueCount(3); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); + new SearchDictionaryEncoder<>( + dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); encoder.encode(vector, encoded); assertEquals(count, encoded.getValueCount()); @@ -312,8 +318,8 @@ public void testEncodeLargeVector() { public void testEncodeBinaryVector() { // Create a new value vector try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { + final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); + final IntVector encoded = new IntVector("encoded", allocator)) { vector.allocateNew(512, 5); vector.allocateNew(); encoded.allocateNew(); @@ -334,8 +340,8 @@ public void testEncodeBinaryVector() { dictionaryVector.setValueCount(3); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); + new SearchDictionaryEncoder<>( + dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); encoder.encode(vector, encoded); assertEquals(5, encoded.getValueCount()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java index 340b7e67e861f..6c8a57c1a4648 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -33,9 +31,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link SearchTreeBasedDictionaryBuilder}. - */ +/** Test cases for {@link SearchTreeBasedDictionaryBuilder}. */ public class TestSearchTreeBasedDictionaryBuilder { private BufferAllocator allocator; @@ -53,8 +49,8 @@ public void shutdown() { @Test public void testBuildVariableWidthDictionaryWithNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator); - VarCharVector sortedDictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator); + VarCharVector sortedDictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -74,9 +70,10 @@ public void testBuildVariableWidthDictionaryWithNull() { vec.set(8, "good".getBytes(StandardCharsets.UTF_8)); vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); int result = dictionaryBuilder.addValues(vec); @@ -86,20 +83,32 @@ public void testBuildVariableWidthDictionaryWithNull() { dictionaryBuilder.populateSortedDictionary(sortedDictionary); assertTrue(sortedDictionary.isNull(0)); - assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "12", + new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "abc", + new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "good", + new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "hello", + new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "world", + new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8)); } } @Test public void testBuildVariableWidthDictionaryWithoutNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator); - VarCharVector sortedDictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator); + VarCharVector sortedDictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -119,9 +128,10 @@ public void testBuildVariableWidthDictionaryWithoutNull() { vec.set(8, "good".getBytes(StandardCharsets.UTF_8)); vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); int result = dictionaryBuilder.addValues(vec); @@ -130,20 +140,32 @@ public void testBuildVariableWidthDictionaryWithoutNull() { dictionaryBuilder.populateSortedDictionary(sortedDictionary); - assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "12", + new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "abc", + new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "good", + new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "hello", + new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "world", + new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); } } @Test public void testBuildFixedWidthDictionaryWithNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator); - IntVector sortedDictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator); + IntVector sortedDictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -162,9 +184,10 @@ public void testBuildFixedWidthDictionaryWithNull() { vec.set(8, 4); vec.setNull(9); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); int result = dictionaryBuilder.addValues(vec); @@ -184,8 +207,8 @@ public void testBuildFixedWidthDictionaryWithNull() { @Test public void testBuildFixedWidthDictionaryWithoutNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator); - IntVector sortedDictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator); + IntVector sortedDictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -204,9 +227,10 @@ public void testBuildFixedWidthDictionaryWithoutNull() { vec.set(8, 4); vec.setNull(9); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); int result = dictionaryBuilder.addValues(vec); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java index 630dd80b44084..e3ab981670e9e 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.misc; import static org.junit.Assert.assertEquals; @@ -26,9 +25,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link PartialSumUtils}. - */ +/** Test cases for {@link PartialSumUtils}. */ public class TestPartialSumUtils { private static final int PARTIAL_SUM_VECTOR_LENGTH = 101; @@ -50,7 +47,7 @@ public void shutdown() { @Test public void testToPartialSumVector() { try (IntVector delta = new IntVector("delta", allocator); - IntVector partialSum = new IntVector("partial sum", allocator)) { + IntVector partialSum = new IntVector("partial sum", allocator)) { delta.allocateNew(DELTA_VECTOR_LENGTH); delta.setValueCount(DELTA_VECTOR_LENGTH); @@ -75,7 +72,7 @@ public void testToPartialSumVector() { @Test public void testToDeltaVector() { try (IntVector partialSum = new IntVector("partial sum", allocator); - IntVector delta = new IntVector("delta", allocator)) { + IntVector delta = new IntVector("delta", allocator)) { partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH); partialSum.setValueCount(PARTIAL_SUM_VECTOR_LENGTH); @@ -111,7 +108,8 @@ public void testFindPositionInPartialSumVector() { // search and verify results for (int i = 0; i < PARTIAL_SUM_VECTOR_LENGTH - 1; i++) { - assertEquals(i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1)); + assertEquals( + i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1)); } } } @@ -131,8 +129,10 @@ public void testFindPositionInPartialSumVectorNegative() { // search and verify results assertEquals(0, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase)); assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase - 1)); - assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum, - sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1))); + assertEquals( + -1, + PartialSumUtils.findPositionInPartialSumVector( + partialSum, sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1))); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java index 0e6627eb4822a..4b7c6a9756780 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.rank; import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import java.nio.charset.StandardCharsets; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}. - */ +/** Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}. */ public class TestVectorRank { private BufferAllocator allocator; @@ -70,7 +66,7 @@ public void testFixedWidthRank() { vector.set(9, 6); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); + DefaultVectorComparators.createDefaultComparator(vector); assertEquals(7, rank.indexAtRank(vector, comparator, 0)); assertEquals(0, rank.indexAtRank(vector, comparator, 1)); assertEquals(6, rank.indexAtRank(vector, comparator, 2)); @@ -103,7 +99,7 @@ public void testVariableWidthRank() { vector.set(9, String.valueOf(6).getBytes(StandardCharsets.UTF_8)); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); + DefaultVectorComparators.createDefaultComparator(vector); assertEquals(7, rank.indexAtRank(vector, comparator, 0)); assertEquals(0, rank.indexAtRank(vector, comparator, 1)); @@ -137,11 +133,13 @@ public void testRankNegative() { vector.set(9, 6); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); + DefaultVectorComparators.createDefaultComparator(vector); - assertThrows(IllegalArgumentException.class, () -> { - rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1); + }); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java index 9ccecfa84a73a..7ff86a743effd 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -26,7 +25,6 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -39,9 +37,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test cases for {@link ParallelSearcher}. - */ +/** Test cases for {@link ParallelSearcher}. */ @RunWith(Parameterized.class) public class TestParallelSearcher { @@ -97,8 +93,10 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept keyVector.allocateNew(VECTOR_LENGTH); // if we are comparing elements using equality semantics, we do not need a comparator here. - VectorValueComparator comparator = comparatorType == ComparatorType.EqualityComparator ? null - : DefaultVectorComparators.createDefaultComparator(targetVector); + VectorValueComparator comparator = + comparatorType == ComparatorType.EqualityComparator + ? null + : DefaultVectorComparators.createDefaultComparator(targetVector); for (int i = 0; i < VECTOR_LENGTH; i++) { targetVector.set(i, i); @@ -107,9 +105,13 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept targetVector.setValueCount(VECTOR_LENGTH); keyVector.setValueCount(VECTOR_LENGTH); - ParallelSearcher searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount); + ParallelSearcher searcher = + new ParallelSearcher<>(targetVector, threadPool, threadCount); for (int i = 0; i < VECTOR_LENGTH; i++) { - int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator); + int pos = + comparator == null + ? searcher.search(keyVector, i) + : searcher.search(keyVector, i, comparator); if (i * 2 < VECTOR_LENGTH) { assertEquals(i * 2, pos); } else { @@ -122,13 +124,15 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept @Test public void testParallelStringSearch() throws ExecutionException, InterruptedException { try (VarCharVector targetVector = new VarCharVector("targetVector", allocator); - VarCharVector keyVector = new VarCharVector("keyVector", allocator)) { + VarCharVector keyVector = new VarCharVector("keyVector", allocator)) { targetVector.allocateNew(VECTOR_LENGTH); keyVector.allocateNew(VECTOR_LENGTH); // if we are comparing elements using equality semantics, we do not need a comparator here. - VectorValueComparator comparator = comparatorType == ComparatorType.EqualityComparator ? null - : DefaultVectorComparators.createDefaultComparator(targetVector); + VectorValueComparator comparator = + comparatorType == ComparatorType.EqualityComparator + ? null + : DefaultVectorComparators.createDefaultComparator(targetVector); for (int i = 0; i < VECTOR_LENGTH; i++) { targetVector.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); @@ -137,9 +141,13 @@ public void testParallelStringSearch() throws ExecutionException, InterruptedExc targetVector.setValueCount(VECTOR_LENGTH); keyVector.setValueCount(VECTOR_LENGTH); - ParallelSearcher searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount); + ParallelSearcher searcher = + new ParallelSearcher<>(targetVector, threadPool, threadCount); for (int i = 0; i < VECTOR_LENGTH; i++) { - int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator); + int pos = + comparator == null + ? searcher.search(keyVector, i) + : searcher.search(keyVector, i, comparator); if (i * 2 < VECTOR_LENGTH) { assertEquals(i * 2, pos); } else { diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java index 18f4fa0355f4f..39f2f609f7df4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import static org.junit.Assert.assertEquals; import java.util.Arrays; import java.util.Collection; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -33,9 +31,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test cases for {@link VectorRangeSearcher}. - */ +/** Test cases for {@link VectorRangeSearcher}. */ @RunWith(Parameterized.class) public class TestVectorRangeSearcher { @@ -78,9 +74,11 @@ public void testGetLowerBounds() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { - int result = VectorRangeSearcher.getFirstMatch(intVector, comparator, intVector, i * repeat); + int result = + VectorRangeSearcher.getFirstMatch(intVector, comparator, intVector, i * repeat); assertEquals(i * ((long) repeat), result); } } @@ -112,7 +110,8 @@ public void testGetLowerBoundsNegative() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { int result = VectorRangeSearcher.getFirstMatch(intVector, comparator, negVector, i); assertEquals(-1, result); @@ -141,7 +140,8 @@ public void testGetUpperBounds() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { int result = VectorRangeSearcher.getLastMatch(intVector, comparator, intVector, i * repeat); assertEquals((i + 1) * repeat - 1, result); @@ -153,7 +153,7 @@ public void testGetUpperBounds() { public void testGetUpperBoundsNegative() { final int maxValue = 100; try (IntVector intVector = new IntVector("int vec", allocator); - IntVector negVector = new IntVector("neg vec", allocator)) { + IntVector negVector = new IntVector("neg vec", allocator)) { // allocate vector intVector.allocateNew(maxValue * repeat); intVector.setValueCount(maxValue * repeat); @@ -175,7 +175,8 @@ public void testGetUpperBoundsNegative() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { int result = VectorRangeSearcher.getLastMatch(intVector, comparator, negVector, i); assertEquals(-1, result); @@ -185,11 +186,6 @@ public void testGetUpperBoundsNegative() { @Parameterized.Parameters(name = "repeat = {0}") public static Collection getRepeat() { - return Arrays.asList( - new Object[]{1}, - new Object[]{2}, - new Object[]{5}, - new Object[]{10} - ); + return Arrays.asList(new Object[] {1}, new Object[] {2}, new Object[] {5}, new Object[] {10}); } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java index 32fa10bbd98d0..629d900b479b6 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; import static org.junit.Assert.assertEquals; import java.nio.charset.StandardCharsets; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -37,9 +35,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link org.apache.arrow.algorithm.search.VectorSearcher}. - */ +/** Test cases for {@link org.apache.arrow.algorithm.search.VectorSearcher}. */ public class TestVectorSearcher { private final int VECTOR_LENGTH = 100; @@ -59,7 +55,7 @@ public void shutdown() { @Test public void testBinarySearchInt() { try (IntVector rawVector = new IntVector("", allocator); - IntVector negVector = new IntVector("", allocator)) { + IntVector negVector = new IntVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(1); @@ -77,7 +73,7 @@ public void testBinarySearchInt() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -91,7 +87,7 @@ public void testBinarySearchInt() { @Test public void testLinearSearchInt() { try (IntVector rawVector = new IntVector("", allocator); - IntVector negVector = new IntVector("", allocator)) { + IntVector negVector = new IntVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(1); @@ -109,7 +105,7 @@ public void testLinearSearchInt() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -123,7 +119,7 @@ public void testLinearSearchInt() { @Test public void testBinarySearchVarChar() { try (VarCharVector rawVector = new VarCharVector("", allocator); - VarCharVector negVector = new VarCharVector("", allocator)) { + VarCharVector negVector = new VarCharVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(VECTOR_LENGTH, 1); @@ -148,7 +144,7 @@ public void testBinarySearchVarChar() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -162,7 +158,7 @@ public void testBinarySearchVarChar() { @Test public void testLinearSearchVarChar() { try (VarCharVector rawVector = new VarCharVector("", allocator); - VarCharVector negVector = new VarCharVector("", allocator)) { + VarCharVector negVector = new VarCharVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(VECTOR_LENGTH, 1); @@ -187,7 +183,7 @@ public void testLinearSearchVarChar() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -260,11 +256,11 @@ private ListVector createNegativeListVector() { @Test public void testBinarySearchList() { try (ListVector rawVector = createListVector(); - ListVector negVector = createNegativeListVector()) { + ListVector negVector = createNegativeListVector()) { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < rawVector.getValueCount(); i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -281,11 +277,11 @@ public void testBinarySearchList() { @Test public void testLinearSearchList() { try (ListVector rawVector = createListVector(); - ListVector negVector = createNegativeListVector()) { + ListVector negVector = createNegativeListVector()) { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < rawVector.getValueCount(); i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java index 9624432924b5a..21f6c0217c376 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -33,9 +31,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link CompositeVectorComparator}. - */ +/** Test cases for {@link CompositeVectorComparator}. */ public class TestCompositeVectorComparator { private BufferAllocator allocator; @@ -60,7 +56,7 @@ public void testCompareVectorSchemaRoot() { VarCharVector strVec2 = new VarCharVector("str2", allocator); try (VectorSchemaRoot batch1 = new VectorSchemaRoot(Arrays.asList(intVec1, strVec1)); - VectorSchemaRoot batch2 = new VectorSchemaRoot(Arrays.asList(intVec2, strVec2))) { + VectorSchemaRoot batch2 = new VectorSchemaRoot(Arrays.asList(intVec2, strVec2))) { intVec1.allocateNew(vectorLength); strVec1.allocateNew(vectorLength * 10, vectorLength); @@ -75,15 +71,15 @@ public void testCompareVectorSchemaRoot() { } VectorValueComparator innerComparator1 = - DefaultVectorComparators.createDefaultComparator(intVec1); + DefaultVectorComparators.createDefaultComparator(intVec1); innerComparator1.attachVectors(intVec1, intVec2); VectorValueComparator innerComparator2 = - DefaultVectorComparators.createDefaultComparator(strVec1); + DefaultVectorComparators.createDefaultComparator(strVec1); innerComparator2.attachVectors(strVec1, strVec2); - VectorValueComparator comparator = new CompositeVectorComparator( - new VectorValueComparator[]{innerComparator1, innerComparator2} - ); + VectorValueComparator comparator = + new CompositeVectorComparator( + new VectorValueComparator[] {innerComparator1, innerComparator2}); // verify results diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java index c40854fb17410..f1b3d6fb5aa1d 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; @@ -67,9 +66,7 @@ import org.junit.Test; import org.junit.jupiter.api.Assertions; -/** - * Test cases for {@link DefaultVectorComparators}. - */ +/** Test cases for {@link DefaultVectorComparators}. */ public class TestDefaultVectorComparator { private BufferAllocator allocator; @@ -111,9 +108,9 @@ private ListVector createListVector(int count) { @Test public void testCompareLists() { try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(11)) { + ListVector listVector2 = createListVector(11)) { VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); // prefix is smaller @@ -121,11 +118,11 @@ public void testCompareLists() { } try (ListVector listVector1 = createListVector(11); - ListVector listVector2 = createListVector(11)) { + ListVector listVector2 = createListVector(11)) { ((IntVector) listVector2.getDataVector()).set(10, 110); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); // breaking tie by the last element @@ -133,10 +130,10 @@ public void testCompareLists() { } try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(10)) { + ListVector listVector2 = createListVector(10)) { VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); // list vector elements equal @@ -149,9 +146,9 @@ public void testCopiedComparatorForLists() { for (int i = 1; i < 10; i++) { for (int j = 1; j < 10; j++) { try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(11)) { + ListVector listVector2 = createListVector(11)) { VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); VectorValueComparator copyComparator = comparator.createNew(); @@ -185,7 +182,7 @@ private FixedSizeListVector createFixedSizeListVector(int count) { @Test public void testCompareFixedSizeLists() { try (FixedSizeListVector listVector1 = createFixedSizeListVector(10); - FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { + FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); @@ -195,7 +192,7 @@ public void testCompareFixedSizeLists() { } try (FixedSizeListVector listVector1 = createFixedSizeListVector(11); - FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { + FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { ((IntVector) listVector2.getDataVector()).set(10, 110); VectorValueComparator comparator = @@ -207,7 +204,7 @@ public void testCompareFixedSizeLists() { } try (FixedSizeListVector listVector1 = createFixedSizeListVector(10); - FixedSizeListVector listVector2 = createFixedSizeListVector(10)) { + FixedSizeListVector listVector2 = createFixedSizeListVector(10)) { VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(listVector1); @@ -236,7 +233,7 @@ public void testCompareUInt1() { vec.set(9, Byte.MIN_VALUE); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -259,14 +256,21 @@ public void testCompareUInt2() { vec.allocateNew(10); ValueVectorDataPopulator.setVector( - vec, null, (char) (Character.MAX_VALUE - 1), Character.MAX_VALUE, (char) 0, (char) 1, - (char) 2, (char) (Character.MAX_VALUE - 1), null, + vec, + null, + (char) (Character.MAX_VALUE - 1), + Character.MAX_VALUE, + (char) 0, + (char) 1, + (char) 2, + (char) (Character.MAX_VALUE - 1), + null, '\u7FFF', // value for the max 16-byte signed integer '\u8000' // value for the min 16-byte signed integer - ); + ); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -301,7 +305,7 @@ public void testCompareUInt4() { vec.set(9, Integer.MIN_VALUE); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -336,7 +340,7 @@ public void testCompareUInt8() { vec.set(9, Long.MIN_VALUE); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -358,7 +362,16 @@ public void testCompareFloat4() { try (Float4Vector vec = new Float4Vector("", allocator)) { vec.allocateNew(9); ValueVectorDataPopulator.setVector( - vec, -1.1f, 0.0f, 1.0f, null, 1.0f, 2.0f, Float.NaN, Float.NaN, Float.POSITIVE_INFINITY, + vec, + -1.1f, + 0.0f, + 1.0f, + null, + 1.0f, + 2.0f, + Float.NaN, + Float.NaN, + Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY); VectorValueComparator comparator = @@ -393,7 +406,16 @@ public void testCompareFloat8() { try (Float8Vector vec = new Float8Vector("", allocator)) { vec.allocateNew(9); ValueVectorDataPopulator.setVector( - vec, -1.1, 0.0, 1.0, null, 1.0, 2.0, Double.NaN, Double.NaN, Double.POSITIVE_INFINITY, + vec, + -1.1, + 0.0, + 1.0, + null, + 1.0, + 2.0, + Double.NaN, + Double.NaN, + Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY); VectorValueComparator comparator = @@ -488,8 +510,15 @@ public void testCompareShort() { try (SmallIntVector vec = new SmallIntVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( - vec, (short) -1, (short) 0, (short) 1, null, (short) 1, (short) 5, - (short) (Short.MIN_VALUE + 1), Short.MAX_VALUE); + vec, + (short) -1, + (short) 0, + (short) 1, + null, + (short) 1, + (short) 5, + (short) (Short.MIN_VALUE + 1), + Short.MAX_VALUE); VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); @@ -519,8 +548,15 @@ public void testCompareByte() { try (TinyIntVector vec = new TinyIntVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( - vec, (byte) -1, (byte) 0, (byte) 1, null, (byte) 1, (byte) 5, - (byte) (Byte.MIN_VALUE + 1), Byte.MAX_VALUE); + vec, + (byte) -1, + (byte) 0, + (byte) 1, + null, + (byte) 1, + (byte) 5, + (byte) (Byte.MIN_VALUE + 1), + Byte.MAX_VALUE); VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); @@ -549,8 +585,7 @@ public void testCompareByte() { public void testCompareBit() { try (BitVector vec = new BitVector("", allocator)) { vec.allocateNew(6); - ValueVectorDataPopulator.setVector( - vec, 1, 2, 0, 0, -1, null); + ValueVectorDataPopulator.setVector(vec, 1, 2, 0, 0, -1, null); VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); @@ -691,7 +726,8 @@ public void testCompareDecimal256() { @Test public void testCompareDuration() { try (DurationVector vec = - new DurationVector("", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { + new DurationVector( + "", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -722,7 +758,8 @@ public void testCompareDuration() { @Test public void testCompareIntervalDay() { try (IntervalDayVector vec = - new IntervalDayVector("", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { + new IntervalDayVector( + "", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { vec.allocateNew(8); vec.set(0, -1, 0); vec.set(1, 0, 0); @@ -755,8 +792,7 @@ public void testCompareIntervalDay() { @Test public void testCompareTimeMicro() { - try (TimeMicroVector vec = - new TimeMicroVector("", allocator)) { + try (TimeMicroVector vec = new TimeMicroVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -816,8 +852,7 @@ public void testCompareTimeMilli() { @Test public void testCompareTimeNano() { - try (TimeNanoVector vec = - new TimeNanoVector("", allocator)) { + try (TimeNanoVector vec = new TimeNanoVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -877,8 +912,7 @@ public void testCompareTimeSec() { @Test public void testCompareTimeStamp() { - try (TimeStampMilliVector vec = - new TimeStampMilliVector("", allocator)) { + try (TimeStampMilliVector vec = new TimeStampMilliVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -909,7 +943,7 @@ public void testCompareTimeStamp() { @Test public void testCompareFixedSizeBinary() { try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 2); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { + FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { vector1.allocateNew(); vector2.allocateNew(); vector1.set(0, new byte[] {1, 1}); @@ -923,7 +957,7 @@ public void testCompareFixedSizeBinary() { } try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 3); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { + FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { vector1.allocateNew(); vector2.allocateNew(); vector1.set(0, new byte[] {1, 1, 0}); @@ -937,7 +971,7 @@ public void testCompareFixedSizeBinary() { } try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 3); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { + FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { vector1.allocateNew(); vector2.allocateNew(); vector1.set(0, new byte[] {1, 1, 1}); @@ -953,8 +987,8 @@ public void testCompareFixedSizeBinary() { @Test public void testCompareNull() { - try (NullVector vec = new NullVector("test", - FieldType.notNullable(new ArrowType.Int(32, false)))) { + try (NullVector vec = + new NullVector("test", FieldType.notNullable(new ArrowType.Int(32, false)))) { vec.setValueCount(2); VectorValueComparator comparator = @@ -967,12 +1001,14 @@ public void testCompareNull() { @Test public void testCheckNullsOnCompareIsFalseForNonNullableVector() { - try (IntVector vec = new IntVector("not nullable", - FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { + try (IntVector vec = + new IntVector( + "not nullable", FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { ValueVectorDataPopulator.setVector(vec, 1, 2, 3, 4); - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertFalse(comparator.checkNullsOnCompare()); @@ -981,16 +1017,17 @@ public void testCheckNullsOnCompareIsFalseForNonNullableVector() { @Test public void testCheckNullsOnCompareIsTrueForNullableVector() { - try (IntVector vec = new IntVector("nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator); - IntVector vec2 = new IntVector("not-nullable", FieldType.notNullable( - new ArrowType.Int(32, false)), allocator) - ) { + try (IntVector vec = + new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); + IntVector vec2 = + new IntVector( + "not-nullable", FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { ValueVectorDataPopulator.setVector(vec, 1, null, 3, 4); ValueVectorDataPopulator.setVector(vec2, 1, 2, 3, 4); - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.checkNullsOnCompare()); @@ -1001,17 +1038,18 @@ public void testCheckNullsOnCompareIsTrueForNullableVector() { @Test public void testCheckNullsOnCompareIsFalseWithNoNulls() { - try (IntVector vec = new IntVector("nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator); - IntVector vec2 = new IntVector("also-nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator) - ) { + try (IntVector vec = + new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); + IntVector vec2 = + new IntVector( + "also-nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator)) { // no null values ValueVectorDataPopulator.setVector(vec, 1, 2, 3, 4); ValueVectorDataPopulator.setVector(vec2, 1, 2, 3, 4); - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertFalse(comparator.checkNullsOnCompare()); @@ -1022,13 +1060,14 @@ public void testCheckNullsOnCompareIsFalseWithNoNulls() { @Test public void testCheckNullsOnCompareIsTrueWithEmptyVectors() { - try (IntVector vec = new IntVector("nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator); - IntVector vec2 = new IntVector("also-nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator) - ) { + try (IntVector vec = + new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); + IntVector vec2 = + new IntVector( + "also-nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator)) { - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec2); assertTrue(comparator.checkNullsOnCompare()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java index 91ef52017df4d..ed5aadfcda04c 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.util.stream.IntStream; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link FixedWidthInPlaceVectorSorter}. - */ +/** Test cases for {@link FixedWidthInPlaceVectorSorter}. */ public class TestFixedWidthInPlaceVectorSorter { private BufferAllocator allocator; @@ -69,7 +65,8 @@ public void testSortInt() { // sort the vector FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); sorter.sortInPlace(vec, comparator); @@ -90,8 +87,8 @@ public void testSortInt() { } /** - * Tests the worst case for quick sort. - * It may cause stack overflow if the algorithm is implemented as a recursive algorithm. + * Tests the worst case for quick sort. It may cause stack overflow if the algorithm is + * implemented as a recursive algorithm. */ @Test public void testSortLargeIncreasingInt() { @@ -107,7 +104,8 @@ public void testSortLargeIncreasingInt() { // sort the vector FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); sorter.sortInPlace(vec, comparator); @@ -133,7 +131,8 @@ public void testChoosePivot() { vec.setValueCount(vectorLength); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); try (IntVector pivotBuffer = (IntVector) vec.getField().createVector(allocator)) { // setup internal data structures @@ -164,16 +163,15 @@ public void testChoosePivot() { } } - /** - * Evaluates choosing pivot for all possible permutations of 3 numbers. - */ + /** Evaluates choosing pivot for all possible permutations of 3 numbers. */ @Test public void testChoosePivotAllPermutes() { try (IntVector vec = new IntVector("", allocator)) { vec.allocateNew(3); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); try (IntVector pivotBuffer = (IntVector) vec.getField().createVector(allocator)) { // setup internal data structures @@ -216,25 +214,25 @@ public void testChoosePivotAllPermutes() { @Test public void testSortInt2() { try (IntVector vector = new IntVector("vector", allocator)) { - ValueVectorDataPopulator.setVector(vector, - 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, - 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, - 8, 9, 10, 11, 36, 37, 38, 39, 40, 41, - 66, 67, 68, 69, 70, 71); + ValueVectorDataPopulator.setVector( + vector, 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, 8, 9, 10, + 11, 36, 37, 38, 39, 40, 41, 66, 67, 68, 69, 70, 71); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); sorter.sortInPlace(vector, comparator); int[] actual = new int[vector.getValueCount()]; - IntStream.range(0, vector.getValueCount()).forEach( - i -> actual[i] = vector.get(i)); + IntStream.range(0, vector.getValueCount()).forEach(i -> actual[i] = vector.get(i)); assertArrayEquals( - new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}, actual); + new int[] { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71 + }, + actual); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java index cc13e7f8ceaee..4096897c20a05 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.util.stream.IntStream; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -37,9 +35,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link FixedWidthOutOfPlaceVectorSorter}. - */ +/** Test cases for {@link FixedWidthOutOfPlaceVectorSorter}. */ public class TestFixedWidthOutOfPlaceVectorSorter extends TestOutOfPlaceVectorSorter { private BufferAllocator allocator; @@ -49,7 +45,9 @@ public TestFixedWidthOutOfPlaceVectorSorter(boolean generalSorter) { } OutOfPlaceVectorSorter getSorter() { - return generalSorter ? new GeneralOutOfPlaceVectorSorter<>() : new FixedWidthOutOfPlaceVectorSorter<>(); + return generalSorter + ? new GeneralOutOfPlaceVectorSorter<>() + : new FixedWidthOutOfPlaceVectorSorter<>(); } @Before @@ -82,10 +80,11 @@ public void testSortByte() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); TinyIntVector sortedVec = - (TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + (TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -129,10 +128,11 @@ public void testSortShort() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SmallIntVector sortedVec = - (SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + (SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -176,9 +176,11 @@ public void testSortInt() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + IntVector sortedVec = + (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -222,9 +224,11 @@ public void testSortLong() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - BigIntVector sortedVec = (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + BigIntVector sortedVec = + (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -268,9 +272,11 @@ public void testSortFloat() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - Float4Vector sortedVec = (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + Float4Vector sortedVec = + (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -314,9 +320,11 @@ public void testSortDouble() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - Float8Vector sortedVec = (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + Float8Vector sortedVec = + (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -343,17 +351,17 @@ public void testSortDouble() { @Test public void testSortInt2() { try (IntVector vec = new IntVector("", allocator)) { - ValueVectorDataPopulator.setVector(vec, - 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, - 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, - 8, 9, 10, 11, 36, 37, 38, 39, 40, 41, - 66, 67, 68, 69, 70, 71); + ValueVectorDataPopulator.setVector( + vec, 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, 8, 9, 10, 11, + 36, 37, 38, 39, 40, 41, 66, 67, 68, 69, 70, 71); // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - try (IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { + try (IntVector sortedVec = + (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -361,13 +369,14 @@ public void testSortInt2() { // verify results int[] actual = new int[sortedVec.getValueCount()]; - IntStream.range(0, sortedVec.getValueCount()).forEach( - i -> actual[i] = sortedVec.get(i)); + IntStream.range(0, sortedVec.getValueCount()).forEach(i -> actual[i] = sortedVec.get(i)); assertArrayEquals( - new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}, actual); + new int[] { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71 + }, + actual); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java index 80c72b4e21a27..a92cc77818f4a 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -37,9 +35,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test sorting fixed width vectors with random data. - */ +/** Test sorting fixed width vectors with random data. */ @RunWith(Parameterized.class) public class TestFixedWidthSorting> { @@ -70,8 +66,12 @@ public void shutdown() { } public TestFixedWidthSorting( - int length, double nullFraction, boolean inPlace, String desc, - Function vectorGenerator, TestSortingUtil.DataGenerator dataGenerator) { + int length, + double nullFraction, + boolean inPlace, + String desc, + Function vectorGenerator, + TestSortingUtil.DataGenerator dataGenerator) { this.length = length; this.nullFraction = nullFraction; this.inPlace = inPlace; @@ -94,7 +94,8 @@ void sortInPlace() { TestSortingUtil.sortArray(array); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); sorter.sortInPlace(vector, comparator); @@ -109,9 +110,11 @@ void sortOutOfPlace() { // sort the vector FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); - try (V sortedVec = (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { + try (V sortedVec = + (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { sortedVec.allocateNew(vector.getValueCount()); sortedVec.setValueCount(vector.getValueCount()); @@ -123,47 +126,78 @@ void sortOutOfPlace() { } } - @Parameterized.Parameters(name = "length = {0}, null fraction = {1}, in place = {2}, vector = {3}") + @Parameterized.Parameters( + name = "length = {0}, null fraction = {1}, in place = {2}, vector = {3}") public static Collection getParameters() { List params = new ArrayList<>(); for (int length : VECTOR_LENGTHS) { for (double nullFrac : NULL_FRACTIONS) { for (boolean inPlace : new boolean[] {true, false}) { - params.add(new Object[] { - length, nullFrac, inPlace, "TinyIntVector", - (Function) allocator -> new TinyIntVector("vector", allocator), - TestSortingUtil.TINY_INT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "SmallIntVector", - (Function) allocator -> new SmallIntVector("vector", allocator), - TestSortingUtil.SMALL_INT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "IntVector", - (Function) allocator -> new IntVector("vector", allocator), - TestSortingUtil.INT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "BigIntVector", - (Function) allocator -> new BigIntVector("vector", allocator), - TestSortingUtil.LONG_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "Float4Vector", - (Function) allocator -> new Float4Vector("vector", allocator), - TestSortingUtil.FLOAT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "Float8Vector", - (Function) allocator -> new Float8Vector("vector", allocator), - TestSortingUtil.DOUBLE_GENERATOR - }); + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "TinyIntVector", + (Function) + allocator -> new TinyIntVector("vector", allocator), + TestSortingUtil.TINY_INT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "SmallIntVector", + (Function) + allocator -> new SmallIntVector("vector", allocator), + TestSortingUtil.SMALL_INT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "IntVector", + (Function) + allocator -> new IntVector("vector", allocator), + TestSortingUtil.INT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "BigIntVector", + (Function) + allocator -> new BigIntVector("vector", allocator), + TestSortingUtil.LONG_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "Float4Vector", + (Function) + allocator -> new Float4Vector("vector", allocator), + TestSortingUtil.FLOAT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "Float8Vector", + (Function) + allocator -> new Float8Vector("vector", allocator), + TestSortingUtil.DOUBLE_GENERATOR + }); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java index 07a6b545ddaa2..9e796a98ab790 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -30,9 +29,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link GeneralOutOfPlaceVectorSorter}. - */ +/** Test cases for {@link GeneralOutOfPlaceVectorSorter}. */ public class TestGeneralOutOfPlaceVectorSorter { private BufferAllocator allocator; @@ -49,30 +46,33 @@ public void shutdown() { VectorValueComparator getComparator(StructVector structVector) { IntVector child0 = structVector.getChild("column0", IntVector.class); - VectorValueComparator childComp0 = DefaultVectorComparators.createDefaultComparator(child0); + VectorValueComparator childComp0 = + DefaultVectorComparators.createDefaultComparator(child0); childComp0.attachVector(child0); IntVector child1 = structVector.getChild("column1", IntVector.class); - VectorValueComparator childComp1 = DefaultVectorComparators.createDefaultComparator(child1); + VectorValueComparator childComp1 = + DefaultVectorComparators.createDefaultComparator(child1); childComp1.attachVector(child1); - VectorValueComparator comp = new VectorValueComparator() { - - @Override - public int compareNotNull(int index1, int index2) { - // compare values by lexicographic order - int result0 = childComp0.compare(index1, index2); - if (result0 != 0) { - return result0; - } - return childComp1.compare(index1, index2); - } - - @Override - public VectorValueComparator createNew() { - return this; - } - }; + VectorValueComparator comp = + new VectorValueComparator() { + + @Override + public int compareNotNull(int index1, int index2) { + // compare values by lexicographic order + int result0 = childComp0.compare(index1, index2); + if (result0 != 0) { + return result0; + } + return childComp1.compare(index1, index2); + } + + @Override + public VectorValueComparator createNew() { + return this; + } + }; return comp; } @@ -81,17 +81,21 @@ public VectorValueComparator createNew() { public void testSortStructVector() { final int vectorLength = 7; try (StructVector srcVector = StructVector.empty("src struct", allocator); - StructVector dstVector = StructVector.empty("dst struct", allocator)) { + StructVector dstVector = StructVector.empty("dst struct", allocator)) { IntVector srcChild0 = - srcVector.addOrGet("column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + srcVector.addOrGet( + "column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); IntVector srcChild1 = - srcVector.addOrGet("column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + srcVector.addOrGet( + "column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); IntVector dstChild0 = - dstVector.addOrGet("column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + dstVector.addOrGet( + "column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); IntVector dstChild1 = - dstVector.addOrGet("column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + dstVector.addOrGet( + "column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); // src struct vector values: // [ @@ -128,15 +132,16 @@ public void testSortStructVector() { // validate results assertEquals(vectorLength, dstVector.getValueCount()); assertEquals( - "[" + - "null, " + - "{\"column1\":3}, " + - "{\"column0\":2,\"column1\":1}, " + - "{\"column0\":3,\"column1\":4}, " + - "{\"column0\":5,\"column1\":4}, " + - "{\"column0\":6,\"column1\":6}, " + - "{\"column0\":7}" + - "]", dstVector.toString()); + "[" + + "null, " + + "{\"column1\":3}, " + + "{\"column0\":2,\"column1\":1}, " + + "{\"column0\":3,\"column1\":4}, " + + "{\"column0\":5,\"column1\":4}, " + + "{\"column0\":6,\"column1\":6}, " + + "{\"column0\":7}" + + "]", + dstVector.toString()); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java index 99e22f8bdcd5c..bc8aac08b61e4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -28,9 +27,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link IndexSorter}. - */ +/** Test cases for {@link IndexSorter}. */ public class TestIndexSorter { private BufferAllocator allocator; @@ -56,14 +53,15 @@ public void testIndexSort() { // sort the index IndexSorter indexSorter = new IndexSorter<>(); - DefaultVectorComparators.IntComparator intComparator = new DefaultVectorComparators.IntComparator(); + DefaultVectorComparators.IntComparator intComparator = + new DefaultVectorComparators.IntComparator(); intComparator.attachVector(vec); IntVector indices = new IntVector("", allocator); indices.setValueCount(10); indexSorter.sort(vec, indices, intComparator); - int[] expected = new int[]{6, 9, 1, 3, 0, 4, 5, 7, 2, 8}; + int[] expected = new int[] {6, 9, 1, 3, 0, 4, 5, 7, 2, 8}; for (int i = 0; i < expected.length; i++) { assertTrue(!indices.isNull(i)); @@ -74,8 +72,8 @@ public void testIndexSort() { } /** - * Tests the worst case for quick sort. - * It may cause stack overflow if the algorithm is implemented as a recursive algorithm. + * Tests the worst case for quick sort. It may cause stack overflow if the algorithm is + * implemented as a recursive algorithm. */ @Test public void testSortLargeIncreasingInt() { @@ -91,7 +89,8 @@ public void testSortLargeIncreasingInt() { // sort the vector IndexSorter indexSorter = new IndexSorter<>(); - DefaultVectorComparators.IntComparator intComparator = new DefaultVectorComparators.IntComparator(); + DefaultVectorComparators.IntComparator intComparator = + new DefaultVectorComparators.IntComparator(); intComparator.attachVector(vec); try (IntVector indices = new IntVector("", allocator)) { @@ -110,7 +109,7 @@ public void testSortLargeIncreasingInt() { public void testChoosePivot() { final int vectorLength = 100; try (IntVector vec = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { + IntVector indices = new IntVector("indices", allocator)) { vec.allocateNew(vectorLength); indices.allocateNew(vectorLength); @@ -122,7 +121,8 @@ public void testChoosePivot() { vec.setValueCount(vectorLength); indices.setValueCount(vectorLength); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); // setup internal data structures comparator.attachVector(vec); @@ -147,17 +147,16 @@ public void testChoosePivot() { } } - /** - * Evaluates choosing pivot for all possible permutations of 3 numbers. - */ + /** Evaluates choosing pivot for all possible permutations of 3 numbers. */ @Test public void testChoosePivotAllPermutes() { try (IntVector vec = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { + IntVector indices = new IntVector("indices", allocator)) { vec.allocateNew(); indices.allocateNew(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); // setup internal data structures comparator.attachVector(vec); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java index ba9c42913c0d9..3b16ac30d4ff4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertFalse; @@ -28,9 +27,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link InsertionSorter}. - */ +/** Test cases for {@link InsertionSorter}. */ public class TestInsertionSorter { private BufferAllocator allocator; @@ -49,7 +46,7 @@ public void shutdown() { private void testSortIntVectorRange(int start, int end, int[] expected) { try (IntVector vector = new IntVector("vector", allocator); - IntVector buffer = new IntVector("buffer", allocator)) { + IntVector buffer = new IntVector("buffer", allocator)) { buffer.allocateNew(1); @@ -81,7 +78,7 @@ public void testSortIntVector() { private void testSortIndicesRange(int start, int end, int[] expectedIndices) { try (IntVector vector = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { + IntVector indices = new IntVector("indices", allocator)) { ValueVectorDataPopulator.setVector(vector, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); ValueVectorDataPopulator.setVector(indices, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java index 321ca226d7e1d..025576f08e248 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static junit.framework.TestCase.assertEquals; @@ -26,9 +25,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link OffHeapIntStack}. - */ +/** Test cases for {@link OffHeapIntStack}. */ public class TestOffHeapIntStack { private BufferAllocator allocator; diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java index 66b75cbccac3e..4f6a8489c43ea 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java @@ -14,19 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import java.util.ArrayList; import java.util.Collection; import java.util.List; - import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test cases for out-of-place sorters. - */ +/** Test cases for out-of-place sorters. */ @RunWith(Parameterized.class) public abstract class TestOutOfPlaceVectorSorter { diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java index e22b22d4e6757..24b2c752d0863 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -25,7 +24,6 @@ import java.util.Random; import java.util.function.BiConsumer; import java.util.function.Supplier; - import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; @@ -37,50 +35,59 @@ import org.apache.arrow.vector.testing.RandomDataGenerator; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -/** - * Utilities for sorting related utilities. - */ +/** Utilities for sorting related utilities. */ public class TestSortingUtil { static final Random random = new Random(0); - static final DataGenerator TINY_INT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.TINY_INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Byte.class); - - static final DataGenerator SMALL_INT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.SMALL_INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Short.class); - - static final DataGenerator INT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Integer.class); - - static final DataGenerator LONG_GENERATOR = new DataGenerator<>( - RandomDataGenerator.LONG_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Long.class); - - static final DataGenerator FLOAT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.FLOAT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Float.class); - - static final DataGenerator DOUBLE_GENERATOR = new DataGenerator<>( - RandomDataGenerator.DOUBLE_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Double.class); - - static final DataGenerator STRING_GENERATOR = new DataGenerator<>( - () -> { - int strLength = random.nextInt(20) + 1; - return generateRandomString(strLength); - }, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), String.class); - - private TestSortingUtil() { - } - - /** - * Verify that a vector is equal to an array. - */ + static final DataGenerator TINY_INT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.TINY_INT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Byte.class); + + static final DataGenerator SMALL_INT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.SMALL_INT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Short.class); + + static final DataGenerator INT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.INT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Integer.class); + + static final DataGenerator LONG_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.LONG_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Long.class); + + static final DataGenerator FLOAT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.FLOAT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Float.class); + + static final DataGenerator DOUBLE_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.DOUBLE_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Double.class); + + static final DataGenerator STRING_GENERATOR = + new DataGenerator<>( + () -> { + int strLength = random.nextInt(20) + 1; + return generateRandomString(strLength); + }, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + String.class); + + private TestSortingUtil() {} + + /** Verify that a vector is equal to an array. */ public static void verifyResults(V vector, U[] expected) { assertEquals(vector.getValueCount(), expected.length); for (int i = 0; i < expected.length; i++) { @@ -88,30 +95,28 @@ public static void verifyResults(V vector, U[] expect } } - /** - * Sort an array with null values come first. - */ + /** Sort an array with null values come first. */ public static > void sortArray(U[] array) { - Arrays.sort(array, (a, b) -> { - if (a == null || b == null) { - if (a == null && b == null) { - return 0; - } - - // exactly one is null - if (a == null) { - return -1; - } else { - return 1; - } - } - return a.compareTo(b); - }); + Arrays.sort( + array, + (a, b) -> { + if (a == null || b == null) { + if (a == null && b == null) { + return 0; + } + + // exactly one is null + if (a == null) { + return -1; + } else { + return 1; + } + } + return a.compareTo(b); + }); } - /** - * Generate a string with alphabetic characters only. - */ + /** Generate a string with alphabetic characters only. */ static String generateRandomString(int length) { byte[] str = new byte[length]; final int lower = 'a'; @@ -128,6 +133,7 @@ static String generateRandomString(int length) { /** * Utility to generate data for testing. + * * @param vector type. * @param data element type. */ @@ -139,8 +145,7 @@ static class DataGenerator> { final Class clazz; - DataGenerator( - Supplier dataGenerator, BiConsumer vectorPopulator, Class clazz) { + DataGenerator(Supplier dataGenerator, BiConsumer vectorPopulator, Class clazz) { this.dataGenerator = dataGenerator; this.vectorPopulator = vectorPopulator; this.clazz = clazz; @@ -148,6 +153,7 @@ static class DataGenerator> { /** * Populate the vector according to the specified parameters. + * * @param vector the vector to populate. * @param length vector length. * @param nullFraction the fraction of null values. diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java index f2de5d23fce89..ce15940c1df3d 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.VarCharVector; @@ -31,9 +29,7 @@ import org.junit.Test; import org.junit.jupiter.api.Assertions; -/** - * Test cases for {@link StableVectorComparator}. - */ +/** Test cases for {@link StableVectorComparator}. */ public class TestStableVectorComparator { private BufferAllocator allocator; @@ -62,7 +58,8 @@ public void testCompare() { vec.set(4, "a".getBytes(StandardCharsets.UTF_8)); VectorValueComparator comparator = new TestVarCharSorter(); - VectorValueComparator stableComparator = new StableVectorComparator<>(comparator); + VectorValueComparator stableComparator = + new StableVectorComparator<>(comparator); stableComparator.attachVector(vec); assertTrue(stableComparator.compare(0, 1) > 0); @@ -95,10 +92,12 @@ public void testStableSortString() { // sort the vector VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter(); VectorValueComparator comparator = new TestVarCharSorter(); - VectorValueComparator stableComparator = new StableVectorComparator<>(comparator); + VectorValueComparator stableComparator = + new StableVectorComparator<>(comparator); try (VarCharVector sortedVec = - (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { + (VarCharVector) + vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { sortedVec.allocateNew(vec.getByteCapacity(), vec.getValueCount()); sortedVec.setLastSet(vec.getValueCount() - 1); sortedVec.setValueCount(vec.getValueCount()); @@ -107,23 +106,32 @@ public void testStableSortString() { // verify results // the results are stable - assertEquals("0", new String(Objects.requireNonNull(sortedVec.get(0)), StandardCharsets.UTF_8)); - assertEquals("01", new String(Objects.requireNonNull(sortedVec.get(1)), StandardCharsets.UTF_8)); - assertEquals("0c", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); - assertEquals("a", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); - assertEquals("aa", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); - assertEquals("a1", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); - assertEquals("abcdefg", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); - assertEquals("accc", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); - assertEquals("afds", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); + assertEquals( + "0", new String(Objects.requireNonNull(sortedVec.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "01", new String(Objects.requireNonNull(sortedVec.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "0c", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "a", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "aa", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "a1", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "abcdefg", + new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); + assertEquals( + "accc", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); + assertEquals( + "afds", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); } } } - /** - * Utility comparator that compares varchars by the first character. - */ + /** Utility comparator that compares varchars by the first character. */ private static class TestVarCharSorter extends VectorValueComparator { @Override diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java index 2486034f1fa32..b3f2539fa53c2 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseVariableWidthVector; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link VariableWidthOutOfPlaceVectorSorter}. - */ +/** Test cases for {@link VariableWidthOutOfPlaceVectorSorter}. */ public class TestVariableWidthOutOfPlaceVectorSorter extends TestOutOfPlaceVectorSorter { private BufferAllocator allocator; @@ -44,10 +40,11 @@ public TestVariableWidthOutOfPlaceVectorSorter(boolean generalSorter) { } OutOfPlaceVectorSorter getSorter() { - return generalSorter ? new GeneralOutOfPlaceVectorSorter<>() : new VariableWidthOutOfPlaceVectorSorter(); + return generalSorter + ? new GeneralOutOfPlaceVectorSorter<>() + : new VariableWidthOutOfPlaceVectorSorter(); } - @Before public void prepare() { allocator = new RootAllocator(1024 * 1024); @@ -79,10 +76,10 @@ public void testSortString() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); VarCharVector sortedVec = - (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getByteCapacity(), vec.getValueCount()); sortedVec.setLastSet(vec.getValueCount() - 1); sortedVec.setValueCount(vec.getValueCount()); @@ -96,14 +93,23 @@ public void testSortString() { assertTrue(sortedVec.isNull(0)); assertTrue(sortedVec.isNull(1)); - assertEquals("12", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); - assertEquals("yes", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); + assertEquals( + "12", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "good", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "hello", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "hello", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); + assertEquals( + "world", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); + assertEquals( + "yes", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); sortedVec.close(); } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java index 7951c39d550d2..5c37ddf9284e4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.jupiter.api.Assertions.assertArrayEquals; @@ -28,7 +27,6 @@ import java.util.Comparator; import java.util.List; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseVariableWidthVector; @@ -41,9 +39,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test sorting variable width vectors with random data. - */ +/** Test sorting variable width vectors with random data. */ @RunWith(Parameterized.class) public class TestVariableWidthSorting> { @@ -72,8 +68,11 @@ public void shutdown() { } public TestVariableWidthSorting( - int length, double nullFraction, String desc, - Function vectorGenerator, TestSortingUtil.DataGenerator dataGenerator) { + int length, + double nullFraction, + String desc, + Function vectorGenerator, + TestSortingUtil.DataGenerator dataGenerator) { this.length = length; this.nullFraction = nullFraction; this.vectorGenerator = vectorGenerator; @@ -92,9 +91,11 @@ void sortOutOfPlace() { // sort the vector VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); - try (V sortedVec = (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { + try (V sortedVec = + (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { int dataSize = vector.getOffsetBuffer().getInt(vector.getValueCount() * 4L); sortedVec.allocateNew(dataSize, vector.getValueCount()); sortedVec.setValueCount(vector.getValueCount()); @@ -112,33 +113,36 @@ public static Collection getParameters() { List params = new ArrayList<>(); for (int length : VECTOR_LENGTHS) { for (double nullFrac : NULL_FRACTIONS) { - params.add(new Object[]{ - length, nullFrac, "VarCharVector", - (Function) allocator -> new VarCharVector("vector", allocator), - TestSortingUtil.STRING_GENERATOR - }); + params.add( + new Object[] { + length, + nullFrac, + "VarCharVector", + (Function) + allocator -> new VarCharVector("vector", allocator), + TestSortingUtil.STRING_GENERATOR + }); } } return params; } - /** - * Verify results as byte arrays. - */ + /** Verify results as byte arrays. */ public static void verifyResults(V vector, String[] expected) { assertEquals(vector.getValueCount(), expected.length); for (int i = 0; i < expected.length; i++) { if (expected[i] == null) { assertTrue(vector.isNull(i)); } else { - assertArrayEquals(((Text) vector.getObject(i)).getBytes(), expected[i].getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + ((Text) vector.getObject(i)).getBytes(), expected[i].getBytes(StandardCharsets.UTF_8)); } } } /** - * String comparator with the same behavior as that of - * {@link DefaultVectorComparators.VariableWidthComparator}. + * String comparator with the same behavior as that of {@link + * DefaultVectorComparators.VariableWidthComparator}. */ static class StringComparator implements Comparator { diff --git a/java/dev/checkstyle/checkstyle-spotless.xml b/java/dev/checkstyle/checkstyle-spotless.xml new file mode 100644 index 0000000000000..cbaec1a39bf2c --- /dev/null +++ b/java/dev/checkstyle/checkstyle-spotless.xml @@ -0,0 +1,286 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/java/dev/checkstyle/checkstyle.license b/java/dev/license/asf-java.license similarity index 100% rename from java/dev/checkstyle/checkstyle.license rename to java/dev/license/asf-java.license diff --git a/java/dev/license/asf-xml.license b/java/dev/license/asf-xml.license new file mode 100644 index 0000000000000..a43b97bca8f0f --- /dev/null +++ b/java/dev/license/asf-xml.license @@ -0,0 +1,11 @@ + + \ No newline at end of file diff --git a/java/maven/pom.xml b/java/maven/pom.xml index 72140dd6570d0..8a4043016e770 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -237,7 +237,7 @@ maven-checkstyle-plugin ../dev/checkstyle/checkstyle.xml - ../dev/checkstyle/checkstyle.license + ../dev/license/asf-java.license ../dev/checkstyle/suppressions.xml true UTF-8 diff --git a/java/pom.xml b/java/pom.xml index 9624444cf422d..bcb8b46843f2d 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -92,12 +92,15 @@ 1.11.3 2 + dev/checkstyle/checkstyle.xml true 9+181-r4173-1 2.28.0 5.11.0 5.2.0 3.43.0 + + **/*.java none -Xdoclint:none @@ -701,8 +704,8 @@ maven-checkstyle-plugin **/module-info.java - dev/checkstyle/checkstyle.xml - dev/checkstyle/checkstyle.license + ${checkstyle.config.location} + dev/license/asf-java.license dev/checkstyle/suppressions.xml true UTF-8 @@ -803,6 +806,19 @@ + + + ${spotless.java.excludes} + + + 1.7 + + + + ${maven.multiModuleProjectDirectory}/dev/license/asf-java.license + package + + @@ -929,7 +945,7 @@ Error Prone 2.10.0 is the latest version to support running on JDK 8. With right flags it could be upgraded, - but we choose to keep this unchanged for now. + but we choose to keep this unchanged for now. --> 2.10.0 From 95db23e9e02c7d724269b0a0f241e53ecfbe76be Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Jun 2024 13:41:04 +0900 Subject: [PATCH 23/59] MINOR: [Java] Bump org.jacoco:jacoco-maven-plugin from 0.8.11 to 0.8.12 in /java (#41516) Bumps [org.jacoco:jacoco-maven-plugin](https://github.com/jacoco/jacoco) from 0.8.11 to 0.8.12.

    Release notes

    Sourced from org.jacoco:jacoco-maven-plugin's releases.

    0.8.12

    New Features

    • JaCoCo now officially supports Java 22 (GitHub #1596).
    • Experimental support for Java 23 class files (GitHub #1553).

    Fixed bugs

    • Branches added by the Kotlin compiler for functions with default arguments and having more than 32 parameters are filtered out during generation of report (GitHub #1556).
    • Branch added by the Kotlin compiler version 1.5.0 and above for reading from lateinit property is filtered out during generation of report (GitHub #1568).

    Non-functional Changes

    • JaCoCo now depends on ASM 9.7 (GitHub #1600).
    Commits

    [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.jacoco:jacoco-maven-plugin&package-manager=maven&previous-version=0.8.11&new-version=0.8.12)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
    Dependabot commands and options
    You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
    > **Note** > Automatic rebases have been disabled on this pull request as it has been open for over 30 days. Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index bcb8b46843f2d..085546573596a 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -342,7 +342,7 @@ org.jacoco jacoco-maven-plugin - 0.8.11 + 0.8.12 ArrowType.Utf8
  • - *
  • INT --> ArrowType.Int(32, signed)
  • - *
  • LONG --> ArrowType.Int(64, signed)
  • - *
  • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
  • - *
  • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
  • - *
  • BOOLEAN --> ArrowType.Bool
  • - *
  • BYTES --> ArrowType.Binary
  • - *
  • ARRAY --> ArrowType.List
  • - *
  • MAP --> ArrowType.Map
  • - *
  • FIXED --> ArrowType.FixedSizeBinary
  • - *
  • RECORD --> ArrowType.Struct
  • - *
  • UNION --> ArrowType.Union
  • - *
  • ENUM--> ArrowType.Int
  • - *
  • DECIMAL --> ArrowType.Decimal
  • - *
  • Date --> ArrowType.Date(DateUnit.DAY)
  • - *
  • TimeMillis --> ArrowType.Time(TimeUnit.MILLISECOND, 32)
  • - *
  • TimeMicros --> ArrowType.Time(TimeUnit.MICROSECOND, 64)
  • - *
  • TimestampMillis --> ArrowType.Timestamp(TimeUnit.MILLISECOND, null)
  • - *
  • TimestampMicros --> ArrowType.Timestamp(TimeUnit.MICROSECOND, null)
  • + *
  • STRING --> ArrowType.Utf8 + *
  • INT --> ArrowType.Int(32, signed) + *
  • LONG --> ArrowType.Int(64, signed) + *
  • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) + *
  • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) + *
  • BOOLEAN --> ArrowType.Bool + *
  • BYTES --> ArrowType.Binary + *
  • ARRAY --> ArrowType.List + *
  • MAP --> ArrowType.Map + *
  • FIXED --> ArrowType.FixedSizeBinary + *
  • RECORD --> ArrowType.Struct + *
  • UNION --> ArrowType.Union + *
  • ENUM--> ArrowType.Int + *
  • DECIMAL --> ArrowType.Decimal + *
  • Date --> ArrowType.Date(DateUnit.DAY) + *
  • TimeMillis --> ArrowType.Time(TimeUnit.MILLISECOND, 32) + *
  • TimeMicros --> ArrowType.Time(TimeUnit.MICROSECOND, 64) + *
  • TimestampMillis --> ArrowType.Timestamp(TimeUnit.MILLISECOND, null) + *
  • TimestampMicros --> ArrowType.Timestamp(TimeUnit.MICROSECOND, null) * */ - private static Consumer createConsumer(Schema schema, String name, AvroToArrowConfig config) { return createConsumer(schema, name, false, config, null); } - private static Consumer createConsumer(Schema schema, String name, AvroToArrowConfig config, FieldVector vector) { + private static Consumer createConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector vector) { return createConsumer(schema, name, false, config, vector); } @@ -144,7 +143,8 @@ private static Consumer createConsumer(Schema schema, String name, AvroToArrowCo * * @param schema avro schema * @param name arrow field name - * @param consumerVector vector to keep in consumer, if v == null, will create a new vector via field. + * @param consumerVector vector to keep in consumer, if v == null, will create a new vector via + * field. * @return consumer */ private static Consumer createConsumer( @@ -185,7 +185,7 @@ private static Consumer createConsumer( break; case STRING: arrowType = new ArrowType.Utf8(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroStringConsumer((VarCharVector) vector); break; @@ -193,12 +193,18 @@ private static Consumer createConsumer( Map extProps = createExternalProps(schema); if (logicalType instanceof LogicalTypes.Decimal) { arrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema, extProps)); + fieldType = + new FieldType( + nullable, arrowType, /*dictionary=*/ null, getMetaData(schema, extProps)); vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroDecimalConsumer.FixedDecimalConsumer((DecimalVector) vector, schema.getFixedSize()); + consumer = + new AvroDecimalConsumer.FixedDecimalConsumer( + (DecimalVector) vector, schema.getFixedSize()); } else { arrowType = new ArrowType.FixedSizeBinary(schema.getFixedSize()); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema, extProps)); + fieldType = + new FieldType( + nullable, arrowType, /*dictionary=*/ null, getMetaData(schema, extProps)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroFixedConsumer((FixedSizeBinaryVector) vector, schema.getFixedSize()); } @@ -206,84 +212,85 @@ private static Consumer createConsumer( case INT: if (logicalType instanceof LogicalTypes.Date) { arrowType = new ArrowType.Date(DateUnit.DAY); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroDateConsumer((DateDayVector) vector); } else if (logicalType instanceof LogicalTypes.TimeMillis) { arrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimeMillisConsumer((TimeMilliVector) vector); } else { - arrowType = new ArrowType.Int(32, /*isSigned=*/true); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + arrowType = new ArrowType.Int(32, /*isSigned=*/ true); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroIntConsumer((IntVector) vector); } break; case BOOLEAN: arrowType = new ArrowType.Bool(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroBooleanConsumer((BitVector) vector); break; case LONG: if (logicalType instanceof LogicalTypes.TimeMicros) { arrowType = new ArrowType.Time(TimeUnit.MICROSECOND, 64); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimeMicroConsumer((TimeMicroVector) vector); } else if (logicalType instanceof LogicalTypes.TimestampMillis) { arrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimestampMillisConsumer((TimeStampMilliVector) vector); } else if (logicalType instanceof LogicalTypes.TimestampMicros) { arrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimestampMicrosConsumer((TimeStampMicroVector) vector); } else { - arrowType = new ArrowType.Int(64, /*isSigned=*/true); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + arrowType = new ArrowType.Int(64, /*isSigned=*/ true); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroLongConsumer((BigIntVector) vector); } break; case FLOAT: arrowType = new ArrowType.FloatingPoint(SINGLE); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroFloatConsumer((Float4Vector) vector); break; case DOUBLE: arrowType = new ArrowType.FloatingPoint(DOUBLE); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroDoubleConsumer((Float8Vector) vector); break; case BYTES: if (logicalType instanceof LogicalTypes.Decimal) { arrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroDecimalConsumer.BytesDecimalConsumer((DecimalVector) vector); } else { arrowType = new ArrowType.Binary(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroBytesConsumer((VarBinaryVector) vector); } break; case NULL: arrowType = new ArrowType.Null(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); - vector = fieldType.createNewSingleVector(name, allocator, /*schemaCallBack=*/null); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); + vector = fieldType.createNewSingleVector(name, allocator, /*schemaCallBack=*/ null); consumer = new AvroNullConsumer((NullVector) vector); break; default: // no-op, shouldn't get here - throw new UnsupportedOperationException("Can't convert avro type %s to arrow type." + type.getName()); + throw new UnsupportedOperationException( + "Can't convert avro type %s to arrow type." + type.getName()); } return consumer; } @@ -291,15 +298,16 @@ private static Consumer createConsumer( private static ArrowType createDecimalArrowType(LogicalTypes.Decimal logicalType) { final int scale = logicalType.getScale(); final int precision = logicalType.getPrecision(); - Preconditions.checkArgument(precision > 0 && precision <= 38, - "Precision must be in range of 1 to 38"); - Preconditions.checkArgument(scale >= 0 && scale <= 38, - "Scale must be in range of 0 to 38."); - Preconditions.checkArgument(scale <= precision, - "Invalid decimal scale: %s (greater than precision: %s)", scale, precision); + Preconditions.checkArgument( + precision > 0 && precision <= 38, "Precision must be in range of 1 to 38"); + Preconditions.checkArgument(scale >= 0 && scale <= 38, "Scale must be in range of 0 to 38."); + Preconditions.checkArgument( + scale <= precision, + "Invalid decimal scale: %s (greater than precision: %s)", + scale, + precision); return new ArrowType.Decimal(precision, scale, 128); - } private static Consumer createSkipConsumer(Schema schema) { @@ -309,41 +317,46 @@ private static Consumer createSkipConsumer(Schema schema) { switch (type) { case UNION: - List unionDelegates = schema.getTypes().stream().map(s -> - createSkipConsumer(s)).collect(Collectors.toList()); + List unionDelegates = + schema.getTypes().stream().map(s -> createSkipConsumer(s)).collect(Collectors.toList()); skipFunction = decoder -> unionDelegates.get(decoder.readInt()).consume(decoder); break; case ARRAY: Consumer elementDelegate = createSkipConsumer(schema.getElementType()); - skipFunction = decoder -> { - for (long i = decoder.skipArray(); i != 0; i = decoder.skipArray()) { - for (long j = 0; j < i; j++) { - elementDelegate.consume(decoder); - } - } - }; + skipFunction = + decoder -> { + for (long i = decoder.skipArray(); i != 0; i = decoder.skipArray()) { + for (long j = 0; j < i; j++) { + elementDelegate.consume(decoder); + } + } + }; break; case MAP: Consumer valueDelegate = createSkipConsumer(schema.getValueType()); - skipFunction = decoder -> { - for (long i = decoder.skipMap(); i != 0; i = decoder.skipMap()) { - for (long j = 0; j < i; j++) { - decoder.skipString(); // Discard key - valueDelegate.consume(decoder); - } - } - }; + skipFunction = + decoder -> { + for (long i = decoder.skipMap(); i != 0; i = decoder.skipMap()) { + for (long j = 0; j < i; j++) { + decoder.skipString(); // Discard key + valueDelegate.consume(decoder); + } + } + }; break; case RECORD: - List delegates = schema.getFields().stream().map(field -> - createSkipConsumer(field.schema())).collect(Collectors.toList()); + List delegates = + schema.getFields().stream() + .map(field -> createSkipConsumer(field.schema())) + .collect(Collectors.toList()); - skipFunction = decoder -> { - for (Consumer consumer : delegates) { - consumer.consume(decoder); - } - }; + skipFunction = + decoder -> { + for (Consumer consumer : delegates) { + consumer.consume(decoder); + } + }; break; case ENUM: @@ -374,7 +387,7 @@ private static Consumer createSkipConsumer(Schema schema) { skipFunction = decoder -> decoder.skipBytes(); break; case NULL: - skipFunction = decoder -> { }; + skipFunction = decoder -> {}; break; default: // no-op, shouldn't get here @@ -384,8 +397,7 @@ private static Consumer createSkipConsumer(Schema schema) { return new SkipConsumer(skipFunction); } - static CompositeAvroConsumer createCompositeConsumer( - Schema schema, AvroToArrowConfig config) { + static CompositeAvroConsumer createCompositeConsumer(Schema schema, AvroToArrowConfig config) { List consumers = new ArrayList<>(); final Set skipFieldNames = config.getSkipFieldNames(); @@ -399,7 +411,6 @@ static CompositeAvroConsumer createCompositeConsumer( Consumer consumer = createConsumer(field.schema(), field.name(), config); consumers.add(consumer); } - } } else { Consumer consumer = createConsumer(schema, "", config); @@ -409,9 +420,11 @@ static CompositeAvroConsumer createCompositeConsumer( return new CompositeAvroConsumer(consumers); } - private static FieldVector createVector(FieldVector consumerVector, FieldType fieldType, - String name, BufferAllocator allocator) { - return consumerVector != null ? consumerVector : fieldType.createNewSingleVector(name, allocator, null); + private static FieldVector createVector( + FieldVector consumerVector, FieldType fieldType, String name, BufferAllocator allocator) { + return consumerVector != null + ? consumerVector + : fieldType.createNewSingleVector(name, allocator, null); } private static String getDefaultFieldName(ArrowType type) { @@ -424,10 +437,7 @@ private static Field avroSchemaToField(Schema schema, String name, AvroToArrowCo } private static Field avroSchemaToField( - Schema schema, - String name, - AvroToArrowConfig config, - Map externalProps) { + Schema schema, String name, AvroToArrowConfig config, Map externalProps) { final Schema.Type type = schema.getType(); final LogicalType logicalType = schema.getLogicalType(); @@ -441,7 +451,8 @@ private static Field avroSchemaToField( // Union child vector should use default name children.add(avroSchemaToField(childSchema, null, config)); } - fieldType = createFieldType(new ArrowType.Union(UnionMode.Sparse, null), schema, externalProps); + fieldType = + createFieldType(new ArrowType.Union(UnionMode.Sparse, null), schema, externalProps); break; case ARRAY: Schema elementSchema = schema.getElementType(); @@ -450,14 +461,18 @@ private static Field avroSchemaToField( break; case MAP: // MapVector internal struct field and key field should be non-nullable - FieldType keyFieldType = new FieldType(/*nullable=*/false, new ArrowType.Utf8(), /*dictionary=*/null); - Field keyField = new Field("key", keyFieldType, /*children=*/null); + FieldType keyFieldType = + new FieldType(/*nullable=*/ false, new ArrowType.Utf8(), /*dictionary=*/ null); + Field keyField = new Field("key", keyFieldType, /*children=*/ null); Field valueField = avroSchemaToField(schema.getValueType(), "value", config); - FieldType structFieldType = new FieldType(false, new ArrowType.Struct(), /*dictionary=*/null); - Field structField = new Field("internal", structFieldType, Arrays.asList(keyField, valueField)); + FieldType structFieldType = + new FieldType(false, new ArrowType.Struct(), /*dictionary=*/ null); + Field structField = + new Field("internal", structFieldType, Arrays.asList(keyField, valueField)); children.add(structField); - fieldType = createFieldType(new ArrowType.Map(/*keysSorted=*/false), schema, externalProps); + fieldType = + createFieldType(new ArrowType.Map(/*keysSorted=*/ false), schema, externalProps); break; case RECORD: final Set skipFieldNames = config.getSkipFieldNames(); @@ -486,8 +501,12 @@ private static Field avroSchemaToField( int enumCount = schema.getEnumSymbols().size(); ArrowType.Int indexType = DictionaryEncoder.getIndexType(enumCount); - fieldType = createFieldType(indexType, schema, externalProps, - new DictionaryEncoding(current, /*ordered=*/false, /*indexType=*/indexType)); + fieldType = + createFieldType( + indexType, + schema, + externalProps, + new DictionaryEncoding(current, /*ordered=*/ false, /*indexType=*/ indexType)); break; case STRING: @@ -509,7 +528,7 @@ private static Field avroSchemaToField( } else if (logicalType instanceof LogicalTypes.TimeMillis) { intArrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32); } else { - intArrowType = new ArrowType.Int(32, /*isSigned=*/true); + intArrowType = new ArrowType.Int(32, /*isSigned=*/ true); } fieldType = createFieldType(intArrowType, schema, externalProps); break; @@ -525,7 +544,7 @@ private static Field avroSchemaToField( } else if (logicalType instanceof LogicalTypes.TimestampMicros) { longArrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null); } else { - longArrowType = new ArrowType.Int(64, /*isSigned=*/true); + longArrowType = new ArrowType.Int(64, /*isSigned=*/ true); } fieldType = createFieldType(longArrowType, schema, externalProps); break; @@ -558,8 +577,8 @@ private static Field avroSchemaToField( return new Field(name, fieldType, children.size() == 0 ? null : children); } - private static Consumer createArrayConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createArrayConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { ListVector listVector; if (consumerVector == null) { @@ -578,8 +597,8 @@ private static Consumer createArrayConsumer(Schema schema, String name, AvroToAr return new AvroArraysConsumer(listVector, delegate); } - private static Consumer createStructConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createStructConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { final Set skipFieldNames = config.getSkipFieldNames(); @@ -601,19 +620,22 @@ private static Consumer createStructConsumer(Schema schema, String name, AvroToA if (skipFieldNames.contains(fullChildName)) { delegate = createSkipConsumer(childField.schema()); } else { - delegate = createConsumer(childField.schema(), fullChildName, config, - structVector.getChildrenFromFields().get(vectorIndex++)); + delegate = + createConsumer( + childField.schema(), + fullChildName, + config, + structVector.getChildrenFromFields().get(vectorIndex++)); } delegates[i] = delegate; } return new AvroStructConsumer(structVector, delegates); - } - private static Consumer createEnumConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createEnumConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { BaseIntVector indexVector; if (consumerVector == null) { @@ -630,16 +652,14 @@ private static Consumer createEnumConsumer(Schema schema, String name, AvroToArr for (int i = 0; i < valueCount; i++) { dictVector.set(i, schema.getEnumSymbols().get(i).getBytes(StandardCharsets.UTF_8)); } - Dictionary dictionary = - new Dictionary(dictVector, indexVector.getField().getDictionary()); + Dictionary dictionary = new Dictionary(dictVector, indexVector.getField().getDictionary()); config.getProvider().put(dictionary); return new AvroEnumConsumer(indexVector); - } - private static Consumer createMapConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createMapConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { MapVector mapVector; if (consumerVector == null) { @@ -653,10 +673,14 @@ private static Consumer createMapConsumer(Schema schema, String name, AvroToArro StructVector structVector = (StructVector) mapVector.getDataVector(); // keys in avro map are always assumed to be strings. - Consumer keyConsumer = new AvroStringConsumer( - (VarCharVector) structVector.getChildrenFromFields().get(0)); - Consumer valueConsumer = createConsumer(schema.getValueType(), schema.getValueType().getName(), - config, structVector.getChildrenFromFields().get(1)); + Consumer keyConsumer = + new AvroStringConsumer((VarCharVector) structVector.getChildrenFromFields().get(0)); + Consumer valueConsumer = + createConsumer( + schema.getValueType(), + schema.getValueType().getName(), + config, + structVector.getChildrenFromFields().get(1)); AvroStructConsumer internalConsumer = new AvroStructConsumer(structVector, new Consumer[] {keyConsumer, valueConsumer}); @@ -664,11 +688,12 @@ private static Consumer createMapConsumer(Schema schema, String name, AvroToArro return new AvroMapConsumer(mapVector, internalConsumer); } - private static Consumer createUnionConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createUnionConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { final int size = schema.getTypes().size(); - final boolean nullable = schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.NULL); + final boolean nullable = + schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.NULL); UnionVector unionVector; if (consumerVector == null) { @@ -695,14 +720,12 @@ private static Consumer createUnionConsumer(Schema schema, String name, AvroToAr /** * Read data from {@link Decoder} and generate a {@link VectorSchemaRoot}. + * * @param schema avro schema * @param decoder avro decoder to read data from */ static VectorSchemaRoot avroToArrowVectors( - Schema schema, - Decoder decoder, - AvroToArrowConfig config) - throws IOException { + Schema schema, Decoder decoder, AvroToArrowConfig config) throws IOException { List vectors = new ArrayList<>(); List consumers = new ArrayList<>(); @@ -726,8 +749,8 @@ static VectorSchemaRoot avroToArrowVectors( } long validConsumerCount = consumers.stream().filter(c -> !c.skippable()).count(); - Preconditions.checkArgument(vectors.size() == validConsumerCount, - "vectors size not equals consumers size."); + Preconditions.checkArgument( + vectors.size() == validConsumerCount, "vectors size not equals consumers size."); List fields = vectors.stream().map(t -> t.getField()).collect(Collectors.toList()); @@ -767,9 +790,7 @@ private static Map getMetaData(Schema schema, Map createExternalProps(Schema schema) { final Map extProps = new HashMap<>(); String doc = schema.getDoc(); @@ -783,8 +804,9 @@ private static Map createExternalProps(Schema schema) { return extProps; } - private static FieldType createFieldType(ArrowType arrowType, Schema schema, Map externalProps) { - return createFieldType(arrowType, schema, externalProps, /*dictionary=*/null); + private static FieldType createFieldType( + ArrowType arrowType, Schema schema, Map externalProps) { + return createFieldType(arrowType, schema, externalProps, /*dictionary=*/ null); } private static FieldType createFieldType( @@ -793,8 +815,8 @@ private static FieldType createFieldType( Map externalProps, DictionaryEncoding dictionary) { - return new FieldType(/*nullable=*/false, arrowType, dictionary, - getMetaData(schema, externalProps)); + return new FieldType( + /*nullable=*/ false, arrowType, dictionary, getMetaData(schema, externalProps)); } private static String convertAliases(Set aliases) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java index 9a0cfd97a49a1..4123370061794 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import java.io.EOFException; @@ -22,7 +21,6 @@ import java.util.Iterator; import java.util.List; import java.util.stream.Collectors; - import org.apache.arrow.adapter.avro.consumers.CompositeAvroConsumer; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.FieldVector; @@ -32,9 +30,7 @@ import org.apache.avro.Schema; import org.apache.avro.io.Decoder; -/** - * VectorSchemaRoot iterator for partially converting avro data. - */ +/** VectorSchemaRoot iterator for partially converting avro data. */ public class AvroToArrowVectorIterator implements Iterator, AutoCloseable { public static final int NO_LIMIT_BATCH_SIZE = -1; @@ -53,28 +49,18 @@ public class AvroToArrowVectorIterator implements Iterator, Au private final int targetBatchSize; - /** - * Construct an instance. - */ - private AvroToArrowVectorIterator( - Decoder decoder, - Schema schema, - AvroToArrowConfig config) { + /** Construct an instance. */ + private AvroToArrowVectorIterator(Decoder decoder, Schema schema, AvroToArrowConfig config) { this.decoder = decoder; this.schema = schema; this.config = config; this.targetBatchSize = config.getTargetBatchSize(); - } - /** - * Create a ArrowVectorIterator to partially convert data. - */ + /** Create a ArrowVectorIterator to partially convert data. */ public static AvroToArrowVectorIterator create( - Decoder decoder, - Schema schema, - AvroToArrowConfig config) { + Decoder decoder, Schema schema, AvroToArrowConfig config) { AvroToArrowVectorIterator iterator = new AvroToArrowVectorIterator(decoder, schema, config); try { @@ -136,9 +122,10 @@ private void load(VectorSchemaRoot root) { ValueVectorUtility.preAllocate(root, targetBatchSize); } - long validConsumerCount = compositeConsumer.getConsumers().stream().filter(c -> - !c.skippable()).count(); - Preconditions.checkArgument(root.getFieldVectors().size() == validConsumerCount, + long validConsumerCount = + compositeConsumer.getConsumers().stream().filter(c -> !c.skippable()).count(); + Preconditions.checkArgument( + root.getFieldVectors().size() == validConsumerCount, "Schema root vectors size not equals to consumers size."); compositeConsumer.resetConsumerVectors(root); @@ -159,9 +146,7 @@ public boolean hasNext() { return nextBatch != null; } - /** - * Gets the next vector. The user is responsible for freeing its resources. - */ + /** Gets the next vector. The user is responsible for freeing its resources. */ @Override public VectorSchemaRoot next() { Preconditions.checkArgument(hasNext()); @@ -175,9 +160,7 @@ public VectorSchemaRoot next() { return returned; } - /** - * Clean up resources. - */ + /** Clean up resources. */ @Override public void close() { if (nextBatch != null) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java index fd25986c32b95..4555ce7a295f7 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java @@ -14,25 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.complex.ListVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume array type values from avro decoder. - * Write the data to {@link ListVector}. + * Consumer which consume array type values from avro decoder. Write the data to {@link ListVector}. */ public class AvroArraysConsumer extends BaseAvroConsumer { private final Consumer delegate; - /** - * Instantiate a ArrayConsumer. - */ + /** Instantiate a ArrayConsumer. */ public AvroArraysConsumer(ListVector vector, Consumer delegate) { super(vector); this.delegate = delegate; diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java index bf41828d19f7a..09eb5f3b255d5 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java @@ -14,23 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.BitVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume boolean type values from avro decoder. - * Write the data to {@link BitVector}. + * Consumer which consume boolean type values from avro decoder. Write the data to {@link + * BitVector}. */ public class AvroBooleanConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroBooleanConsumer. - */ + /** Instantiate a AvroBooleanConsumer. */ public AvroBooleanConsumer(BitVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java index c8370e480608d..86b6cbb13d881 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java @@ -14,26 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; import java.nio.ByteBuffer; - import org.apache.arrow.vector.VarBinaryVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume bytes type values from avro decoder. - * Write the data to {@link VarBinaryVector}. + * Consumer which consume bytes type values from avro decoder. Write the data to {@link + * VarBinaryVector}. */ public class AvroBytesConsumer extends BaseAvroConsumer { private ByteBuffer cacheBuffer; - /** - * Instantiate a AvroBytesConsumer. - */ + /** Instantiate a AvroBytesConsumer. */ public AvroBytesConsumer(VarBinaryVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java index 7cc7dd33b15a9..011cbccc09c5b 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java @@ -14,23 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.Float8Vector; import org.apache.avro.io.Decoder; /** - * Consumer which consume double type values from avro decoder. - * Write the data to {@link Float8Vector}. + * Consumer which consume double type values from avro decoder. Write the data to {@link + * Float8Vector}. */ public class AvroDoubleConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroDoubleConsumer. - */ + /** Instantiate a AvroDoubleConsumer. */ public AvroDoubleConsumer(Float8Vector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java index 32a2c85f6fc50..f47988fb962a1 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java @@ -14,24 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.BaseIntVector; import org.apache.arrow.vector.IntVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume enum type values from avro decoder. - * Write the data to {@link IntVector}. + * Consumer which consume enum type values from avro decoder. Write the data to {@link IntVector}. */ public class AvroEnumConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroEnumConsumer. - */ + /** Instantiate a AvroEnumConsumer. */ public AvroEnumConsumer(BaseIntVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java index 16b70898fd36a..6b78afd3c95d4 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java @@ -14,25 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.FixedSizeBinaryVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume fixed type values from avro decoder. - * Write the data to {@link org.apache.arrow.vector.FixedSizeBinaryVector}. + * Consumer which consume fixed type values from avro decoder. Write the data to {@link + * org.apache.arrow.vector.FixedSizeBinaryVector}. */ public class AvroFixedConsumer extends BaseAvroConsumer { private final byte[] reuseBytes; - /** - * Instantiate a AvroFixedConsumer. - */ + /** Instantiate a AvroFixedConsumer. */ public AvroFixedConsumer(FixedSizeBinaryVector vector, int size) { super(vector); reuseBytes = new byte[size]; diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java index b09d2881875b6..2c6d4aa5a05f6 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java @@ -14,23 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.Float4Vector; import org.apache.avro.io.Decoder; /** - * Consumer which consume float type values from avro decoder. - * Write the data to {@link Float4Vector}. + * Consumer which consume float type values from avro decoder. Write the data to {@link + * Float4Vector}. */ public class AvroFloatConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroFloatConsumer. - */ + /** Instantiate a AvroFloatConsumer. */ public AvroFloatConsumer(Float4Vector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java index ae5a2719c5642..22c7b10aa65f7 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java @@ -14,23 +14,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.IntVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume int type values from avro decoder. - * Write the data to {@link IntVector}. + * Consumer which consume int type values from avro decoder. Write the data to {@link IntVector}. */ public class AvroIntConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroIntConsumer. - */ + /** Instantiate a AvroIntConsumer. */ public AvroIntConsumer(IntVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java index 4db836acc4586..90c5313417d7c 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java @@ -14,23 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.BigIntVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume long type values from avro decoder. - * Write the data to {@link BigIntVector}. + * Consumer which consume long type values from avro decoder. Write the data to {@link + * BigIntVector}. */ public class AvroLongConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroLongConsumer. - */ + /** Instantiate a AvroLongConsumer. */ public AvroLongConsumer(BigIntVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java index 1ea97e63b61e5..543471533ec01 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java @@ -14,27 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume map type values from avro decoder. - * Write the data to {@link MapVector}. + * Consumer which consume map type values from avro decoder. Write the data to {@link MapVector}. */ public class AvroMapConsumer extends BaseAvroConsumer { private final Consumer delegate; - /** - * Instantiate a AvroMapConsumer. - */ + /** Instantiate a AvroMapConsumer. */ public AvroMapConsumer(MapVector vector, Consumer delegate) { super(vector); this.delegate = delegate; diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java index 4c7bb8c03bad3..0f80c2b7b2db3 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java @@ -14,17 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.NullVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume null type values from avro decoder. - * Corresponding to {@link org.apache.arrow.vector.NullVector}. + * Consumer which consume null type values from avro decoder. Corresponding to {@link + * org.apache.arrow.vector.NullVector}. */ public class AvroNullConsumer extends BaseAvroConsumer { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java index 072270aa6c081..164d595e9c6ac 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java @@ -14,26 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; import java.nio.ByteBuffer; - import org.apache.arrow.vector.VarCharVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume string type values from avro decoder. - * Write the data to {@link VarCharVector}. + * Consumer which consume string type values from avro decoder. Write the data to {@link + * VarCharVector}. */ public class AvroStringConsumer extends BaseAvroConsumer { private ByteBuffer cacheBuffer; - /** - * Instantiate a AvroStringConsumer. - */ + /** Instantiate a AvroStringConsumer. */ public AvroStringConsumer(VarCharVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java index a02b1577f9fa8..94c2f611e84b7 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java @@ -14,27 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume nested record type values from avro decoder. - * Write the data to {@link org.apache.arrow.vector.complex.StructVector}. + * Consumer which consume nested record type values from avro decoder. Write the data to {@link + * org.apache.arrow.vector.complex.StructVector}. */ public class AvroStructConsumer extends BaseAvroConsumer { private final Consumer[] delegates; - /** - * Instantiate a AvroStructConsumer. - */ + /** Instantiate a AvroStructConsumer. */ public AvroStructConsumer(StructVector vector, Consumer[] delegates) { super(vector); this.delegates = delegates; @@ -49,7 +45,6 @@ public void consume(Decoder decoder) throws IOException { } vector.setIndexDefined(currentIndex); currentIndex++; - } @Override diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java index 76287543b0646..5a8e23e62892c 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.UnionVector; @@ -26,17 +24,15 @@ import org.apache.avro.io.Decoder; /** - * Consumer which consume unions type values from avro decoder. - * Write the data to {@link org.apache.arrow.vector.complex.UnionVector}. + * Consumer which consume unions type values from avro decoder. Write the data to {@link + * org.apache.arrow.vector.complex.UnionVector}. */ public class AvroUnionsConsumer extends BaseAvroConsumer { private Consumer[] delegates; private Types.MinorType[] types; - /** - * Instantiate an AvroUnionConsumer. - */ + /** Instantiate an AvroUnionConsumer. */ public AvroUnionsConsumer(UnionVector vector, Consumer[] delegates, Types.MinorType[] types) { super(vector); @@ -53,7 +49,8 @@ public void consume(Decoder decoder) throws IOException { vector.setType(currentIndex, types[fieldIndex]); // In UnionVector we need to set sub vector writer position before consume a value - // because in the previous iterations we might not have written to the specific union sub vector. + // because in the previous iterations we might not have written to the specific union sub + // vector. delegate.setPosition(currentIndex); delegate.consume(decoder); diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java index 66a6cda68401e..9430d83cb4372 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import org.apache.arrow.vector.FieldVector; /** * Base class for non-skippable avro consumers. + * * @param vector type. */ public abstract class BaseAvroConsumer implements Consumer { @@ -30,6 +30,7 @@ public abstract class BaseAvroConsumer implements Consume /** * Constructs a base avro consumer. + * * @param vector the vector to consume. */ public BaseAvroConsumer(T vector) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java index 97812226180ac..11c1f7712ef19 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java @@ -14,20 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; import java.util.List; - import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.avro.io.Decoder; -/** - * Composite consumer which hold all consumers. - * It manages the consume and cleanup process. - */ +/** Composite consumer which hold all consumers. It manages the consume and cleanup process. */ public class CompositeAvroConsumer implements AutoCloseable { private final List consumers; @@ -40,18 +35,14 @@ public CompositeAvroConsumer(List consumers) { this.consumers = consumers; } - /** - * Consume decoder data. - */ + /** Consume decoder data. */ public void consume(Decoder decoder) throws IOException { for (Consumer consumer : consumers) { consumer.consume(decoder); } } - /** - * Reset vector of consumers with the given {@link VectorSchemaRoot}. - */ + /** Reset vector of consumers with the given {@link VectorSchemaRoot}. */ public void resetConsumerVectors(VectorSchemaRoot root) { int index = 0; for (Consumer consumer : consumers) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java index 8eaaf74cff68a..0c07f90bf5f39 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java @@ -14,59 +14,49 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.FieldVector; import org.apache.avro.io.Decoder; /** * Interface that is used to consume values from avro decoder. + * * @param The vector within consumer or its delegate, used for partially consume purpose. */ public interface Consumer extends AutoCloseable { /** * Consume a specific type value from avro decoder and write it to vector. + * * @param decoder avro decoder to read data * @throws IOException on error */ void consume(Decoder decoder) throws IOException; - /** - * Add null value to vector by making writer position + 1. - */ + /** Add null value to vector by making writer position + 1. */ void addNull(); - /** - * Set the position to write value into vector. - */ + /** Set the position to write value into vector. */ void setPosition(int index); - /** - * Get the vector within the consumer. - */ + /** Get the vector within the consumer. */ FieldVector getVector(); - /** - * Close this consumer when occurs exception to avoid potential leak. - */ + /** Close this consumer when occurs exception to avoid potential leak. */ @Override void close() throws Exception; /** * Reset the vector within consumer for partial read purpose. + * * @return true if reset is successful, false if reset is not needed. */ boolean resetValueVector(T vector); - /** - * Indicates whether the consumer is type of {@link SkipConsumer}. - */ + /** Indicates whether the consumer is type of {@link SkipConsumer}. */ default boolean skippable() { return false; } - } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java index 1ac0a6d71557b..2c104728ce620 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java @@ -14,17 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.FieldVector; import org.apache.avro.io.Decoder; -/** - * Consumer which skip (throw away) data from the decoder. - */ +/** Consumer which skip (throw away) data from the decoder. */ public class SkipConsumer implements Consumer { private final SkipFunction skipFunction; @@ -39,12 +35,10 @@ public void consume(Decoder decoder) throws IOException { } @Override - public void addNull() { - } + public void addNull() {} @Override - public void setPosition(int index) { - } + public void setPosition(int index) {} @Override public FieldVector getVector() { @@ -52,8 +46,7 @@ public FieldVector getVector() { } @Override - public void close() throws Exception { - } + public void close() throws Exception {} @Override public boolean resetValueVector(FieldVector vector) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java index 93fc4a7fede3f..3d72d03104f3c 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java @@ -14,16 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.avro.io.Decoder; -/** - * Adapter function to skip (throw away) data from the decoder. - */ +/** Adapter function to skip (throw away) data from the decoder. */ @FunctionalInterface public interface SkipFunction { void apply(Decoder decoder) throws IOException; diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java index a5c36d88fb76a..0f557297a3cb7 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.DateDayVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date type values from avro decoder. - * Write the data to {@link DateDayVector}. + * Consumer which consume date type values from avro decoder. Write the data to {@link + * DateDayVector}. */ public class AvroDateConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroDateConsumer. - */ + /** Instantiate a AvroDateConsumer. */ public AvroDateConsumer(DateDayVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java index ebe5ca3884e5e..fa1a12ac8a6ed 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java @@ -14,40 +14,32 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; import java.nio.ByteBuffer; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.DecimalVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume decimal type values from avro decoder. - * Write the data to {@link DecimalVector}. + * Consumer which consume decimal type values from avro decoder. Write the data to {@link + * DecimalVector}. */ public abstract class AvroDecimalConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroDecimalConsumer. - */ + /** Instantiate a AvroDecimalConsumer. */ public AvroDecimalConsumer(DecimalVector vector) { super(vector); } - /** - * Consumer for decimal logical type with original bytes type. - */ + /** Consumer for decimal logical type with original bytes type. */ public static class BytesDecimalConsumer extends AvroDecimalConsumer { private ByteBuffer cacheBuffer; - /** - * Instantiate a BytesDecimalConsumer. - */ + /** Instantiate a BytesDecimalConsumer. */ public BytesDecimalConsumer(DecimalVector vector) { super(vector); } @@ -60,19 +52,14 @@ public void consume(Decoder decoder) throws IOException { cacheBuffer.get(bytes); vector.setBigEndian(currentIndex++, bytes); } - } - /** - * Consumer for decimal logical type with original fixed type. - */ + /** Consumer for decimal logical type with original fixed type. */ public static class FixedDecimalConsumer extends AvroDecimalConsumer { private byte[] reuseBytes; - /** - * Instantiate a FixedDecimalConsumer. - */ + /** Instantiate a FixedDecimalConsumer. */ public FixedDecimalConsumer(DecimalVector vector, int size) { super(vector); Preconditions.checkArgument(size <= 16, "Decimal bytes length should <= 16."); diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java index 89216d4ad1436..60e7d15bf16d6 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.TimeMicroVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date time-micro values from avro decoder. - * Write the data to {@link TimeMicroVector}. + * Consumer which consume date time-micro values from avro decoder. Write the data to {@link + * TimeMicroVector}. */ public class AvroTimeMicroConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroTimeMicroConsumer. - */ + /** Instantiate a AvroTimeMicroConsumer. */ public AvroTimeMicroConsumer(TimeMicroVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java index ab5df8d4bc8ac..e0b232e9abd5e 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.TimeMilliVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date time-millis values from avro decoder. - * Write the data to {@link TimeMilliVector}. + * Consumer which consume date time-millis values from avro decoder. Write the data to {@link + * TimeMilliVector}. */ public class AvroTimeMillisConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroTimeMilliConsumer. - */ + /** Instantiate a AvroTimeMilliConsumer. */ public AvroTimeMillisConsumer(TimeMilliVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java index 93b39d479ff0e..88acf7b329569 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.TimeStampMicroVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date timestamp-micro values from avro decoder. - * Write the data to {@link TimeStampMicroVector}. + * Consumer which consume date timestamp-micro values from avro decoder. Write the data to {@link + * TimeStampMicroVector}. */ public class AvroTimestampMicrosConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroTimestampMicroConsumer. - */ + /** Instantiate a AvroTimestampMicroConsumer. */ public AvroTimestampMicrosConsumer(TimeStampMicroVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java index 9e651c3959f81..ec50d7902319c 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.TimeStampMilliVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date timestamp-millis values from avro decoder. - * Write the data to {@link TimeStampMilliVector}. + * Consumer which consume date timestamp-millis values from avro decoder. Write the data to {@link + * TimeStampMilliVector}. */ public class AvroTimestampMillisConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroTimestampMillisConsumer. - */ + /** Instantiate a AvroTimestampMillisConsumer. */ public AvroTimestampMillisConsumer(TimeStampMilliVector vector) { super(vector); } diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java index 6ee04e33a5ce1..d8eefc715f611 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static junit.framework.TestCase.assertNull; @@ -27,7 +26,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.util.DateUtility; @@ -43,13 +41,13 @@ public void testTimestampMicros() throws Exception { Schema schema = getSchema("logical/test_timestamp_micros.avsc"); List data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L); - List expected = Arrays.asList( - DateUtility.getLocalDateTimeFromEpochMicro(10000), - DateUtility.getLocalDateTimeFromEpochMicro(20000), - DateUtility.getLocalDateTimeFromEpochMicro(30000), - DateUtility.getLocalDateTimeFromEpochMicro(40000), - DateUtility.getLocalDateTimeFromEpochMicro(50000) - ); + List expected = + Arrays.asList( + DateUtility.getLocalDateTimeFromEpochMicro(10000), + DateUtility.getLocalDateTimeFromEpochMicro(20000), + DateUtility.getLocalDateTimeFromEpochMicro(30000), + DateUtility.getLocalDateTimeFromEpochMicro(40000), + DateUtility.getLocalDateTimeFromEpochMicro(50000)); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -62,13 +60,13 @@ public void testTimestampMillis() throws Exception { Schema schema = getSchema("logical/test_timestamp_millis.avsc"); List data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L); - List expected = Arrays.asList( - DateUtility.getLocalDateTimeFromEpochMilli(10000), - DateUtility.getLocalDateTimeFromEpochMilli(20000), - DateUtility.getLocalDateTimeFromEpochMilli(30000), - DateUtility.getLocalDateTimeFromEpochMilli(40000), - DateUtility.getLocalDateTimeFromEpochMilli(50000) - ); + List expected = + Arrays.asList( + DateUtility.getLocalDateTimeFromEpochMilli(10000), + DateUtility.getLocalDateTimeFromEpochMilli(20000), + DateUtility.getLocalDateTimeFromEpochMilli(30000), + DateUtility.getLocalDateTimeFromEpochMilli(40000), + DateUtility.getLocalDateTimeFromEpochMilli(50000)); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -93,13 +91,13 @@ public void testTimeMillis() throws Exception { Schema schema = getSchema("logical/test_time_millis.avsc"); List data = Arrays.asList(100, 200, 300, 400, 500); - List expected = Arrays.asList( - DateUtility.getLocalDateTimeFromEpochMilli(100), - DateUtility.getLocalDateTimeFromEpochMilli(200), - DateUtility.getLocalDateTimeFromEpochMilli(300), - DateUtility.getLocalDateTimeFromEpochMilli(400), - DateUtility.getLocalDateTimeFromEpochMilli(500) - ); + List expected = + Arrays.asList( + DateUtility.getLocalDateTimeFromEpochMilli(100), + DateUtility.getLocalDateTimeFromEpochMilli(200), + DateUtility.getLocalDateTimeFromEpochMilli(300), + DateUtility.getLocalDateTimeFromEpochMilli(400), + DateUtility.getLocalDateTimeFromEpochMilli(500)); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -137,7 +135,6 @@ public void testDecimalWithOriginalBytes() throws Exception { VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); checkPrimitiveResult(expected, vector); - } @Test @@ -174,10 +171,9 @@ public void testInvalidDecimalPrecision() throws Exception { data.add(buffer); } - IllegalArgumentException e = assertThrows(IllegalArgumentException.class, - () -> writeAndRead(schema, data)); + IllegalArgumentException e = + assertThrows(IllegalArgumentException.class, () -> writeAndRead(schema, data)); assertTrue(e.getMessage().contains("Precision must be in range of 1 to 38")); - } @Test @@ -197,5 +193,4 @@ public void testFailedToCreateDecimalLogicalType() throws Exception { Schema schema3 = getSchema("logical/test_decimal_invalid4.avsc"); assertNull(schema3.getLogicalType()); } - } diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java index 54fa26afe3fa8..3335ee5a8f6dc 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -26,7 +25,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Set; - import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.types.Types; @@ -41,7 +39,10 @@ public class AvroSkipFieldTest extends AvroTestBase { public void testSkipUnionWithOneField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_union_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_union_one_field_expected.avsc"); @@ -70,7 +71,10 @@ public void testSkipUnionWithOneField() throws Exception { public void testSkipUnionWithNullableOneField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_union_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_union_nullable_field_expected.avsc"); @@ -99,7 +103,10 @@ public void testSkipUnionWithNullableOneField() throws Exception { public void testSkipUnionWithMultiFields() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f2"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_union_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_union_multi_fields_expected.avsc"); @@ -128,7 +135,10 @@ public void testSkipUnionWithMultiFields() throws Exception { public void testSkipMapField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_map_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_map_expected.avsc"); @@ -160,7 +170,10 @@ public void testSkipMapField() throws Exception { public void testSkipArrayField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_array_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_array_expected.avsc"); @@ -189,7 +202,10 @@ public void testSkipMultiFields() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); skipFieldNames.add("f2"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("test_record.avsc"); Schema expectedSchema = getSchema("skip/test_skip_multi_fields_expected.avsc"); @@ -216,7 +232,10 @@ public void testSkipMultiFields() throws Exception { public void testSkipStringField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f2"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base1.avsc"); Schema expectedSchema = getSchema("skip/test_skip_string_expected.avsc"); @@ -229,7 +248,8 @@ public void testSkipStringField() throws Exception { GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); record.put(0, fixed); - GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + GenericData.EnumSymbol symbol = + new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); record.put(1, symbol); record.put(2, "testtest" + i); record.put(3, ByteBuffer.wrap(testBytes)); @@ -250,7 +270,10 @@ public void testSkipStringField() throws Exception { public void testSkipBytesField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f3"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base1.avsc"); Schema expectedSchema = getSchema("skip/test_skip_bytes_expected.avsc"); @@ -263,7 +286,8 @@ public void testSkipBytesField() throws Exception { GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); record.put(0, fixed); - GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + GenericData.EnumSymbol symbol = + new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); record.put(1, symbol); record.put(2, "testtest" + i); record.put(3, ByteBuffer.wrap(testBytes)); @@ -284,7 +308,10 @@ public void testSkipBytesField() throws Exception { public void testSkipFixedField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base1.avsc"); Schema expectedSchema = getSchema("skip/test_skip_fixed_expected.avsc"); @@ -297,7 +324,8 @@ public void testSkipFixedField() throws Exception { GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); record.put(0, fixed); - GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + GenericData.EnumSymbol symbol = + new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); record.put(1, symbol); record.put(2, "testtest" + i); record.put(3, ByteBuffer.wrap(testBytes)); @@ -318,7 +346,10 @@ public void testSkipFixedField() throws Exception { public void testSkipEnumField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base1.avsc"); Schema expectedSchema = getSchema("skip/test_skip_fixed_expected.avsc"); @@ -331,7 +362,8 @@ public void testSkipEnumField() throws Exception { GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); record.put(0, fixed); - GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + GenericData.EnumSymbol symbol = + new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); record.put(1, symbol); record.put(2, "testtest" + i); record.put(3, ByteBuffer.wrap(testBytes)); @@ -352,7 +384,10 @@ public void testSkipEnumField() throws Exception { public void testSkipBooleanField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_boolean_expected.avsc"); @@ -385,7 +420,10 @@ public void testSkipBooleanField() throws Exception { public void testSkipIntField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_int_expected.avsc"); @@ -418,7 +456,10 @@ public void testSkipIntField() throws Exception { public void testSkipLongField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f2"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_long_expected.avsc"); @@ -451,7 +492,10 @@ public void testSkipLongField() throws Exception { public void testSkipFloatField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f3"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_float_expected.avsc"); @@ -484,7 +528,10 @@ public void testSkipFloatField() throws Exception { public void testSkipDoubleField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f4"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_double_expected.avsc"); @@ -517,7 +564,10 @@ public void testSkipDoubleField() throws Exception { public void testSkipRecordField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_record_before.avsc"); Schema nestedSchema = schema.getFields().get(0).schema(); ArrayList data = new ArrayList<>(); @@ -547,7 +597,10 @@ public void testSkipRecordField() throws Exception { public void testSkipNestedFields() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0.f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("test_nested_record.avsc"); Schema nestedSchema = schema.getFields().get(0).schema(); ArrayList data = new ArrayList<>(); @@ -603,21 +656,26 @@ public void testSkipThirdLevelField() throws Exception { assertEquals(Types.MinorType.STRUCT, root1.getFieldVectors().get(0).getMinorType()); StructVector secondLevelVector = (StructVector) root1.getFieldVectors().get(0); assertEquals(1, secondLevelVector.getChildrenFromFields().size()); - assertEquals(Types.MinorType.STRUCT, secondLevelVector.getChildrenFromFields().get(0).getMinorType()); + assertEquals( + Types.MinorType.STRUCT, secondLevelVector.getChildrenFromFields().get(0).getMinorType()); StructVector thirdLevelVector = (StructVector) secondLevelVector.getChildrenFromFields().get(0); assertEquals(3, thirdLevelVector.getChildrenFromFields().size()); // skip third level field and validate Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0.f0.f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); VectorSchemaRoot root2 = writeAndRead(firstLevelSchema, data); assertEquals(1, root2.getFieldVectors().size()); assertEquals(Types.MinorType.STRUCT, root2.getFieldVectors().get(0).getMinorType()); StructVector secondStruct = (StructVector) root2.getFieldVectors().get(0); assertEquals(1, secondStruct.getChildrenFromFields().size()); - assertEquals(Types.MinorType.STRUCT, secondStruct.getChildrenFromFields().get(0).getMinorType()); + assertEquals( + Types.MinorType.STRUCT, secondStruct.getChildrenFromFields().get(0).getMinorType()); StructVector thirdStruct = (StructVector) secondStruct.getChildrenFromFields().get(0); assertEquals(2, thirdStruct.getChildrenFromFields().size()); diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java index a91bba7b84fb4..534c2cc18c572 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -29,7 +28,6 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.FieldVector; @@ -51,8 +49,7 @@ public class AvroTestBase { - @ClassRule - public static final TemporaryFolder TMP = new TemporaryFolder(); + @ClassRule public static final TemporaryFolder TMP = new TemporaryFolder(); protected AvroToArrowConfig config; @@ -64,18 +61,21 @@ public void init() { public static Schema getSchema(String schemaName) throws Exception { try { - // Attempt to use JDK 9 behavior of getting the module then the resource stream from the module. + // Attempt to use JDK 9 behavior of getting the module then the resource stream from the + // module. // Note that this code is caller-sensitive. Method getModuleMethod = Class.class.getMethod("getModule"); Object module = getModuleMethod.invoke(TestWriteReadAvroRecord.class); - Method getResourceAsStreamFromModule = module.getClass().getMethod("getResourceAsStream", String.class); - try (InputStream is = (InputStream) getResourceAsStreamFromModule.invoke(module, "/schema/" + schemaName)) { - return new Schema.Parser() - .parse(is); + Method getResourceAsStreamFromModule = + module.getClass().getMethod("getResourceAsStream", String.class); + try (InputStream is = + (InputStream) getResourceAsStreamFromModule.invoke(module, "/schema/" + schemaName)) { + return new Schema.Parser().parse(is); } } catch (NoSuchMethodException ex) { // Use JDK8 behavior. - try (InputStream is = TestWriteReadAvroRecord.class.getResourceAsStream("/schema/" + schemaName)) { + try (InputStream is = + TestWriteReadAvroRecord.class.getResourceAsStream("/schema/" + schemaName)) { return new Schema.Parser().parse(is); } } @@ -84,11 +84,11 @@ public static Schema getSchema(String schemaName) throws Exception { protected VectorSchemaRoot writeAndRead(Schema schema, List data) throws Exception { File dataFile = TMP.newFile(); - BinaryEncoder - encoder = new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); + BinaryEncoder encoder = + new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); DatumWriter writer = new GenericDatumWriter(schema); - BinaryDecoder - decoder = new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); + BinaryDecoder decoder = + new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); for (Object value : data) { writer.write(value, encoder); @@ -157,10 +157,10 @@ protected void checkRecordResult(Schema schema, List data, Vector checkPrimitiveResult(fieldData, root.getFieldVectors().get(i)); } - } - protected void checkNestedRecordResult(Schema schema, List data, VectorSchemaRoot root) { + protected void checkNestedRecordResult( + Schema schema, List data, VectorSchemaRoot root) { assertEquals(data.size(), root.getRowCount()); assertTrue(schema.getFields().size() == 1); @@ -176,10 +176,8 @@ protected void checkNestedRecordResult(Schema schema, List data, checkPrimitiveResult(fieldData, structVector.getChildrenFromFields().get(i)); } - } - // belows are for iterator api protected void checkArrayResult(List> expected, List vectors) { @@ -194,10 +192,12 @@ protected void checkArrayResult(List> expected, List vectors } } - protected void checkRecordResult(Schema schema, List data, List roots) { - roots.forEach(root -> { - assertEquals(schema.getFields().size(), root.getFieldVectors().size()); - }); + protected void checkRecordResult( + Schema schema, List data, List roots) { + roots.forEach( + root -> { + assertEquals(schema.getFields().size(), root.getFieldVectors().size()); + }); for (int i = 0; i < schema.getFields().size(); i++) { List fieldData = new ArrayList(); @@ -210,7 +210,6 @@ protected void checkRecordResult(Schema schema, List data, List vectors) { diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java index 7f2edb08fdabc..7e73b2d6c7038 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -28,7 +27,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.util.AutoCloseables; @@ -59,11 +57,11 @@ public void init() { private AvroToArrowVectorIterator convert(Schema schema, List data) throws Exception { File dataFile = TMP.newFile(); - BinaryEncoder - encoder = new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); + BinaryEncoder encoder = + new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); DatumWriter writer = new GenericDatumWriter(schema); - BinaryDecoder - decoder = new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); + BinaryDecoder decoder = + new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); for (Object value : data) { writer.write(value, encoder); @@ -107,7 +105,7 @@ public void testNullableStringType() throws Exception { List roots = new ArrayList<>(); List vectors = new ArrayList<>(); - try (AvroToArrowVectorIterator iterator = convert(schema, data);) { + try (AvroToArrowVectorIterator iterator = convert(schema, data); ) { while (iterator.hasNext()) { VectorSchemaRoot root = iterator.next(); FieldVector vector = root.getFieldVectors().get(0); @@ -117,7 +115,6 @@ public void testNullableStringType() throws Exception { } checkPrimitiveResult(expected, vectors); AutoCloseables.close(roots); - } @Test @@ -140,18 +137,18 @@ public void testRecordType() throws Exception { } checkRecordResult(schema, data, roots); AutoCloseables.close(roots); - } @Test public void testArrayType() throws Exception { Schema schema = getSchema("test_array.avsc"); - List> data = Arrays.asList( - Arrays.asList("11", "222", "999"), - Arrays.asList("12222", "2333", "1000"), - Arrays.asList("1rrr", "2ggg"), - Arrays.asList("1vvv", "2bbb"), - Arrays.asList("1fff", "2")); + List> data = + Arrays.asList( + Arrays.asList("11", "222", "999"), + Arrays.asList("12222", "2333", "1000"), + Arrays.asList("1rrr", "2ggg"), + Arrays.asList("1vvv", "2bbb"), + Arrays.asList("1fff", "2")); List roots = new ArrayList<>(); List vectors = new ArrayList<>(); @@ -172,8 +169,9 @@ public void runLargeNumberOfRows() throws Exception { int x = 0; final int targetRows = 600000; Decoder fakeDecoder = new FakeDecoder(targetRows); - try (AvroToArrowVectorIterator iter = AvroToArrow.avroToArrowIterator(schema, fakeDecoder, - new AvroToArrowConfigBuilder(config.getAllocator()).build())) { + try (AvroToArrowVectorIterator iter = + AvroToArrow.avroToArrowIterator( + schema, fakeDecoder, new AvroToArrowConfigBuilder(config.getAllocator()).build())) { while (iter.hasNext()) { VectorSchemaRoot root = iter.next(); x += root.getRowCount(); @@ -184,9 +182,7 @@ public void runLargeNumberOfRows() throws Exception { assertEquals(targetRows, x); } - /** - * Fake avro decoder to test large data. - */ + /** Fake avro decoder to test large data. */ private static class FakeDecoder extends Decoder { private int numRows; @@ -204,8 +200,7 @@ private void validate() throws EOFException { } @Override - public void readNull() throws IOException { - } + public void readNull() throws IOException {} @Override public boolean readBoolean() throws IOException { @@ -243,9 +238,7 @@ public String readString() throws IOException { } @Override - public void skipString() throws IOException { - - } + public void skipString() throws IOException {} @Override public ByteBuffer readBytes(ByteBuffer old) throws IOException { @@ -253,9 +246,7 @@ public ByteBuffer readBytes(ByteBuffer old) throws IOException { } @Override - public void skipBytes() throws IOException { - - } + public void skipBytes() throws IOException {} @Override public void readFixed(byte[] bytes, int start, int length) throws IOException { @@ -264,9 +255,7 @@ public void readFixed(byte[] bytes, int start, int length) throws IOException { } @Override - public void skipFixed(int length) throws IOException { - - } + public void skipFixed(int length) throws IOException {} @Override public int readEnum() throws IOException { diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java index 26f72173b6b7e..59317c3be033f 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -26,7 +25,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; @@ -105,12 +103,13 @@ public void testFixedAttributes() throws Exception { @Test public void testEnumAttributes() throws Exception { Schema schema = getSchema("attrs/test_enum_attrs.avsc"); - List data = Arrays.asList( - new GenericData.EnumSymbol(schema, "SPADES"), - new GenericData.EnumSymbol(schema, "HEARTS"), - new GenericData.EnumSymbol(schema, "DIAMONDS"), - new GenericData.EnumSymbol(schema, "CLUBS"), - new GenericData.EnumSymbol(schema, "SPADES")); + List data = + Arrays.asList( + new GenericData.EnumSymbol(schema, "SPADES"), + new GenericData.EnumSymbol(schema, "HEARTS"), + new GenericData.EnumSymbol(schema, "DIAMONDS"), + new GenericData.EnumSymbol(schema, "CLUBS"), + new GenericData.EnumSymbol(schema, "SPADES")); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -172,12 +171,13 @@ public void testNestedRecordType() throws Exception { @Test public void testEnumType() throws Exception { Schema schema = getSchema("test_primitive_enum.avsc"); - List data = Arrays.asList( - new GenericData.EnumSymbol(schema, "SPADES"), - new GenericData.EnumSymbol(schema, "HEARTS"), - new GenericData.EnumSymbol(schema, "DIAMONDS"), - new GenericData.EnumSymbol(schema, "CLUBS"), - new GenericData.EnumSymbol(schema, "SPADES")); + List data = + Arrays.asList( + new GenericData.EnumSymbol(schema, "SPADES"), + new GenericData.EnumSymbol(schema, "HEARTS"), + new GenericData.EnumSymbol(schema, "DIAMONDS"), + new GenericData.EnumSymbol(schema, "CLUBS"), + new GenericData.EnumSymbol(schema, "SPADES")); List expectedIndices = Arrays.asList(0, 1, 2, 3, 0); @@ -302,12 +302,13 @@ public void testNullableDoubleType() throws Exception { @Test public void testBytesType() throws Exception { Schema schema = getSchema("test_primitive_bytes.avsc"); - List data = Arrays.asList( - ByteBuffer.wrap("value1".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value2".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value3".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value4".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value5".getBytes(StandardCharsets.UTF_8))); + List data = + Arrays.asList( + ByteBuffer.wrap("value1".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value2".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value3".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value4".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value5".getBytes(StandardCharsets.UTF_8))); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -322,7 +323,8 @@ public void testNullableBytesType() throws Exception { ArrayList data = new ArrayList<>(); for (int i = 0; i < 5; i++) { GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0 ? ByteBuffer.wrap(("test" + i).getBytes(StandardCharsets.UTF_8)) : null); + record.put( + 0, i % 2 == 0 ? ByteBuffer.wrap(("test" + i).getBytes(StandardCharsets.UTF_8)) : null); data.add(record); } @@ -359,12 +361,13 @@ public void testNullableBooleanType() throws Exception { @Test public void testArrayType() throws Exception { Schema schema = getSchema("test_array.avsc"); - List> data = Arrays.asList( - Arrays.asList("11", "222", "999"), - Arrays.asList("12222", "2333", "1000"), - Arrays.asList("1rrr", "2ggg"), - Arrays.asList("1vvv", "2bbb"), - Arrays.asList("1fff", "2")); + List> data = + Arrays.asList( + Arrays.asList("11", "222", "999"), + Arrays.asList("12222", "2333", "1000"), + Arrays.asList("1rrr", "2ggg"), + Arrays.asList("1vvv", "2bbb"), + Arrays.asList("1fff", "2")); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -471,5 +474,4 @@ public void testNullableUnionType() throws Exception { checkPrimitiveResult(expected, vector); } - } diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java index afbddaa6ed87a..a721a1e4cc6a8 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.io.File; import java.util.ArrayList; import java.util.List; - import org.apache.avro.Schema; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.DataFileWriter; @@ -36,11 +34,9 @@ import org.junit.Test; import org.junit.rules.TemporaryFolder; - public class TestWriteReadAvroRecord { - @ClassRule - public static final TemporaryFolder TMP = new TemporaryFolder(); + @ClassRule public static final TemporaryFolder TMP = new TemporaryFolder(); @Test public void testWriteAndRead() throws Exception { @@ -48,7 +44,7 @@ public void testWriteAndRead() throws Exception { File dataFile = TMP.newFile(); Schema schema = AvroTestBase.getSchema("test.avsc"); - //write data to disk + // write data to disk GenericRecord user1 = new GenericData.Record(schema); user1.put("name", "Alyssa"); user1.put("favorite_number", 256); @@ -65,10 +61,10 @@ public void testWriteAndRead() throws Exception { dataFileWriter.append(user2); dataFileWriter.close(); - //read data from disk + // read data from disk DatumReader datumReader = new GenericDatumReader(schema); - DataFileReader - dataFileReader = new DataFileReader(dataFile, datumReader); + DataFileReader dataFileReader = + new DataFileReader(dataFile, datumReader); List result = new ArrayList<>(); while (dataFileReader.hasNext()) { GenericRecord user = dataFileReader.next(); @@ -86,5 +82,4 @@ public void testWriteAndRead() throws Exception { assertEquals(7, deUser2.get("favorite_number")); assertEquals("red", deUser2.get("favorite_color").toString()); } - } diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 2f2911dd9da95..b444eff56277d 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -24,6 +24,11 @@ (Contrib/Experimental)A library for converting JDBC data to Arrow data. http://maven.apache.org + + dev/checkstyle/checkstyle-spotless.xml + none + + diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java index 427c766982f30..d30cf32a04996 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.apache.arrow.adapter.jdbc.JdbcToArrowUtils.isColumnNullable; @@ -23,7 +22,6 @@ import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.Iterator; - import org.apache.arrow.adapter.jdbc.consumer.CompositeJdbcConsumer; import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer; import org.apache.arrow.adapter.jdbc.consumer.exceptions.JdbcConsumerException; @@ -35,9 +33,7 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ValueVectorUtility; -/** - * VectorSchemaRoot iterator for partially converting JDBC data. - */ +/** VectorSchemaRoot iterator for partially converting JDBC data. */ public class ArrowVectorIterator implements Iterator, AutoCloseable { private final ResultSet resultSet; @@ -54,13 +50,12 @@ public class ArrowVectorIterator implements Iterator, AutoClos private final int targetBatchSize; - // This is used to track whether the ResultSet has been fully read, and is needed specifically for cases where there + // This is used to track whether the ResultSet has been fully read, and is needed specifically for + // cases where there // is a ResultSet having zero rows (empty): private boolean readComplete = false; - /** - * Construct an instance. - */ + /** Construct an instance. */ private ArrowVectorIterator(ResultSet resultSet, JdbcToArrowConfig config) throws SQLException { this.resultSet = resultSet; this.config = config; @@ -73,12 +68,8 @@ private ArrowVectorIterator(ResultSet resultSet, JdbcToArrowConfig config) throw this.nextBatch = config.isReuseVectorSchemaRoot() ? createVectorSchemaRoot() : null; } - /** - * Create a ArrowVectorIterator to partially convert data. - */ - public static ArrowVectorIterator create( - ResultSet resultSet, - JdbcToArrowConfig config) + /** Create a ArrowVectorIterator to partially convert data. */ + public static ArrowVectorIterator create(ResultSet resultSet, JdbcToArrowConfig config) throws SQLException { ArrowVectorIterator iterator = null; try { @@ -142,10 +133,18 @@ private VectorSchemaRoot createVectorSchemaRoot() throws SQLException { private void initialize(VectorSchemaRoot root) throws SQLException { for (int i = 1; i <= consumers.length; i++) { - final JdbcFieldInfo columnFieldInfo = JdbcToArrowUtils.getJdbcFieldInfoForColumn(rsmd, i, config); + final JdbcFieldInfo columnFieldInfo = + JdbcToArrowUtils.getJdbcFieldInfoForColumn(rsmd, i, config); ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo); - consumers[i - 1] = config.getJdbcConsumerGetter().apply( - arrowType, i, isColumnNullable(resultSet.getMetaData(), i, columnFieldInfo), root.getVector(i - 1), config); + consumers[i - 1] = + config + .getJdbcConsumerGetter() + .apply( + arrowType, + i, + isColumnNullable(resultSet.getMetaData(), i, columnFieldInfo), + root.getVector(i - 1), + config); } } @@ -170,16 +169,17 @@ public boolean hasNext() { } /** - * Gets the next vector. - * If {@link JdbcToArrowConfig#isReuseVectorSchemaRoot()} is false, - * the client is responsible for freeing its resources. + * Gets the next vector. If {@link JdbcToArrowConfig#isReuseVectorSchemaRoot()} is false, the + * client is responsible for freeing its resources. + * * @throws JdbcConsumerException on error from VectorConsumer */ @Override public VectorSchemaRoot next() { Preconditions.checkArgument(hasNext()); try { - VectorSchemaRoot ret = config.isReuseVectorSchemaRoot() ? nextBatch : createVectorSchemaRoot(); + VectorSchemaRoot ret = + config.isReuseVectorSchemaRoot() ? nextBatch : createVectorSchemaRoot(); load(ret); return ret; } catch (Exception e) { @@ -193,8 +193,9 @@ public VectorSchemaRoot next() { } /** - * Clean up resources ONLY WHEN THE {@link VectorSchemaRoot} HOLDING EACH BATCH IS REUSED. If a new VectorSchemaRoot - * is created for each batch, each root must be closed manually by the client code. + * Clean up resources ONLY WHEN THE {@link VectorSchemaRoot} HOLDING EACH BATCH IS REUSED. If a + * new VectorSchemaRoot is created for each batch, each root must be closed manually by the client + * code. */ @Override public void close() { diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java index f95133fc7e44c..30e734a68d511 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java @@ -14,20 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; -/** - * String constants used for metadata returned on Vectors. - */ +/** String constants used for metadata returned on Vectors. */ public class Constants { - private Constants() { - } + private Constants() {} public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME"; public static final String SQL_SCHEMA_NAME_KEY = "SQL_SCHEMA_NAME"; public static final String SQL_TABLE_NAME_KEY = "SQL_TABLE_NAME"; public static final String SQL_COLUMN_NAME_KEY = "SQL_COLUMN_NAME"; public static final String SQL_TYPE_KEY = "SQL_TYPE"; - } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java index d16964ea14417..6becac0bbc10c 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java @@ -14,25 +14,25 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.util.Preconditions; /** - * This class represents the information about a JDBC ResultSet Field that is - * needed to construct an {@link org.apache.arrow.vector.types.pojo.ArrowType}. - * Currently, this is: + * This class represents the information about a JDBC ResultSet Field that is needed to construct an + * {@link org.apache.arrow.vector.types.pojo.ArrowType}. Currently, this is: + * *
      - *
    • The JDBC {@link java.sql.Types} type.
    • - *
    • The nullability.
    • - *
    • The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types).
    • - *
    • The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types).
    • + *
    • The JDBC {@link java.sql.Types} type. + *
    • The nullability. + *
    • The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link + * java.sql.Types#NUMERIC} types). + *
    • The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link + * java.sql.Types#NUMERIC} types). *
    */ public class JdbcFieldInfo { @@ -45,12 +45,13 @@ public class JdbcFieldInfo { private final int displaySize; /** - * Builds a JdbcFieldInfo using only the {@link java.sql.Types} type. Do not use this constructor - * if the field type is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}; the precision and - * scale will be set to 0. + * Builds a JdbcFieldInfo using only the {@link java.sql.Types} type. Do not use this + * constructor if the field type is {@link java.sql.Types#DECIMAL} or {@link + * java.sql.Types#NUMERIC}; the precision and scale will be set to 0. * * @param jdbcType The {@link java.sql.Types} type. - * @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}. + * @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link + * java.sql.Types#NUMERIC}. */ public JdbcFieldInfo(int jdbcType) { Preconditions.checkArgument( @@ -67,7 +68,8 @@ public JdbcFieldInfo(int jdbcType) { /** * Builds a JdbcFieldInfo from the {@link java.sql.Types} type, precision, and scale. - * Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types. + * Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} + * types. * * @param jdbcType The {@link java.sql.Types} type. * @param precision The field's numeric precision. @@ -84,11 +86,13 @@ public JdbcFieldInfo(int jdbcType, int precision, int scale) { } /** - * Builds a JdbcFieldInfo from the {@link java.sql.Types} type, nullability, precision, and scale. + * Builds a JdbcFieldInfo from the {@link java.sql.Types} type, nullability, + * precision, and scale. * * @param jdbcType The {@link java.sql.Types} type. * @param nullability The nullability. Must be one of {@link ResultSetMetaData#columnNoNulls}, - * {@link ResultSetMetaData#columnNullable}, or {@link ResultSetMetaData#columnNullableUnknown}. + * {@link ResultSetMetaData#columnNullable}, or {@link + * ResultSetMetaData#columnNullableUnknown}. * @param precision The field's numeric precision. * @param scale The field's numeric scale. */ @@ -103,7 +107,8 @@ public JdbcFieldInfo(int jdbcType, int nullability, int precision, int scale) { } /** - * Builds a JdbcFieldInfo from the corresponding {@link java.sql.ResultSetMetaData} column. + * Builds a JdbcFieldInfo from the corresponding {@link java.sql.ResultSetMetaData} + * column. * * @param rsmd The {@link java.sql.ResultSetMetaData} to get the field information from. * @param column The column to get the field information for (on a 1-based index). @@ -113,10 +118,12 @@ public JdbcFieldInfo(int jdbcType, int nullability, int precision, int scale) { */ public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException { Preconditions.checkNotNull(rsmd, "ResultSetMetaData cannot be null."); - Preconditions.checkArgument(column > 0, "ResultSetMetaData columns have indices starting at 1."); + Preconditions.checkArgument( + column > 0, "ResultSetMetaData columns have indices starting at 1."); Preconditions.checkArgument( column <= rsmd.getColumnCount(), - "The index must be within the number of columns (1 to %s, inclusive)", rsmd.getColumnCount()); + "The index must be within the number of columns (1 to %s, inclusive)", + rsmd.getColumnCount()); this.column = column; this.jdbcType = rsmd.getColumnType(column); @@ -128,8 +135,8 @@ public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException { } /** - * Builds a JdbcFieldInfo from the corresponding row from a {@link java.sql.DatabaseMetaData#getColumns} - * ResultSet. + * Builds a JdbcFieldInfo from the corresponding row from a {@link + * java.sql.DatabaseMetaData#getColumns} ResultSet. * * @param rs The {@link java.sql.ResultSet} to get the field information from. * @throws SQLException If the column information cannot be retrieved. @@ -144,51 +151,42 @@ public JdbcFieldInfo(ResultSet rs) throws SQLException { this.displaySize = rs.getInt("CHAR_OCTET_LENGTH"); } - /** - * The {@link java.sql.Types} type. - */ + /** The {@link java.sql.Types} type. */ public int getJdbcType() { return jdbcType; } - /** - * The nullability. - */ + /** The nullability. */ public int isNullable() { return nullability; } /** - * The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. + * The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} + * types. */ public int getPrecision() { return precision; } /** - * The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. + * The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. */ public int getScale() { return scale; } - /** - * The column index for query column. - */ + /** The column index for query column. */ public int getColumn() { return column; } - /** - * The type name as reported by the database. - */ + /** The type name as reported by the database. */ public String getTypeName() { return typeName; } - /** - * The max number of characters for the column. - */ + /** The max number of characters for the column. */ public int getDisplaySize() { return displaySize; } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java index 2dfc0658cb8d1..fd4721bcd9c4e 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.HashMap; import java.util.Map; - import org.apache.arrow.adapter.jdbc.binder.ColumnBinder; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.VectorSchemaRoot; @@ -29,8 +27,8 @@ /** * A binder binds JDBC prepared statement parameters to rows of Arrow data from a VectorSchemaRoot. * - * Each row of the VectorSchemaRoot will be bound to the configured parameters of the PreparedStatement. - * One row of data is bound at a time. + *

    Each row of the VectorSchemaRoot will be bound to the configured parameters of the + * PreparedStatement. One row of data is bound at a time. */ public class JdbcParameterBinder { private final PreparedStatement statement; @@ -44,8 +42,10 @@ public class JdbcParameterBinder { * * @param statement The statement to bind parameters to. * @param root The VectorSchemaRoot to pull data from. - * @param binders Column binders to translate from Arrow data to JDBC parameters, one per parameter. - * @param parameterIndices For each binder in binders, the index of the parameter to bind to. + * @param binders Column binders to translate from Arrow data to JDBC parameters, one per + * parameter. + * @param parameterIndices For each binder in binders, the index of the parameter to bind + * to. */ private JdbcParameterBinder( final PreparedStatement statement, @@ -55,7 +55,8 @@ private JdbcParameterBinder( Preconditions.checkArgument( binders.length == parameterIndices.length, "Number of column binders (%s) must equal number of parameter indices (%s)", - binders.length, parameterIndices.length); + binders.length, + parameterIndices.length); this.statement = statement; this.root = root; this.binders = binders; @@ -66,9 +67,10 @@ private JdbcParameterBinder( /** * Initialize a binder with a builder. * - * @param statement The statement to bind to. The binder does not maintain ownership of the statement. - * @param root The {@link VectorSchemaRoot} to pull data from. The binder does not maintain ownership - * of the vector schema root. + * @param statement The statement to bind to. The binder does not maintain ownership of the + * statement. + * @param root The {@link VectorSchemaRoot} to pull data from. The binder does not maintain + * ownership of the vector schema root. */ public static Builder builder(final PreparedStatement statement, final VectorSchemaRoot root) { return new Builder(statement, root); @@ -82,8 +84,8 @@ public void reset() { /** * Bind the next row of data to the parameters of the statement. * - * After this, the application should call the desired method on the prepared statement, - * such as {@link PreparedStatement#executeUpdate()}, or {@link PreparedStatement#addBatch()}. + *

    After this, the application should call the desired method on the prepared statement, such + * as {@link PreparedStatement#executeUpdate()}, or {@link PreparedStatement#addBatch()}. * * @return true if a row was bound, false if rows were exhausted */ @@ -99,9 +101,7 @@ public boolean next() throws SQLException { return true; } - /** - * A builder for a {@link JdbcParameterBinder}. - */ + /** A builder for a {@link JdbcParameterBinder}. */ public static class Builder { private final PreparedStatement statement; private final VectorSchemaRoot root; @@ -123,9 +123,7 @@ public Builder bindAll() { /** Bind the given parameter to the given column using the default binder. */ public Builder bind(int parameterIndex, int columnIndex) { - return bind( - parameterIndex, - ColumnBinder.forVector(root.getVector(columnIndex))); + return bind(parameterIndex, ColumnBinder.forVector(root.getVector(columnIndex))); } /** Bind the given parameter using the given binder. */ diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java index 246451b5b22f9..493e53056f945 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; @@ -29,44 +27,32 @@ * *

    This utility uses following data mapping to map JDBC/SQL datatype to Arrow data types. * - *

    CHAR --> ArrowType.Utf8 - * NCHAR --> ArrowType.Utf8 - * VARCHAR --> ArrowType.Utf8 - * NVARCHAR --> ArrowType.Utf8 - * LONGVARCHAR --> ArrowType.Utf8 - * LONGNVARCHAR --> ArrowType.Utf8 - * NUMERIC --> ArrowType.Decimal(precision, scale) - * DECIMAL --> ArrowType.Decimal(precision, scale) - * BIT --> ArrowType.Bool - * TINYINT --> ArrowType.Int(8, signed) - * SMALLINT --> ArrowType.Int(16, signed) - * INTEGER --> ArrowType.Int(32, signed) - * BIGINT --> ArrowType.Int(64, signed) - * REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) - * FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) - * DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) - * BINARY --> ArrowType.Binary - * VARBINARY --> ArrowType.Binary - * LONGVARBINARY --> ArrowType.Binary - * DATE --> ArrowType.Date(DateUnit.MILLISECOND) - * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) - * TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null) - * CLOB --> ArrowType.Utf8 - * BLOB --> ArrowType.Binary + *

    CHAR --> ArrowType.Utf8 NCHAR --> ArrowType.Utf8 VARCHAR --> ArrowType.Utf8 NVARCHAR --> + * ArrowType.Utf8 LONGVARCHAR --> ArrowType.Utf8 LONGNVARCHAR --> ArrowType.Utf8 NUMERIC --> + * ArrowType.Decimal(precision, scale) DECIMAL --> ArrowType.Decimal(precision, scale) BIT --> + * ArrowType.Bool TINYINT --> ArrowType.Int(8, signed) SMALLINT --> ArrowType.Int(16, signed) + * INTEGER --> ArrowType.Int(32, signed) BIGINT --> ArrowType.Int(64, signed) REAL --> + * ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) FLOAT --> + * ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) DOUBLE --> + * ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) BINARY --> ArrowType.Binary VARBINARY --> + * ArrowType.Binary LONGVARBINARY --> ArrowType.Binary DATE --> ArrowType.Date(DateUnit.MILLISECOND) + * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) TIMESTAMP --> + * ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null) CLOB --> ArrowType.Utf8 BLOB --> + * ArrowType.Binary * * @since 0.10.0 */ public class JdbcToArrow { /*----------------------------------------------------------------* - | | - | Partial Convert API | - | | - *----------------------------------------------------------------*/ + | | + | Partial Convert API | + | | + *----------------------------------------------------------------*/ /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. - * Note here uses the default targetBatchSize = 1024. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. Note here uses the default targetBatchSize = 1024. * * @param resultSet ResultSet to use to fetch the data from underlying database * @param allocator Memory allocator @@ -74,28 +60,25 @@ public class JdbcToArrow { * @throws SQLException on error */ public static ArrowVectorIterator sqlToArrowVectorIterator( - ResultSet resultSet, - BufferAllocator allocator) - throws SQLException, IOException { + ResultSet resultSet, BufferAllocator allocator) throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null"); - JdbcToArrowConfig config = - new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar()); + JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar()); return sqlToArrowVectorIterator(resultSet, config); } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. - * Note if not specify {@link JdbcToArrowConfig#targetBatchSize}, will use default value 1024. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. Note if not specify {@link JdbcToArrowConfig#targetBatchSize}, will use default value + * 1024. + * * @param resultSet ResultSet to use to fetch the data from underlying database - * @param config Configuration of the conversion from JDBC to Arrow. + * @param config Configuration of the conversion from JDBC to Arrow. * @return Arrow Data Objects {@link ArrowVectorIterator} * @throws SQLException on error */ public static ArrowVectorIterator sqlToArrowVectorIterator( - ResultSet resultSet, - JdbcToArrowConfig config) - throws SQLException, IOException { + ResultSet resultSet, JdbcToArrowConfig config) throws SQLException, IOException { Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null"); Preconditions.checkNotNull(config, "The configuration cannot be null"); return ArrowVectorIterator.create(resultSet, config); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java index 68851f4a98bc9..1bfcfc8fe00aa 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.math.RoundingMode; import java.util.Calendar; import java.util.Map; import java.util.function.Function; - import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; @@ -30,25 +28,23 @@ /** * This class configures the JDBC-to-Arrow conversion process. - *

    - * The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot}, - * and the calendar is used to define the time zone of any - * {@link org.apache.arrow.vector.types.pojo.ArrowType.Timestamp} - * fields that are created during the conversion. Neither field may be null. - *

    - *

    - * If the includeMetadata flag is set, the Arrow field metadata will contain information - * from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the - * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding - * {@link org.apache.arrow.vector.FieldVector}. - *

    - *

    - * If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding - * {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type - * information cannot be retrieved from all JDBC implementations (H2 for example, returns - * {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index - * or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion. - *

    + * + *

    The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot}, and + * the calendar is used to define the time zone of any {@link + * org.apache.arrow.vector.types.pojo.ArrowType.Timestamp} fields that are created during the + * conversion. Neither field may be null. + * + *

    If the includeMetadata flag is set, the Arrow field metadata will contain + * information from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the + * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding {@link + * org.apache.arrow.vector.FieldVector}. + * + *

    If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the + * corresponding {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, + * the sub-type information cannot be retrieved from all JDBC implementations (H2 for example, + * returns {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The + * column index or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the + * conversion. */ public final class JdbcToArrowConfig { @@ -66,14 +62,12 @@ public final class JdbcToArrowConfig { private final Map> columnMetadataByColumnIndex; private final RoundingMode bigDecimalRoundingMode; /** - * The maximum rowCount to read each time when partially convert data. - * Default value is 1024 and -1 means disable partial read. - * default is -1 which means disable partial read. - * Note that this flag only useful for {@link JdbcToArrow#sqlToArrowVectorIterator} - * 1) if targetBatchSize != -1, it will convert full data into multiple vectors - * with valueCount no more than targetBatchSize. - * 2) if targetBatchSize == -1, it will convert full data into a single vector in {@link ArrowVectorIterator} - *

    + * The maximum rowCount to read each time when partially convert data. Default value is 1024 and + * -1 means disable partial read. default is -1 which means disable partial read. Note that this + * flag only useful for {@link JdbcToArrow#sqlToArrowVectorIterator} 1) if targetBatchSize != -1, + * it will convert full data into multiple vectors with valueCount no more than targetBatchSize. + * 2) if targetBatchSize == -1, it will convert full data into a single vector in {@link + * ArrowVectorIterator} */ private final int targetBatchSize; @@ -81,81 +75,100 @@ public final class JdbcToArrowConfig { private final JdbcConsumerFactory jdbcConsumerGetter; /** - * Constructs a new configuration from the provided allocator and calendar. The allocator - * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define - * Arrow Timestamp fields, and to read time-based fields from the JDBC ResultSet. + * Constructs a new configuration from the provided allocator and calendar. The allocator + * is used when constructing the Arrow vectors from the ResultSet, and the calendar is + * used to define Arrow Timestamp fields, and to read time-based fields from the JDBC + * ResultSet. * - * @param allocator The memory allocator to construct the Arrow vectors with. - * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. + * @param allocator The memory allocator to construct the Arrow vectors with. + * @param calendar The calendar to use when constructing Timestamp fields and reading time-based + * results. */ JdbcToArrowConfig(BufferAllocator allocator, Calendar calendar) { - this(allocator, calendar, + this( + allocator, + calendar, /* include metadata */ false, /* reuse vector schema root */ false, /* array sub-types by column index */ null, /* array sub-types by column name */ null, - DEFAULT_TARGET_BATCH_SIZE, null, null); + DEFAULT_TARGET_BATCH_SIZE, + null, + null); } JdbcToArrowConfig( - BufferAllocator allocator, - Calendar calendar, - boolean includeMetadata, - boolean reuseVectorSchemaRoot, - Map arraySubTypesByColumnIndex, - Map arraySubTypesByColumnName, - int targetBatchSize, - Function jdbcToArrowTypeConverter) { - this(allocator, calendar, includeMetadata, reuseVectorSchemaRoot, arraySubTypesByColumnIndex, - arraySubTypesByColumnName, targetBatchSize, jdbcToArrowTypeConverter, null); + BufferAllocator allocator, + Calendar calendar, + boolean includeMetadata, + boolean reuseVectorSchemaRoot, + Map arraySubTypesByColumnIndex, + Map arraySubTypesByColumnName, + int targetBatchSize, + Function jdbcToArrowTypeConverter) { + this( + allocator, + calendar, + includeMetadata, + reuseVectorSchemaRoot, + arraySubTypesByColumnIndex, + arraySubTypesByColumnName, + targetBatchSize, + jdbcToArrowTypeConverter, + null); } /** - * Constructs a new configuration from the provided allocator and calendar. The allocator - * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define - * Arrow Timestamp fields, and to read time-based fields from the JDBC ResultSet. + * Constructs a new configuration from the provided allocator and calendar. The allocator + * is used when constructing the Arrow vectors from the ResultSet, and the calendar is + * used to define Arrow Timestamp fields, and to read time-based fields from the JDBC + * ResultSet. * - * @param allocator The memory allocator to construct the Arrow vectors with. - * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. - * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata. + * @param allocator The memory allocator to construct the Arrow vectors with. + * @param calendar The calendar to use when constructing Timestamp fields and reading time-based + * results. + * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field + * metadata. * @param reuseVectorSchemaRoot Whether to reuse the vector schema root for each data load. * @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based). - * @param arraySubTypesByColumnName The type of the JDBC array at the column name. - * @param targetBatchSize The target batch size to be used in preallocation of the resulting vectors. - * @param jdbcToArrowTypeConverter The function that maps JDBC field type information to arrow type. If set to null, - * the default mapping will be used, which is defined as: - *
      - *
    • CHAR --> ArrowType.Utf8
    • - *
    • NCHAR --> ArrowType.Utf8
    • - *
    • VARCHAR --> ArrowType.Utf8
    • - *
    • NVARCHAR --> ArrowType.Utf8
    • - *
    • LONGVARCHAR --> ArrowType.Utf8
    • - *
    • LONGNVARCHAR --> ArrowType.Utf8
    • - *
    • NUMERIC --> ArrowType.Decimal(precision, scale)
    • - *
    • DECIMAL --> ArrowType.Decimal(precision, scale)
    • - *
    • BIT --> ArrowType.Bool
    • - *
    • TINYINT --> ArrowType.Int(8, signed)
    • - *
    • SMALLINT --> ArrowType.Int(16, signed)
    • - *
    • INTEGER --> ArrowType.Int(32, signed)
    • - *
    • BIGINT --> ArrowType.Int(64, signed)
    • - *
    • REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
    • - *
    • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
    • - *
    • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
    • - *
    • BINARY --> ArrowType.Binary
    • - *
    • VARBINARY --> ArrowType.Binary
    • - *
    • LONGVARBINARY --> ArrowType.Binary
    • - *
    • DATE --> ArrowType.Date(DateUnit.DAY)
    • - *
    • TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32)
    • - *
    • TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone)
    • - *
    • CLOB --> ArrowType.Utf8
    • - *
    • BLOB --> ArrowType.Binary
    • - *
    • ARRAY --> ArrowType.List
    • - *
    • STRUCT --> ArrowType.Struct
    • - *
    • NULL --> ArrowType.Null
    • - *
    - * @param bigDecimalRoundingMode The java.math.RoundingMode to be used in coercion of a BigDecimal from a - * ResultSet having a scale which does not match that of the target vector. Use null - * (default value) to require strict scale matching. + * @param arraySubTypesByColumnName The type of the JDBC array at the column name. + * @param targetBatchSize The target batch size to be used in preallocation of the resulting + * vectors. + * @param jdbcToArrowTypeConverter The function that maps JDBC field type information to arrow + * type. If set to null, the default mapping will be used, which is defined as: + *
      + *
    • CHAR --> ArrowType.Utf8 + *
    • NCHAR --> ArrowType.Utf8 + *
    • VARCHAR --> ArrowType.Utf8 + *
    • NVARCHAR --> ArrowType.Utf8 + *
    • LONGVARCHAR --> ArrowType.Utf8 + *
    • LONGNVARCHAR --> ArrowType.Utf8 + *
    • NUMERIC --> ArrowType.Decimal(precision, scale) + *
    • DECIMAL --> ArrowType.Decimal(precision, scale) + *
    • BIT --> ArrowType.Bool + *
    • TINYINT --> ArrowType.Int(8, signed) + *
    • SMALLINT --> ArrowType.Int(16, signed) + *
    • INTEGER --> ArrowType.Int(32, signed) + *
    • BIGINT --> ArrowType.Int(64, signed) + *
    • REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) + *
    • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) + *
    • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) + *
    • BINARY --> ArrowType.Binary + *
    • VARBINARY --> ArrowType.Binary + *
    • LONGVARBINARY --> ArrowType.Binary + *
    • DATE --> ArrowType.Date(DateUnit.DAY) + *
    • TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) + *
    • TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone) + *
    • CLOB --> ArrowType.Utf8 + *
    • BLOB --> ArrowType.Binary + *
    • ARRAY --> ArrowType.List + *
    • STRUCT --> ArrowType.Struct + *
    • NULL --> ArrowType.Null + *
    + * + * @param bigDecimalRoundingMode The java.math.RoundingMode to be used in coercion of a BigDecimal + * from a ResultSet having a scale which does not match that of the target vector. Use null + * (default value) to require strict scale matching. */ JdbcToArrowConfig( BufferAllocator allocator, @@ -245,16 +258,19 @@ public final class JdbcToArrowConfig { this.bigDecimalRoundingMode = bigDecimalRoundingMode; // set up type converter - this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter != null ? jdbcToArrowTypeConverter : - (jdbcFieldInfo) -> JdbcToArrowUtils.getArrowTypeFromJdbcType(jdbcFieldInfo, calendar); + this.jdbcToArrowTypeConverter = + jdbcToArrowTypeConverter != null + ? jdbcToArrowTypeConverter + : (jdbcFieldInfo) -> JdbcToArrowUtils.getArrowTypeFromJdbcType(jdbcFieldInfo, calendar); - this.jdbcConsumerGetter = jdbcConsumerGetter != null ? jdbcConsumerGetter : JdbcToArrowUtils::getConsumer; + this.jdbcConsumerGetter = + jdbcConsumerGetter != null ? jdbcConsumerGetter : JdbcToArrowUtils::getConsumer; } /** - * The calendar to use when defining Arrow Timestamp fields - * and retrieving {@link java.sql.Date}, {@link java.sql.Time}, or {@link java.sql.Timestamp} - * data types from the {@link java.sql.ResultSet}, or null if not converting. + * The calendar to use when defining Arrow Timestamp fields and retrieving {@link java.sql.Date}, + * {@link java.sql.Time}, or {@link java.sql.Timestamp} data types from the {@link + * java.sql.ResultSet}, or null if not converting. * * @return the calendar. */ @@ -280,30 +296,22 @@ public boolean shouldIncludeMetadata() { return includeMetadata; } - /** - * Get the target batch size for partial read. - */ + /** Get the target batch size for partial read. */ public int getTargetBatchSize() { return targetBatchSize; } - /** - * Get whether it is allowed to reuse the vector schema root. - */ + /** Get whether it is allowed to reuse the vector schema root. */ public boolean isReuseVectorSchemaRoot() { return reuseVectorSchemaRoot; } - /** - * Gets the mapping between JDBC type information to Arrow type. - */ + /** Gets the mapping between JDBC type information to Arrow type. */ public Function getJdbcToArrowTypeConverter() { return jdbcToArrowTypeConverter; } - /** - * Gets the JDBC consumer getter. - */ + /** Gets the JDBC consumer getter. */ public JdbcConsumerFactory getJdbcConsumerGetter() { return jdbcConsumerGetter; } @@ -311,8 +319,10 @@ public JdbcConsumerFactory getJdbcConsumerGetter() { /** * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index. * - * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type. - * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not defined. + * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link + * java.sql.Types#ARRAY} type. + * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not + * defined. */ public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) { if (arraySubTypesByColumnIndex == null) { @@ -325,8 +335,10 @@ public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) { /** * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name. * - * @param name The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type. - * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not defined. + * @param name The {@link java.sql.ResultSetMetaData} column name of an {@link + * java.sql.Types#ARRAY} type. + * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not + * defined. */ public JdbcFieldInfo getArraySubTypeByColumnName(String name) { if (arraySubTypesByColumnName == null) { @@ -339,7 +351,8 @@ public JdbcFieldInfo getArraySubTypeByColumnName(String name) { /** * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column index. * - * @param index The {@link java.sql.ResultSetMetaData} column index to evaluate for explicit type mapping. + * @param index The {@link java.sql.ResultSetMetaData} column index to evaluate for explicit type + * mapping. * @return The {@link JdbcFieldInfo} defined for the column, or null if not defined. */ public JdbcFieldInfo getExplicitTypeByColumnIndex(int index) { @@ -353,7 +366,8 @@ public JdbcFieldInfo getExplicitTypeByColumnIndex(int index) { /** * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column name. * - * @param name The {@link java.sql.ResultSetMetaData} column name to evaluate for explicit type mapping. + * @param name The {@link java.sql.ResultSetMetaData} column name to evaluate for explicit type + * mapping. * @return The {@link JdbcFieldInfo} defined for the column, or null if not defined. */ public JdbcFieldInfo getExplicitTypeByColumnName(String name) { @@ -364,17 +378,12 @@ public JdbcFieldInfo getExplicitTypeByColumnName(String name) { } } - /** - * Return schema level metadata or null if not provided. - */ + /** Return schema level metadata or null if not provided. */ public Map getSchemaMetadata() { return schemaMetadata; } - /** - * Return metadata from columnIndex->meta map on per field basis - * or null if not provided. - */ + /** Return metadata from columnIndex->meta map on per field basis or null if not provided. */ public Map> getColumnMetadataByColumnIndex() { return columnMetadataByColumnIndex; } @@ -383,12 +392,14 @@ public RoundingMode getBigDecimalRoundingMode() { return bigDecimalRoundingMode; } - /** - * Interface for a function that gets a JDBC consumer for the given values. - */ + /** Interface for a function that gets a JDBC consumer for the given values. */ @FunctionalInterface public interface JdbcConsumerFactory { - JdbcConsumer apply(ArrowType arrowType, int columnIndex, boolean nullable, FieldVector vector, - JdbcToArrowConfig config); + JdbcConsumer apply( + ArrowType arrowType, + int columnIndex, + boolean nullable, + FieldVector vector, + JdbcToArrowConfig config); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java index 7d88c23832067..783a373c6d0a7 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.apache.arrow.adapter.jdbc.JdbcToArrowConfig.DEFAULT_TARGET_BATCH_SIZE; @@ -23,15 +22,12 @@ import java.util.Calendar; import java.util.Map; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.types.pojo.ArrowType; -/** - * This class builds {@link JdbcToArrowConfig}s. - */ +/** This class builds {@link JdbcToArrowConfig}s. */ public class JdbcToArrowConfigBuilder { private Calendar calendar; private BufferAllocator allocator; @@ -49,9 +45,9 @@ public class JdbcToArrowConfigBuilder { private RoundingMode bigDecimalRoundingMode; /** - * Default constructor for the JdbcToArrowConfigBuilder}. - * Use the setter methods for the allocator and calendar; the allocator must be - * set. Otherwise, {@link #build()} will throw a {@link NullPointerException}. + * Default constructor for the JdbcToArrowConfigBuilder}. Use the setter methods for + * the allocator and calendar; the allocator must be set. Otherwise, {@link #build()} will throw a + * {@link NullPointerException}. */ public JdbcToArrowConfigBuilder() { this.allocator = null; @@ -68,16 +64,13 @@ public JdbcToArrowConfigBuilder() { } /** - * Constructor for the JdbcToArrowConfigBuilder. The - * allocator is required, and a {@link NullPointerException} - * will be thrown if it is null. - *

    - * The allocator is used to construct Arrow vectors from the JDBC ResultSet. - * The calendar is used to determine the time zone of {@link java.sql.Timestamp} - * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and - * {@link java.sql.Timestamp} fields to a single, common time zone when reading - * from the result set. - *

    + * Constructor for the JdbcToArrowConfigBuilder. The allocator is required, and a + * {@link NullPointerException} will be thrown if it is null. + * + *

    The allocator is used to construct Arrow vectors from the JDBC ResultSet. The calendar is + * used to determine the time zone of {@link java.sql.Timestamp} fields and convert {@link + * java.sql.Date}, {@link java.sql.Time}, and {@link java.sql.Timestamp} fields to a single, + * common time zone when reading from the result set. * * @param allocator The Arrow Vector memory allocator. * @param calendar The calendar to use when constructing timestamp fields. @@ -95,26 +88,23 @@ public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar) { } /** - * Constructor for the JdbcToArrowConfigBuilder. Both the - * allocator and calendar are required. A {@link NullPointerException} - * will be thrown if either of those arguments is null. - *

    - * The allocator is used to construct Arrow vectors from the JDBC ResultSet. - * The calendar is used to determine the time zone of {@link java.sql.Timestamp} - * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and - * {@link java.sql.Timestamp} fields to a single, common time zone when reading - * from the result set. - *

    - *

    - * The includeMetadata argument, if true will cause - * various information about each database field to be added to the Vector - * Schema's field metadata. - *

    + * Constructor for the JdbcToArrowConfigBuilder. Both the allocator and calendar are + * required. A {@link NullPointerException} will be thrown if either of those arguments is + * null. + * + *

    The allocator is used to construct Arrow vectors from the JDBC ResultSet. The calendar is + * used to determine the time zone of {@link java.sql.Timestamp} fields and convert {@link + * java.sql.Date}, {@link java.sql.Time}, and {@link java.sql.Timestamp} fields to a single, + * common time zone when reading from the result set. + * + *

    The includeMetadata argument, if true will cause various + * information about each database field to be added to the Vector Schema's field metadata. * * @param allocator The Arrow Vector memory allocator. * @param calendar The calendar to use when constructing timestamp fields. */ - public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar, boolean includeMetadata) { + public JdbcToArrowConfigBuilder( + BufferAllocator allocator, Calendar calendar, boolean includeMetadata) { this(allocator, calendar); this.includeMetadata = includeMetadata; } @@ -132,8 +122,8 @@ public JdbcToArrowConfigBuilder setAllocator(BufferAllocator allocator) { } /** - * Sets the {@link Calendar} to use when constructing timestamp fields in the - * Arrow schema, and reading time-based fields from the JDBC ResultSet. + * Sets the {@link Calendar} to use when constructing timestamp fields in the Arrow schema, and + * reading time-based fields from the JDBC ResultSet. * * @param calendar the calendar to set. */ @@ -145,7 +135,8 @@ public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) { /** * Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata. * - * @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field metadata. + * @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field + * metadata. * @return This instance of the JdbcToArrowConfig, for chaining. */ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { @@ -154,8 +145,8 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { } /** - * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}. - * The column index is 1-based, to match the JDBC column index. + * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link + * java.sql.Types#ARRAY}. The column index is 1-based, to match the JDBC column index. * * @param map The mapping. * @return This instance of the JdbcToArrowConfig, for chaining. @@ -166,7 +157,8 @@ public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(MapJdbcToArrowConfig, for chaining. @@ -178,11 +170,12 @@ public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map - * This can be useful to override type information from JDBC drivers that provide incomplete type info, - * e.g. DECIMAL with precision = scale = 0. - *

    - * The column index is 1-based, to match the JDBC column index. + * + *

    This can be useful to override type information from JDBC drivers that provide incomplete + * type info, e.g. DECIMAL with precision = scale = 0. + * + *

    The column index is 1-based, to match the JDBC column index. + * * @param map The mapping. */ public JdbcToArrowConfigBuilder setExplicitTypesByColumnIndex(Map map) { @@ -192,9 +185,10 @@ public JdbcToArrowConfigBuilder setExplicitTypesByColumnIndex(Map - * This can be useful to override type information from JDBC drivers that provide incomplete type info, - * e.g. DECIMAL with precision = scale = 0. + * + *

    This can be useful to override type information from JDBC drivers that provide incomplete + * type info, e.g. DECIMAL with precision = scale = 0. + * * @param map The mapping. */ public JdbcToArrowConfigBuilder setExplicitTypesByColumnName(Map map) { @@ -204,8 +198,8 @@ public JdbcToArrowConfigBuilder setExplicitTypesByColumnName(Map - * Use {@link JdbcToArrowConfig#NO_LIMIT_BATCH_SIZE} to read all rows at once. + * + *

    Use {@link JdbcToArrowConfig#NO_LIMIT_BATCH_SIZE} to read all rows at once. */ public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) { this.targetBatchSize = targetBatchSize; @@ -214,8 +208,9 @@ public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) { /** * Set the function used to convert JDBC types to Arrow types. - *

    - * Defaults to wrapping {@link JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo, Calendar)}. + * + *

    Defaults to wrapping {@link JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo, + * Calendar)}. */ public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter( Function jdbcToArrowTypeConverter) { @@ -225,9 +220,9 @@ public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter( /** * Set the function used to get a JDBC consumer for a given type. - *

    - * Defaults to wrapping {@link - * JdbcToArrowUtils#getConsumer(ArrowType, Integer, Boolean, FieldVector, JdbcToArrowConfig)}. + * + *

    Defaults to wrapping {@link JdbcToArrowUtils#getConsumer(ArrowType, Integer, Boolean, + * FieldVector, JdbcToArrowConfig)}. */ public JdbcToArrowConfigBuilder setJdbcConsumerGetter( JdbcToArrowConfig.JdbcConsumerFactory jdbcConsumerGetter) { @@ -236,35 +231,32 @@ public JdbcToArrowConfigBuilder setJdbcConsumerGetter( } /** - * Set whether to use the same {@link org.apache.arrow.vector.VectorSchemaRoot} instance on each iteration, - * or to allocate a new one. + * Set whether to use the same {@link org.apache.arrow.vector.VectorSchemaRoot} instance on each + * iteration, or to allocate a new one. */ public JdbcToArrowConfigBuilder setReuseVectorSchemaRoot(boolean reuseVectorSchemaRoot) { this.reuseVectorSchemaRoot = reuseVectorSchemaRoot; return this; } - /** - * Set metadata for schema. - */ + /** Set metadata for schema. */ public JdbcToArrowConfigBuilder setSchemaMetadata(Map schemaMetadata) { this.schemaMetadata = schemaMetadata; return this; } - /** - * Set metadata from columnIndex->meta map on per field basis. - */ + /** Set metadata from columnIndex->meta map on per field basis. */ public JdbcToArrowConfigBuilder setColumnMetadataByColumnIndex( - Map> columnMetadataByColumnIndex) { + Map> columnMetadataByColumnIndex) { this.columnMetadataByColumnIndex = columnMetadataByColumnIndex; return this; } /** - * Set the rounding mode used when the scale of the actual value does not match the declared scale. - *

    - * By default, an error is raised in such cases. + * Set the rounding mode used when the scale of the actual value does not match the declared + * scale. + * + *

    By default, an error is raised in such cases. */ public JdbcToArrowConfigBuilder setBigDecimalRoundingMode(RoundingMode bigDecimalRoundingMode) { this.bigDecimalRoundingMode = bigDecimalRoundingMode; @@ -272,8 +264,8 @@ public JdbcToArrowConfigBuilder setBigDecimalRoundingMode(RoundingMode bigDecima } /** - * This builds the {@link JdbcToArrowConfig} from the provided - * {@link BufferAllocator} and {@link Calendar}. + * This builds the {@link JdbcToArrowConfig} from the provided {@link BufferAllocator} and {@link + * Calendar}. * * @return The built {@link JdbcToArrowConfig} * @throws NullPointerException if either the allocator or calendar was not set. diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java index eaee49936079f..8397d4c9e0dc4 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; @@ -38,7 +37,6 @@ import java.util.Locale; import java.util.Map; import java.util.TimeZone; - import org.apache.arrow.adapter.jdbc.consumer.ArrayConsumer; import org.apache.arrow.adapter.jdbc.consumer.BigIntConsumer; import org.apache.arrow.adapter.jdbc.consumer.BinaryConsumer; @@ -91,7 +89,8 @@ import org.apache.arrow.vector.util.ValueVectorUtility; /** - * Class that does most of the work to convert JDBC ResultSet data into Arrow columnar format Vector objects. + * Class that does most of the work to convert JDBC ResultSet data into Arrow columnar format Vector + * objects. * * @since 0.10.0 */ @@ -99,9 +98,7 @@ public class JdbcToArrowUtils { private static final int JDBC_ARRAY_VALUE_COLUMN = 2; - /** - * Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. - */ + /** Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. */ public static Calendar getUtcCalendar() { return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); } @@ -114,7 +111,8 @@ public static Calendar getUtcCalendar() { * @return {@link Schema} * @throws SQLException on error */ - public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException { + public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) + throws SQLException { Preconditions.checkNotNull(calendar, "Calendar object can't be null"); return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar)); @@ -123,25 +121,28 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar /** * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. * - * @param parameterMetaData The ResultSetMetaData containing the results, to read the JDBC metadata from. - * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from. + * @param parameterMetaData The ResultSetMetaData containing the results, to read the JDBC + * metadata from. + * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from. * @return {@link Schema} * @throws SQLException on error */ - public static Schema jdbcToArrowSchema(final ParameterMetaData parameterMetaData, final Calendar calendar) - throws SQLException { + public static Schema jdbcToArrowSchema( + final ParameterMetaData parameterMetaData, final Calendar calendar) throws SQLException { Preconditions.checkNotNull(calendar, "Calendar object can't be null"); Preconditions.checkNotNull(parameterMetaData); final List parameterFields = new ArrayList<>(parameterMetaData.getParameterCount()); - for (int parameterCounter = 1; parameterCounter <= parameterMetaData.getParameterCount(); - parameterCounter++) { + for (int parameterCounter = 1; + parameterCounter <= parameterMetaData.getParameterCount(); + parameterCounter++) { final int jdbcDataType = parameterMetaData.getParameterType(parameterCounter); final int jdbcIsNullable = parameterMetaData.isNullable(parameterCounter); final boolean arrowIsNullable = jdbcIsNullable != ParameterMetaData.parameterNoNulls; final int precision = parameterMetaData.getPrecision(parameterCounter); final int scale = parameterMetaData.getScale(parameterCounter); - final ArrowType arrowType = getArrowTypeFromJdbcType(new JdbcFieldInfo(jdbcDataType, precision, scale), calendar); - final FieldType fieldType = new FieldType(arrowIsNullable, arrowType, /*dictionary=*/null); + final ArrowType arrowType = + getArrowTypeFromJdbcType(new JdbcFieldInfo(jdbcDataType, precision, scale), calendar); + final FieldType fieldType = new FieldType(arrowIsNullable, arrowType, /*dictionary=*/ null); parameterFields.add(new Field(null, fieldType, null)); } @@ -152,10 +153,11 @@ public static Schema jdbcToArrowSchema(final ParameterMetaData parameterMetaData * Converts the provided JDBC type to its respective {@link ArrowType} counterpart. * * @param fieldInfo the {@link JdbcFieldInfo} with information about the original JDBC type. - * @param calendar the {@link Calendar} to use for datetime data types. + * @param calendar the {@link Calendar} to use for datetime data types. * @return a new {@link ArrowType}. */ - public static ArrowType getArrowTypeFromJdbcType(final JdbcFieldInfo fieldInfo, final Calendar calendar) { + public static ArrowType getArrowTypeFromJdbcType( + final JdbcFieldInfo fieldInfo, final Calendar calendar) { switch (fieldInfo.getJdbcType()) { case Types.BOOLEAN: case Types.BIT: @@ -222,30 +224,34 @@ public static ArrowType getArrowTypeFromJdbcType(final JdbcFieldInfo fieldInfo, /** * Create Arrow {@link Schema} object for the given JDBC {@link java.sql.ResultSetMetaData}. * - *

    - * If {@link JdbcToArrowConfig#shouldIncludeMetadata()} returns true, the following fields - * will be added to the {@link FieldType#getMetadata()}: + *

    If {@link JdbcToArrowConfig#shouldIncludeMetadata()} returns true, the + * following fields will be added to the {@link FieldType#getMetadata()}: + * *

      - *
    • {@link Constants#SQL_CATALOG_NAME_KEY} representing {@link ResultSetMetaData#getCatalogName(int)}
    • - *
    • {@link Constants#SQL_TABLE_NAME_KEY} representing {@link ResultSetMetaData#getTableName(int)}
    • - *
    • {@link Constants#SQL_COLUMN_NAME_KEY} representing {@link ResultSetMetaData#getColumnLabel(int)}
    • - *
    • {@link Constants#SQL_TYPE_KEY} representing {@link ResultSetMetaData#getColumnTypeName(int)}
    • + *
    • {@link Constants#SQL_CATALOG_NAME_KEY} representing {@link + * ResultSetMetaData#getCatalogName(int)} + *
    • {@link Constants#SQL_TABLE_NAME_KEY} representing {@link + * ResultSetMetaData#getTableName(int)} + *
    • {@link Constants#SQL_COLUMN_NAME_KEY} representing {@link + * ResultSetMetaData#getColumnLabel(int)} + *
    • {@link Constants#SQL_TYPE_KEY} representing {@link + * ResultSetMetaData#getColumnTypeName(int)} *
    - *

    - *

    - * If any columns are of type {@link java.sql.Types#ARRAY}, the configuration object will be used to look up - * the array sub-type field. The {@link JdbcToArrowConfig#getArraySubTypeByColumnIndex(int)} method will be - * checked first, followed by the {@link JdbcToArrowConfig#getArraySubTypeByColumnName(String)} method. - *

    + * + *

    If any columns are of type {@link java.sql.Types#ARRAY}, the configuration object will be + * used to look up the array sub-type field. The {@link + * JdbcToArrowConfig#getArraySubTypeByColumnIndex(int)} method will be checked first, followed by + * the {@link JdbcToArrowConfig#getArraySubTypeByColumnName(String)} method. * * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. * @param config The configuration to use when constructing the schema. * @return {@link Schema} * @throws SQLException on error - * @throws IllegalArgumentException if rsmd contains an {@link java.sql.Types#ARRAY} but the - * config does not have a sub-type definition for it. + * @throws IllegalArgumentException if rsmd contains an {@link java.sql.Types#ARRAY} + * but the config does not have a sub-type definition for it. */ - public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException { + public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) + throws SQLException { Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); Preconditions.checkNotNull(config, "The configuration object must not be null"); @@ -254,8 +260,10 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig for (int i = 1; i <= columnCount; i++) { final String columnName = rsmd.getColumnLabel(i); - final Map columnMetadata = config.getColumnMetadataByColumnIndex() != null ? - config.getColumnMetadataByColumnIndex().get(i) : null; + final Map columnMetadata = + config.getColumnMetadataByColumnIndex() != null + ? config.getColumnMetadataByColumnIndex().get(i) + : null; final Map metadata; if (config.shouldIncludeMetadata()) { metadata = new HashMap<>(); @@ -278,14 +286,19 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig final JdbcFieldInfo columnFieldInfo = getJdbcFieldInfoForColumn(rsmd, i, config); final ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo); if (arrowType != null) { - final FieldType fieldType = new FieldType( - isColumnNullable(rsmd, i, columnFieldInfo), arrowType, /* dictionary encoding */ null, metadata); + final FieldType fieldType = + new FieldType( + isColumnNullable(rsmd, i, columnFieldInfo), + arrowType, /* dictionary encoding */ + null, + metadata); List children = null; if (arrowType.getTypeID() == ArrowType.List.TYPE_TYPE) { final JdbcFieldInfo arrayFieldInfo = getJdbcFieldInfoForArraySubType(rsmd, i, config); if (arrayFieldInfo == null) { - throw new IllegalArgumentException("Configuration does not provide a mapping for array column " + i); + throw new IllegalArgumentException( + "Configuration does not provide a mapping for array column " + i); } children = new ArrayList(); final ArrowType childType = config.getJdbcToArrowTypeConverter().apply(arrayFieldInfo); @@ -295,9 +308,13 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig FieldType keyType = new FieldType(false, new ArrowType.Utf8(), null, null); FieldType valueType = new FieldType(false, new ArrowType.Utf8(), null, null); children = new ArrayList<>(); - children.add(new Field("child", mapType, - Arrays.asList(new Field(MapVector.KEY_NAME, keyType, null), - new Field(MapVector.VALUE_NAME, valueType, null)))); + children.add( + new Field( + "child", + mapType, + Arrays.asList( + new Field(MapVector.KEY_NAME, keyType, null), + new Field(MapVector.VALUE_NAME, valueType, null)))); } fields.add(new Field(columnName, fieldType, children)); @@ -307,18 +324,14 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig } static JdbcFieldInfo getJdbcFieldInfoForColumn( - ResultSetMetaData rsmd, - int arrayColumn, - JdbcToArrowConfig config) - throws SQLException { + ResultSetMetaData rsmd, int arrayColumn, JdbcToArrowConfig config) throws SQLException { Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null"); Preconditions.checkNotNull(config, "Configuration must not be null"); Preconditions.checkArgument( - arrayColumn > 0, - "ResultSetMetaData columns start with 1; column cannot be less than 1"); + arrayColumn > 0, "ResultSetMetaData columns start with 1; column cannot be less than 1"); Preconditions.checkArgument( - arrayColumn <= rsmd.getColumnCount(), - "Column number cannot be more than the number of columns"); + arrayColumn <= rsmd.getColumnCount(), + "Column number cannot be more than the number of columns"); JdbcFieldInfo fieldInfo = config.getExplicitTypeByColumnIndex(arrayColumn); if (fieldInfo == null) { @@ -334,16 +347,12 @@ static JdbcFieldInfo getJdbcFieldInfoForColumn( * If no sub-type can be found, returns null. */ private static JdbcFieldInfo getJdbcFieldInfoForArraySubType( - ResultSetMetaData rsmd, - int arrayColumn, - JdbcToArrowConfig config) - throws SQLException { + ResultSetMetaData rsmd, int arrayColumn, JdbcToArrowConfig config) throws SQLException { Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null"); Preconditions.checkNotNull(config, "Configuration must not be null"); Preconditions.checkArgument( - arrayColumn > 0, - "ResultSetMetaData columns start with 1; column cannot be less than 1"); + arrayColumn > 0, "ResultSetMetaData columns start with 1; column cannot be less than 1"); Preconditions.checkArgument( arrayColumn <= rsmd.getColumnCount(), "Column number cannot be more than the number of columns"); @@ -359,10 +368,10 @@ private static JdbcFieldInfo getJdbcFieldInfoForArraySubType( * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate * the given Arrow Vector objects. * - * @param rs ResultSet to use to fetch the data from underlying database - * @param root Arrow {@link VectorSchemaRoot} object to populate - * @param calendar The calendar to use when reading {@link Date}, {@link Time}, or {@link Timestamp} - * data types from the {@link ResultSet}, or null if not converting. + * @param rs ResultSet to use to fetch the data from underlying database + * @param root Arrow {@link VectorSchemaRoot} object to populate + * @param calendar The calendar to use when reading {@link Date}, {@link Time}, or {@link + * Timestamp} data types from the {@link ResultSet}, or null if not converting. * @throws SQLException on error */ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar) @@ -373,29 +382,30 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar)); } - static boolean isColumnNullable(ResultSetMetaData resultSetMetadata, int index, JdbcFieldInfo info) - throws SQLException { + static boolean isColumnNullable( + ResultSetMetaData resultSetMetadata, int index, JdbcFieldInfo info) throws SQLException { int nullableValue; if (info != null && info.isNullable() != ResultSetMetaData.columnNullableUnknown) { nullableValue = info.isNullable(); } else { nullableValue = resultSetMetadata.isNullable(index); } - return nullableValue == ResultSetMetaData.columnNullable || - nullableValue == ResultSetMetaData.columnNullableUnknown; + return nullableValue == ResultSetMetaData.columnNullable + || nullableValue == ResultSetMetaData.columnNullableUnknown; } /** * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate * the given Arrow Vector objects. * - * @param rs ResultSet to use to fetch the data from underlying database - * @param root Arrow {@link VectorSchemaRoot} object to populate + * @param rs ResultSet to use to fetch the data from underlying database + * @param root Arrow {@link VectorSchemaRoot} object to populate * @param config The configuration to use when reading the data. * @throws SQLException on error * @throws JdbcConsumerException on error from VectorConsumer */ - public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config) + public static void jdbcToArrowVectors( + ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config) throws SQLException, IOException { ResultSetMetaData rsmd = rs.getMetaData(); @@ -405,8 +415,13 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT for (int i = 1; i <= columnCount; i++) { FieldVector vector = root.getVector(rsmd.getColumnLabel(i)); final JdbcFieldInfo columnFieldInfo = getJdbcFieldInfoForColumn(rsmd, i, config); - consumers[i - 1] = getConsumer( - vector.getField().getType(), i, isColumnNullable(rsmd, i, columnFieldInfo), vector, config); + consumers[i - 1] = + getConsumer( + vector.getField().getType(), + i, + isColumnNullable(rsmd, i, columnFieldInfo), + vector, + config); } CompositeJdbcConsumer compositeConsumer = null; @@ -439,18 +454,22 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT } /** - * Default function used for JdbcConsumerFactory. This function gets a JdbcConsumer for the - * given column based on the Arrow type and provided vector. + * Default function used for JdbcConsumerFactory. This function gets a JdbcConsumer for the given + * column based on the Arrow type and provided vector. * - * @param arrowType Arrow type for the column. + * @param arrowType Arrow type for the column. * @param columnIndex Column index to fetch from the ResultSet - * @param nullable Whether the value is nullable or not - * @param vector Vector to store the consumed value - * @param config Associated JdbcToArrowConfig, used mainly for the Calendar. + * @param nullable Whether the value is nullable or not + * @param vector Vector to store the consumed value + * @param config Associated JdbcToArrowConfig, used mainly for the Calendar. * @return {@link JdbcConsumer} */ - public static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boolean nullable, - FieldVector vector, JdbcToArrowConfig config) { + public static JdbcConsumer getConsumer( + ArrowType arrowType, + int columnIndex, + boolean nullable, + FieldVector vector, + JdbcToArrowConfig config) { final Calendar calendar = config.getCalendar(); switch (arrowType.getTypeID()) { @@ -472,10 +491,11 @@ public static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boo case Decimal: final RoundingMode bigDecimalRoundingMode = config.getBigDecimalRoundingMode(); if (((ArrowType.Decimal) arrowType).getBitWidth() == 256) { - return Decimal256Consumer.createConsumer((Decimal256Vector) vector, columnIndex, nullable, - bigDecimalRoundingMode); + return Decimal256Consumer.createConsumer( + (Decimal256Vector) vector, columnIndex, nullable, bigDecimalRoundingMode); } else { - return DecimalConsumer.createConsumer((DecimalVector) vector, columnIndex, nullable, bigDecimalRoundingMode); + return DecimalConsumer.createConsumer( + (DecimalVector) vector, columnIndex, nullable, bigDecimalRoundingMode); } case FloatingPoint: switch (((ArrowType.FloatingPoint) arrowType).getPrecision()) { @@ -495,17 +515,25 @@ public static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boo case Date: return DateConsumer.createConsumer((DateDayVector) vector, columnIndex, nullable, calendar); case Time: - return TimeConsumer.createConsumer((TimeMilliVector) vector, columnIndex, nullable, calendar); + return TimeConsumer.createConsumer( + (TimeMilliVector) vector, columnIndex, nullable, calendar); case Timestamp: if (config.getCalendar() == null) { - return TimestampConsumer.createConsumer((TimeStampMilliVector) vector, columnIndex, nullable); + return TimestampConsumer.createConsumer( + (TimeStampMilliVector) vector, columnIndex, nullable); } else { - return TimestampTZConsumer.createConsumer((TimeStampMilliTZVector) vector, columnIndex, nullable, calendar); + return TimestampTZConsumer.createConsumer( + (TimeStampMilliTZVector) vector, columnIndex, nullable, calendar); } case List: FieldVector childVector = ((ListVector) vector).getDataVector(); - JdbcConsumer delegate = getConsumer(childVector.getField().getType(), JDBC_ARRAY_VALUE_COLUMN, - childVector.getField().isNullable(), childVector, config); + JdbcConsumer delegate = + getConsumer( + childVector.getField().getType(), + JDBC_ARRAY_VALUE_COLUMN, + childVector.getField().isNullable(), + childVector, + config); return ArrayConsumer.createConsumer((ListVector) vector, delegate, columnIndex, nullable); case Map: return MapConsumer.createConsumer((MapVector) vector, columnIndex, nullable); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java index f24f409072c0d..d7b62c43acf6f 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import org.apache.arrow.vector.FieldVector; /** * Base class for ColumnBinder implementations. + * * @param The concrete FieldVector subtype. */ public abstract class BaseColumnBinder implements ColumnBinder { diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java index fde4642ef90a5..b9dfcb0d6c956 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.BigIntVector; /** A column binder for 8-bit integers. */ @@ -34,7 +32,8 @@ public BigIntBinder(BigIntVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final long value = vector.getDataBuffer().getLong((long) rowIndex * BigIntVector.TYPE_WIDTH); statement.setLong(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java index adae513e99e7c..c9db194f652ff 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.BitVector; /** A column binder for booleans. */ @@ -34,7 +32,8 @@ public BitBinder(BitVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // See BitVector#getBit final int byteIndex = rowIndex >> 3; final byte b = vector.getDataBuffer().getByte(byteIndex); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java index c2b1259e1424b..c38db68234ecf 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java @@ -14,17 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.vector.FieldVector; -/** - * A helper to bind values from a wrapped Arrow vector to a JDBC PreparedStatement. - */ +/** A helper to bind values from a wrapped Arrow vector to a JDBC PreparedStatement. */ public interface ColumnBinder { /** * Bind the given row to the given parameter. @@ -43,14 +39,10 @@ public interface ColumnBinder { */ int getJdbcType(); - /** - * Get the vector used by this binder. - */ + /** Get the vector used by this binder. */ FieldVector getVector(); - /** - * Create a column binder for a vector, using the default JDBC type code for null values. - */ + /** Create a column binder for a vector, using the default JDBC type code for null values. */ static ColumnBinder forVector(FieldVector vector) { return forVector(vector, /*jdbcType*/ null); } @@ -62,7 +54,8 @@ static ColumnBinder forVector(FieldVector vector) { * @param jdbcType The JDBC type code to use (or null to use the default). */ static ColumnBinder forVector(FieldVector vector, Integer jdbcType) { - final ColumnBinder binder = vector.getField().getType().accept(new ColumnBinderArrowTypeVisitor(vector, jdbcType)); + final ColumnBinder binder = + vector.getField().getType().accept(new ColumnBinderArrowTypeVisitor(vector, jdbcType)); if (vector.getField().isNullable()) { return new NullableColumnBinder(binder); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java index 7420a8c23dd48..30b2305f3f916 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.Types; import java.time.ZoneId; import java.util.Calendar; import java.util.TimeZone; - import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.DateDayVector; @@ -50,8 +48,8 @@ /** * Visitor to create the base ColumnBinder for a vector. - *

    - * To handle null values, wrap the returned binder in a {@link NullableColumnBinder}. + * + *

    To handle null values, wrap the returned binder in a {@link NullableColumnBinder}. */ public class ColumnBinderArrowTypeVisitor implements ArrowType.ArrowTypeVisitor { private final FieldVector vector; @@ -111,17 +109,21 @@ public ColumnBinder visit(ArrowType.Int type) { } switch (type.getBitWidth()) { case 8: - return jdbcType == null ? new TinyIntBinder((TinyIntVector) vector) : - new TinyIntBinder((TinyIntVector) vector, jdbcType); + return jdbcType == null + ? new TinyIntBinder((TinyIntVector) vector) + : new TinyIntBinder((TinyIntVector) vector, jdbcType); case 16: - return jdbcType == null ? new SmallIntBinder((SmallIntVector) vector) : - new SmallIntBinder((SmallIntVector) vector, jdbcType); + return jdbcType == null + ? new SmallIntBinder((SmallIntVector) vector) + : new SmallIntBinder((SmallIntVector) vector, jdbcType); case 32: - return jdbcType == null ? new IntBinder((IntVector) vector) : - new IntBinder((IntVector) vector, jdbcType); + return jdbcType == null + ? new IntBinder((IntVector) vector) + : new IntBinder((IntVector) vector, jdbcType); case 64: - return jdbcType == null ? new BigIntBinder((BigIntVector) vector) : - new BigIntBinder((BigIntVector) vector, jdbcType); + return jdbcType == null + ? new BigIntBinder((BigIntVector) vector) + : new BigIntBinder((BigIntVector) vector, jdbcType); default: throw new UnsupportedOperationException("No column binder implemented for type " + type); } @@ -131,11 +133,13 @@ public ColumnBinder visit(ArrowType.Int type) { public ColumnBinder visit(ArrowType.FloatingPoint type) { switch (type.getPrecision()) { case SINGLE: - return jdbcType == null ? new Float4Binder((Float4Vector) vector) : - new Float4Binder((Float4Vector) vector, jdbcType); + return jdbcType == null + ? new Float4Binder((Float4Vector) vector) + : new Float4Binder((Float4Vector) vector, jdbcType); case DOUBLE: - return jdbcType == null ? new Float8Binder((Float8Vector) vector) : - new Float8Binder((Float8Vector) vector, jdbcType); + return jdbcType == null + ? new Float8Binder((Float8Vector) vector) + : new Float8Binder((Float8Vector) vector, jdbcType); default: throw new UnsupportedOperationException("No column binder implemented for type " + type); } @@ -144,61 +148,74 @@ public ColumnBinder visit(ArrowType.FloatingPoint type) { @Override public ColumnBinder visit(ArrowType.Utf8 type) { VarCharVector varChar = (VarCharVector) vector; - return jdbcType == null ? new VarCharBinder<>(varChar, Types.VARCHAR) : - new VarCharBinder<>(varChar, jdbcType); + return jdbcType == null + ? new VarCharBinder<>(varChar, Types.VARCHAR) + : new VarCharBinder<>(varChar, jdbcType); } @Override public ColumnBinder visit(ArrowType.Utf8View type) { - throw new UnsupportedOperationException("Column binder implemented for type " + type + " is not supported"); + throw new UnsupportedOperationException( + "Column binder implemented for type " + type + " is not supported"); } @Override public ColumnBinder visit(ArrowType.LargeUtf8 type) { LargeVarCharVector varChar = (LargeVarCharVector) vector; - return jdbcType == null ? new VarCharBinder<>(varChar, Types.LONGVARCHAR) : - new VarCharBinder<>(varChar, jdbcType); + return jdbcType == null + ? new VarCharBinder<>(varChar, Types.LONGVARCHAR) + : new VarCharBinder<>(varChar, jdbcType); } @Override public ColumnBinder visit(ArrowType.Binary type) { VarBinaryVector varBinary = (VarBinaryVector) vector; - return jdbcType == null ? new VarBinaryBinder<>(varBinary, Types.VARBINARY) : - new VarBinaryBinder<>(varBinary, jdbcType); + return jdbcType == null + ? new VarBinaryBinder<>(varBinary, Types.VARBINARY) + : new VarBinaryBinder<>(varBinary, jdbcType); } @Override public ColumnBinder visit(ArrowType.BinaryView type) { - throw new UnsupportedOperationException("Column binder implemented for type " + type + " is not supported"); + throw new UnsupportedOperationException( + "Column binder implemented for type " + type + " is not supported"); } @Override public ColumnBinder visit(ArrowType.LargeBinary type) { LargeVarBinaryVector varBinary = (LargeVarBinaryVector) vector; - return jdbcType == null ? new VarBinaryBinder<>(varBinary, Types.LONGVARBINARY) : - new VarBinaryBinder<>(varBinary, jdbcType); + return jdbcType == null + ? new VarBinaryBinder<>(varBinary, Types.LONGVARBINARY) + : new VarBinaryBinder<>(varBinary, jdbcType); } @Override public ColumnBinder visit(ArrowType.FixedSizeBinary type) { FixedSizeBinaryVector binary = (FixedSizeBinaryVector) vector; - return jdbcType == null ? new FixedSizeBinaryBinder(binary, Types.BINARY) : - new FixedSizeBinaryBinder(binary, jdbcType); + return jdbcType == null + ? new FixedSizeBinaryBinder(binary, Types.BINARY) + : new FixedSizeBinaryBinder(binary, jdbcType); } @Override public ColumnBinder visit(ArrowType.Bool type) { - return jdbcType == null ? new BitBinder((BitVector) vector) : new BitBinder((BitVector) vector, jdbcType); + return jdbcType == null + ? new BitBinder((BitVector) vector) + : new BitBinder((BitVector) vector, jdbcType); } @Override public ColumnBinder visit(ArrowType.Decimal type) { if (type.getBitWidth() == 128) { DecimalVector decimalVector = (DecimalVector) vector; - return jdbcType == null ? new Decimal128Binder(decimalVector) : new Decimal128Binder(decimalVector, jdbcType); + return jdbcType == null + ? new Decimal128Binder(decimalVector) + : new Decimal128Binder(decimalVector, jdbcType); } else if (type.getBitWidth() == 256) { Decimal256Vector decimalVector = (Decimal256Vector) vector; - return jdbcType == null ? new Decimal256Binder(decimalVector) : new Decimal256Binder(decimalVector, jdbcType); + return jdbcType == null + ? new Decimal256Binder(decimalVector) + : new Decimal256Binder(decimalVector, jdbcType); } throw new UnsupportedOperationException("No column binder implemented for type " + type); } @@ -207,11 +224,13 @@ public ColumnBinder visit(ArrowType.Decimal type) { public ColumnBinder visit(ArrowType.Date type) { switch (type.getUnit()) { case DAY: - return jdbcType == null ? new DateDayBinder((DateDayVector) vector) : - new DateDayBinder((DateDayVector) vector, /*calendar*/null, jdbcType); + return jdbcType == null + ? new DateDayBinder((DateDayVector) vector) + : new DateDayBinder((DateDayVector) vector, /*calendar*/ null, jdbcType); case MILLISECOND: - return jdbcType == null ? new DateMilliBinder((DateMilliVector) vector) : - new DateMilliBinder((DateMilliVector) vector, /*calendar*/null, jdbcType); + return jdbcType == null + ? new DateMilliBinder((DateMilliVector) vector) + : new DateMilliBinder((DateMilliVector) vector, /*calendar*/ null, jdbcType); default: throw new UnsupportedOperationException("No column binder implemented for type " + type); } @@ -221,17 +240,21 @@ public ColumnBinder visit(ArrowType.Date type) { public ColumnBinder visit(ArrowType.Time type) { switch (type.getUnit()) { case SECOND: - return jdbcType == null ? new Time32Binder((TimeSecVector) vector) : - new Time32Binder((TimeSecVector) vector, jdbcType); + return jdbcType == null + ? new Time32Binder((TimeSecVector) vector) + : new Time32Binder((TimeSecVector) vector, jdbcType); case MILLISECOND: - return jdbcType == null ? new Time32Binder((TimeMilliVector) vector) : - new Time32Binder((TimeMilliVector) vector, jdbcType); + return jdbcType == null + ? new Time32Binder((TimeMilliVector) vector) + : new Time32Binder((TimeMilliVector) vector, jdbcType); case MICROSECOND: - return jdbcType == null ? new Time64Binder((TimeMicroVector) vector) : - new Time64Binder((TimeMicroVector) vector, jdbcType); + return jdbcType == null + ? new Time64Binder((TimeMicroVector) vector) + : new Time64Binder((TimeMicroVector) vector, jdbcType); case NANOSECOND: - return jdbcType == null ? new Time64Binder((TimeNanoVector) vector) : - new Time64Binder((TimeNanoVector) vector, jdbcType); + return jdbcType == null + ? new Time64Binder((TimeNanoVector) vector) + : new Time64Binder((TimeNanoVector) vector, jdbcType); default: throw new UnsupportedOperationException("No column binder implemented for type " + type); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java index bc16790c8f391..b9eae464c8aa2 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.Date; @@ -22,12 +21,9 @@ import java.sql.SQLException; import java.sql.Types; import java.util.Calendar; - import org.apache.arrow.vector.DateDayVector; -/** - * A column binder for 32-bit dates. - */ +/** A column binder for 32-bit dates. */ public class DateDayBinder extends BaseColumnBinder { private static final long MILLIS_PER_DAY = 86_400_000; private final Calendar calendar; @@ -46,7 +42,8 @@ public DateDayBinder(DateDayVector vector, Calendar calendar, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // TODO: multiply with overflow final long index = (long) rowIndex * DateDayVector.TYPE_WIDTH; final Date value = new Date(vector.getDataBuffer().getInt(index) * MILLIS_PER_DAY); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java index 5cb91b46ac179..f320391fbed5b 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.Date; @@ -22,12 +21,9 @@ import java.sql.SQLException; import java.sql.Types; import java.util.Calendar; - import org.apache.arrow.vector.DateMilliVector; -/** - * A column binder for 64-bit dates. - */ +/** A column binder for 64-bit dates. */ public class DateMilliBinder extends BaseColumnBinder { private final Calendar calendar; @@ -39,14 +35,14 @@ public DateMilliBinder(DateMilliVector vector, Calendar calendar) { this(vector, calendar, Types.DATE); } - public DateMilliBinder(DateMilliVector vector, Calendar calendar, int jdbcType) { super(vector, jdbcType); this.calendar = calendar; } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final long index = (long) rowIndex * DateMilliVector.TYPE_WIDTH; final Date value = new Date(vector.getDataBuffer().getLong(index)); if (calendar == null) { diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java index 9e9d0e4fdb25b..07ef52f2e594c 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java @@ -14,20 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.math.BigDecimal; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.util.DecimalUtility; -/** - * A binder for 128-bit decimals. - */ +/** A binder for 128-bit decimals. */ public class Decimal128Binder extends BaseColumnBinder { public Decimal128Binder(DecimalVector vector) { this(vector, Types.DECIMAL); @@ -38,9 +34,11 @@ public Decimal128Binder(DecimalVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final BigDecimal value = DecimalUtility.getBigDecimalFromArrowBuf( - vector.getDataBuffer(), rowIndex, vector.getScale(), DecimalVector.TYPE_WIDTH); + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { + final BigDecimal value = + DecimalUtility.getBigDecimalFromArrowBuf( + vector.getDataBuffer(), rowIndex, vector.getScale(), DecimalVector.TYPE_WIDTH); statement.setBigDecimal(parameterIndex, value); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java index bd29e083b4513..5a4222f6b84db 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java @@ -14,20 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.math.BigDecimal; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.Decimal256Vector; import org.apache.arrow.vector.util.DecimalUtility; -/** - * A binder for 256-bit decimals. - */ +/** A binder for 256-bit decimals. */ public class Decimal256Binder extends BaseColumnBinder { public Decimal256Binder(Decimal256Vector vector) { this(vector, Types.DECIMAL); @@ -38,9 +34,11 @@ public Decimal256Binder(Decimal256Vector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final BigDecimal value = DecimalUtility.getBigDecimalFromArrowBuf( - vector.getDataBuffer(), rowIndex, vector.getScale(), Decimal256Vector.TYPE_WIDTH); + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { + final BigDecimal value = + DecimalUtility.getBigDecimalFromArrowBuf( + vector.getDataBuffer(), rowIndex, vector.getScale(), Decimal256Vector.TYPE_WIDTH); statement.setBigDecimal(parameterIndex, value); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java index 7edc5e4532985..4f74b1fa8cfd4 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java @@ -14,22 +14,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.vector.FixedSizeBinaryVector; -/** - * A binder for fixed-width binary types. - */ +/** A binder for fixed-width binary types. */ public class FixedSizeBinaryBinder extends BaseColumnBinder { /** * Create a binder for the given vector using the given JDBC type for null values. * - * @param vector The vector to draw values from. + * @param vector The vector to draw values from. * @param jdbcType The JDBC type code. */ public FixedSizeBinaryBinder(FixedSizeBinaryVector vector, int jdbcType) { @@ -37,9 +33,12 @@ public FixedSizeBinaryBinder(FixedSizeBinaryVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { byte[] binaryData = new byte[vector.getByteWidth()]; - vector.getDataBuffer().getBytes((long) rowIndex * binaryData.length, binaryData, 0, binaryData.length); + vector + .getDataBuffer() + .getBytes((long) rowIndex * binaryData.length, binaryData, 0, binaryData.length); statement.setBytes(parameterIndex, binaryData); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java index a471c1ebadd66..466a67a2dbc89 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java @@ -14,18 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.Float4Vector; -/** - * A binder for 32-bit floats. - */ +/** A binder for 32-bit floats. */ public class Float4Binder extends BaseColumnBinder { public Float4Binder(Float4Vector vector) { this(vector, Types.REAL); @@ -36,7 +32,8 @@ public Float4Binder(Float4Vector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final float value = vector.getDataBuffer().getFloat((long) rowIndex * Float4Vector.TYPE_WIDTH); statement.setFloat(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java index 4710c3b59860d..222bebf115372 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java @@ -14,18 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.Float8Vector; -/** - * A binder for 64-bit floats. - */ +/** A binder for 64-bit floats. */ public class Float8Binder extends BaseColumnBinder { public Float8Binder(Float8Vector vector) { this(vector, Types.DOUBLE); @@ -36,8 +32,10 @@ public Float8Binder(Float8Vector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final double value = vector.getDataBuffer().getDouble((long) rowIndex * Float8Vector.TYPE_WIDTH); + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { + final double value = + vector.getDataBuffer().getDouble((long) rowIndex * Float8Vector.TYPE_WIDTH); statement.setDouble(parameterIndex, value); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java index 7d47f585a39d9..6b49eeb5352b1 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.IntVector; /** A column binder for 32-bit integers. */ @@ -34,7 +32,8 @@ public IntBinder(IntVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final int value = vector.getDataBuffer().getInt((long) rowIndex * IntVector.TYPE_WIDTH); statement.setInt(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java index b8aa61234f4e9..25172c0c1f0aa 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java @@ -14,21 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Arrays; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.util.Text; -/** - * A column binder for list of primitive values. - */ +/** A column binder for list of primitive values. */ public class ListBinder extends BaseColumnBinder { private final UnionListReader listReader; @@ -52,7 +48,9 @@ public ListBinder(ListVector vector, int jdbcType) { try { arrayElementClass = dataVectorClass.getMethod("getObject", Integer.TYPE).getReturnType(); } catch (NoSuchMethodException e) { - final String message = String.format("Issue to determine type for getObject method of data vector class %s ", + final String message = + String.format( + "Issue to determine type for getObject method of data vector class %s ", dataVectorClass.getName()); throw new RuntimeException(message); } @@ -60,7 +58,8 @@ public ListBinder(ListVector vector, int jdbcType) { } @Override - public void bind(java.sql.PreparedStatement statement, int parameterIndex, int rowIndex)throws java.sql.SQLException { + public void bind(java.sql.PreparedStatement statement, int parameterIndex, int rowIndex) + throws java.sql.SQLException { listReader.setPosition(rowIndex); ArrayList sourceArray = (ArrayList) listReader.readObject(); Object array; @@ -69,7 +68,9 @@ public void bind(java.sql.PreparedStatement statement, int parameterIndex, int r Arrays.setAll((Object[]) array, sourceArray::get); } else { array = new String[sourceArray.size()]; - Arrays.setAll((Object[]) array, idx -> sourceArray.get(idx) != null ? sourceArray.get(idx).toString() : null); + Arrays.setAll( + (Object[]) array, + idx -> sourceArray.get(idx) != null ? sourceArray.get(idx).toString() : null); } statement.setObject(parameterIndex, array); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java index 07391eb7cbfb4..e94f186453581 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; @@ -23,16 +22,13 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Objects; - import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.impl.UnionMapReader; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.util.JsonStringHashMap; -/** - * A column binder for map of primitive values. - */ +/** A column binder for map of primitive values. */ public class MapBinder extends BaseColumnBinder { private UnionMapReader reader; @@ -58,8 +54,8 @@ public MapBinder(MapVector vector, int jdbcType) { } List keyValueFields = Objects.requireNonNull(structField.get(0)).getChildren(); if (keyValueFields.size() != 2) { - throw new IllegalArgumentException("Expected two children fields " + - "inside nested Struct field in Map"); + throw new IllegalArgumentException( + "Expected two children fields " + "inside nested Struct field in Map"); } ArrowType keyType = Objects.requireNonNull(keyValueFields.get(0)).getType(); ArrowType valueType = Objects.requireNonNull(keyValueFields.get(1)).getType(); @@ -68,15 +64,16 @@ public MapBinder(MapVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, - int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { reader.setPosition(rowIndex); LinkedHashMap tags = new JsonStringHashMap<>(); while (reader.next()) { Object key = reader.key().readObject(); Object value = reader.value().readObject(); - tags.put(isTextKey && key != null ? key.toString() : key, - isTextValue && value != null ? value.toString() : value); + tags.put( + isTextKey && key != null ? key.toString() : key, + isTextValue && value != null ? value.toString() : value); } switch (jdbcType) { case Types.VARCHAR: diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java index 123b587ca50d4..bf5288b173341 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java @@ -14,17 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.vector.FieldVector; -/** - * A ColumnBinder that checks for nullability before deferring to a type-specific binder. - */ +/** A ColumnBinder that checks for nullability before deferring to a type-specific binder. */ public class NullableColumnBinder implements ColumnBinder { private final ColumnBinder wrapped; @@ -33,7 +29,8 @@ public NullableColumnBinder(ColumnBinder wrapped) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { if (wrapped.getVector().isNull(rowIndex)) { statement.setNull(parameterIndex, wrapped.getJdbcType()); } else { diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java index f9d744b9f5497..aa636c9336f55 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.SmallIntVector; /** A column binder for 8-bit integers. */ @@ -34,8 +32,10 @@ public SmallIntBinder(SmallIntVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final short value = vector.getDataBuffer().getShort((short) rowIndex * SmallIntVector.TYPE_WIDTH); + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { + final short value = + vector.getDataBuffer().getShort((short) rowIndex * SmallIntVector.TYPE_WIDTH); statement.setShort(parameterIndex, value); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java index 5dc7e3f513f97..4e09c3be23264 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java @@ -14,21 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Time; import java.sql.Types; - import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.TimeMilliVector; import org.apache.arrow.vector.TimeSecVector; -/** - * A binder for 32-bit time types. - */ +/** A binder for 32-bit time types. */ public class Time32Binder extends BaseColumnBinder { private static final long TYPE_WIDTH = 4; @@ -43,11 +39,11 @@ public Time32Binder(TimeMilliVector vector) { } public Time32Binder(TimeSecVector vector, int jdbcType) { - this(vector, /*factor*/1_000, jdbcType); + this(vector, /*factor*/ 1_000, jdbcType); } public Time32Binder(TimeMilliVector vector, int jdbcType) { - this(vector, /*factor*/1, jdbcType); + this(vector, /*factor*/ 1, jdbcType); } Time32Binder(BaseFixedWidthVector vector, long factor, int jdbcType) { @@ -56,7 +52,8 @@ public Time32Binder(TimeMilliVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // TODO: multiply with overflow // TODO: take in a Calendar as well? final Time value = new Time(vector.getDataBuffer().getInt(rowIndex * TYPE_WIDTH) * factor); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java index 8d62ae0eb36df..01c85fb32f1b5 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java @@ -14,21 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Time; import java.sql.Types; - import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.TimeMicroVector; import org.apache.arrow.vector.TimeNanoVector; -/** - * A binder for 64-bit time types. - */ +/** A binder for 64-bit time types. */ public class Time64Binder extends BaseColumnBinder { private static final long TYPE_WIDTH = 8; @@ -43,11 +39,11 @@ public Time64Binder(TimeNanoVector vector) { } public Time64Binder(TimeMicroVector vector, int jdbcType) { - this(vector, /*factor*/1_000, jdbcType); + this(vector, /*factor*/ 1_000, jdbcType); } public Time64Binder(TimeNanoVector vector, int jdbcType) { - this(vector, /*factor*/1_000_000, jdbcType); + this(vector, /*factor*/ 1_000_000, jdbcType); } Time64Binder(BaseFixedWidthVector vector, long factor, int jdbcType) { @@ -56,7 +52,8 @@ public Time64Binder(TimeNanoVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // TODO: option to throw on truncation (vendor Guava IntMath#multiply) final Time value = new Time(vector.getDataBuffer().getLong(rowIndex * TYPE_WIDTH) / factor); statement.setTime(parameterIndex, value); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java index 6677e5909901a..942d7ae58dcd5 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; @@ -22,7 +21,6 @@ import java.sql.Timestamp; import java.sql.Types; import java.util.Calendar; - import org.apache.arrow.vector.TimeStampVector; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -32,15 +30,17 @@ public class TimeStampBinder extends BaseColumnBinder { private final long unitsPerSecond; private final long nanosPerUnit; - /** - * Create a binder for a timestamp vector using the default JDBC type code. - */ + /** Create a binder for a timestamp vector using the default JDBC type code. */ public TimeStampBinder(TimeStampVector vector, Calendar calendar) { - this(vector, calendar, isZoned(vector.getField().getType()) ? Types.TIMESTAMP_WITH_TIMEZONE : Types.TIMESTAMP); + this( + vector, + calendar, + isZoned(vector.getField().getType()) ? Types.TIMESTAMP_WITH_TIMEZONE : Types.TIMESTAMP); } /** * Create a binder for a timestamp vector. + * * @param vector The vector to pull values from. * @param calendar Optionally, the calendar to pass to JDBC. * @param jdbcType The JDBC type code to use for null values. @@ -73,19 +73,23 @@ public TimeStampBinder(TimeStampVector vector, Calendar calendar, int jdbcType) } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // TODO: option to throw on truncation (vendor Guava IntMath#multiply) or overflow - final long rawValue = vector.getDataBuffer().getLong((long) rowIndex * TimeStampVector.TYPE_WIDTH); + final long rawValue = + vector.getDataBuffer().getLong((long) rowIndex * TimeStampVector.TYPE_WIDTH); final long seconds = rawValue / unitsPerSecond; final int nanos = (int) ((rawValue - (seconds * unitsPerSecond)) * nanosPerUnit); final Timestamp value = new Timestamp(seconds * 1_000); value.setNanos(nanos); if (calendar != null) { - // Timestamp == Date == UTC timestamp (confusingly). Arrow's timestamp with timezone is a UTC value with a + // Timestamp == Date == UTC timestamp (confusingly). Arrow's timestamp with timezone is a UTC + // value with a // zone offset, so we don't need to do any conversion. statement.setTimestamp(parameterIndex, value, calendar); } else { - // Arrow timestamp without timezone isn't strictly convertible to any timezone. So this is technically wrong, + // Arrow timestamp without timezone isn't strictly convertible to any timezone. So this is + // technically wrong, // but there is no 'correct' interpretation here. The application should provide a calendar. statement.setTimestamp(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java index f51d139be863a..0580456d37983 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.TinyIntVector; /** A column binder for 8-bit integers. */ @@ -34,7 +32,8 @@ public TinyIntBinder(TinyIntVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final byte value = vector.getDataBuffer().getByte((long) rowIndex * TinyIntVector.TYPE_WIDTH); statement.setByte(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java index a94cff6a00496..41807efc611b1 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java @@ -14,12 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.vector.ElementAddressableVector; import org.apache.arrow.vector.FieldVector; @@ -29,13 +27,14 @@ * * @param The binary vector. */ -public class VarBinaryBinder extends BaseColumnBinder { +public class VarBinaryBinder + extends BaseColumnBinder { private final ArrowBufPointer element; /** * Create a binder for the given vector using the given JDBC type for null values. * - * @param vector The vector to draw values from. + * @param vector The vector to draw values from. * @param jdbcType The JDBC type code. */ public VarBinaryBinder(T vector, int jdbcType) { @@ -44,15 +43,18 @@ public VarBinaryBinder(T vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { vector.getDataPointer(rowIndex, element); if (element.getBuf() == null) { statement.setNull(parameterIndex, jdbcType); return; } if (element.getLength() > (long) Integer.MAX_VALUE) { - final String message = String.format("Length of value at index %d (%d) exceeds Integer.MAX_VALUE", - rowIndex, element.getLength()); + final String message = + String.format( + "Length of value at index %d (%d) exceeds Integer.MAX_VALUE", + rowIndex, element.getLength()); throw new RuntimeException(message); } byte[] binaryData = new byte[(int) element.getLength()]; diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java index 73bd55981490b..926e1da28c9a0 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.nio.charset.StandardCharsets; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.VariableWidthVector; @@ -30,13 +28,14 @@ * * @param The text vector. */ -public class VarCharBinder extends BaseColumnBinder { +public class VarCharBinder + extends BaseColumnBinder { private final ArrowBufPointer element; /** * Create a binder for the given vector using the given JDBC type for null values. * - * @param vector The vector to draw values from. + * @param vector The vector to draw values from. * @param jdbcType The JDBC type code. */ public VarCharBinder(T vector, int jdbcType) { @@ -45,15 +44,18 @@ public VarCharBinder(T vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { vector.getDataPointer(rowIndex, element); if (element.getBuf() == null) { statement.setNull(parameterIndex, jdbcType); return; } if (element.getLength() > (long) Integer.MAX_VALUE) { - final String message = String.format("Length of value at index %d (%d) exceeds Integer.MAX_VALUE", - rowIndex, element.getLength()); + final String message = + String.format( + "Length of value at index %d (%d) exceeds Integer.MAX_VALUE", + rowIndex, element.getLength()); throw new RuntimeException(message); } byte[] utf8Bytes = new byte[(int) element.getLength()]; diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java index 4f8936e0c27bf..945c3c9f84fa8 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java @@ -15,8 +15,5 @@ * limitations under the License. */ -/** - * Utilities to bind Arrow data as JDBC prepared statement parameters. - */ - +/** Utilities to bind Arrow data as JDBC prepared statement parameters. */ package org.apache.arrow.adapter.jdbc.binder; diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java index 2f18b8a416d34..4676e8204eed4 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java @@ -14,29 +14,25 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.sql.Array; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.ListVector; /** - * Consumer which consume array type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.complex.ListVector}. + * Consumer which consume array type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.complex.ListVector}. */ public abstract class ArrayConsumer extends BaseConsumer { - /** - * Creates a consumer for {@link ListVector}. - */ + /** Creates a consumer for {@link ListVector}. */ public static ArrayConsumer createConsumer( - ListVector vector, JdbcConsumer delegate, int index, boolean nullable) { + ListVector vector, JdbcConsumer delegate, int index, boolean nullable) { if (nullable) { return new ArrayConsumer.NullableArrayConsumer(vector, delegate, index); } else { @@ -50,9 +46,7 @@ public static ArrayConsumer createConsumer( protected int innerVectorIndex = 0; - /** - * Instantiate a ArrayConsumer. - */ + /** Instantiate a ArrayConsumer. */ public ArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { super(vector, index); this.delegate = delegate; @@ -81,14 +75,10 @@ void ensureInnerVectorCapacity(int targetCapacity) { } } - /** - * Nullable consumer for {@link ListVector}. - */ + /** Nullable consumer for {@link ListVector}. */ static class NullableArrayConsumer extends ArrayConsumer { - /** - * Instantiate a nullable array consumer. - */ + /** Instantiate a nullable array consumer. */ public NullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { super(vector, delegate, index); } @@ -113,14 +103,10 @@ public void consume(ResultSet resultSet) throws SQLException, IOException { } } - /** - * Non-nullable consumer for {@link ListVector}. - */ + /** Non-nullable consumer for {@link ListVector}. */ static class NonNullableArrayConsumer extends ArrayConsumer { - /** - * Instantiate a nullable array consumer. - */ + /** Instantiate a nullable array consumer. */ public NonNullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { super(vector, delegate, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java index 2db128d3e2b2d..9ca3c98a7eb98 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import org.apache.arrow.vector.ValueVector; /** * Base class for all consumers. + * * @param vector type. */ public abstract class BaseConsumer implements JdbcConsumer { @@ -33,6 +33,7 @@ public abstract class BaseConsumer implements JdbcConsume /** * Constructs a new consumer. + * * @param vector the underlying vector for the consumer. * @param index the column id for the consumer. */ diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java index 19c8efa91719f..b7c547a9391b6 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.BigIntVector; /** - * Consumer which consume bigint type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.BigIntVector}. + * Consumer which consume bigint type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.BigIntVector}. */ public class BigIntConsumer { - /** - * Creates a consumer for {@link BigIntVector}. - */ - public static JdbcConsumer createConsumer(BigIntVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link BigIntVector}. */ + public static JdbcConsumer createConsumer( + BigIntVector vector, int index, boolean nullable) { if (nullable) { return new NullableBigIntConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(BigIntVector vector, int } } - /** - * Nullable consumer for big int. - */ + /** Nullable consumer for big int. */ static class NullableBigIntConsumer extends BaseConsumer { - /** - * Instantiate a BigIntConsumer. - */ + /** Instantiate a BigIntConsumer. */ public NullableBigIntConsumer(BigIntVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for big int. - */ + /** Non-nullable consumer for big int. */ static class NonNullableBigIntConsumer extends BaseConsumer { - /** - * Instantiate a BigIntConsumer. - */ + /** Instantiate a BigIntConsumer. */ public NonNullableBigIntConsumer(BigIntVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java index 538d161f9e9c7..edbc6360df6bf 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java @@ -14,27 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.io.InputStream; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.VarBinaryVector; /** - * Consumer which consume binary type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.VarBinaryVector}. + * Consumer which consume binary type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.VarBinaryVector}. */ public abstract class BinaryConsumer extends BaseConsumer { - /** - * Creates a consumer for {@link VarBinaryVector}. - */ + /** Creates a consumer for {@link VarBinaryVector}. */ public static BinaryConsumer createConsumer(VarBinaryVector vector, int index, boolean nullable) { if (nullable) { return new NullableBinaryConsumer(vector, index); @@ -45,9 +41,7 @@ public static BinaryConsumer createConsumer(VarBinaryVector vector, int index, b private final byte[] reuseBytes = new byte[1024]; - /** - * Instantiate a BinaryConsumer. - */ + /** Instantiate a BinaryConsumer. */ public BinaryConsumer(VarBinaryVector vector, int index) { super(vector, index); if (vector != null) { @@ -55,9 +49,7 @@ public BinaryConsumer(VarBinaryVector vector, int index) { } } - /** - * consume a InputStream. - */ + /** consume a InputStream. */ public void consume(InputStream is) throws IOException { if (is != null) { while (currentIndex >= vector.getValueCapacity()) { @@ -74,7 +66,8 @@ public void consume(InputStream is) throws IOException { vector.getDataBuffer().setBytes(startOffset + dataLength, reuseBytes, 0, read); dataLength += read; } - offsetBuffer.setInt((currentIndex + 1) * ((long) VarBinaryVector.OFFSET_WIDTH), startOffset + dataLength); + offsetBuffer.setInt( + (currentIndex + 1) * ((long) VarBinaryVector.OFFSET_WIDTH), startOffset + dataLength); BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); vector.setLastSet(currentIndex); } @@ -91,14 +84,10 @@ public void resetValueVector(VarBinaryVector vector) { this.currentIndex = 0; } - /** - * Consumer for nullable binary data. - */ + /** Consumer for nullable binary data. */ static class NullableBinaryConsumer extends BinaryConsumer { - - /** - * Instantiate a BinaryConsumer. - */ + + /** Instantiate a BinaryConsumer. */ public NullableBinaryConsumer(VarBinaryVector vector, int index) { super(vector, index); } @@ -113,14 +102,10 @@ public void consume(ResultSet resultSet) throws SQLException, IOException { } } - /** - * Consumer for non-nullable binary data. - */ + /** Consumer for non-nullable binary data. */ static class NonNullableBinaryConsumer extends BinaryConsumer { - /** - * Instantiate a BinaryConsumer. - */ + /** Instantiate a BinaryConsumer. */ public NonNullableBinaryConsumer(VarBinaryVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java index d2d94d0a40e2f..287b9509b5054 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.BitVector; /** - * Consumer which consume bit type values from {@link ResultSet}. - * Write the data to {@link BitVector}. + * Consumer which consume bit type values from {@link ResultSet}. Write the data to {@link + * BitVector}. */ public class BitConsumer { - /** - * Creates a consumer for {@link BitVector}. - */ - public static JdbcConsumer createConsumer(BitVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link BitVector}. */ + public static JdbcConsumer createConsumer( + BitVector vector, int index, boolean nullable) { if (nullable) { return new NullableBitConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(BitVector vector, int index } } - /** - * Nullable consumer for {@link BitVector}. - */ + /** Nullable consumer for {@link BitVector}. */ static class NullableBitConsumer extends BaseConsumer { - /** - * Instantiate a BitConsumer. - */ + /** Instantiate a BitConsumer. */ public NullableBitConsumer(BitVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for {@link BitVector}. - */ + /** Non-nullable consumer for {@link BitVector}. */ static class NonNullableBitConsumer extends BaseConsumer { - /** - * Instantiate a BitConsumer. - */ + /** Instantiate a BitConsumer. */ public NonNullableBitConsumer(BitVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java index e57ecdf91707a..a4fc789494e0f 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java @@ -14,19 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.sql.Blob; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.VarBinaryVector; /** - * Consumer which consume blob type values from {@link ResultSet}. - * Write the data to {@link VarBinaryVector}. + * Consumer which consume blob type values from {@link ResultSet}. Write the data to {@link + * VarBinaryVector}. */ public class BlobConsumer extends BaseConsumer { @@ -34,17 +32,12 @@ public class BlobConsumer extends BaseConsumer { private final boolean nullable; - /** - * Creates a consumer for {@link VarBinaryVector}. - */ - public static BlobConsumer createConsumer( - BinaryConsumer delegate, int index, boolean nullable) { + /** Creates a consumer for {@link VarBinaryVector}. */ + public static BlobConsumer createConsumer(BinaryConsumer delegate, int index, boolean nullable) { return new BlobConsumer(delegate, index, nullable); } - /** - * Instantiate a BlobConsumer. - */ + /** Instantiate a BlobConsumer. */ public BlobConsumer(BinaryConsumer delegate, int index, boolean nullable) { super(null, index); this.delegate = delegate; diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java index 3ed0c2d3cbb2f..7deba1cbffebd 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java @@ -14,28 +14,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.nio.charset.StandardCharsets; import java.sql.Clob; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.util.MemoryUtil; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.VarCharVector; /** - * Consumer which consume clob type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.VarCharVector}. + * Consumer which consume clob type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.VarCharVector}. */ public abstract class ClobConsumer extends BaseConsumer { - /** - * Creates a consumer for {@link VarCharVector}. - */ + /** Creates a consumer for {@link VarCharVector}. */ public static ClobConsumer createConsumer(VarCharVector vector, int index, boolean nullable) { if (nullable) { return new NullableClobConsumer(vector, index); @@ -46,9 +42,7 @@ public static ClobConsumer createConsumer(VarCharVector vector, int index, boole private static final int BUFFER_SIZE = 256; - /** - * Instantiate a ClobConsumer. - */ + /** Instantiate a ClobConsumer. */ public ClobConsumer(VarCharVector vector, int index) { super(vector, index); if (vector != null) { @@ -63,14 +57,10 @@ public void resetValueVector(VarCharVector vector) { this.currentIndex = 0; } - /** - * Nullable consumer for clob data. - */ + /** Nullable consumer for clob data. */ static class NullableClobConsumer extends ClobConsumer { - - /** - * Instantiate a ClobConsumer. - */ + + /** Instantiate a ClobConsumer. */ public NullableClobConsumer(VarCharVector vector, int index) { super(vector, index); } @@ -97,11 +87,11 @@ public void consume(ResultSet resultSet) throws SQLException { vector.reallocDataBuffer(); } MemoryUtil.UNSAFE.copyMemory( - bytes, - MemoryUtil.BYTE_ARRAY_BASE_OFFSET, - null, - dataBuffer.memoryAddress() + startIndex + totalBytes, - bytes.length); + bytes, + MemoryUtil.BYTE_ARRAY_BASE_OFFSET, + null, + dataBuffer.memoryAddress() + startIndex + totalBytes, + bytes.length); totalBytes += bytes.length; read += readSize; @@ -115,14 +105,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for clob data. - */ + /** Non-nullable consumer for clob data. */ static class NonNullableClobConsumer extends ClobConsumer { - /** - * Instantiate a ClobConsumer. - */ + /** Instantiate a ClobConsumer. */ public NonNullableClobConsumer(VarCharVector vector, int index) { super(vector, index); } @@ -148,11 +134,11 @@ public void consume(ResultSet resultSet) throws SQLException { vector.reallocDataBuffer(); } MemoryUtil.UNSAFE.copyMemory( - bytes, - MemoryUtil.BYTE_ARRAY_BASE_OFFSET, - null, - dataBuffer.memoryAddress() + startIndex + totalBytes, - bytes.length); + bytes, + MemoryUtil.BYTE_ARRAY_BASE_OFFSET, + null, + dataBuffer.memoryAddress() + startIndex + totalBytes, + bytes.length); totalBytes += bytes.length; read += readSize; @@ -161,7 +147,7 @@ public void consume(ResultSet resultSet) throws SQLException { BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); vector.setLastSet(currentIndex); } - + currentIndex++; } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java index e6d780956d538..2366116fd0d18 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; import org.apache.arrow.adapter.jdbc.consumer.exceptions.JdbcConsumerException; import org.apache.arrow.util.AutoCloseables; @@ -28,17 +26,12 @@ import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.ArrowType; -/** - * Composite consumer which hold all consumers. - * It manages the consume and cleanup process. - */ +/** Composite consumer which hold all consumers. It manages the consume and cleanup process. */ public class CompositeJdbcConsumer implements JdbcConsumer { private final JdbcConsumer[] consumers; - /** - * Construct an instance. - */ + /** Construct an instance. */ public CompositeJdbcConsumer(JdbcConsumer[] consumers) { this.consumers = consumers; } @@ -51,9 +44,11 @@ public void consume(ResultSet rs) throws SQLException, IOException { } catch (Exception e) { if (consumers[i] instanceof BaseConsumer) { BaseConsumer consumer = (BaseConsumer) consumers[i]; - JdbcFieldInfo fieldInfo = new JdbcFieldInfo(rs.getMetaData(), consumer.columnIndexInResultSet); + JdbcFieldInfo fieldInfo = + new JdbcFieldInfo(rs.getMetaData(), consumer.columnIndexInResultSet); ArrowType arrowType = consumer.vector.getMinorType().getType(); - throw new JdbcConsumerException("Exception while consuming JDBC value", e, fieldInfo, arrowType); + throw new JdbcConsumerException( + "Exception while consuming JDBC value", e, fieldInfo, arrowType); } else { throw e; } @@ -70,17 +65,12 @@ public void close() { } catch (Exception e) { throw new RuntimeException("Error occurred while releasing resources.", e); } - } @Override - public void resetValueVector(ValueVector vector) { + public void resetValueVector(ValueVector vector) {} - } - - /** - * Reset inner consumers through vectors in the vector schema root. - */ + /** Reset inner consumers through vectors in the vector schema root. */ public void resetVectorSchemaRoot(VectorSchemaRoot root) { assert root.getFieldVectors().size() == consumers.length; for (int i = 0; i < consumers.length; i++) { @@ -88,4 +78,3 @@ public void resetVectorSchemaRoot(VectorSchemaRoot root) { } } } - diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java index b9b83daccc25a..c271b900682a1 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.Date; @@ -22,19 +21,16 @@ import java.sql.SQLException; import java.util.Calendar; import java.util.concurrent.TimeUnit; - import org.apache.arrow.vector.DateDayVector; import org.apache.arrow.vector.DateMilliVector; /** - * Consumer which consume date type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.DateDayVector}. + * Consumer which consume date type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.DateDayVector}. */ public class DateConsumer { - /** - * Creates a consumer for {@link DateMilliVector}. - */ + /** Creates a consumer for {@link DateMilliVector}. */ public static JdbcConsumer createConsumer( DateDayVector vector, int index, boolean nullable, Calendar calendar) { if (nullable) { @@ -44,23 +40,17 @@ public static JdbcConsumer createConsumer( } } - /** - * Nullable consumer for date. - */ + /** Nullable consumer for date. */ static class NullableDateConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a DateConsumer. - */ + /** Instantiate a DateConsumer. */ public NullableDateConsumer(DateDayVector vector, int index) { - this(vector, index, /* calendar */null); + this(vector, index, /* calendar */ null); } - /** - * Instantiate a DateConsumer. - */ + /** Instantiate a DateConsumer. */ public NullableDateConsumer(DateDayVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; @@ -68,8 +58,10 @@ public NullableDateConsumer(DateDayVector vector, int index, Calendar calendar) @Override public void consume(ResultSet resultSet) throws SQLException { - Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) : - resultSet.getDate(columnIndexInResultSet, calendar); + Date date = + calendar == null + ? resultSet.getDate(columnIndexInResultSet) + : resultSet.getDate(columnIndexInResultSet, calendar); if (!resultSet.wasNull()) { // for fixed width vectors, we have allocated enough memory proactively, // so there is no need to call the setSafe method here. @@ -79,23 +71,17 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for date. - */ + /** Non-nullable consumer for date. */ static class NonNullableDateConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a DateConsumer. - */ + /** Instantiate a DateConsumer. */ public NonNullableDateConsumer(DateDayVector vector, int index) { - this(vector, index, /* calendar */null); + this(vector, index, /* calendar */ null); } - /** - * Instantiate a DateConsumer. - */ + /** Instantiate a DateConsumer. */ public NonNullableDateConsumer(DateDayVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; @@ -103,8 +89,10 @@ public NonNullableDateConsumer(DateDayVector vector, int index, Calendar calenda @Override public void consume(ResultSet resultSet) throws SQLException { - Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) : - resultSet.getDate(columnIndexInResultSet, calendar); + Date date = + calendar == null + ? resultSet.getDate(columnIndexInResultSet) + : resultSet.getDate(columnIndexInResultSet, calendar); // for fixed width vectors, we have allocated enough memory proactively, // so there is no need to call the setSafe method here. vector.set(currentIndex, Math.toIntExact(TimeUnit.MILLISECONDS.toDays(date.getTime()))); @@ -112,5 +100,3 @@ public void consume(ResultSet resultSet) throws SQLException { } } } - - diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java index ad00d9b5a2492..eb33ea5038b98 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java @@ -14,19 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.math.BigDecimal; import java.math.RoundingMode; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.Decimal256Vector; /** - * Consumer which consume decimal type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.Decimal256Vector}. + * Consumer which consume decimal type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.Decimal256Vector}. */ public abstract class Decimal256Consumer extends BaseConsumer { private final RoundingMode bigDecimalRoundingMode; @@ -36,7 +34,7 @@ public abstract class Decimal256Consumer extends BaseConsumer * Constructs a new consumer. * * @param vector the underlying vector for the consumer. - * @param index the column id for the consumer. + * @param index the column id for the consumer. */ public Decimal256Consumer(Decimal256Vector vector, int index) { this(vector, index, null); @@ -44,27 +42,23 @@ public Decimal256Consumer(Decimal256Vector vector, int index) { /** * Constructs a new consumer, with optional coercibility. + * * @param vector the underlying vector for the consumer. * @param index the column index for the consumer. - * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does not match that - * of the target vector. Set to null to retain strict matching behavior (scale of - * source and target vector must match exactly). + * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does + * not match that of the target vector. Set to null to retain strict matching behavior (scale + * of source and target vector must match exactly). */ - public Decimal256Consumer(Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { + public Decimal256Consumer( + Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index); this.bigDecimalRoundingMode = bigDecimalRoundingMode; this.scale = vector.getScale(); } - /** - * Creates a consumer for {@link Decimal256Vector}. - */ + /** Creates a consumer for {@link Decimal256Vector}. */ public static JdbcConsumer createConsumer( - Decimal256Vector vector, - int index, - boolean nullable, - RoundingMode bigDecimalRoundingMode - ) { + Decimal256Vector vector, int index, boolean nullable, RoundingMode bigDecimalRoundingMode) { if (nullable) { return new NullableDecimal256Consumer(vector, index, bigDecimalRoundingMode); } else { @@ -79,16 +73,12 @@ protected void set(BigDecimal value) { vector.set(currentIndex, value); } - - /** - * Consumer for nullable decimal. - */ + /** Consumer for nullable decimal. */ static class NullableDecimal256Consumer extends Decimal256Consumer { - /** - * Instantiate a Decimal256Consumer. - */ - public NullableDecimal256Consumer(Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { + /** Instantiate a Decimal256Consumer. */ + public NullableDecimal256Consumer( + Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index, bigDecimalRoundingMode); } @@ -104,15 +94,12 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Consumer for non-nullable decimal. - */ + /** Consumer for non-nullable decimal. */ static class NonNullableDecimal256Consumer extends Decimal256Consumer { - /** - * Instantiate a Decimal256Consumer. - */ - public NonNullableDecimal256Consumer(Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { + /** Instantiate a Decimal256Consumer. */ + public NonNullableDecimal256Consumer( + Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index, bigDecimalRoundingMode); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java index bed96dda8b65d..05b4d27de1022 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java @@ -14,19 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.math.BigDecimal; import java.math.RoundingMode; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.DecimalVector; /** - * Consumer which consume decimal type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.DecimalVector}. + * Consumer which consume decimal type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.DecimalVector}. */ public abstract class DecimalConsumer extends BaseConsumer { private final RoundingMode bigDecimalRoundingMode; @@ -36,7 +34,7 @@ public abstract class DecimalConsumer extends BaseConsumer { * Constructs a new consumer. * * @param vector the underlying vector for the consumer. - * @param index the column id for the consumer. + * @param index the column id for the consumer. */ public DecimalConsumer(DecimalVector vector, int index) { this(vector, index, null); @@ -44,11 +42,12 @@ public DecimalConsumer(DecimalVector vector, int index) { /** * Constructs a new consumer, with optional coercibility. + * * @param vector the underlying vector for the consumer. * @param index the column index for the consumer. - * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does not match that - * of the target vector. Set to null to retain strict matching behavior (scale of - * source and target vector must match exactly). + * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does + * not match that of the target vector. Set to null to retain strict matching behavior (scale + * of source and target vector must match exactly). */ public DecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index); @@ -56,15 +55,9 @@ public DecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalR this.scale = vector.getScale(); } - /** - * Creates a consumer for {@link DecimalVector}. - */ + /** Creates a consumer for {@link DecimalVector}. */ public static JdbcConsumer createConsumer( - DecimalVector vector, - int index, - boolean nullable, - RoundingMode bigDecimalRoundingMode - ) { + DecimalVector vector, int index, boolean nullable, RoundingMode bigDecimalRoundingMode) { if (nullable) { return new NullableDecimalConsumer(vector, index, bigDecimalRoundingMode); } else { @@ -79,16 +72,12 @@ protected void set(BigDecimal value) { vector.set(currentIndex, value); } - - /** - * Consumer for nullable decimal. - */ + /** Consumer for nullable decimal. */ static class NullableDecimalConsumer extends DecimalConsumer { - /** - * Instantiate a DecimalConsumer. - */ - public NullableDecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { + /** Instantiate a DecimalConsumer. */ + public NullableDecimalConsumer( + DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index, bigDecimalRoundingMode); } @@ -104,15 +93,12 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Consumer for non-nullable decimal. - */ + /** Consumer for non-nullable decimal. */ static class NonNullableDecimalConsumer extends DecimalConsumer { - /** - * Instantiate a DecimalConsumer. - */ - public NonNullableDecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { + /** Instantiate a DecimalConsumer. */ + public NonNullableDecimalConsumer( + DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index, bigDecimalRoundingMode); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java index e3db95d1535af..9cd31e9245472 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.Float8Vector; /** - * Consumer which consume double type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.Float8Vector}. + * Consumer which consume double type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.Float8Vector}. */ public class DoubleConsumer { - /** - * Creates a consumer for {@link Float8Vector}. - */ - public static JdbcConsumer createConsumer(Float8Vector vector, int index, boolean nullable) { + /** Creates a consumer for {@link Float8Vector}. */ + public static JdbcConsumer createConsumer( + Float8Vector vector, int index, boolean nullable) { if (nullable) { return new NullableDoubleConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(Float8Vector vector, int } } - /** - * Nullable double consumer. - */ + /** Nullable double consumer. */ static class NullableDoubleConsumer extends BaseConsumer { - /** - * Instantiate a DoubleConsumer. - */ + /** Instantiate a DoubleConsumer. */ public NullableDoubleConsumer(Float8Vector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable double consumer. - */ + /** Non-nullable double consumer. */ static class NonNullableDoubleConsumer extends BaseConsumer { - /** - * Instantiate a DoubleConsumer. - */ + /** Instantiate a DoubleConsumer. */ public NonNullableDoubleConsumer(Float8Vector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java index 830348fe94c6b..0f16a68da883e 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.Float4Vector; /** - * Consumer which consume float type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.Float4Vector}. + * Consumer which consume float type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.Float4Vector}. */ public class FloatConsumer { - /** - * Creates a consumer for {@link Float4Vector}. - */ - public static JdbcConsumer createConsumer(Float4Vector vector, int index, boolean nullable) { + /** Creates a consumer for {@link Float4Vector}. */ + public static JdbcConsumer createConsumer( + Float4Vector vector, int index, boolean nullable) { if (nullable) { return new NullableFloatConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(Float4Vector vector, int } } - /** - * Nullable float consumer. - */ + /** Nullable float consumer. */ static class NullableFloatConsumer extends BaseConsumer { - /** - * Instantiate a FloatConsumer. - */ + /** Instantiate a FloatConsumer. */ public NullableFloatConsumer(Float4Vector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable float consumer. - */ + /** Non-nullable float consumer. */ static class NonNullableFloatConsumer extends BaseConsumer { - /** - * Instantiate a FloatConsumer. - */ + /** Instantiate a FloatConsumer. */ public NonNullableFloatConsumer(Float4Vector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java index 4e537d682ff7c..302be697fbf07 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.IntVector; /** - * Consumer which consume int type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.IntVector}. + * Consumer which consume int type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.IntVector}. */ public class IntConsumer { - /** - * Creates a consumer for {@link IntVector}. - */ - public static JdbcConsumer createConsumer(IntVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link IntVector}. */ + public static JdbcConsumer createConsumer( + IntVector vector, int index, boolean nullable) { if (nullable) { return new NullableIntConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(IntVector vector, int index } } - /** - * Nullable consumer for int. - */ + /** Nullable consumer for int. */ static class NullableIntConsumer extends BaseConsumer { - /** - * Instantiate a IntConsumer. - */ + /** Instantiate a IntConsumer. */ public NullableIntConsumer(IntVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for int. - */ + /** Non-nullable consumer for int. */ static class NonNullableIntConsumer extends BaseConsumer { - /** - * Instantiate a IntConsumer. - */ + /** Instantiate a IntConsumer. */ public NonNullableIntConsumer(IntVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java index 7c867c7ad64d3..1ec6ad7eb9266 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java @@ -14,34 +14,27 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.ValueVector; /** * An abstraction that is used to consume values from {@link ResultSet}. + * * @param The vector within consumer or its delegate, used for partially consume purpose. */ public interface JdbcConsumer extends AutoCloseable { - /** - * Consume a specific type value from {@link ResultSet} and write it to vector. - */ + /** Consume a specific type value from {@link ResultSet} and write it to vector. */ void consume(ResultSet resultSet) throws SQLException, IOException; - /** - * Close this consumer, do some clean work such as clear reuse ArrowBuf. - */ + /** Close this consumer, do some clean work such as clear reuse ArrowBuf. */ @Override void close() throws Exception; - /** - * Reset the vector within consumer for partial read purpose. - */ + /** Reset the vector within consumer for partial read purpose. */ void resetValueVector(T vector); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java index 07a071bfc096e..6223650ff2c04 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java @@ -14,46 +14,39 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.sql.ResultSet; import java.sql.SQLException; import java.util.Map; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.impl.UnionMapWriter; import org.apache.arrow.vector.util.ObjectMapperFactory; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; - /** - * Consumer which consume map type values from {@link ResultSet}. - * Write the data into {@link org.apache.arrow.vector.complex.MapVector}. + * Consumer which consume map type values from {@link ResultSet}. Write the data into {@link + * org.apache.arrow.vector.complex.MapVector}. */ public class MapConsumer extends BaseConsumer { - private final UnionMapWriter writer; private final ObjectMapper objectMapper = ObjectMapperFactory.newObjectMapper(); - private final TypeReference> typeReference = new TypeReference>() {}; + private final TypeReference> typeReference = + new TypeReference>() {}; private int currentRow; - /** - * Creates a consumer for {@link MapVector}. - */ + /** Creates a consumer for {@link MapVector}. */ public static MapConsumer createConsumer(MapVector mapVector, int index, boolean nullable) { return new MapConsumer(mapVector, index); } - /** - * Instantiate a MapConsumer. - */ + /** Instantiate a MapConsumer. */ public MapConsumer(MapVector vector, int index) { super(vector, index); writer = vector.getWriter(); @@ -69,7 +62,8 @@ public void consume(ResultSet resultSet) throws SQLException, IOException { } else if (map instanceof Map) { writeJavaMapIntoVector((Map) map); } else { - throw new IllegalArgumentException("Unknown type of map type column from JDBC " + map.getClass().getName()); + throw new IllegalArgumentException( + "Unknown type of map type column from JDBC " + map.getClass().getName()); } } else { writer.writeNull(); @@ -79,26 +73,25 @@ public void consume(ResultSet resultSet) throws SQLException, IOException { private void writeJavaMapIntoVector(Map map) { BufferAllocator allocator = vector.getAllocator(); writer.startMap(); - map.forEach((key, value) -> { - byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8); - byte[] valueBytes = value != null ? value.getBytes(StandardCharsets.UTF_8) : null; - try ( - ArrowBuf keyBuf = allocator.buffer(keyBytes.length); - ArrowBuf valueBuf = valueBytes != null ? allocator.buffer(valueBytes.length) : null; - ) { - writer.startEntry(); - keyBuf.writeBytes(keyBytes); - writer.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); - if (valueBytes != null) { - valueBuf.writeBytes(valueBytes); - writer.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); - } else { - writer.value().varChar().writeNull(); - } - writer.endEntry(); - } - }); + map.forEach( + (key, value) -> { + byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8); + byte[] valueBytes = value != null ? value.getBytes(StandardCharsets.UTF_8) : null; + try (ArrowBuf keyBuf = allocator.buffer(keyBytes.length); + ArrowBuf valueBuf = + valueBytes != null ? allocator.buffer(valueBytes.length) : null; ) { + writer.startEntry(); + keyBuf.writeBytes(keyBytes); + writer.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); + if (valueBytes != null) { + valueBuf.writeBytes(valueBytes); + writer.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); + } else { + writer.value().varChar().writeNull(); + } + writer.endEntry(); + } + }); writer.endMap(); } } - diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java index a79a029f45d06..9d7a760f697a7 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java @@ -14,17 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.NullVector; /** - * Consumer which consume null type values from ResultSet. - * Corresponding to {@link org.apache.arrow.vector.NullVector}. + * Consumer which consume null type values from ResultSet. Corresponding to {@link + * org.apache.arrow.vector.NullVector}. */ public class NullConsumer extends BaseConsumer { @@ -33,6 +31,5 @@ public NullConsumer(NullVector vector) { } @Override - public void consume(ResultSet resultSet) throws SQLException { - } + public void consume(ResultSet resultSet) throws SQLException {} } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java index 2edb3605b177a..9f45c077ed0a8 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.SmallIntVector; /** - * Consumer which consume smallInt type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.SmallIntVector}. + * Consumer which consume smallInt type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.SmallIntVector}. */ public class SmallIntConsumer { - /** - * Creates a consumer for {@link SmallIntVector}. - */ - public static BaseConsumer createConsumer(SmallIntVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link SmallIntVector}. */ + public static BaseConsumer createConsumer( + SmallIntVector vector, int index, boolean nullable) { if (nullable) { return new NullableSmallIntConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static BaseConsumer createConsumer(SmallIntVector vector, } } - /** - * Nullable consumer for small int. - */ + /** Nullable consumer for small int. */ static class NullableSmallIntConsumer extends BaseConsumer { - /** - * Instantiate a SmallIntConsumer. - */ + /** Instantiate a SmallIntConsumer. */ public NullableSmallIntConsumer(SmallIntVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for small int. - */ + /** Non-nullable consumer for small int. */ static class NonNullableSmallIntConsumer extends BaseConsumer { - /** - * Instantiate a SmallIntConsumer. - */ + /** Instantiate a SmallIntConsumer. */ public NonNullableSmallIntConsumer(SmallIntVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java index 4fa15ad79039e..bee19d0e4deab 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java @@ -14,27 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Time; import java.util.Calendar; - import org.apache.arrow.vector.TimeMilliVector; /** - * Consumer which consume time type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.TimeMilliVector}. + * Consumer which consume time type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.TimeMilliVector}. */ public abstract class TimeConsumer { - /** - * Creates a consumer for {@link TimeMilliVector}. - */ + /** Creates a consumer for {@link TimeMilliVector}. */ public static JdbcConsumer createConsumer( - TimeMilliVector vector, int index, boolean nullable, Calendar calendar) { + TimeMilliVector vector, int index, boolean nullable, Calendar calendar) { if (nullable) { return new NullableTimeConsumer(vector, index, calendar); } else { @@ -42,23 +38,17 @@ public static JdbcConsumer createConsumer( } } - /** - * Nullable consumer for {@link TimeMilliVector}. - */ + /** Nullable consumer for {@link TimeMilliVector}. */ static class NullableTimeConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a TimeConsumer. - */ + /** Instantiate a TimeConsumer. */ public NullableTimeConsumer(TimeMilliVector vector, int index) { - this(vector, index, /* calendar */null); + this(vector, index, /* calendar */ null); } - /** - * Instantiate a TimeConsumer. - */ + /** Instantiate a TimeConsumer. */ public NullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; @@ -66,8 +56,10 @@ public NullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar @Override public void consume(ResultSet resultSet) throws SQLException { - Time time = calendar == null ? resultSet.getTime(columnIndexInResultSet) : - resultSet.getTime(columnIndexInResultSet, calendar); + Time time = + calendar == null + ? resultSet.getTime(columnIndexInResultSet) + : resultSet.getTime(columnIndexInResultSet, calendar); if (!resultSet.wasNull()) { // for fixed width vectors, we have allocated enough memory proactively, // so there is no need to call the setSafe method here. @@ -77,23 +69,17 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for {@link TimeMilliVector}. - */ + /** Non-nullable consumer for {@link TimeMilliVector}. */ static class NonNullableTimeConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a TimeConsumer. - */ + /** Instantiate a TimeConsumer. */ public NonNullableTimeConsumer(TimeMilliVector vector, int index) { - this(vector, index, /* calendar */null); + this(vector, index, /* calendar */ null); } - /** - * Instantiate a TimeConsumer. - */ + /** Instantiate a TimeConsumer. */ public NonNullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; @@ -101,8 +87,10 @@ public NonNullableTimeConsumer(TimeMilliVector vector, int index, Calendar calen @Override public void consume(ResultSet resultSet) throws SQLException { - Time time = calendar == null ? resultSet.getTime(columnIndexInResultSet) : - resultSet.getTime(columnIndexInResultSet, calendar); + Time time = + calendar == null + ? resultSet.getTime(columnIndexInResultSet) + : resultSet.getTime(columnIndexInResultSet, calendar); // for fixed width vectors, we have allocated enough memory proactively, // so there is no need to call the setSafe method here. vector.set(currentIndex, (int) time.getTime()); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java index 3351e7e78a7e4..cc6269c21f04a 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java @@ -14,26 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; - import org.apache.arrow.vector.TimeStampMilliVector; /** - * Consumer which consume timestamp type values from {@link ResultSet}. - * Write the data to {@link TimeStampMilliVector}. + * Consumer which consume timestamp type values from {@link ResultSet}. Write the data to {@link + * TimeStampMilliVector}. */ public abstract class TimestampConsumer { - /** - * Creates a consumer for {@link TimeStampMilliVector}. - */ + /** Creates a consumer for {@link TimeStampMilliVector}. */ public static JdbcConsumer createConsumer( - TimeStampMilliVector vector, int index, boolean nullable) { + TimeStampMilliVector vector, int index, boolean nullable) { if (nullable) { return new NullableTimestampConsumer(vector, index); } else { @@ -41,14 +37,10 @@ public static JdbcConsumer createConsumer( } } - /** - * Nullable consumer for timestamp. - */ + /** Nullable consumer for timestamp. */ static class NullableTimestampConsumer extends BaseConsumer { - /** - * Instantiate a TimestampConsumer. - */ + /** Instantiate a TimestampConsumer. */ public NullableTimestampConsumer(TimeStampMilliVector vector, int index) { super(vector, index); } @@ -65,14 +57,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for timestamp. - */ + /** Non-nullable consumer for timestamp. */ static class NonNullableTimestampConsumer extends BaseConsumer { - /** - * Instantiate a TimestampConsumer. - */ + /** Instantiate a TimestampConsumer. */ public NonNullableTimestampConsumer(TimeStampMilliVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java index f08671f0be61a..3e4911ac1a161 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java @@ -14,25 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; import java.util.Calendar; - import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.TimeStampMilliTZVector; /** - * Consumer which consume timestamp (with time zone) type values from {@link ResultSet}. - * Write the data to {@link TimeStampMilliTZVector}. + * Consumer which consume timestamp (with time zone) type values from {@link ResultSet}. Write the + * data to {@link TimeStampMilliTZVector}. */ public class TimestampTZConsumer { - /** - * Creates a consumer for {@link TimeStampMilliTZVector}. - */ + /** Creates a consumer for {@link TimeStampMilliTZVector}. */ public static JdbcConsumer createConsumer( TimeStampMilliTZVector vector, int index, boolean nullable, Calendar calendar) { Preconditions.checkArgument(calendar != null, "Calendar cannot be null"); @@ -43,17 +39,14 @@ public static JdbcConsumer createConsumer( } } - /** - * Nullable consumer for timestamp (with time zone). - */ + /** Nullable consumer for timestamp (with time zone). */ static class NullableTimestampTZConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a TimestampConsumer. - */ - public NullableTimestampTZConsumer(TimeStampMilliTZVector vector, int index, Calendar calendar) { + /** Instantiate a TimestampConsumer. */ + public NullableTimestampTZConsumer( + TimeStampMilliTZVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; } @@ -70,17 +63,14 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for timestamp (with time zone). - */ + /** Non-nullable consumer for timestamp (with time zone). */ static class NonNullableTimestampConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a TimestampConsumer. - */ - public NonNullableTimestampConsumer(TimeStampMilliTZVector vector, int index, Calendar calendar) { + /** Instantiate a TimestampConsumer. */ + public NonNullableTimestampConsumer( + TimeStampMilliTZVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java index 40cf087a5ec66..b75b87dd81cc4 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.TinyIntVector; /** - * Consumer which consume tinyInt type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.TinyIntVector}. + * Consumer which consume tinyInt type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.TinyIntVector}. */ public abstract class TinyIntConsumer { - /** - * Creates a consumer for {@link TinyIntVector}. - */ - public static JdbcConsumer createConsumer(TinyIntVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link TinyIntVector}. */ + public static JdbcConsumer createConsumer( + TinyIntVector vector, int index, boolean nullable) { if (nullable) { return new NullableTinyIntConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(TinyIntVector vector, i } } - /** - * Nullable consumer for tiny int. - */ + /** Nullable consumer for tiny int. */ static class NullableTinyIntConsumer extends BaseConsumer { - /** - * Instantiate a TinyIntConsumer. - */ + /** Instantiate a TinyIntConsumer. */ public NullableTinyIntConsumer(TinyIntVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for tiny int. - */ + /** Non-nullable consumer for tiny int. */ static class NonNullableTinyIntConsumer extends BaseConsumer { - /** - * Instantiate a TinyIntConsumer. - */ + /** Instantiate a TinyIntConsumer. */ public NonNullableTinyIntConsumer(TinyIntVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java index 05333715b8c2f..c81c4f0db124b 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java @@ -14,25 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.nio.charset.StandardCharsets; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.VarCharVector; /** - * Consumer which consume varchar type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.VarCharVector}. + * Consumer which consume varchar type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.VarCharVector}. */ public abstract class VarCharConsumer { - /** - * Creates a consumer for {@link VarCharVector}. - */ - public static JdbcConsumer createConsumer(VarCharVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link VarCharVector}. */ + public static JdbcConsumer createConsumer( + VarCharVector vector, int index, boolean nullable) { if (nullable) { return new NullableVarCharConsumer(vector, index); } else { @@ -40,14 +37,10 @@ public static JdbcConsumer createConsumer(VarCharVector vector, i } } - /** - * Nullable consumer for var char. - */ + /** Nullable consumer for var char. */ static class NullableVarCharConsumer extends BaseConsumer { - /** - * Instantiate a VarCharConsumer. - */ + /** Instantiate a VarCharConsumer. */ public NullableVarCharConsumer(VarCharVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for var char. - */ + /** Non-nullable consumer for var char. */ static class NonNullableVarCharConsumer extends BaseConsumer { - /** - * Instantiate a VarCharConsumer. - */ + /** Instantiate a VarCharConsumer. */ public NonNullableVarCharConsumer(VarCharVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java index b235be173cf10..04e26d640c04d 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java @@ -14,15 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer.exceptions; import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; import org.apache.arrow.vector.types.pojo.ArrowType; /** - * Exception while consuming JDBC data. This exception stores the JdbcFieldInfo for the column and the - * ArrowType for the corresponding vector for easier debugging. + * Exception while consuming JDBC data. This exception stores the JdbcFieldInfo for the column and + * the ArrowType for the corresponding vector for easier debugging. */ public class JdbcConsumerException extends RuntimeException { final JdbcFieldInfo fieldInfo; @@ -31,12 +30,13 @@ public class JdbcConsumerException extends RuntimeException { /** * Construct JdbcConsumerException with all fields. * - * @param message error message - * @param cause original exception + * @param message error message + * @param cause original exception * @param fieldInfo JdbcFieldInfo for the column * @param arrowType ArrowType for the corresponding vector */ - public JdbcConsumerException(String message, Throwable cause, JdbcFieldInfo fieldInfo, ArrowType arrowType) { + public JdbcConsumerException( + String message, Throwable cause, JdbcFieldInfo fieldInfo, ArrowType arrowType) { super(message, cause); this.fieldInfo = fieldInfo; this.arrowType = arrowType; diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java index 88a66a31aa2c9..1ad4492b35d18 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java @@ -14,9 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; @@ -30,7 +31,6 @@ import java.util.Map; import java.util.TimeZone; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.util.Preconditions; @@ -41,12 +41,7 @@ import org.junit.Before; import org.junit.Test; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; - -/** - * Class to abstract out some common test functionality for testing JDBC to Arrow. - */ +/** Class to abstract out some common test functionality for testing JDBC to Arrow. */ public abstract class AbstractJdbcToArrowTest { protected static final String BIGINT = "BIGINT_FIELD5"; @@ -69,7 +64,8 @@ public abstract class AbstractJdbcToArrowTest { protected static final String TINYINT = "TINYINT_FIELD3"; protected static final String VARCHAR = "VARCHAR_FIELD13"; protected static final String NULL = "NULL_FIELD18"; - protected static final Map ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP = new HashMap<>(); + protected static final Map ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP = + new HashMap<>(); static { ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP.put(LIST, new JdbcFieldInfo(Types.INTEGER)); @@ -86,12 +82,12 @@ public abstract class AbstractJdbcToArrowTest { * @return Table object * @throws IOException on error */ - protected static Table getTable(String ymlFilePath, @SuppressWarnings("rawtypes") Class clss) throws IOException { - return new ObjectMapper(new YAMLFactory()).readValue( - clss.getClassLoader().getResourceAsStream(ymlFilePath), Table.class); + protected static Table getTable(String ymlFilePath, @SuppressWarnings("rawtypes") Class clss) + throws IOException { + return new ObjectMapper(new YAMLFactory()) + .readValue(clss.getClassLoader().getResourceAsStream(ymlFilePath), Table.class); } - /** * This method creates Connection object and DB table and also populate data into table for test. * @@ -105,7 +101,7 @@ public void setUp() throws SQLException, ClassNotFoundException { String driver = "org.h2.Driver"; Class.forName(driver); conn = DriverManager.getConnection(url); - try (Statement stmt = conn.createStatement();) { + try (Statement stmt = conn.createStatement(); ) { stmt.executeUpdate(table.getCreate()); for (String insert : table.getData()) { stmt.executeUpdate(insert); @@ -136,12 +132,13 @@ public void destroy() throws SQLException { * @throws ClassNotFoundException on error * @throws IOException on error */ - public static Object[][] prepareTestData(String[] testFiles, @SuppressWarnings("rawtypes") Class clss) + public static Object[][] prepareTestData( + String[] testFiles, @SuppressWarnings("rawtypes") Class clss) throws SQLException, ClassNotFoundException, IOException { Object[][] tableArr = new Object[testFiles.length][]; int i = 0; for (String testFile : testFiles) { - tableArr[i++] = new Object[]{getTable(testFile, clss)}; + tableArr[i++] = new Object[] {getTable(testFile, clss)}; } return tableArr; } @@ -159,86 +156,90 @@ public static Object[][] prepareTestData(String[] testFiles, @SuppressWarnings(" * Abstract method to implement logic to assert test various datatype values. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ public abstract void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector); /** - * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects. - * This method uses the default Calendar instance with default TimeZone and Locale as returned by the JVM. - * If you wish to use specific TimeZone or Locale for any Date, Time and Timestamp datasets, you may want use - * overloaded API that taken Calendar object instance. + * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow + * objects. This method uses the default Calendar instance with default TimeZone and Locale as + * returned by the JVM. If you wish to use specific TimeZone or Locale for any Date, Time and + * Timestamp datasets, you may want use overloaded API that taken Calendar object instance. * - * This method is for test only. + *

    This method is for test only. * - * @param connection Database connection to be used. This method will not close the passed connection object. Since - * the caller has passed the connection object it's the responsibility of the caller to close or - * return the connection to the pool. - * @param query The DB Query to fetch the data. - * @param allocator Memory allocator + * @param connection Database connection to be used. This method will not close the passed + * connection object. Since the caller has passed the connection object it's the + * responsibility of the caller to close or return the connection to the pool. + * @param query The DB Query to fetch the data. + * @param allocator Memory allocator * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as - * ResultSet and Statement objects. + * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources + * opened such as ResultSet and Statement objects. */ public VectorSchemaRoot sqlToArrow(Connection connection, String query, BufferAllocator allocator) throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory allocator object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(connection, query, config); } /** - * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects. + * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

    This method is for test only. * - * @param connection Database connection to be used. This method will not close the passed connection object. Since - * the caller has passed the connection object it's the responsibility of the caller to close or - * return the connection to the pool. - * @param query The DB Query to fetch the data. - * @param allocator Memory allocator - * @param calendar Calendar object to use to handle Date, Time and Timestamp datasets. + * @param connection Database connection to be used. This method will not close the passed + * connection object. Since the caller has passed the connection object it's the + * responsibility of the caller to close or return the connection to the pool. + * @param query The DB Query to fetch the data. + * @param allocator Memory allocator + * @param calendar Calendar object to use to handle Date, Time and Timestamp datasets. * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as - * ResultSet and Statement objects. + * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources + * opened such as ResultSet and Statement objects. */ public VectorSchemaRoot sqlToArrow( - Connection connection, - String query, - BufferAllocator allocator, - Calendar calendar) throws SQLException, IOException { + Connection connection, String query, BufferAllocator allocator, Calendar calendar) + throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory allocator object cannot be null"); Preconditions.checkNotNull(calendar, "Calendar object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(allocator, calendar) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(connection, query, config); } /** - * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects. + * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

    This method is for test only. * - * @param connection Database connection to be used. This method will not close the passed connection object. - * Since the caller has passed the connection object it's the responsibility of the caller - * to close or return the connection to the pool. - * @param query The DB Query to fetch the data. - * @param config Configuration + * @param connection Database connection to be used. This method will not close the passed + * connection object. Since the caller has passed the connection object it's the + * responsibility of the caller to close or return the connection to the pool. + * @param query The DB Query to fetch the data. + * @param config Configuration * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as - * ResultSet and Statement objects. + * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources + * opened such as ResultSet and Statement objects. */ - public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config) + public static VectorSchemaRoot sqlToArrow( + Connection connection, String query, JdbcToArrowConfig config) throws SQLException, IOException { Preconditions.checkNotNull(connection, "JDBC connection object cannot be null"); - Preconditions.checkArgument(query != null && query.length() > 0, "SQL query cannot be null or empty"); + Preconditions.checkArgument( + query != null && query.length() > 0, "SQL query cannot be null or empty"); try (Statement stmt = connection.createStatement()) { return sqlToArrow(stmt.executeQuery(query), config); @@ -246,10 +247,10 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, J } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. This - * method uses the default RootAllocator and Calendar object. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. This method uses the default RootAllocator and Calendar object. * - * This method is for test only. + *

    This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database * @return Arrow Data Objects {@link VectorSchemaRoot} @@ -262,9 +263,10 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLExcepti } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

    This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database * @param allocator Memory allocator @@ -275,62 +277,69 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BufferAllocator a throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(resultSet, config); } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

    This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database - * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null if none. + * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null + * if none. * @return Arrow Data Objects {@link VectorSchemaRoot} * @throws SQLException on error */ - public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException { + public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) + throws SQLException, IOException { Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(resultSet, config); } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

    This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database * @param allocator Memory allocator to use. - * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null if none. + * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null + * if none. * @return Arrow Data Objects {@link VectorSchemaRoot} * @throws SQLException on error */ public static VectorSchemaRoot sqlToArrow( - ResultSet resultSet, - BufferAllocator allocator, - Calendar calendar) + ResultSet resultSet, BufferAllocator allocator, Calendar calendar) throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(allocator, calendar) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(resultSet, config); } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

    This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database - * @param config Configuration of the conversion from JDBC to Arrow. + * @param config Configuration of the conversion from JDBC to Arrow. * @return Arrow Data Objects {@link VectorSchemaRoot} * @throws SQLException on error */ @@ -339,8 +348,10 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null"); Preconditions.checkNotNull(config, "The configuration cannot be null"); - VectorSchemaRoot root = VectorSchemaRoot.create( - JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), config.getAllocator()); + VectorSchemaRoot root = + VectorSchemaRoot.create( + JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), + config.getAllocator()); if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { ValueVectorUtility.preAllocate(root, config.getTargetBatchSize()); } @@ -350,12 +361,14 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig /** * Register MAP_FIELD20 as ArrowType.Map - * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null if none. + * + * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null + * if none. * @param rsmd ResultSetMetaData to lookup column name from result set metadata * @return typeConverter instance with mapping column to Map type */ protected Function jdbcToArrowTypeConverter( - Calendar calendar, ResultSetMetaData rsmd) { + Calendar calendar, ResultSetMetaData rsmd) { return (jdbcFieldInfo) -> { String columnLabel = null; try { @@ -377,5 +390,4 @@ protected Function jdbcToArrowTypeConverter( protected ResultSetMetaData getQueryMetaData(String query) throws SQLException { return conn.createStatement().executeQuery(query).getMetaData(); } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java index b1a8b8f226753..cd6a78eae2b1a 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.*; import java.sql.Types; - import org.junit.Test; public class JdbcFieldInfoTest { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java index a94f0aa454f1d..a05130f18e4ac 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.assertj.core.api.Assertions.assertThat; @@ -32,7 +31,6 @@ import java.util.List; import java.util.Map; import java.util.function.BiConsumer; - import org.apache.arrow.adapter.jdbc.binder.ColumnBinder; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -106,7 +104,7 @@ void bindOrder() throws SQLException { Field.nullable("ints1", new ArrowType.Int(32, true)), Field.nullable("ints2", new ArrowType.Int(32, true)))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root) .bind(/*parameterIndex=*/ 1, /*columnIndex=*/ 2) @@ -161,17 +159,17 @@ void bindOrder() throws SQLException { @Test void customBinder() throws SQLException { final Schema schema = - new Schema(Collections.singletonList( - Field.nullable("ints0", new ArrowType.Int(32, true)))); + new Schema(Collections.singletonList(Field.nullable("ints0", new ArrowType.Int(32, true)))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root) .bind( /*parameterIndex=*/ 1, new ColumnBinder() { private final IntVector vector = (IntVector) root.getVector(0); + @Override public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { @@ -212,7 +210,9 @@ public FieldVector getVector() { @Test void bool() throws SQLException { - testSimpleType(ArrowType.Bool.INSTANCE, Types.BOOLEAN, + testSimpleType( + ArrowType.Bool.INSTANCE, + Types.BOOLEAN, (BitVector vector, Integer index, Boolean value) -> vector.setSafe(index, value ? 1 : 0), BitVector::setNull, Arrays.asList(true, false, true)); @@ -220,53 +220,76 @@ void bool() throws SQLException { @Test void int8() throws SQLException { - testSimpleType(new ArrowType.Int(8, true), Types.TINYINT, - TinyIntVector::setSafe, TinyIntVector::setNull, + testSimpleType( + new ArrowType.Int(8, true), + Types.TINYINT, + TinyIntVector::setSafe, + TinyIntVector::setNull, Arrays.asList(Byte.MAX_VALUE, Byte.MIN_VALUE, (byte) 42)); } @Test void int16() throws SQLException { - testSimpleType(new ArrowType.Int(16, true), Types.SMALLINT, - SmallIntVector::setSafe, SmallIntVector::setNull, + testSimpleType( + new ArrowType.Int(16, true), + Types.SMALLINT, + SmallIntVector::setSafe, + SmallIntVector::setNull, Arrays.asList(Short.MAX_VALUE, Short.MIN_VALUE, (short) 42)); } @Test void int32() throws SQLException { - testSimpleType(new ArrowType.Int(32, true), Types.INTEGER, - IntVector::setSafe, IntVector::setNull, + testSimpleType( + new ArrowType.Int(32, true), + Types.INTEGER, + IntVector::setSafe, + IntVector::setNull, Arrays.asList(Integer.MAX_VALUE, Integer.MIN_VALUE, 42)); } @Test void int64() throws SQLException { - testSimpleType(new ArrowType.Int(64, true), Types.BIGINT, - BigIntVector::setSafe, BigIntVector::setNull, + testSimpleType( + new ArrowType.Int(64, true), + Types.BIGINT, + BigIntVector::setSafe, + BigIntVector::setNull, Arrays.asList(Long.MAX_VALUE, Long.MIN_VALUE, 42L)); } @Test void float32() throws SQLException { - testSimpleType(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), Types.REAL, - Float4Vector::setSafe, Float4Vector::setNull, + testSimpleType( + new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), + Types.REAL, + Float4Vector::setSafe, + Float4Vector::setNull, Arrays.asList(Float.MIN_VALUE, Float.MAX_VALUE, Float.POSITIVE_INFINITY)); } @Test void float64() throws SQLException { - testSimpleType(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), Types.DOUBLE, - Float8Vector::setSafe, Float8Vector::setNull, + testSimpleType( + new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), + Types.DOUBLE, + Float8Vector::setSafe, + Float8Vector::setNull, Arrays.asList(Double.MIN_VALUE, Double.MAX_VALUE, Double.POSITIVE_INFINITY)); } @Test void time32() throws SQLException { - testSimpleType(new ArrowType.Time(TimeUnit.SECOND, 32), Types.TIME, - (valueVectors, index, value) -> valueVectors.setSafe(index, (int) (value.getTime() / 1_000)), + testSimpleType( + new ArrowType.Time(TimeUnit.SECOND, 32), + Types.TIME, + (valueVectors, index, value) -> + valueVectors.setSafe(index, (int) (value.getTime() / 1_000)), TimeSecVector::setNull, Arrays.asList(new Time(-128_000), new Time(104_000), new Time(-42_000))); - testSimpleType(new ArrowType.Time(TimeUnit.MILLISECOND, 32), Types.TIME, + testSimpleType( + new ArrowType.Time(TimeUnit.MILLISECOND, 32), + Types.TIME, (valueVectors, index, value) -> valueVectors.setSafe(index, (int) value.getTime()), TimeMilliVector::setNull, Arrays.asList(new Time(-128_000), new Time(104_000), new Time(-42_000))); @@ -274,11 +297,15 @@ void time32() throws SQLException { @Test void time64() throws SQLException { - testSimpleType(new ArrowType.Time(TimeUnit.MICROSECOND, 64), Types.TIME, + testSimpleType( + new ArrowType.Time(TimeUnit.MICROSECOND, 64), + Types.TIME, (valueVectors, index, value) -> valueVectors.setSafe(index, (value.getTime() * 1_000)), TimeMicroVector::setNull, Arrays.asList(new Time(-128_000), new Time(104_000), new Time(-42_000))); - testSimpleType(new ArrowType.Time(TimeUnit.NANOSECOND, 64), Types.TIME, + testSimpleType( + new ArrowType.Time(TimeUnit.NANOSECOND, 64), + Types.TIME, (valueVectors, index, value) -> valueVectors.setSafe(index, (value.getTime() * 1_000_000)), TimeNanoVector::setNull, Arrays.asList(new Time(-128), new Time(104), new Time(-42))); @@ -286,57 +313,92 @@ void time64() throws SQLException { @Test void date32() throws SQLException { - testSimpleType(new ArrowType.Date(DateUnit.DAY), Types.DATE, - (valueVectors, index, value) -> valueVectors.setSafe(index, (int) (value.getTime() / MILLIS_PER_DAY)), + testSimpleType( + new ArrowType.Date(DateUnit.DAY), + Types.DATE, + (valueVectors, index, value) -> + valueVectors.setSafe(index, (int) (value.getTime() / MILLIS_PER_DAY)), DateDayVector::setNull, - Arrays.asList(new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); + Arrays.asList( + new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); } @Test void date64() throws SQLException { - testSimpleType(new ArrowType.Date(DateUnit.MILLISECOND), Types.DATE, + testSimpleType( + new ArrowType.Date(DateUnit.MILLISECOND), + Types.DATE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime()), DateMilliVector::setNull, - Arrays.asList(new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); + Arrays.asList( + new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); } @Test void timestamp() throws SQLException { - List values = Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); - testSimpleType(new ArrowType.Timestamp(TimeUnit.SECOND, null), Types.TIMESTAMP, + List values = + Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.SECOND, null), + Types.TIMESTAMP, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() / 1_000), - TimeStampSecVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), Types.TIMESTAMP, + TimeStampSecVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), + Types.TIMESTAMP, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime()), - TimeStampMilliVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.MICROSECOND, null), Types.TIMESTAMP, + TimeStampMilliVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.MICROSECOND, null), + Types.TIMESTAMP, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000), - TimeStampMicroVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.NANOSECOND, null), Types.TIMESTAMP, + TimeStampMicroVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.NANOSECOND, null), + Types.TIMESTAMP, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000_000), - TimeStampNanoVector::setNull, values); + TimeStampNanoVector::setNull, + values); } @Test void timestampTz() throws SQLException { - List values = Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); - testSimpleType(new ArrowType.Timestamp(TimeUnit.SECOND, "UTC"), Types.TIMESTAMP_WITH_TIMEZONE, + List values = + Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.SECOND, "UTC"), + Types.TIMESTAMP_WITH_TIMEZONE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() / 1_000), - TimeStampSecTZVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"), Types.TIMESTAMP_WITH_TIMEZONE, + TimeStampSecTZVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"), + Types.TIMESTAMP_WITH_TIMEZONE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime()), - TimeStampMilliTZVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"), Types.TIMESTAMP_WITH_TIMEZONE, + TimeStampMilliTZVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"), + Types.TIMESTAMP_WITH_TIMEZONE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000), - TimeStampMicroTZVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC"), Types.TIMESTAMP_WITH_TIMEZONE, + TimeStampMicroTZVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC"), + Types.TIMESTAMP_WITH_TIMEZONE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000_000), - TimeStampNanoTZVector::setNull, values); + TimeStampNanoTZVector::setNull, + values); } @Test void utf8() throws SQLException { - testSimpleType(ArrowType.Utf8.INSTANCE, Types.VARCHAR, + testSimpleType( + ArrowType.Utf8.INSTANCE, + Types.VARCHAR, (VarCharVector vector, Integer index, String value) -> vector.setSafe(index, value.getBytes(StandardCharsets.UTF_8)), BaseVariableWidthVector::setNull, @@ -345,7 +407,9 @@ void utf8() throws SQLException { @Test void largeUtf8() throws SQLException { - testSimpleType(ArrowType.LargeUtf8.INSTANCE, Types.LONGVARCHAR, + testSimpleType( + ArrowType.LargeUtf8.INSTANCE, + Types.LONGVARCHAR, (LargeVarCharVector vector, Integer index, String value) -> vector.setSafe(index, value.getBytes(StandardCharsets.UTF_8)), BaseLargeVariableWidthVector::setNull, @@ -354,155 +418,200 @@ void largeUtf8() throws SQLException { @Test void binary() throws SQLException { - testSimpleType(ArrowType.Binary.INSTANCE, Types.VARBINARY, - (VarBinaryVector vector, Integer index, byte[] value) -> - vector.setSafe(index, value), + testSimpleType( + ArrowType.Binary.INSTANCE, + Types.VARBINARY, + (VarBinaryVector vector, Integer index, byte[] value) -> vector.setSafe(index, value), BaseVariableWidthVector::setNull, Arrays.asList(new byte[0], new byte[] {2, -4}, new byte[] {0, -1, 127, -128})); } @Test void largeBinary() throws SQLException { - testSimpleType(ArrowType.LargeBinary.INSTANCE, Types.LONGVARBINARY, - (LargeVarBinaryVector vector, Integer index, byte[] value) -> - vector.setSafe(index, value), + testSimpleType( + ArrowType.LargeBinary.INSTANCE, + Types.LONGVARBINARY, + (LargeVarBinaryVector vector, Integer index, byte[] value) -> vector.setSafe(index, value), BaseLargeVariableWidthVector::setNull, Arrays.asList(new byte[0], new byte[] {2, -4}, new byte[] {0, -1, 127, -128})); } @Test void fixedSizeBinary() throws SQLException { - testSimpleType(new ArrowType.FixedSizeBinary(3), Types.BINARY, - FixedSizeBinaryVector::setSafe, FixedSizeBinaryVector::setNull, + testSimpleType( + new ArrowType.FixedSizeBinary(3), + Types.BINARY, + FixedSizeBinaryVector::setSafe, + FixedSizeBinaryVector::setNull, Arrays.asList(new byte[3], new byte[] {1, 2, -4}, new byte[] {-1, 127, -128})); } @Test void decimal128() throws SQLException { - testSimpleType(new ArrowType.Decimal(/*precision*/ 12, /*scale*/3, 128), Types.DECIMAL, - DecimalVector::setSafe, DecimalVector::setNull, - Arrays.asList(new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); + testSimpleType( + new ArrowType.Decimal(/*precision*/ 12, /*scale*/ 3, 128), + Types.DECIMAL, + DecimalVector::setSafe, + DecimalVector::setNull, + Arrays.asList( + new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); } @Test void decimal256() throws SQLException { - testSimpleType(new ArrowType.Decimal(/*precision*/ 12, /*scale*/3, 256), Types.DECIMAL, - Decimal256Vector::setSafe, Decimal256Vector::setNull, - Arrays.asList(new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); + testSimpleType( + new ArrowType.Decimal(/*precision*/ 12, /*scale*/ 3, 256), + Types.DECIMAL, + Decimal256Vector::setSafe, + Decimal256Vector::setNull, + Arrays.asList( + new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); } @Test void listOfDouble() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(doubleValue -> writer.float8().writeFloat8(doubleValue)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new Double[]{0.0, Math.PI}, new Double[]{1.1, -352346.2, 2355.6}, - new Double[]{-1024.3}, new Double[]{}); - testListType(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), setValue, ListVector::setNull, values); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(doubleValue -> writer.float8().writeFloat8(doubleValue)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new Double[] {0.0, Math.PI}, new Double[] {1.1, -352346.2, 2355.6}, + new Double[] {-1024.3}, new Double[] {}); + testListType( + new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), + setValue, + ListVector::setNull, + values); } @Test void listOfInt64() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(longValue -> writer.bigInt().writeBigInt(longValue)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new Long[]{1L, 2L, 3L}, new Long[]{4L, 5L}, - new Long[]{512L, 1024L, 2048L, 4096L}, new Long[]{}); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(longValue -> writer.bigInt().writeBigInt(longValue)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new Long[] {1L, 2L, 3L}, + new Long[] {4L, 5L}, + new Long[] {512L, 1024L, 2048L, 4096L}, + new Long[] {}); testListType((ArrowType) new ArrowType.Int(64, true), setValue, ListVector::setNull, values); } @Test void listOfInt32() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(integerValue -> writer.integer().writeInt(integerValue)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new Integer[]{1, 2, 3}, new Integer[]{4, 5}, - new Integer[]{512, 1024, 2048, 4096}, new Integer[]{}); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(integerValue -> writer.integer().writeInt(integerValue)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new Integer[] {1, 2, 3}, + new Integer[] {4, 5}, + new Integer[] {512, 1024, 2048, 4096}, + new Integer[] {}); testListType((ArrowType) new ArrowType.Int(32, true), setValue, ListVector::setNull, values); } @Test void listOfBoolean() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(booleanValue -> writer.bit().writeBit(booleanValue ? 1 : 0)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new Boolean[]{true, false}, - new Boolean[]{false, false}, new Boolean[]{true, true, false, true}, new Boolean[]{}); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values) + .forEach(booleanValue -> writer.bit().writeBit(booleanValue ? 1 : 0)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new Boolean[] {true, false}, + new Boolean[] {false, false}, + new Boolean[] {true, true, false, true}, + new Boolean[] {}); testListType((ArrowType) new ArrowType.Bool(), setValue, ListVector::setNull, values); } @Test void listOfString() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(stringValue -> { - if (stringValue != null) { - byte[] stringValueBytes = stringValue.getBytes(StandardCharsets.UTF_8); - try (ArrowBuf stringBuffer = allocator.buffer(stringValueBytes.length)) { - stringBuffer.writeBytes(stringValueBytes); - writer.varChar().writeVarChar(0, stringValueBytes.length, stringBuffer); - } - } else { - writer.varChar().writeNull(); - } - }); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new String[]{"aaaa", "b1"}, - new String[]{"c", null, "d"}, new String[]{"e", "f", "g", "h"}, new String[]{}); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values) + .forEach( + stringValue -> { + if (stringValue != null) { + byte[] stringValueBytes = stringValue.getBytes(StandardCharsets.UTF_8); + try (ArrowBuf stringBuffer = allocator.buffer(stringValueBytes.length)) { + stringBuffer.writeBytes(stringValueBytes); + writer.varChar().writeVarChar(0, stringValueBytes.length, stringBuffer); + } + } else { + writer.varChar().writeNull(); + } + }); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new String[] {"aaaa", "b1"}, + new String[] {"c", null, "d"}, + new String[] {"e", "f", "g", "h"}, + new String[] {}); testListType((ArrowType) new ArrowType.Utf8(), setValue, ListVector::setNull, values); } @Test void mapOfString() throws SQLException { - TriConsumer> setValue = (mapVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); - mapWriter.setPosition(index); - mapWriter.startMap(); - values.entrySet().forEach(mapValue -> { - if (mapValue != null) { - byte[] keyBytes = mapValue.getKey().getBytes(StandardCharsets.UTF_8); - byte[] valueBytes = mapValue.getValue().getBytes(StandardCharsets.UTF_8); - try ( - ArrowBuf keyBuf = allocator.buffer(keyBytes.length); - ArrowBuf valueBuf = allocator.buffer(valueBytes.length); - ) { - mapWriter.startEntry(); - keyBuf.writeBytes(keyBytes); - valueBuf.writeBytes(valueBytes); - mapWriter.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); - mapWriter.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); - mapWriter.endEntry(); - } - } else { - mapWriter.writeNull(); - } - }); - mapWriter.endMap(); - }; + TriConsumer> setValue = + (mapVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); + mapWriter.setPosition(index); + mapWriter.startMap(); + values + .entrySet() + .forEach( + mapValue -> { + if (mapValue != null) { + byte[] keyBytes = mapValue.getKey().getBytes(StandardCharsets.UTF_8); + byte[] valueBytes = mapValue.getValue().getBytes(StandardCharsets.UTF_8); + try (ArrowBuf keyBuf = allocator.buffer(keyBytes.length); + ArrowBuf valueBuf = allocator.buffer(valueBytes.length); ) { + mapWriter.startEntry(); + keyBuf.writeBytes(keyBytes); + valueBuf.writeBytes(valueBytes); + mapWriter.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); + mapWriter.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); + mapWriter.endEntry(); + } + } else { + mapWriter.writeNull(); + } + }); + mapWriter.endMap(); + }; JsonStringHashMap value1 = new JsonStringHashMap(); value1.put("a", "b"); @@ -514,28 +623,34 @@ void mapOfString() throws SQLException { JsonStringHashMap value3 = new JsonStringHashMap(); value3.put("y", "z"); value3.put("arrow", "cool"); - List> values = Arrays.asList(value1, value2, value3, Collections.emptyMap()); - testMapType(new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Utf8()); + List> values = + Arrays.asList(value1, value2, value3, Collections.emptyMap()); + testMapType( + new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Utf8()); } @Test void mapOfInteger() throws SQLException { - TriConsumer> setValue = (mapVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); - mapWriter.setPosition(index); - mapWriter.startMap(); - values.entrySet().forEach(mapValue -> { - if (mapValue != null) { - mapWriter.startEntry(); - mapWriter.key().integer().writeInt(mapValue.getKey()); - mapWriter.value().integer().writeInt(mapValue.getValue()); - mapWriter.endEntry(); - } else { - mapWriter.writeNull(); - } - }); - mapWriter.endMap(); - }; + TriConsumer> setValue = + (mapVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); + mapWriter.setPosition(index); + mapWriter.startMap(); + values + .entrySet() + .forEach( + mapValue -> { + if (mapValue != null) { + mapWriter.startEntry(); + mapWriter.key().integer().writeInt(mapValue.getKey()); + mapWriter.value().integer().writeInt(mapValue.getValue()); + mapWriter.endEntry(); + } else { + mapWriter.writeNull(); + } + }); + mapWriter.endMap(); + }; JsonStringHashMap value1 = new JsonStringHashMap(); value1.put(1, 2); @@ -547,8 +662,10 @@ void mapOfInteger() throws SQLException { JsonStringHashMap value3 = new JsonStringHashMap(); value3.put(Integer.MIN_VALUE, Integer.MAX_VALUE); value3.put(0, 4096); - List> values = Arrays.asList(value1, value2, value3, Collections.emptyMap()); - testMapType(new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Int(32, true)); + List> values = + Arrays.asList(value1, value2, value3, Collections.emptyMap()); + testMapType( + new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Int(32, true)); } @FunctionalInterface @@ -556,11 +673,16 @@ interface TriConsumer { void accept(T value1, U value2, V value3); } - void testSimpleType(ArrowType arrowType, int jdbcType, TriConsumer setValue, - BiConsumer setNull, List values) throws SQLException { + void testSimpleType( + ArrowType arrowType, + int jdbcType, + TriConsumer setValue, + BiConsumer setNull, + List values) + throws SQLException { Schema schema = new Schema(Collections.singletonList(Field.nullable("field", arrowType))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root).bindAll().build(); assertThat(binder.next()).isFalse(); @@ -610,7 +732,7 @@ void testSimpleType(ArrowType arrowType, int jdbcType // Non-nullable (since some types have a specialized binder) schema = new Schema(Collections.singletonList(Field.notNullable("field", arrowType))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root).bindAll().build(); assertThat(binder.next()).isFalse(); @@ -650,15 +772,23 @@ void testSimpleType(ArrowType arrowType, int jdbcType } } - void testListType(ArrowType arrowType, TriConsumer setValue, - BiConsumer setNull, List values) throws SQLException { + void testListType( + ArrowType arrowType, + TriConsumer setValue, + BiConsumer setNull, + List values) + throws SQLException { int jdbcType = Types.ARRAY; - Schema schema = new Schema(Collections.singletonList(new Field("field", FieldType.nullable( - new ArrowType.List()), Collections.singletonList( - new Field("element", FieldType.notNullable(arrowType), null) - )))); + Schema schema = + new Schema( + Collections.singletonList( + new Field( + "field", + FieldType.nullable(new ArrowType.List()), + Collections.singletonList( + new Field("element", FieldType.notNullable(arrowType), null))))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root).bindAll().build(); assertThat(binder.next()).isFalse(); @@ -706,12 +836,16 @@ void testListType(ArrowType arrowType, TriConsumer void testListType(ArrowType arrowType, TriConsumer void testMapType(ArrowType arrowType, TriConsumer setValue, - BiConsumer setNull, List values, - ArrowType elementType) throws SQLException { + void testMapType( + ArrowType arrowType, + TriConsumer setValue, + BiConsumer setNull, + List values, + ArrowType elementType) + throws SQLException { int jdbcType = Types.VARCHAR; FieldType keyType = new FieldType(false, elementType, null, null); FieldType mapType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - Schema schema = new Schema(Collections.singletonList(new Field("field", FieldType.nullable(arrowType), - Collections.singletonList(new Field(MapVector.KEY_NAME, mapType, - Arrays.asList(new Field(MapVector.KEY_NAME, keyType, null), - new Field(MapVector.VALUE_NAME, keyType, null))))))); + Schema schema = + new Schema( + Collections.singletonList( + new Field( + "field", + FieldType.nullable(arrowType), + Collections.singletonList( + new Field( + MapVector.KEY_NAME, + mapType, + Arrays.asList( + new Field(MapVector.KEY_NAME, keyType, null), + new Field(MapVector.VALUE_NAME, keyType, null))))))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root).bindAll().build(); assertThat(binder.next()).isFalse(); @@ -810,18 +957,31 @@ void testMapType(ArrowType arrowType, TriConsumer metadata) { + private static Field field( + String name, boolean nullable, ArrowType type, Map metadata) { return new Field(name, new FieldType(nullable, type, null, metadata), Collections.emptyList()); } @@ -90,16 +91,26 @@ private static Map metadata(String... entries) { public void schemaComment() throws Exception { boolean includeMetadata = false; Schema schema = getSchemaWithCommentFromQuery(includeMetadata); - Schema expectedSchema = new Schema(Arrays.asList( - field("ID", false, Types.MinorType.BIGINT.getType(), - metadata("comment", "Record identifier")), - field("NAME", true, Types.MinorType.VARCHAR.getType(), - metadata("comment", "Name of record")), - field("COLUMN1", true, Types.MinorType.BIT.getType(), - metadata()), - field("COLUMNN", true, Types.MinorType.INT.getType(), - metadata("comment", "Informative description of columnN")) - ), metadata("comment", "This is super special table with valuable data")); + Schema expectedSchema = + new Schema( + Arrays.asList( + field( + "ID", + false, + Types.MinorType.BIGINT.getType(), + metadata("comment", "Record identifier")), + field( + "NAME", + true, + Types.MinorType.VARCHAR.getType(), + metadata("comment", "Name of record")), + field("COLUMN1", true, Types.MinorType.BIT.getType(), metadata()), + field( + "COLUMNN", + true, + Types.MinorType.INT.getType(), + metadata("comment", "Informative description of columnN"))), + metadata("comment", "This is super special table with valuable data")); assertThat(schema).isEqualTo(expectedSchema); } @@ -107,47 +118,60 @@ public void schemaComment() throws Exception { public void schemaCommentWithDatabaseMetadata() throws Exception { boolean includeMetadata = true; Schema schema = getSchemaWithCommentFromQuery(includeMetadata); - Schema expectedSchema = new Schema(Arrays.asList( - field("ID", false, Types.MinorType.BIGINT.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "ID", - "SQL_TYPE", "BIGINT", - "comment", "Record identifier" - )), - field("NAME", true, Types.MinorType.VARCHAR.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "NAME", - "SQL_TYPE", "CHARACTER VARYING", - "comment", "Name of record")), - field("COLUMN1", true, Types.MinorType.BIT.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "COLUMN1", - "SQL_TYPE", "BOOLEAN")), - field("COLUMNN", true, Types.MinorType.INT.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "COLUMNN", - "SQL_TYPE", "INTEGER", - "comment", "Informative description of columnN")) - ), metadata("comment", "This is super special table with valuable data")); + Schema expectedSchema = + new Schema( + Arrays.asList( + field( + "ID", + false, + Types.MinorType.BIGINT.getType(), + metadata( + "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", + "SQL_SCHEMA_NAME", "PUBLIC", + "SQL_TABLE_NAME", "TABLE1", + "SQL_COLUMN_NAME", "ID", + "SQL_TYPE", "BIGINT", + "comment", "Record identifier")), + field( + "NAME", + true, + Types.MinorType.VARCHAR.getType(), + metadata( + "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", + "SQL_SCHEMA_NAME", "PUBLIC", + "SQL_TABLE_NAME", "TABLE1", + "SQL_COLUMN_NAME", "NAME", + "SQL_TYPE", "CHARACTER VARYING", + "comment", "Name of record")), + field( + "COLUMN1", + true, + Types.MinorType.BIT.getType(), + metadata( + "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", + "SQL_SCHEMA_NAME", "PUBLIC", + "SQL_TABLE_NAME", "TABLE1", + "SQL_COLUMN_NAME", "COLUMN1", + "SQL_TYPE", "BOOLEAN")), + field( + "COLUMNN", + true, + Types.MinorType.INT.getType(), + metadata( + "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", + "SQL_SCHEMA_NAME", "PUBLIC", + "SQL_TABLE_NAME", "TABLE1", + "SQL_COLUMN_NAME", "COLUMNN", + "SQL_TYPE", "INTEGER", + "comment", "Informative description of columnN"))), + metadata("comment", "This is super special table with valuable data")); assertThat(schema).isEqualTo(expectedSchema); /* corresponding Apache Spark DDL after conversion: - ID BIGINT NOT NULL COMMENT 'Record identifier', - NAME STRING COMMENT 'Name of record', - COLUMN1 BOOLEAN, - COLUMNN INT COMMENT 'Informative description of columnN' - */ + ID BIGINT NOT NULL COMMENT 'Record identifier', + NAME STRING COMMENT 'Name of record', + COLUMN1 BOOLEAN, + COLUMNN INT COMMENT 'Informative description of columnN' + */ assertThat(schema).isEqualTo(expectedSchema); } @@ -156,19 +180,25 @@ private Schema getSchemaWithCommentFromQuery(boolean includeMetadata) throws SQL try (Statement statement = conn.createStatement()) { try (ResultSet resultSet = statement.executeQuery("select * from table1")) { ResultSetMetaData resultSetMetaData = resultSet.getMetaData(); - Map> columnCommentByColumnIndex = getColumnComments(metaData, resultSetMetaData); + Map> columnCommentByColumnIndex = + getColumnComments(metaData, resultSetMetaData); String tableName = getTableNameFromResultSetMetaData(resultSetMetaData); String tableComment = getTableComment(metaData, tableName); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder() - .setAllocator(new RootAllocator()).setSchemaMetadata(Collections.singletonMap(COMMENT, tableComment)) - .setColumnMetadataByColumnIndex(columnCommentByColumnIndex).setIncludeMetadata(includeMetadata).build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder() + .setAllocator(new RootAllocator()) + .setSchemaMetadata(Collections.singletonMap(COMMENT, tableComment)) + .setColumnMetadataByColumnIndex(columnCommentByColumnIndex) + .setIncludeMetadata(includeMetadata) + .build(); return JdbcToArrowUtils.jdbcToArrowSchema(resultSetMetaData, config); } } } - private String getTableNameFromResultSetMetaData(ResultSetMetaData resultSetMetaData) throws SQLException { + private String getTableNameFromResultSetMetaData(ResultSetMetaData resultSetMetaData) + throws SQLException { Set tablesFromQuery = new HashSet<>(); for (int idx = 1, columnCount = resultSetMetaData.getColumnCount(); idx <= columnCount; idx++) { String tableName = resultSetMetaData.getTableName(idx); @@ -182,11 +212,16 @@ private String getTableNameFromResultSetMetaData(ResultSetMetaData resultSetMeta throw new RuntimeException("Table metadata is absent or ambiguous"); } - private Map> getColumnComments(DatabaseMetaData metaData, - ResultSetMetaData resultSetMetaData) throws SQLException { + private Map> getColumnComments( + DatabaseMetaData metaData, ResultSetMetaData resultSetMetaData) throws SQLException { Map> columnCommentByColumnIndex = new HashMap<>(); - for (int columnIdx = 1, columnCount = resultSetMetaData.getColumnCount(); columnIdx <= columnCount; columnIdx++) { - String columnComment = getColumnComment(metaData, resultSetMetaData.getTableName(columnIdx), + for (int columnIdx = 1, columnCount = resultSetMetaData.getColumnCount(); + columnIdx <= columnCount; + columnIdx++) { + String columnComment = + getColumnComment( + metaData, + resultSetMetaData.getTableName(columnIdx), resultSetMetaData.getColumnName(columnIdx)); if (columnComment != null && !columnComment.isEmpty()) { columnCommentByColumnIndex.put(columnIdx, Collections.singletonMap(COMMENT, columnComment)); @@ -216,7 +251,8 @@ private String getTableComment(DatabaseMetaData metaData, String tableName) thro throw new RuntimeException("Table comment not found"); } - private String getColumnComment(DatabaseMetaData metaData, String tableName, String columnName) throws SQLException { + private String getColumnComment(DatabaseMetaData metaData, String tableName, String columnName) + throws SQLException { try (ResultSet tableMetadata = metaData.getColumns(null, null, tableName, columnName)) { if (tableMetadata.next()) { return tableMetadata.getString("REMARKS"); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java index d4fb7c32997a7..85d6d89d036ff 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.assertEquals; @@ -28,7 +27,6 @@ import java.util.HashMap; import java.util.Locale; import java.util.TimeZone; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.junit.Test; @@ -36,7 +34,8 @@ public class JdbcToArrowConfigTest { private static final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - private static final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); + private static final Calendar calendar = + Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); @Test(expected = NullPointerException.class) public void testConfigNullArguments() { @@ -116,13 +115,29 @@ public void testIncludeMetadata() { config = new JdbcToArrowConfigBuilder(allocator, calendar, true).build(); assertTrue(config.shouldIncludeMetadata()); - config = new JdbcToArrowConfig(allocator, calendar, /* include metadata */ true, - /* reuse vector schema root */ true, null, null, JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, null); + config = + new JdbcToArrowConfig( + allocator, + calendar, /* include metadata */ + true, + /* reuse vector schema root */ true, + null, + null, + JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, + null); assertTrue(config.shouldIncludeMetadata()); assertTrue(config.isReuseVectorSchemaRoot()); - config = new JdbcToArrowConfig(allocator, calendar, /* include metadata */ false, - /* reuse vector schema root */ false, null, null, JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, null); + config = + new JdbcToArrowConfig( + allocator, + calendar, /* include metadata */ + false, + /* reuse vector schema root */ false, + null, + null, + JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, + null); assertFalse(config.shouldIncludeMetadata()); assertFalse(config.isReuseVectorSchemaRoot()); } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java index 7dd881b3f7cec..375463d6fd5d4 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.assertArrayEquals; @@ -22,6 +21,9 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import java.math.BigDecimal; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; @@ -32,7 +34,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - import org.apache.arrow.vector.BaseValueVector; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; @@ -58,12 +59,9 @@ import org.apache.arrow.vector.util.ObjectMapperFactory; import org.apache.arrow.vector.util.Text; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; - /** - * This is a Helper class which has functionalities to read and assert the values from the given FieldVector object. + * This is a Helper class which has functionalities to read and assert the values from the given + * FieldVector object. */ public class JdbcToArrowTestHelper { @@ -79,7 +77,8 @@ public static void assertIntVectorValues(IntVector intVector, int rowCount, Inte } } - public static void assertBooleanVectorValues(BitVector bitVector, int rowCount, Boolean[] values) { + public static void assertBooleanVectorValues( + BitVector bitVector, int rowCount, Boolean[] values) { assertEquals(rowCount, bitVector.getValueCount()); for (int j = 0; j < bitVector.getValueCount(); j++) { @@ -103,7 +102,8 @@ public static void assertBitVectorValues(BitVector bitVector, int rowCount, Inte } } - public static void assertTinyIntVectorValues(TinyIntVector tinyIntVector, int rowCount, Integer[] values) { + public static void assertTinyIntVectorValues( + TinyIntVector tinyIntVector, int rowCount, Integer[] values) { assertEquals(rowCount, tinyIntVector.getValueCount()); for (int j = 0; j < tinyIntVector.getValueCount(); j++) { @@ -115,7 +115,8 @@ public static void assertTinyIntVectorValues(TinyIntVector tinyIntVector, int ro } } - public static void assertSmallIntVectorValues(SmallIntVector smallIntVector, int rowCount, Integer[] values) { + public static void assertSmallIntVectorValues( + SmallIntVector smallIntVector, int rowCount, Integer[] values) { assertEquals(rowCount, smallIntVector.getValueCount()); for (int j = 0; j < smallIntVector.getValueCount(); j++) { @@ -127,7 +128,8 @@ public static void assertSmallIntVectorValues(SmallIntVector smallIntVector, int } } - public static void assertBigIntVectorValues(BigIntVector bigIntVector, int rowCount, Long[] values) { + public static void assertBigIntVectorValues( + BigIntVector bigIntVector, int rowCount, Long[] values) { assertEquals(rowCount, bigIntVector.getValueCount()); for (int j = 0; j < bigIntVector.getValueCount(); j++) { @@ -139,7 +141,8 @@ public static void assertBigIntVectorValues(BigIntVector bigIntVector, int rowCo } } - public static void assertDecimalVectorValues(DecimalVector decimalVector, int rowCount, BigDecimal[] values) { + public static void assertDecimalVectorValues( + DecimalVector decimalVector, int rowCount, BigDecimal[] values) { assertEquals(rowCount, decimalVector.getValueCount()); for (int j = 0; j < decimalVector.getValueCount(); j++) { @@ -151,7 +154,8 @@ public static void assertDecimalVectorValues(DecimalVector decimalVector, int ro } } - public static void assertFloat8VectorValues(Float8Vector float8Vector, int rowCount, Double[] values) { + public static void assertFloat8VectorValues( + Float8Vector float8Vector, int rowCount, Double[] values) { assertEquals(rowCount, float8Vector.getValueCount()); for (int j = 0; j < float8Vector.getValueCount(); j++) { @@ -163,7 +167,8 @@ public static void assertFloat8VectorValues(Float8Vector float8Vector, int rowCo } } - public static void assertFloat4VectorValues(Float4Vector float4Vector, int rowCount, Float[] values) { + public static void assertFloat4VectorValues( + Float4Vector float4Vector, int rowCount, Float[] values) { assertEquals(rowCount, float4Vector.getValueCount()); for (int j = 0; j < float4Vector.getValueCount(); j++) { @@ -175,7 +180,8 @@ public static void assertFloat4VectorValues(Float4Vector float4Vector, int rowCo } } - public static void assertTimeVectorValues(TimeMilliVector timeMilliVector, int rowCount, Long[] values) { + public static void assertTimeVectorValues( + TimeMilliVector timeMilliVector, int rowCount, Long[] values) { assertEquals(rowCount, timeMilliVector.getValueCount()); for (int j = 0; j < timeMilliVector.getValueCount(); j++) { @@ -187,7 +193,8 @@ public static void assertTimeVectorValues(TimeMilliVector timeMilliVector, int r } } - public static void assertDateVectorValues(DateDayVector dateDayVector, int rowCount, Integer[] values) { + public static void assertDateVectorValues( + DateDayVector dateDayVector, int rowCount, Integer[] values) { assertEquals(rowCount, dateDayVector.getValueCount()); for (int j = 0; j < dateDayVector.getValueCount(); j++) { @@ -199,7 +206,8 @@ public static void assertDateVectorValues(DateDayVector dateDayVector, int rowCo } } - public static void assertTimeStampVectorValues(TimeStampVector timeStampVector, int rowCount, Long[] values) { + public static void assertTimeStampVectorValues( + TimeStampVector timeStampVector, int rowCount, Long[] values) { assertEquals(rowCount, timeStampVector.getValueCount()); for (int j = 0; j < timeStampVector.getValueCount(); j++) { @@ -211,7 +219,8 @@ public static void assertTimeStampVectorValues(TimeStampVector timeStampVector, } } - public static void assertVarBinaryVectorValues(VarBinaryVector varBinaryVector, int rowCount, byte[][] values) { + public static void assertVarBinaryVectorValues( + VarBinaryVector varBinaryVector, int rowCount, byte[][] values) { assertEquals(rowCount, varBinaryVector.getValueCount()); for (int j = 0; j < varBinaryVector.getValueCount(); j++) { @@ -223,7 +232,8 @@ public static void assertVarBinaryVectorValues(VarBinaryVector varBinaryVector, } } - public static void assertVarcharVectorValues(VarCharVector varCharVector, int rowCount, byte[][] values) { + public static void assertVarcharVectorValues( + VarCharVector varCharVector, int rowCount, byte[][] values) { assertEquals(rowCount, varCharVector.getValueCount()); for (int j = 0; j < varCharVector.getValueCount(); j++) { @@ -239,7 +249,8 @@ public static void assertNullVectorValues(NullVector vector, int rowCount) { assertEquals(rowCount, vector.getValueCount()); } - public static void assertListVectorValues(ListVector listVector, int rowCount, Integer[][] values) { + public static void assertListVectorValues( + ListVector listVector, int rowCount, Integer[][] values) { assertEquals(rowCount, listVector.getValueCount()); for (int j = 0; j < listVector.getValueCount(); j++) { @@ -252,7 +263,8 @@ public static void assertListVectorValues(ListVector listVector, int rowCount, I } } - public static void assertMapVectorValues(MapVector mapVector, int rowCount, Map[] values) { + public static void assertMapVectorValues( + MapVector mapVector, int rowCount, Map[] values) { assertEquals(rowCount, mapVector.getValueCount()); for (int j = 0; j < mapVector.getValueCount(); j++) { @@ -263,10 +275,17 @@ public static void assertMapVectorValues(MapVector mapVector, int rowCount, Map< (JsonStringArrayList>) mapVector.getObject(j); Map actualMap = null; if (actualSource != null && !actualSource.isEmpty()) { - actualMap = actualSource.stream().map(entry -> - new AbstractMap.SimpleEntry<>(entry.get("key").toString(), - entry.get("value") != null ? entry.get("value").toString() : null)) - .collect(HashMap::new, (collector, val) -> collector.put(val.getKey(), val.getValue()), HashMap::putAll); + actualMap = + actualSource.stream() + .map( + entry -> + new AbstractMap.SimpleEntry<>( + entry.get("key").toString(), + entry.get("value") != null ? entry.get("value").toString() : null)) + .collect( + HashMap::new, + (collector, val) -> collector.put(val.getKey(), val.getValue()), + HashMap::putAll); } assertEquals(values[j], actualMap); } @@ -310,8 +329,8 @@ public static void assertFieldMetadataIsEmpty(VectorSchemaRoot schema) { } } - public static void assertFieldMetadataMatchesResultSetMetadata(ResultSetMetaData rsmd, Schema schema) - throws SQLException { + public static void assertFieldMetadataMatchesResultSetMetadata( + ResultSetMetaData rsmd, Schema schema) throws SQLException { assertNotNull(schema); assertNotNull(schema.getFields()); assertNotNull(rsmd); @@ -400,12 +419,14 @@ public static byte[][] getCharArray(String[] values, String dataType) { byte[][] valueArr = new byte[dataArr.length][]; int i = 0; for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); + valueArr[i++] = + "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); } return valueArr; } - public static byte[][] getCharArrayWithCharSet(String[] values, String dataType, Charset charSet) { + public static byte[][] getCharArrayWithCharSet( + String[] values, String dataType, Charset charSet) { String[] dataArr = getValues(values, dataType); byte[][] valueArr = new byte[dataArr.length][]; int i = 0; @@ -420,7 +441,8 @@ public static byte[][] getBinaryValues(String[] values, String dataType) { byte[][] valueArr = new byte[dataArr.length][]; int i = 0; for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); + valueArr[i++] = + "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); } return valueArr; } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java index 4478cdfbee6f7..8dfc684e22f24 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.io.InputStream; @@ -231,8 +230,7 @@ public void setDate(int parameterIndex, Date x, Calendar cal) throws SQLExceptio } @Override - public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException { - } + public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException {} @Override public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws SQLException { @@ -242,8 +240,7 @@ public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws S } @Override - public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException { - } + public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException {} @Override public void setURL(int parameterIndex, URL x) throws SQLException { @@ -261,80 +258,62 @@ public void setRowId(int parameterIndex, RowId x) throws SQLException { } @Override - public void setNString(int parameterIndex, String value) throws SQLException { - } + public void setNString(int parameterIndex, String value) throws SQLException {} @Override public void setNCharacterStream(int parameterIndex, Reader value, long length) - throws SQLException { - } + throws SQLException {} @Override - public void setNClob(int parameterIndex, NClob value) throws SQLException { - } + public void setNClob(int parameterIndex, NClob value) throws SQLException {} @Override - public void setClob(int parameterIndex, Reader reader, long length) throws SQLException { - } + public void setClob(int parameterIndex, Reader reader, long length) throws SQLException {} @Override public void setBlob(int parameterIndex, InputStream inputStream, long length) - throws SQLException { - } + throws SQLException {} @Override - public void setNClob(int parameterIndex, Reader reader, long length) throws SQLException { - } + public void setNClob(int parameterIndex, Reader reader, long length) throws SQLException {} @Override - public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException { - } + public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException {} @Override public void setObject(int parameterIndex, Object x, int targetSqlType, int scaleOrLength) - throws SQLException { - } + throws SQLException {} @Override - public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException { - } + public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException {} @Override - public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException { - } + public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException {} @Override public void setCharacterStream(int parameterIndex, Reader reader, long length) - throws SQLException { - } + throws SQLException {} @Override - public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException { - } + public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException {} @Override - public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException { - } + public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException {} @Override - public void setCharacterStream(int parameterIndex, Reader reader) throws SQLException { - } + public void setCharacterStream(int parameterIndex, Reader reader) throws SQLException {} @Override - public void setNCharacterStream(int parameterIndex, Reader value) throws SQLException { - } + public void setNCharacterStream(int parameterIndex, Reader value) throws SQLException {} @Override - public void setClob(int parameterIndex, Reader reader) throws SQLException { - } + public void setClob(int parameterIndex, Reader reader) throws SQLException {} @Override - public void setBlob(int parameterIndex, InputStream inputStream) throws SQLException { - } + public void setBlob(int parameterIndex, InputStream inputStream) throws SQLException {} @Override - public void setNClob(int parameterIndex, Reader reader) throws SQLException { - } + public void setNClob(int parameterIndex, Reader reader) throws SQLException {} @Override public ResultSet executeQuery(String sql) throws SQLException { @@ -347,8 +326,7 @@ public int executeUpdate(String sql) throws SQLException { } @Override - public void close() throws SQLException { - } + public void close() throws SQLException {} @Override public int getMaxFieldSize() throws SQLException { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java index b05a59a9a04d8..5f5f6dcb98d43 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.io.InputStream; @@ -48,13 +47,11 @@ public class ResultSetUtility { public static ResultSet generateEmptyResultSet() throws SQLException { MockDataElement element = new MockDataElement("string_example"); MockResultSetMetaData.MockColumnMetaData columnMetaData = - MockResultSetMetaData.MockColumnMetaData.fromDataElement(element, 1); + MockResultSetMetaData.MockColumnMetaData.fromDataElement(element, 1); ArrayList cols = new ArrayList<>(); cols.add(columnMetaData); ResultSetMetaData metadata = new MockResultSetMetaData(cols); - return MockResultSet.builder() - .setMetaData(metadata) - .build(); + return MockResultSet.builder().setMetaData(metadata).build(); } public static MockResultSet generateBasicResultSet(int rows) throws SQLException { @@ -319,15 +316,19 @@ public String getColumnTypeName(int column) throws SQLException { } public static MockResultSetMetaData fromRows(List rows) throws SQLException { - // Note: This attempts to dynamically construct ResultSetMetaData from the first row in a given result set. - // If there are now rows, or the result set contains no columns, this cannot be dynamically generated and + // Note: This attempts to dynamically construct ResultSetMetaData from the first row in a + // given result set. + // If there are now rows, or the result set contains no columns, this cannot be dynamically + // generated and // an exception will be thrown. if (rows.size() == 0) { - throw new SQLException("Unable to dynamically generate ResultSetMetaData because row count is zero!"); + throw new SQLException( + "Unable to dynamically generate ResultSetMetaData because row count is zero!"); } MockRow firstRow = rows.get(0); if (firstRow.dataElements.size() == 0) { - throw new SQLException("Unable to dynamically generate ResultSetMetaData because column count is zero!"); + throw new SQLException( + "Unable to dynamically generate ResultSetMetaData because column count is zero!"); } ArrayList columns = new ArrayList<>(); for (int i = 0; i < firstRow.dataElements.size(); i++) { @@ -346,9 +347,7 @@ public static class MockColumnMetaData { private String typeName; private int displaySize; - - private MockColumnMetaData() { - } + private MockColumnMetaData() {} private String getLabel() { return label; @@ -382,16 +381,17 @@ private int getDisplaySize() { return displaySize; } - public static MockColumnMetaData fromDataElement(MockDataElement element, int i) throws SQLException { + public static MockColumnMetaData fromDataElement(MockDataElement element, int i) + throws SQLException { return MockColumnMetaData.builder() - .sqlType(element.getSqlType()) - .precision(element.getPrecision()) - .scale(element.getScale()) - .nullable(element.isNullable()) - .setTypeName("TYPE") - .setDisplaySize(420) - .label("col_" + i) - .build(); + .sqlType(element.getSqlType()) + .precision(element.getPrecision()) + .scale(element.getScale()) + .nullable(element.isNullable()) + .setTypeName("TYPE") + .setDisplaySize(420) + .label("col_" + i) + .build(); } public static Builder builder() { @@ -440,9 +440,7 @@ public MockColumnMetaData build() { return this.columnMetaData; } } - } - } public static class MockRow { @@ -635,7 +633,6 @@ public short getShort() throws SQLException { } } - public static class ThrowingResultSet implements ResultSet { @Override @@ -1139,17 +1136,20 @@ public void updateTimestamp(String columnLabel, Timestamp x) throws SQLException } @Override - public void updateAsciiStream(String columnLabel, InputStream x, int length) throws SQLException { + public void updateAsciiStream(String columnLabel, InputStream x, int length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateBinaryStream(String columnLabel, InputStream x, int length) throws SQLException { + public void updateBinaryStream(String columnLabel, InputStream x, int length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateCharacterStream(String columnLabel, Reader reader, int length) throws SQLException { + public void updateCharacterStream(String columnLabel, Reader reader, int length) + throws SQLException { throw getExceptionToThrow(); } @@ -1439,7 +1439,8 @@ public void updateNCharacterStream(int columnIndex, Reader x, long length) throw } @Override - public void updateNCharacterStream(String columnLabel, Reader reader, long length) throws SQLException { + public void updateNCharacterStream(String columnLabel, Reader reader, long length) + throws SQLException { throw getExceptionToThrow(); } @@ -1449,7 +1450,8 @@ public void updateAsciiStream(int columnIndex, InputStream x, long length) throw } @Override - public void updateBinaryStream(int columnIndex, InputStream x, long length) throws SQLException { + public void updateBinaryStream(int columnIndex, InputStream x, long length) + throws SQLException { throw getExceptionToThrow(); } @@ -1459,27 +1461,32 @@ public void updateCharacterStream(int columnIndex, Reader x, long length) throws } @Override - public void updateAsciiStream(String columnLabel, InputStream x, long length) throws SQLException { + public void updateAsciiStream(String columnLabel, InputStream x, long length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateBinaryStream(String columnLabel, InputStream x, long length) throws SQLException { + public void updateBinaryStream(String columnLabel, InputStream x, long length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateCharacterStream(String columnLabel, Reader reader, long length) throws SQLException { + public void updateCharacterStream(String columnLabel, Reader reader, long length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateBlob(int columnIndex, InputStream inputStream, long length) throws SQLException { + public void updateBlob(int columnIndex, InputStream inputStream, long length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateBlob(String columnLabel, InputStream inputStream, long length) throws SQLException { + public void updateBlob(String columnLabel, InputStream inputStream, long length) + throws SQLException { throw getExceptionToThrow(); } @@ -1584,13 +1591,14 @@ public T getObject(String columnLabel, Class type) throws SQLException { } @Override - public void updateObject(int columnIndex, Object x, SQLType targetSqlType, int scaleOrLength) throws SQLException { + public void updateObject(int columnIndex, Object x, SQLType targetSqlType, int scaleOrLength) + throws SQLException { throw getExceptionToThrow(); } @Override public void updateObject(String columnLabel, Object x, SQLType targetSqlType, int scaleOrLength) - throws SQLException { + throws SQLException { throw getExceptionToThrow(); } @@ -1600,7 +1608,8 @@ public void updateObject(int columnIndex, Object x, SQLType targetSqlType) throw } @Override - public void updateObject(String columnLabel, Object x, SQLType targetSqlType) throws SQLException { + public void updateObject(String columnLabel, Object x, SQLType targetSqlType) + throws SQLException { throw getExceptionToThrow(); } @@ -1623,7 +1632,6 @@ private static SQLException getExceptionToThrow(String message) { return new SQLException(message); } - public static class ThrowingResultSetMetaData implements ResultSetMetaData { @Override public int getColumnCount() throws SQLException { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java index 2424ed625248d..8af2c06f4de54 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.assertEquals; @@ -26,7 +25,6 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.VectorSchemaRoot; @@ -36,20 +34,24 @@ public class ResultSetUtilityTest { @Test public void testZeroRowResultSet() throws Exception { - for (boolean reuseVectorSchemaRoot : new boolean[]{false, true}) { + for (boolean reuseVectorSchemaRoot : new boolean[] {false, true}) { try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) { ResultSet rs = ResultSetUtility.generateEmptyResultSet(); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .build(); ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config); assertTrue("Iterator on zero row ResultSet should haveNext() before use", iter.hasNext()); VectorSchemaRoot root = iter.next(); assertNotNull("VectorSchemaRoot from first next() result should never be null", root); - assertEquals("VectorSchemaRoot from empty ResultSet should have zero rows", 0, root.getRowCount()); - assertFalse("hasNext() should return false on empty ResultSets after initial next() call", iter.hasNext()); + assertEquals( + "VectorSchemaRoot from empty ResultSet should have zero rows", 0, root.getRowCount()); + assertFalse( + "hasNext() should return false on empty ResultSets after initial next() call", + iter.hasNext()); } } } @@ -99,7 +101,8 @@ public void testBasicResultSet() throws Exception { @Test public void testMockDataTypes() throws SQLException { - ResultSetUtility.MockDataElement element = new ResultSetUtility.MockDataElement(1L, Types.NUMERIC); + ResultSetUtility.MockDataElement element = + new ResultSetUtility.MockDataElement(1L, Types.NUMERIC); assertEquals(1L, element.getLong()); assertEquals(1, element.getInt()); assertEquals("1", element.getString()); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java index 50c4fe6db2a14..7fa8188a99158 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java @@ -14,17 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -/** - * POJO to handle the YAML data from the test YAML file. - */ +/** POJO to handle the YAML data from the test YAML file. */ @JsonIgnoreProperties(ignoreUnknown = true) public class Table { private String name; @@ -39,8 +35,7 @@ public class Table { private String[] vectors; private int rowCount; - public Table() { - } + public Table() {} public String getName() { return name; diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java index 053604073fd66..93ba028e39629 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.assertEquals; @@ -34,7 +33,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -48,9 +46,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test options for dealing with unreliable ResultSetMetaData from JDBC drivers. - */ +/** Test options for dealing with unreliable ResultSetMetaData from JDBC drivers. */ @RunWith(Parameterized.class) public class UnreliableMetaDataTest { private final boolean reuseVectorSchemaRoot; @@ -72,7 +68,7 @@ public void afterEach() { @Parameterized.Parameters(name = "reuseVectorSchemaRoot = {0}") public static Collection getTestData() { - return Arrays.asList(new Object[][] { {false}, {true} }); + return Arrays.asList(new Object[][] {{false}, {true}}); } @Test @@ -91,13 +87,15 @@ public void testUnreliableMetaDataPrecisionAndScale() throws Exception { // reset the ResultSet: rs.beforeFirst(); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); - assertThrows(RuntimeException.class, iter::next, "Expected to fail due to mismatched metadata!"); + assertThrows( + RuntimeException.class, iter::next, "Expected to fail due to mismatched metadata!"); } // reset the ResultSet: @@ -105,11 +103,12 @@ public void testUnreliableMetaDataPrecisionAndScale() throws Exception { JdbcFieldInfo explicitMappingField = new JdbcFieldInfo(Types.DECIMAL, 18, 2); Map explicitMapping = new HashMap<>(); explicitMapping.put(1, explicitMappingField); - config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(explicitMapping) - .build(); + config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(explicitMapping) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { while (iter.hasNext()) { @@ -133,7 +132,8 @@ public void testInconsistentPrecisionAndScale() throws Exception { assertEquals("Value precision should be 18", 18, bd1.precision()); rs.next(); BigDecimal bd2 = rs.getBigDecimal(1); - assertEquals("Value should be 1000000000300.0000001", new BigDecimal("1000000000300.0000001"), bd2); + assertEquals( + "Value should be 1000000000300.0000001", new BigDecimal("1000000000300.0000001"), bd2); assertEquals("Value scale should be 7", 7, bd2.scale()); assertEquals("Value precision should be 20", 20, bd2.precision()); rs.beforeFirst(); @@ -141,23 +141,27 @@ public void testInconsistentPrecisionAndScale() throws Exception { Map explicitMapping = new HashMap<>(); explicitMapping.put(1, explicitMappingField); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(explicitMapping) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(explicitMapping) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); - assertThrows(RuntimeException.class, iter::next, + assertThrows( + RuntimeException.class, + iter::next, "This is expected to fail due to inconsistent BigDecimal scales, while strict matching is enabled."); } // Reuse same ResultSet, with RoundingMode.UNNECESSARY set to coerce BigDecimal scale as needed: - config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(explicitMapping) - .setBigDecimalRoundingMode(RoundingMode.UNNECESSARY) - .build(); + config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(explicitMapping) + .setBigDecimalRoundingMode(RoundingMode.UNNECESSARY) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { while (iter.hasNext()) { VectorSchemaRoot root = iter.next(); @@ -174,23 +178,29 @@ public void testIncorrectNullability() throws Exception { .sqlType(Types.INTEGER) .nullable(ResultSetMetaData.columnNoNulls) .build(); - ResultSetMetaData metadata = new ResultSetUtility.MockResultSetMetaData(Collections.singletonList(columnMetaData)); - final ResultSetUtility.MockResultSet.Builder resultSetBuilder = ResultSetUtility.MockResultSet.builder() - .setMetaData(metadata) - .addDataElement(new ResultSetUtility.MockDataElement(1024, Types.INTEGER)) - .finishRow() - .addDataElement(new ResultSetUtility.MockDataElement(null, Types.INTEGER)) - .finishRow(); - final Schema notNullSchema = new Schema( - Collections.singletonList(Field.notNullable(/*name=*/null, new ArrowType.Int(32, true)))); - final Schema nullSchema = new Schema( - Collections.singletonList(Field.nullable(/*name=*/null, new ArrowType.Int(32, true)))); + ResultSetMetaData metadata = + new ResultSetUtility.MockResultSetMetaData(Collections.singletonList(columnMetaData)); + final ResultSetUtility.MockResultSet.Builder resultSetBuilder = + ResultSetUtility.MockResultSet.builder() + .setMetaData(metadata) + .addDataElement(new ResultSetUtility.MockDataElement(1024, Types.INTEGER)) + .finishRow() + .addDataElement(new ResultSetUtility.MockDataElement(null, Types.INTEGER)) + .finishRow(); + final Schema notNullSchema = + new Schema( + Collections.singletonList( + Field.notNullable(/*name=*/ null, new ArrowType.Int(32, true)))); + final Schema nullSchema = + new Schema( + Collections.singletonList(Field.nullable(/*name=*/ null, new ArrowType.Int(32, true)))); try (final ResultSet rs = resultSetBuilder.build()) { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); final VectorSchemaRoot root = iter.next(); @@ -208,14 +218,16 @@ public void testIncorrectNullability() throws Exception { // Override the nullability to get the correct result final Map typeMapping = new HashMap<>(); - JdbcFieldInfo realFieldInfo = new JdbcFieldInfo( - Types.INTEGER, ResultSetMetaData.columnNullable, /*precision*/0, /*scale*/0); + JdbcFieldInfo realFieldInfo = + new JdbcFieldInfo( + Types.INTEGER, ResultSetMetaData.columnNullable, /*precision*/ 0, /*scale*/ 0); typeMapping.put(1, realFieldInfo); - config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(typeMapping) - .build(); + config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(typeMapping) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); final VectorSchemaRoot root = iter.next(); @@ -231,14 +243,16 @@ public void testIncorrectNullability() throws Exception { rs.beforeFirst(); // columnNullableUnknown won't override the metadata - realFieldInfo = new JdbcFieldInfo( - Types.INTEGER, ResultSetMetaData.columnNullableUnknown, /*precision*/0, /*scale*/0); + realFieldInfo = + new JdbcFieldInfo( + Types.INTEGER, ResultSetMetaData.columnNullableUnknown, /*precision*/ 0, /*scale*/ 0); typeMapping.put(1, realFieldInfo); - config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(typeMapping) - .build(); + config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(typeMapping) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); final VectorSchemaRoot root = iter.next(); @@ -266,8 +280,8 @@ private ResultSet buildIncorrectPrecisionAndScaleMetaDataResultSet() throws SQLE return ResultSetUtility.MockResultSet.builder() .setMetaData(metadata) .addDataElement( - new ResultSetUtility.MockDataElement(new BigDecimal("1000000000000000.01"), Types.DECIMAL) - ) + new ResultSetUtility.MockDataElement( + new BigDecimal("1000000000000000.01"), Types.DECIMAL)) .finishRow() .build(); } @@ -285,12 +299,12 @@ private ResultSet buildVaryingPrecisionAndScaleResultSet() throws SQLException { return ResultSetUtility.MockResultSet.builder() .setMetaData(metadata) .addDataElement( - new ResultSetUtility.MockDataElement(new BigDecimal("1000000000000000.01"), Types.DECIMAL) - ) + new ResultSetUtility.MockDataElement( + new BigDecimal("1000000000000000.01"), Types.DECIMAL)) .finishRow() .addDataElement( - new ResultSetUtility.MockDataElement(new BigDecimal("1000000000300.0000001"), Types.DECIMAL) - ) + new ResultSetUtility.MockDataElement( + new BigDecimal("1000000000300.0000001"), Types.DECIMAL)) .finishRow() .build(); } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java index 96bac42214cef..6a25c58fbde7e 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import org.apache.arrow.memory.BufferAllocator; @@ -35,5 +34,4 @@ public void setUp() { public void tearDown() { allocator.close(); } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java index a368023d49005..255770ecdbf6d 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import static org.junit.Assert.assertArrayEquals; @@ -23,7 +22,6 @@ import java.io.ByteArrayInputStream; import java.io.IOException; - import org.apache.arrow.vector.BaseValueVector; import org.apache.arrow.vector.VarBinaryVector; import org.junit.Test; @@ -37,7 +35,8 @@ interface InputStreamConsumer { void consume(BinaryConsumer consumer) throws IOException; } - protected void assertConsume(boolean nullable, InputStreamConsumer dataConsumer, byte[][] expect) throws IOException { + protected void assertConsume(boolean nullable, InputStreamConsumer dataConsumer, byte[][] expect) + throws IOException { try (final VarBinaryVector vector = new VarBinaryVector("binary", allocator)) { BinaryConsumer consumer = BinaryConsumer.createConsumer(vector, 0, nullable); dataConsumer.consume(consumer); @@ -61,51 +60,59 @@ private byte[] createBytes(int length) { return bytes; } - public void testConsumeInputStream(byte[][] values, boolean nullable) throws IOException { - assertConsume(nullable, binaryConsumer -> { - for (byte[] value : values) { - binaryConsumer.consume(new ByteArrayInputStream(value)); - binaryConsumer.moveWriterPosition(); - } - }, values); + assertConsume( + nullable, + binaryConsumer -> { + for (byte[] value : values) { + binaryConsumer.consume(new ByteArrayInputStream(value)); + binaryConsumer.moveWriterPosition(); + } + }, + values); } @Test public void testConsumeInputStream() throws IOException { - testConsumeInputStream(new byte[][]{ - createBytes(DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(DEFAULT_RECORD_BYTE_COUNT), - createBytes(DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(DEFAULT_RECORD_BYTE_COUNT * 2), - createBytes(DEFAULT_RECORD_BYTE_COUNT), - createBytes(DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT * 10), - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), - createBytes(DEFAULT_RECORD_BYTE_COUNT), - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) - }, false); + testConsumeInputStream(new byte[][] {createBytes(DEFAULT_RECORD_BYTE_COUNT)}, false); + + testConsumeInputStream( + new byte[][] { + createBytes(DEFAULT_RECORD_BYTE_COUNT), createBytes(DEFAULT_RECORD_BYTE_COUNT) + }, + false); + + testConsumeInputStream( + new byte[][] { + createBytes(DEFAULT_RECORD_BYTE_COUNT * 2), + createBytes(DEFAULT_RECORD_BYTE_COUNT), + createBytes(DEFAULT_RECORD_BYTE_COUNT) + }, + false); + + testConsumeInputStream( + new byte[][] {createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT)}, false); + + testConsumeInputStream( + new byte[][] { + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT * 10), + }, + false); + + testConsumeInputStream( + new byte[][] { + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) + }, + false); + + testConsumeInputStream( + new byte[][] { + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), + createBytes(DEFAULT_RECORD_BYTE_COUNT), + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) + }, + false); byte[][] testRecords = new byte[INITIAL_VALUE_ALLOCATION * 2][]; for (int i = 0; i < testRecords.length; i++) { @@ -113,5 +120,4 @@ public void testConsumeInputStream() throws IOException { } testConsumeInputStream(testRecords, false); } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java index d32c2bbab91a8..e22686e890580 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest.sqlToArrow; @@ -28,7 +27,6 @@ import java.sql.SQLException; import java.sql.Statement; import java.util.List; - import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.Field; @@ -40,10 +38,8 @@ public class JdbcAliasToArrowTest { private Connection conn = null; - private static final String CREATE_STATEMENT = - "CREATE TABLE example_table (id INTEGER);"; - private static final String INSERT_STATEMENT = - "INSERT INTO example_table (id) VALUES (?);"; + private static final String CREATE_STATEMENT = "CREATE TABLE example_table (id INTEGER);"; + private static final String INSERT_STATEMENT = "INSERT INTO example_table (id) VALUES (?);"; private static final String QUERY = "SELECT id as a, id as b FROM example_table;"; private static final String DROP_STATEMENT = "DROP TABLE example_table;"; private static final String ORIGINAL_COLUMN_NAME = "ID"; @@ -62,10 +58,9 @@ public void setUp() throws Exception { } /** - * Test h2 database query with alias for column name and column label. - * To verify reading field alias from an H2 database works as expected. - * If this test fails, something is either wrong with the setup, - * or the H2 SQL behavior changed. + * Test h2 database query with alias for column name and column label. To verify reading field + * alias from an H2 database works as expected. If this test fails, something is either wrong with + * the setup, or the H2 SQL behavior changed. */ @Test public void testReadH2Alias() throws Exception { @@ -96,8 +91,8 @@ public void testReadH2Alias() throws Exception { } /** - * Test jdbc query results with alias to arrow works expected. - * Arrow result schema name should be field alias name. + * Test jdbc query results with alias to arrow works expected. Arrow result schema name should be + * field alias name. */ @Test public void testJdbcAliasToArrow() throws Exception { @@ -105,8 +100,7 @@ public void testJdbcAliasToArrow() throws Exception { insertRows(rowCount); try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { - final VectorSchemaRoot vector = - sqlToArrow(resultSet, new RootAllocator(Integer.MAX_VALUE)); + final VectorSchemaRoot vector = sqlToArrow(resultSet, new RootAllocator(Integer.MAX_VALUE)); assertEquals(rowCount, vector.getRowCount()); Schema vectorSchema = vector.getSchema(); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java index eabbdc5a25e5d..895dab52ca534 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest.sqlToArrow; @@ -34,7 +33,6 @@ import java.sql.Types; import java.util.HashMap; import java.util.Map; - import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -54,11 +52,12 @@ public class JdbcToArrowArrayTest { private Connection conn = null; private static final String CREATE_STATEMENT = - "CREATE TABLE array_table (id INTEGER, int_array INTEGER ARRAY, float_array REAL ARRAY, " + - "string_array VARCHAR ARRAY);"; + "CREATE TABLE array_table (id INTEGER, int_array INTEGER ARRAY, float_array REAL ARRAY, " + + "string_array VARCHAR ARRAY);"; private static final String INSERT_STATEMENT = "INSERT INTO array_table (id, int_array, float_array, string_array) VALUES (?, ?, ?, ?);"; - private static final String QUERY = "SELECT int_array, float_array, string_array FROM array_table ORDER BY id;"; + private static final String QUERY = + "SELECT int_array, float_array, string_array FROM array_table ORDER BY id;"; private static final String DROP_STATEMENT = "DROP TABLE array_table;"; private static Map arrayFieldMapping; @@ -158,7 +157,8 @@ public void testJdbcToArrow() throws Exception { insertRows(rowCount, intArrays, floatArrays, strArrays); final JdbcToArrowConfigBuilder builder = - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); builder.setArraySubTypeByColumnNameMap(arrayFieldMapping); final JdbcToArrowConfig config = builder.build(); @@ -168,9 +168,12 @@ public void testJdbcToArrow() throws Exception { assertEquals(rowCount, vector.getRowCount()); - assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); - assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); - assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, strArrays); + assertIntegerVectorEquals( + (ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); + assertFloatVectorEquals( + (ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); + assertStringVectorEquals( + (ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, strArrays); } } @@ -179,30 +182,22 @@ public void testJdbcToArrowWithNulls() throws Exception { int rowCount = 4; Integer[][] intArrays = { - null, - {0}, - {1}, - {}, + null, {0}, {1}, {}, }; Float[][] floatArrays = { - { 2.0f }, - null, - { 3.0f }, - {}, + {2.0f}, null, {3.0f}, {}, }; String[][] stringArrays = { - {"4"}, - null, - {"5"}, - {}, + {"4"}, null, {"5"}, {}, }; insertRows(rowCount, intArrays, floatArrays, stringArrays); final JdbcToArrowConfigBuilder builder = - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); builder.setArraySubTypeByColumnNameMap(arrayFieldMapping); final JdbcToArrowConfig config = builder.build(); @@ -212,13 +207,17 @@ public void testJdbcToArrowWithNulls() throws Exception { assertEquals(rowCount, vector.getRowCount()); - assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); - assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); - assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, stringArrays); + assertIntegerVectorEquals( + (ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); + assertFloatVectorEquals( + (ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); + assertStringVectorEquals( + (ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, stringArrays); } } - private void assertIntegerVectorEquals(ListVector listVector, int rowCount, Integer[][] expectedValues) { + private void assertIntegerVectorEquals( + ListVector listVector, int rowCount, Integer[][] expectedValues) { IntVector vector = (IntVector) listVector.getDataVector(); ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); @@ -243,7 +242,8 @@ private void assertIntegerVectorEquals(ListVector listVector, int rowCount, Inte } } - private void assertFloatVectorEquals(ListVector listVector, int rowCount, Float[][] expectedValues) { + private void assertFloatVectorEquals( + ListVector listVector, int rowCount, Float[][] expectedValues) { Float4Vector vector = (Float4Vector) listVector.getDataVector(); ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); @@ -268,7 +268,8 @@ private void assertFloatVectorEquals(ListVector listVector, int rowCount, Float[ } } - private void assertStringVectorEquals(ListVector listVector, int rowCount, String[][] expectedValues) { + private void assertStringVectorEquals( + ListVector listVector, int rowCount, String[][] expectedValues) { VarCharVector vector = (VarCharVector) listVector.getDataVector(); ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); @@ -285,7 +286,8 @@ private void assertStringVectorEquals(ListVector listVector, int rowCount, Strin assertEquals(1, listVector.isSet(row)); assertEquals(expectedValues[row].length, offset - prevOffset); for (int i = prevOffset; i < offset; ++i) { - assertArrayEquals(expectedValues[row][i - prevOffset].getBytes(StandardCharsets.UTF_8), vector.get(i)); + assertArrayEquals( + expectedValues[row][i - prevOffset].getBytes(StandardCharsets.UTF_8), vector.get(i)); } prevOffset = offset; @@ -309,7 +311,7 @@ private Integer[][] generateIntegerArrayField(int numRows) { for (int i = 0; i < numRows; ++i) { int val = i * 4; - result[i] = new Integer[]{val, val + 1, val + 2, val + 3}; + result[i] = new Integer[] {val, val + 1, val + 2, val + 3}; } return result; @@ -317,10 +319,10 @@ private Integer[][] generateIntegerArrayField(int numRows) { private Float[][] generateFloatArrayField(int numRows) { Float[][] result = new Float[numRows][]; - + for (int i = 0; i < numRows; ++i) { int val = i * 4; - result[i] = new Float[]{(float) val, (float) val + 1, (float) val + 2, (float) val + 3}; + result[i] = new Float[] {(float) val, (float) val + 1, (float) val + 2, (float) val + 3}; } return result; @@ -331,22 +333,21 @@ private String[][] generateStringArrayField(int numRows) { for (int i = 0; i < numRows; ++i) { int val = i * 4; - result[i] = new String[]{ - String.valueOf(val), - String.valueOf(val + 1), - String.valueOf(val + 2), - String.valueOf(val + 3) }; + result[i] = + new String[] { + String.valueOf(val), + String.valueOf(val + 1), + String.valueOf(val + 2), + String.valueOf(val + 3) + }; } return result; } private void insertRows( - int numRows, - Integer[][] integerArrays, - Float[][] floatArrays, - String[][] strArrays) - throws SQLException { + int numRows, Integer[][] integerArrays, Float[][] floatArrays, String[][] strArrays) + throws SQLException { // Insert 4 Rows try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java index ab1b4b7fc2fea..14de2d6dc8f3c 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues; @@ -29,7 +28,6 @@ import java.util.Arrays; import java.util.Calendar; import java.util.Collection; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -47,8 +45,8 @@ import org.junit.runners.Parameterized.Parameters; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with UTF-8 Charset, - * including the multi-byte CJK characters for H2 database. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * UTF-8 Charset, including the multi-byte CJK characters for H2 database. */ @RunWith(Parameterized.class) public class JdbcToArrowCharSetTest extends AbstractJdbcToArrowTest { @@ -82,7 +80,7 @@ public void setUp() throws SQLException, ClassNotFoundException { String driver = "org.h2.Driver"; Class.forName(driver); conn = DriverManager.getConnection(url); - try (Statement stmt = conn.createStatement();) { + try (Statement stmt = conn.createStatement(); ) { stmt.executeUpdate(table.getCreate()); for (String insert : table.getData()) { stmt.executeUpdate(insert); @@ -99,39 +97,59 @@ public void setUp() throws SQLException, ClassNotFoundException { * @throws IOException on error */ @Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowCharSetTest.class)); } /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with UTF-8 Charset, including - * the multi-byte CJK characters. + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with UTF-8 + * Charset, including the multi-byte CJK characters. */ @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - Calendar.getInstance()), false); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()), false); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE)), + false); + testDataSets( + sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .build()), + false); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .build()), + false); } @Test public void testJdbcSchemaMetadata() throws SQLException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); @@ -141,20 +159,26 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CLOB), + table.getRowCount(), getCharArrayWithCharSet(table.getValues(), CLOB, StandardCharsets.UTF_8)); - assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(VARCHAR), + table.getRowCount(), getCharArrayWithCharSet(table.getValues(), VARCHAR, StandardCharsets.UTF_8)); - assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CHAR), + table.getRowCount(), getCharArrayWithCharSet(table.getValues(), CHAR, StandardCharsets.UTF_8)); } } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java index 54e7d5ffb27ed..d7c4be03b3542 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues; @@ -40,7 +39,6 @@ import java.util.Arrays; import java.util.Calendar; import java.util.Collection; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -71,8 +69,8 @@ import org.junit.runners.Parameterized.Parameters; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with various data types - * for H2 database using multiple test data files. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * various data types for H2 database using multiple test data files. */ @RunWith(Parameterized.class) public class JdbcToArrowDataTypesTest extends AbstractJdbcToArrowTest { @@ -137,43 +135,60 @@ public JdbcToArrowDataTypesTest(Table table) { * @throws IOException on error */ @Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowDataTypesTest.class)); } - /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes. - */ + /** Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes. */ @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), false); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build()), false); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE)), + false); + testDataSets( + sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build()), + false); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build()), + false); } @Test public void testJdbcSchemaMetadata() throws SQLException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); @@ -183,8 +198,8 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { @@ -192,69 +207,99 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { switch (table.getType()) { case BIGINT: - assertBigIntVectorValues((BigIntVector) root.getVector(table.getVector()), table.getValues().length, + assertBigIntVectorValues( + (BigIntVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; case BINARY: case BLOB: - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(table.getVector()), table.getValues().length, + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(table.getVector()), + table.getValues().length, table.getBinaryValues()); break; case BIT: - assertBitVectorValues((BitVector) root.getVector(table.getVector()), table.getValues().length, + assertBitVectorValues( + (BitVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case BOOL: - assertBooleanVectorValues((BitVector) root.getVector(table.getVector()), table.getValues().length, + assertBooleanVectorValues( + (BitVector) root.getVector(table.getVector()), + table.getValues().length, table.getBoolValues()); break; case CHAR: case VARCHAR: case CLOB: - assertVarcharVectorValues((VarCharVector) root.getVector(table.getVector()), table.getValues().length, + assertVarcharVectorValues( + (VarCharVector) root.getVector(table.getVector()), + table.getValues().length, table.getCharValues()); break; case DATE: - assertDateVectorValues((DateDayVector) root.getVector(table.getVector()), table.getValues().length, + assertDateVectorValues( + (DateDayVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case TIME: - assertTimeVectorValues((TimeMilliVector) root.getVector(table.getVector()), table.getValues().length, + assertTimeVectorValues( + (TimeMilliVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; case TIMESTAMP: - assertTimeStampVectorValues((TimeStampVector) root.getVector(table.getVector()), table.getValues().length, + assertTimeStampVectorValues( + (TimeStampVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; case DECIMAL: - assertDecimalVectorValues((DecimalVector) root.getVector(table.getVector()), table.getValues().length, + assertDecimalVectorValues( + (DecimalVector) root.getVector(table.getVector()), + table.getValues().length, table.getBigDecimalValues()); break; case DOUBLE: - assertFloat8VectorValues((Float8Vector) root.getVector(table.getVector()), table.getValues().length, + assertFloat8VectorValues( + (Float8Vector) root.getVector(table.getVector()), + table.getValues().length, table.getDoubleValues()); break; case INT: - assertIntVectorValues((IntVector) root.getVector(table.getVector()), table.getValues().length, + assertIntVectorValues( + (IntVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case SMALLINT: - assertSmallIntVectorValues((SmallIntVector) root.getVector(table.getVector()), table.getValues().length, + assertSmallIntVectorValues( + (SmallIntVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case TINYINT: - assertTinyIntVectorValues((TinyIntVector) root.getVector(table.getVector()), table.getValues().length, + assertTinyIntVectorValues( + (TinyIntVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case REAL: - assertFloat4VectorValues((Float4Vector) root.getVector(table.getVector()), table.getValues().length, + assertFloat4VectorValues( + (Float4Vector) root.getVector(table.getVector()), + table.getValues().length, table.getFloatValues()); break; case NULL: assertNullVectorValues((NullVector) root.getVector(table.getVector()), table.getRowCount()); break; case LIST: - assertListVectorValues((ListVector) root.getVector(table.getVector()), table.getValues().length, + assertListVectorValues( + (ListVector) root.getVector(table.getVector()), + table.getValues().length, table.getListValues()); break; default: @@ -263,4 +308,3 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { } } } - diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java index a5d1ffa3f64de..8bb3812637acb 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertMapVectorValues; @@ -24,7 +23,6 @@ import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.Calendar; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; import org.apache.arrow.memory.RootAllocator; @@ -32,46 +30,48 @@ import org.apache.arrow.vector.complex.MapVector; import org.junit.Test; -/** - * Test MapConsumer with OTHER jdbc type. - */ +/** Test MapConsumer with OTHER jdbc type. */ public class JdbcToArrowMapDataTypeTest extends AbstractJdbcToArrowTest { public JdbcToArrowMapDataTypeTest() throws IOException { this.table = getTable("h2/test1_map_h2.yml", JdbcToArrowMapDataTypeTest.class); } - /** - * Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column - */ + /** Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column */ @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { Calendar calendar = Calendar.getInstance(); ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - testDataSets(sqlToArrow( + testDataSets( + sqlToArrow( conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); - testDataSets(sqlToArrow( + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); + testDataSets( + sqlToArrow( conn, table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); } /** * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { - assertMapVectorValues((MapVector) root.getVector(MAP), table.getRowCount(), - getMapValues(table.getValues(), MAP)); + assertMapVectorValues( + (MapVector) root.getVector(MAP), table.getRowCount(), getMapValues(table.getValues(), MAP)); } } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java index 31d32bd648906..51394764e385c 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues; @@ -51,7 +50,6 @@ import java.util.Arrays; import java.util.Calendar; import java.util.Collection; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -82,8 +80,8 @@ import org.junit.runners.Parameterized.Parameters; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with null values for - * H2 database. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * null values for H2 database. */ @RunWith(Parameterized.class) public class JdbcToArrowNullTest extends AbstractJdbcToArrowTest { @@ -116,47 +114,67 @@ public JdbcToArrowNullTest(Table table) { * @throws IOException on error */ @Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowNullTest.class)); } /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with null values. + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with null + * values. */ @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE)), + false); + testDataSets( + sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), + false); Calendar calendar = Calendar.getInstance(); ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); } @Test public void testJdbcSchemaMetadata() throws SQLException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); @@ -166,8 +184,8 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { @@ -178,7 +196,8 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { sqlToArrowTestNullValues(table.getVectors(), root, table.getRowCount()); break; case SELECTED_NULL_COLUMN: - sqlToArrowTestSelectedNullColumnsValues(table.getVectors(), root, table.getRowCount(), isIncludeMapVector); + sqlToArrowTestSelectedNullColumnsValues( + table.getVectors(), root, table.getRowCount(), isIncludeMapVector); break; case SELECTED_NULL_ROW: testAllVectorValues(root, isIncludeMapVector); @@ -192,62 +211,96 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { private void testAllVectorValues(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - assertBigIntVectorValues((BigIntVector) root.getVector(BIGINT), table.getRowCount(), + assertBigIntVectorValues( + (BigIntVector) root.getVector(BIGINT), + table.getRowCount(), getLongValues(table.getValues(), BIGINT)); - assertTinyIntVectorValues((TinyIntVector) root.getVector(TINYINT), table.getRowCount(), + assertTinyIntVectorValues( + (TinyIntVector) root.getVector(TINYINT), + table.getRowCount(), getIntValues(table.getValues(), TINYINT)); - assertSmallIntVectorValues((SmallIntVector) root.getVector(SMALLINT), table.getRowCount(), + assertSmallIntVectorValues( + (SmallIntVector) root.getVector(SMALLINT), + table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BINARY), table.getRowCount(), + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(BINARY), + table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BLOB), table.getRowCount(), + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(BLOB), + table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); - assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CLOB), + table.getRowCount(), getCharArray(table.getValues(), CLOB)); - assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(VARCHAR), + table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); - assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CHAR), + table.getRowCount(), getCharArray(table.getValues(), CHAR)); - assertIntVectorValues((IntVector) root.getVector(INT), table.getRowCount(), - getIntValues(table.getValues(), INT)); + assertIntVectorValues( + (IntVector) root.getVector(INT), table.getRowCount(), getIntValues(table.getValues(), INT)); - assertBitVectorValues((BitVector) root.getVector(BIT), table.getRowCount(), - getIntValues(table.getValues(), BIT)); + assertBitVectorValues( + (BitVector) root.getVector(BIT), table.getRowCount(), getIntValues(table.getValues(), BIT)); - assertBooleanVectorValues((BitVector) root.getVector(BOOL), table.getRowCount(), + assertBooleanVectorValues( + (BitVector) root.getVector(BOOL), + table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); - assertDateVectorValues((DateDayVector) root.getVector(DATE), table.getRowCount(), + assertDateVectorValues( + (DateDayVector) root.getVector(DATE), + table.getRowCount(), getIntValues(table.getValues(), DATE)); - assertTimeVectorValues((TimeMilliVector) root.getVector(TIME), table.getRowCount(), + assertTimeVectorValues( + (TimeMilliVector) root.getVector(TIME), + table.getRowCount(), getLongValues(table.getValues(), TIME)); - assertTimeStampVectorValues((TimeStampVector) root.getVector(TIMESTAMP), table.getRowCount(), + assertTimeStampVectorValues( + (TimeStampVector) root.getVector(TIMESTAMP), + table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); - assertDecimalVectorValues((DecimalVector) root.getVector(DECIMAL), table.getRowCount(), + assertDecimalVectorValues( + (DecimalVector) root.getVector(DECIMAL), + table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); - assertFloat8VectorValues((Float8Vector) root.getVector(DOUBLE), table.getRowCount(), + assertFloat8VectorValues( + (Float8Vector) root.getVector(DOUBLE), + table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); - assertFloat4VectorValues((Float4Vector) root.getVector(REAL), table.getRowCount(), + assertFloat4VectorValues( + (Float4Vector) root.getVector(REAL), + table.getRowCount(), getFloatValues(table.getValues(), REAL)); - assertListVectorValues((ListVector) root.getVector(LIST), table.getRowCount(), + assertListVectorValues( + (ListVector) root.getVector(LIST), + table.getRowCount(), getListValues(table.getValues(), LIST)); if (isIncludeMapVector) { - assertMapVectorValues((MapVector) root.getVector(MAP), table.getRowCount(), - getMapValues(table.getValues(), MAP)); + assertMapVectorValues( + (MapVector) root.getVector(MAP), + table.getRowCount(), + getMapValues(table.getValues(), MAP)); } } @@ -285,11 +338,11 @@ public void sqlToArrowTestNullValues(String[] vectors, VectorSchemaRoot root, in * @param vectors Vectors to test * @param root VectorSchemaRoot for test * @param rowCount number of rows - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ - public void sqlToArrowTestSelectedNullColumnsValues(String[] vectors, VectorSchemaRoot root, int rowCount, - boolean isIncludeMapVector) { + public void sqlToArrowTestSelectedNullColumnsValues( + String[] vectors, VectorSchemaRoot root, int rowCount, boolean isIncludeMapVector) { assertNullValues((BigIntVector) root.getVector(vectors[0]), rowCount); assertNullValues((DecimalVector) root.getVector(vectors[1]), rowCount); assertNullValues((Float8Vector) root.getVector(vectors[2]), rowCount); @@ -308,5 +361,4 @@ public void sqlToArrowTestSelectedNullColumnsValues(String[] vectors, VectorSche assertNullValues((MapVector) root.getVector(vectors[14]), rowCount); } } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java index 4d0bbfc7a993c..47713d9099da6 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static junit.framework.TestCase.assertTrue; @@ -24,7 +23,6 @@ import java.sql.SQLException; import java.util.Arrays; import java.util.Collection; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; import org.apache.arrow.adapter.jdbc.Table; @@ -40,9 +38,7 @@ */ @RunWith(Parameterized.class) public class JdbcToArrowOptionalColumnsTest extends AbstractJdbcToArrowTest { - private static final String[] testFiles = { - "h2/test1_null_and_notnull.yml" - }; + private static final String[] testFiles = {"h2/test1_null_and_notnull.yml"}; /** * Constructor which populates the table object for each test iteration. @@ -57,17 +53,19 @@ public JdbcToArrowOptionalColumnsTest(Table table) { * Get the test data as a collection of Table objects for each test iteration. * * @return Collection of Table objects - * @throws SQLException on error + * @throws SQLException on error * @throws ClassNotFoundException on error - * @throws IOException on error + * @throws IOException on error */ @Parameterized.Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowOptionalColumnsTest.class)); } /** - * Test Method to test JdbcToArrow Functionality for dealing with nullable and non-nullable columns. + * Test Method to test JdbcToArrow Functionality for dealing with nullable and non-nullable + * columns. */ @Test @Override @@ -76,12 +74,13 @@ public void testJdbcToArrowValues() throws SQLException, IOException { } /** - * This method calls the assert methods for various DataSets. We verify that a SQL `NULL` column becomes - * nullable in the VectorSchemaRoot, and that a SQL `NOT NULL` column becomes non-nullable. + * This method calls the assert methods for various DataSets. We verify that a SQL `NULL` column + * becomes nullable in the VectorSchemaRoot, and that a SQL `NOT NULL` column becomes + * non-nullable. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { @@ -90,5 +89,4 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { assertTrue(root.getSchema().getFields().get(0).isNullable()); assertFalse(root.getSchema().getFields().get(1).isNullable()); } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java index a925dd7ee32a8..d290b9bf08960 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.*; @@ -29,7 +28,6 @@ import java.util.Collection; import java.util.stream.Collectors; import java.util.stream.Stream; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; import org.apache.arrow.adapter.jdbc.JdbcToArrow; @@ -64,8 +62,8 @@ import org.junit.runners.Parameterized; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with various data types - * for H2 database using single test data file. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * various data types for H2 database using single test data file. */ @RunWith(Parameterized.class) public class JdbcToArrowTest extends AbstractJdbcToArrowTest { @@ -92,53 +90,72 @@ public JdbcToArrowTest(Table table, boolean reuseVectorSchemaRoot) { * @throws IOException on error */ @Parameterized.Parameters(name = "table = {0}, reuse batch = {1}") - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { - return Arrays.stream(prepareTestData(testFiles, JdbcToArrowTest.class)).flatMap(row -> - Stream.of(new Object[] {row[0], true}, new Object[] {row[0], false})).collect(Collectors.toList()); + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { + return Arrays.stream(prepareTestData(testFiles, JdbcToArrowTest.class)) + .flatMap(row -> Stream.of(new Object[] {row[0], true}, new Object[] {row[0], false})) + .collect(Collectors.toList()); } /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with only one test data file. + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with only one + * test data file. */ @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE)), + false); + testDataSets( + sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), + false); Calendar calendar = Calendar.getInstance(); ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); } @Test public void testJdbcSchemaMetadata() throws SQLException { Calendar calendar = Calendar.getInstance(); ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); } @@ -147,71 +164,105 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - assertBigIntVectorValues((BigIntVector) root.getVector(BIGINT), table.getRowCount(), + assertBigIntVectorValues( + (BigIntVector) root.getVector(BIGINT), + table.getRowCount(), getLongValues(table.getValues(), BIGINT)); - assertTinyIntVectorValues((TinyIntVector) root.getVector(TINYINT), table.getRowCount(), + assertTinyIntVectorValues( + (TinyIntVector) root.getVector(TINYINT), + table.getRowCount(), getIntValues(table.getValues(), TINYINT)); - assertSmallIntVectorValues((SmallIntVector) root.getVector(SMALLINT), table.getRowCount(), + assertSmallIntVectorValues( + (SmallIntVector) root.getVector(SMALLINT), + table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BINARY), table.getRowCount(), + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(BINARY), + table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BLOB), table.getRowCount(), + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(BLOB), + table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); - assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CLOB), + table.getRowCount(), getCharArray(table.getValues(), CLOB)); - assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(VARCHAR), + table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); - assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CHAR), + table.getRowCount(), getCharArray(table.getValues(), CHAR)); - assertIntVectorValues((IntVector) root.getVector(INT), table.getRowCount(), - getIntValues(table.getValues(), INT)); + assertIntVectorValues( + (IntVector) root.getVector(INT), table.getRowCount(), getIntValues(table.getValues(), INT)); - assertBitVectorValues((BitVector) root.getVector(BIT), table.getRowCount(), - getIntValues(table.getValues(), BIT)); + assertBitVectorValues( + (BitVector) root.getVector(BIT), table.getRowCount(), getIntValues(table.getValues(), BIT)); - assertBooleanVectorValues((BitVector) root.getVector(BOOL), table.getRowCount(), + assertBooleanVectorValues( + (BitVector) root.getVector(BOOL), + table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); - assertDateVectorValues((DateDayVector) root.getVector(DATE), table.getRowCount(), + assertDateVectorValues( + (DateDayVector) root.getVector(DATE), + table.getRowCount(), getIntValues(table.getValues(), DATE)); - assertTimeVectorValues((TimeMilliVector) root.getVector(TIME), table.getRowCount(), + assertTimeVectorValues( + (TimeMilliVector) root.getVector(TIME), + table.getRowCount(), getLongValues(table.getValues(), TIME)); - assertTimeStampVectorValues((TimeStampVector) root.getVector(TIMESTAMP), table.getRowCount(), + assertTimeStampVectorValues( + (TimeStampVector) root.getVector(TIMESTAMP), + table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); - assertDecimalVectorValues((DecimalVector) root.getVector(DECIMAL), table.getRowCount(), + assertDecimalVectorValues( + (DecimalVector) root.getVector(DECIMAL), + table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); - assertFloat8VectorValues((Float8Vector) root.getVector(DOUBLE), table.getRowCount(), + assertFloat8VectorValues( + (Float8Vector) root.getVector(DOUBLE), + table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); - assertFloat4VectorValues((Float4Vector) root.getVector(REAL), table.getRowCount(), + assertFloat4VectorValues( + (Float4Vector) root.getVector(REAL), + table.getRowCount(), getFloatValues(table.getValues(), REAL)); assertNullVectorValues((NullVector) root.getVector(NULL), table.getRowCount()); - assertListVectorValues((ListVector) root.getVector(LIST), table.getRowCount(), + assertListVectorValues( + (ListVector) root.getVector(LIST), + table.getRowCount(), getListValues(table.getValues(), LIST)); if (isIncludeMapVector) { - assertMapVectorValues((MapVector) root.getVector(MAP), table.getRowCount(), - getMapValues(table.getValues(), MAP)); + assertMapVectorValues( + (MapVector) root.getVector(MAP), + table.getRowCount(), + getMapValues(table.getValues(), MAP)); } } @@ -221,11 +272,12 @@ public void runLargeNumberOfRows() throws IOException, SQLException { int x = 0; final int targetRows = 600000; ResultSet rs = ResultSetUtility.generateBasicResultSet(targetRows); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { while (iter.hasNext()) { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java index fe08db161c8ac..c4930c3ab6017 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues; @@ -28,7 +27,6 @@ import java.util.Calendar; import java.util.Collection; import java.util.TimeZone; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -47,10 +45,9 @@ import org.junit.runners.Parameterized.Parameters; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with TimeZone based Date, - * Time and Timestamp datatypes for H2 database. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * TimeZone based Date, Time and Timestamp datatypes for H2 database. */ - @RunWith(Parameterized.class) public class JdbcToArrowTimeZoneTest extends AbstractJdbcToArrowTest { @@ -94,40 +91,60 @@ public JdbcToArrowTimeZoneTest(Table table) { * @throws IOException on error */ @Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowTimeZoneTest.class)); } /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with TimeZone based Date, - * Time and Timestamp datatype. + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with TimeZone + * based Date, Time and Timestamp datatype. */ @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), false); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder( + testDataSets( + sqlToArrow( + conn, + table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))).build()), false); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder( + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))).build()), false); + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))) + .build()), + false); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))) + .build()), + false); } @Test public void testJdbcSchemaMetadata() throws SQLException { Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone())); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true).build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true).build(); ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); @@ -137,8 +154,8 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { @@ -148,19 +165,25 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { case EST_DATE: case GMT_DATE: case PST_DATE: - assertDateVectorValues((DateDayVector) root.getVector(table.getVector()), table.getValues().length, + assertDateVectorValues( + (DateDayVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case EST_TIME: case GMT_TIME: case PST_TIME: - assertTimeVectorValues((TimeMilliVector) root.getVector(table.getVector()), table.getValues().length, + assertTimeVectorValues( + (TimeMilliVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; case EST_TIMESTAMP: case GMT_TIMESTAMP: case PST_TIMESTAMP: - assertTimeStampVectorValues((TimeStampVector) root.getVector(table.getVector()), table.getValues().length, + assertTimeStampVectorValues( + (TimeStampVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; default: @@ -168,5 +191,4 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { break; } } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java index 1d7e2760f843e..caa1c1d971adb 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBinaryValues; @@ -42,7 +41,6 @@ import java.util.Arrays; import java.util.Calendar; import java.util.List; - import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; import org.apache.arrow.adapter.jdbc.JdbcToArrow; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; @@ -91,14 +89,15 @@ public JdbcToArrowVectorIteratorTest(Table table, boolean reuseVectorSchemaRoot) @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()) - .setTargetBatchSize(3) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(3) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config); + JdbcToArrow.sqlToArrowVectorIterator( + conn.createStatement().executeQuery(table.getQuery()), config); validate(iterator); } @@ -106,27 +105,28 @@ public void testJdbcToArrowValues() throws SQLException, IOException { @Test public void testVectorSchemaRootReuse() throws SQLException, IOException { Integer[][] intValues = { - {101, 102, 103}, - {104, null, null}, - {107, 108, 109}, - {110} + {101, 102, 103}, + {104, null, null}, + {107, 108, 109}, + {110} }; Integer[][][] listValues = { - {{1, 2, 3}, {1, 2}, {1}}, - {{2, 3, 4}, {2, 3}, {2}}, - {{3, 4, 5}, {3, 4}, {3}}, - {{}} + {{1, 2, 3}, {1, 2}, {1}}, + {{2, 3, 4}, {2, 3}, {2}}, + {{3, 4, 5}, {3, 4}, {3}}, + {{}} }; - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()) - .setTargetBatchSize(3) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(3) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config); + JdbcToArrow.sqlToArrowVectorIterator( + conn.createStatement().executeQuery(table.getQuery()), config); int batchCount = 0; VectorSchemaRoot prev = null; @@ -178,14 +178,15 @@ public void testVectorSchemaRootReuse() throws SQLException, IOException { @Test public void testJdbcToArrowValuesNoLimit() throws SQLException, IOException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()) - .setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config); + JdbcToArrow.sqlToArrowVectorIterator( + conn.createStatement().executeQuery(table.getQuery()), config); validate(iterator); } @@ -195,12 +196,12 @@ public void testTimeStampConsumer() throws SQLException, IOException { final String sql = "select timestamp_field11 from table1"; // first experiment, with calendar and time zone. - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()) - .setTargetBatchSize(3) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(3) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); assertNotNull(config.getCalendar()); try (ArrowVectorIterator iterator = @@ -213,16 +214,16 @@ public void testTimeStampConsumer() throws SQLException, IOException { } // second experiment, without calendar and time zone. - config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - null) - .setTargetBatchSize(3) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), null) + .setTargetBatchSize(3) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); assertNull(config.getCalendar()); try (ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(sql), config)) { + JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(sql), config)) { VectorSchemaRoot root = iterator.next(); assertEquals(1, root.getFieldVectors().size()); @@ -278,24 +279,40 @@ private void validate(ArrowVectorIterator iterator) throws SQLException, IOExcep float8Vectors.add((Float8Vector) root.getVector(DOUBLE)); listVectors.add((ListVector) root.getVector(LIST)); } - assertBigIntVectorValues(bigIntVectors, table.getRowCount(), getLongValues(table.getValues(), BIGINT)); - assertTinyIntVectorValues(tinyIntVectors, table.getRowCount(), getIntValues(table.getValues(), TINYINT)); + assertBigIntVectorValues( + bigIntVectors, table.getRowCount(), getLongValues(table.getValues(), BIGINT)); + assertTinyIntVectorValues( + tinyIntVectors, table.getRowCount(), getIntValues(table.getValues(), TINYINT)); assertIntVectorValues(intVectors, table.getRowCount(), getIntValues(table.getValues(), INT)); - assertSmallIntVectorValues(smallIntVectors, table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); - assertBinaryVectorValues(vectorsForBinary, table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); - assertBinaryVectorValues(vectorsForBlob, table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); - assertVarCharVectorValues(vectorsForClob, table.getRowCount(), getCharArray(table.getValues(), CLOB)); - assertVarCharVectorValues(vectorsForVarChar, table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); - assertVarCharVectorValues(vectorsForChar, table.getRowCount(), getCharArray(table.getValues(), CHAR)); + assertSmallIntVectorValues( + smallIntVectors, table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); + assertBinaryVectorValues( + vectorsForBinary, table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); + assertBinaryVectorValues( + vectorsForBlob, table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); + assertVarCharVectorValues( + vectorsForClob, table.getRowCount(), getCharArray(table.getValues(), CLOB)); + assertVarCharVectorValues( + vectorsForVarChar, table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); + assertVarCharVectorValues( + vectorsForChar, table.getRowCount(), getCharArray(table.getValues(), CHAR)); assertBitVectorValues(vectorsForBit, table.getRowCount(), getIntValues(table.getValues(), BIT)); - assertBooleanVectorValues(vectorsForBool, table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); - assertDateDayVectorValues(dateDayVectors, table.getRowCount(), getLongValues(table.getValues(), DATE)); - assertTimeMilliVectorValues(timeMilliVectors, table.getRowCount(), getLongValues(table.getValues(), TIME)); - assertTimeStampVectorValues(timeStampVectors, table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); - assertDecimalVectorValues(decimalVectors, table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); - assertFloat4VectorValues(float4Vectors, table.getRowCount(), getFloatValues(table.getValues(), REAL)); - assertFloat8VectorValues(float8Vectors, table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); - assertListVectorValues(listVectors, table.getRowCount(), getListValues(table.getValues(), LIST)); + assertBooleanVectorValues( + vectorsForBool, table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); + assertDateDayVectorValues( + dateDayVectors, table.getRowCount(), getLongValues(table.getValues(), DATE)); + assertTimeMilliVectorValues( + timeMilliVectors, table.getRowCount(), getLongValues(table.getValues(), TIME)); + assertTimeStampVectorValues( + timeStampVectors, table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); + assertDecimalVectorValues( + decimalVectors, table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); + assertFloat4VectorValues( + float4Vectors, table.getRowCount(), getFloatValues(table.getValues(), REAL)); + assertFloat8VectorValues( + float8Vectors, table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); + assertListVectorValues( + listVectors, table.getRowCount(), getListValues(table.getValues(), LIST)); roots.forEach(root -> root.close()); } @@ -324,7 +341,8 @@ private void assertFloat4VectorValues(List vectors, int rowCount, } } - private void assertDecimalVectorValues(List vectors, int rowCount, BigDecimal[] values) { + private void assertDecimalVectorValues( + List vectors, int rowCount, BigDecimal[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -337,7 +355,8 @@ private void assertDecimalVectorValues(List vectors, int rowCount } } - private void assertTimeStampVectorValues(List vectors, int rowCount, Long[] values) { + private void assertTimeStampVectorValues( + List vectors, int rowCount, Long[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -349,7 +368,8 @@ private void assertTimeStampVectorValues(List vectors, int rowC } } - private void assertTimeMilliVectorValues(List vectors, int rowCount, Long[] values) { + private void assertTimeMilliVectorValues( + List vectors, int rowCount, Long[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -397,7 +417,8 @@ private void assertBooleanVectorValues(List vectors, int rowCount, Bo } } - private void assertVarCharVectorValues(List vectors, int rowCount, byte[][] values) { + private void assertVarCharVectorValues( + List vectors, int rowCount, byte[][] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -409,7 +430,8 @@ private void assertVarCharVectorValues(List vectors, int rowCount } } - private void assertBinaryVectorValues(List vectors, int rowCount, byte[][] values) { + private void assertBinaryVectorValues( + List vectors, int rowCount, byte[][] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -421,7 +443,8 @@ private void assertBinaryVectorValues(List vectors, int rowCoun } } - private void assertSmallIntVectorValues(List vectors, int rowCount, Integer[] values) { + private void assertSmallIntVectorValues( + List vectors, int rowCount, Integer[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -433,7 +456,8 @@ private void assertSmallIntVectorValues(List vectors, int rowCou } } - private void assertTinyIntVectorValues(List vectors, int rowCount, Integer[] values) { + private void assertTinyIntVectorValues( + List vectors, int rowCount, Integer[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -474,7 +498,8 @@ private void assertIntVectorValues(List vectors, int rowCount, Intege } } - public static void assertListVectorValues(List vectors, int rowCount, Integer[][] values) { + public static void assertListVectorValues( + List vectors, int rowCount, Integer[][] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -492,12 +517,11 @@ public static void assertListVectorValues(List vectors, int rowCount } } - /** - * Runs a simple query, and encapsulates the result into a field vector. - */ + /** Runs a simple query, and encapsulates the result into a field vector. */ private FieldVector getQueryResult(JdbcToArrowConfig config) throws SQLException, IOException { - ArrowVectorIterator iterator = JdbcToArrow.sqlToArrowVectorIterator( - conn.createStatement().executeQuery("select real_field8 from table1"), config); + ArrowVectorIterator iterator = + JdbcToArrow.sqlToArrowVectorIterator( + conn.createStatement().executeQuery("select real_field8 from table1"), config); VectorSchemaRoot root = iterator.next(); @@ -513,10 +537,11 @@ private FieldVector getQueryResult(JdbcToArrowConfig config) throws SQLException @Test public void testJdbcToArrowCustomTypeConversion() throws SQLException, IOException { - JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()).setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP); + JdbcToArrowConfigBuilder builder = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP); // first experiment, using default type converter JdbcToArrowConfig config = builder.build(); @@ -527,15 +552,16 @@ public void testJdbcToArrowCustomTypeConversion() throws SQLException, IOExcepti } // second experiment, using customized type converter - builder.setJdbcToArrowTypeConverter((fieldInfo) -> { - switch (fieldInfo.getJdbcType()) { - case Types.REAL: - // this is different from the default type converter - return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); - default: - return null; - } - }); + builder.setJdbcToArrowTypeConverter( + (fieldInfo) -> { + switch (fieldInfo.getJdbcType()) { + case Types.REAL: + // this is different from the default type converter + return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); + default: + return null; + } + }); config = builder.build(); try (FieldVector vector = getQueryResult(config)) { diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index bc89c4698eecf..7df08e1a98b36 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -24,9 +24,13 @@ jar Arrow Orc Adapter (Experimental/Contrib)A JNI wrapper for the C++ ORC reader implementation. + ../../../cpp/release-build/ + dev/checkstyle/checkstyle-spotless.xml + none + org.apache.arrow diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java index 716a13876608c..faf48e19445ae 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java @@ -14,12 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; -/** - * Metadata about Vectors/Arrays that is passed via JNI interface. - */ +/** Metadata about Vectors/Arrays that is passed via JNI interface. */ class OrcFieldNode { private final int length; @@ -27,6 +24,7 @@ class OrcFieldNode { /** * Construct a new instance. + * * @param length the number of values written. * @param nullCount the number of null values. */ diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java index d61799e990f77..692b0c061839c 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.io.File; @@ -25,24 +24,21 @@ import java.nio.file.StandardCopyOption; import java.util.Locale; -/** - * Helper class for JNI related operations. - */ +/** Helper class for JNI related operations. */ class OrcJniUtils { private static final String LIBRARY_NAME = "arrow_orc_jni"; private static boolean isLoaded = false; - private OrcJniUtils() { - } + private OrcJniUtils() {} - static void loadOrcAdapterLibraryFromJar() - throws IOException, IllegalAccessException { + static void loadOrcAdapterLibraryFromJar() throws IOException, IllegalAccessException { synchronized (OrcJniUtils.class) { if (!isLoaded) { final String libraryToLoad = LIBRARY_NAME + "/" + getNormalizedArch() + "/" + System.mapLibraryName(LIBRARY_NAME); final File libraryFile = - moveFileFromJarToTemp(System.getProperty("java.io.tmpdir"), libraryToLoad, LIBRARY_NAME); + moveFileFromJarToTemp( + System.getProperty("java.io.tmpdir"), libraryToLoad, LIBRARY_NAME); System.load(libraryFile.getAbsolutePath()); isLoaded = true; } @@ -64,11 +60,11 @@ private static String getNormalizedArch() { return arch; } - private static File moveFileFromJarToTemp(final String tmpDir, String libraryToLoad, String libraryName) - throws IOException { + private static File moveFileFromJarToTemp( + final String tmpDir, String libraryToLoad, String libraryName) throws IOException { final File temp = File.createTempFile(tmpDir, libraryName); - try (final InputStream is = OrcReaderJniWrapper.class.getClassLoader() - .getResourceAsStream(libraryToLoad)) { + try (final InputStream is = + OrcReaderJniWrapper.class.getClassLoader().getResourceAsStream(libraryToLoad)) { if (is == null) { throw new FileNotFoundException(libraryToLoad); } else { diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java index 473e8314243b1..70f2a655654c6 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java @@ -14,12 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; -/** - * Wrapper for orc memory allocated by native code. - */ +/** Wrapper for orc memory allocated by native code. */ class OrcMemoryJniWrapper implements AutoCloseable { private final long nativeInstanceId; @@ -32,6 +29,7 @@ class OrcMemoryJniWrapper implements AutoCloseable { /** * Construct a new instance. + * * @param nativeInstanceId unique id of the underlying memory. * @param memoryAddress starting memory address of the underlying memory. * @param size size of the valid data. @@ -46,6 +44,7 @@ class OrcMemoryJniWrapper implements AutoCloseable { /** * Return the size of underlying chunk of memory that has valid data. + * * @return valid data size */ long getSize() { @@ -54,6 +53,7 @@ long getSize() { /** * Return the size of underlying chunk of memory managed by this OrcMemoryJniWrapper. + * * @return underlying memory size */ long getCapacity() { @@ -62,6 +62,7 @@ long getCapacity() { /** * Return the memory address of underlying chunk of memory. + * * @return memory address */ long getMemoryAddress() { diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java index 648e17e9c374c..ca9b44e7e8123 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java @@ -14,44 +14,42 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.io.IOException; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.ipc.ArrowReader; /** - * Orc Reader that allow accessing orc stripes in Orc file. - * This orc reader basically acts like an ArrowReader iterator that - * iterate over orc stripes. Each stripe will be accessed via an - * ArrowReader. + * Orc Reader that allow accessing orc stripes in Orc file. This orc reader basically acts like an + * ArrowReader iterator that iterate over orc stripes. Each stripe will be accessed via an + * ArrowReader. */ public class OrcReader implements AutoCloseable { private final OrcReaderJniWrapper jniWrapper; private BufferAllocator allocator; - /** - * reference to native reader instance. - */ + /** reference to native reader instance. */ private final long nativeInstanceId; /** * Create an OrcReader that iterate over orc stripes. + * * @param filePath file path to target file, currently only support local file. * @param allocator allocator provided to ArrowReader. * @throws IOException throws exception in case of file not found */ - public OrcReader(String filePath, BufferAllocator allocator) throws IOException, IllegalAccessException { + public OrcReader(String filePath, BufferAllocator allocator) + throws IOException, IllegalAccessException { this.allocator = allocator; this.jniWrapper = OrcReaderJniWrapper.getInstance(); this.nativeInstanceId = jniWrapper.open(filePath); } /** - * Seek to designated row. Invoke NextStripeReader() after seek - * will return stripe reader starting from designated row. + * Seek to designated row. Invoke NextStripeReader() after seek will return stripe reader starting + * from designated row. + * * @param rowNumber the rows number to seek * @return true if seek operation is succeeded */ diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java index ff449c343c4e7..be57485005fbf 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java @@ -14,14 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.io.IOException; -/** - * JNI wrapper for Orc reader. - */ +/** JNI wrapper for Orc reader. */ class OrcReaderJniWrapper { private static volatile OrcReaderJniWrapper INSTANCE; @@ -41,21 +38,24 @@ static OrcReaderJniWrapper getInstance() throws IOException, IllegalAccessExcept /** * Construct a orc file reader over the target file. + * * @param fileName absolute file path of target file - * @return id of the orc reader instance if file opened successfully, - * otherwise return error code * -1. + * @return id of the orc reader instance if file opened successfully, otherwise return error code + * * -1. */ native long open(String fileName); /** * Release resources associated with designated reader instance. + * * @param readerId id of the reader instance. */ native void close(long readerId); /** - * Seek to designated row. Invoke nextStripeReader() after seek - * will return id of stripe reader starting from designated row. + * Seek to designated row. Invoke nextStripeReader() after seek will return id of stripe reader + * starting from designated row. + * * @param readerId id of the reader instance * @param rowNumber the rows number to seek * @return true if seek operation is succeeded @@ -64,6 +64,7 @@ static OrcReaderJniWrapper getInstance() throws IOException, IllegalAccessExcept /** * The number of stripes in the file. + * * @param readerId id of the reader instance * @return number of stripes */ @@ -71,6 +72,7 @@ static OrcReaderJniWrapper getInstance() throws IOException, IllegalAccessExcept /** * Get a stripe level ArrowReader with specified batchSize in each record batch. + * * @param readerId id of the reader instance * @param batchSize the number of rows loaded on each iteration * @return id of the stripe reader instance. diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java index a006cacab98f2..f78898df2205d 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java @@ -14,27 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.util.Arrays; import java.util.List; -/** - * Wrapper for record batch meta and native memory. - */ +/** Wrapper for record batch meta and native memory. */ class OrcRecordBatch { final int length; - /** - * Nodes correspond to the pre-ordered flattened logical schema. - */ + /** Nodes correspond to the pre-ordered flattened logical schema. */ final List nodes; final List buffers; /** * Construct a new instance. + * * @param length number of records included in current batch * @param nodes meta data for each fields * @param buffers buffers for underlying data diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java index fdec337e85d39..38233a0493bef 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OwnershipTransferResult; @@ -26,8 +24,8 @@ import org.apache.arrow.util.Preconditions; /** - * A simple reference manager implementation for memory allocated by native code. - * The underlying memory will be released when reference count reach zero. + * A simple reference manager implementation for memory allocated by native code. The underlying + * memory will be released when reference count reach zero. */ public class OrcReferenceManager implements ReferenceManager { private final AtomicInteger bufRefCnt = new AtomicInteger(0); @@ -50,8 +48,8 @@ public boolean release() { @Override public boolean release(int decrement) { - Preconditions.checkState(decrement >= 1, - "ref count decrement should be greater than or equal to 1"); + Preconditions.checkState( + decrement >= 1, "ref count decrement should be greater than or equal to 1"); // decrement the ref count final int refCnt; synchronized (this) { @@ -89,18 +87,21 @@ public ArrowBuf deriveBuffer(ArrowBuf sourceBuffer, long index, long length) { final long derivedBufferAddress = sourceBuffer.memoryAddress() + index; // create new ArrowBuf - final ArrowBuf derivedBuf = new ArrowBuf( + final ArrowBuf derivedBuf = + new ArrowBuf( this, null, length, // length (in bytes) in the underlying memory chunk for this new ArrowBuf - derivedBufferAddress // starting byte address in the underlying memory for this new ArrowBuf, + derivedBufferAddress // starting byte address in the underlying memory for this new + // ArrowBuf, ); return derivedBuf; } @Override - public OwnershipTransferResult transferOwnership(ArrowBuf sourceBuffer, BufferAllocator targetAllocator) { + public OwnershipTransferResult transferOwnership( + ArrowBuf sourceBuffer, BufferAllocator targetAllocator) { throw new UnsupportedOperationException(); } diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java index 484296d92e039..52f5cf429a48d 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.io.IOException; import java.util.ArrayList; import java.util.stream.Collectors; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.ipc.ArrowReader; @@ -33,19 +31,16 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -/** - * Orc stripe that load data into ArrowRecordBatch. - */ +/** Orc stripe that load data into ArrowRecordBatch. */ public class OrcStripeReader extends ArrowReader { - /** - * reference to native stripe reader instance. - */ + /** reference to native stripe reader instance. */ private final long nativeInstanceId; /** * Construct a new instance. - * @param nativeInstanceId nativeInstanceId of the stripe reader instance, obtained by - * calling nextStripeReader from OrcReaderJniWrapper + * + * @param nativeInstanceId nativeInstanceId of the stripe reader instance, obtained by calling + * nextStripeReader from OrcReaderJniWrapper * @param allocator memory allocator for accounting. */ OrcStripeReader(long nativeInstanceId, BufferAllocator allocator) { @@ -62,18 +57,20 @@ public boolean loadNextBatch() throws IOException { ArrayList buffers = new ArrayList<>(); for (OrcMemoryJniWrapper buffer : recordBatch.buffers) { - buffers.add(new ArrowBuf( + buffers.add( + new ArrowBuf( new OrcReferenceManager(buffer), null, (int) buffer.getSize(), buffer.getMemoryAddress())); } - loadRecordBatch(new ArrowRecordBatch( + loadRecordBatch( + new ArrowRecordBatch( recordBatch.length, recordBatch.nodes.stream() - .map(buf -> new ArrowFieldNode(buf.getLength(), buf.getNullCount())) - .collect(Collectors.toList()), + .map(buf -> new ArrowFieldNode(buf.getLength(), buf.getNullCount())) + .collect(Collectors.toList()), buffers)); return true; } @@ -83,7 +80,6 @@ public long bytesRead() { return 0; } - @Override protected void closeReadSource() throws IOException { OrcStripeReaderJniWrapper.close(nativeInstanceId); @@ -94,9 +90,8 @@ protected Schema readSchema() throws IOException { byte[] schemaBytes = OrcStripeReaderJniWrapper.getSchema(nativeInstanceId); try (MessageChannelReader schemaReader = - new MessageChannelReader( - new ReadChannel( - new ByteArrayReadableSeekableByteChannel(schemaBytes)), allocator)) { + new MessageChannelReader( + new ReadChannel(new ByteArrayReadableSeekableByteChannel(schemaBytes)), allocator)) { MessageResult result = schemaReader.readNext(); if (result == null) { diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java index 1dd96986108b4..e7b691087fb96 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java @@ -14,16 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; -/** - * JNI wrapper for orc stripe reader. - */ +/** JNI wrapper for orc stripe reader. */ class OrcStripeReaderJniWrapper { /** * Get the schema of current stripe. + * * @param readerId id of the stripe reader instance. * @return serialized schema. */ @@ -31,14 +29,15 @@ class OrcStripeReaderJniWrapper { /** * Load next record batch. + * * @param readerId id of the stripe reader instance. - * @return loaded record batch, return null when reached - * the end of current stripe. + * @return loaded record batch, return null when reached the end of current stripe. */ static native OrcRecordBatch next(long readerId); /** * Release resources of underlying reader. + * * @param readerId id of the stripe reader instance. */ static native void close(long readerId); diff --git a/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java b/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java index 4153a35a61c67..17098806be72a 100644 --- a/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java +++ b/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import static org.junit.Assert.assertEquals; @@ -24,8 +23,6 @@ import java.io.File; import java.nio.charset.StandardCharsets; import java.util.List; - - import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.IntVector; @@ -45,11 +42,9 @@ import org.junit.Test; import org.junit.rules.TemporaryFolder; - public class OrcReaderTest { - @Rule - public TemporaryFolder testFolder = new TemporaryFolder(); + @Rule public TemporaryFolder testFolder = new TemporaryFolder(); private static final int MAX_ALLOCATION = 8 * 1024; private static RootAllocator allocator; @@ -64,8 +59,10 @@ public void testOrcJniReader() throws Exception { TypeDescription schema = TypeDescription.fromString("struct"); File testFile = new File(testFolder.getRoot(), "test-orc"); - Writer writer = OrcFile.createWriter(new Path(testFile.getAbsolutePath()), - OrcFile.writerOptions(new Configuration()).setSchema(schema)); + Writer writer = + OrcFile.createWriter( + new Path(testFile.getAbsolutePath()), + OrcFile.writerOptions(new Configuration()).setSchema(schema)); VectorizedRowBatch batch = schema.createRowBatch(); LongColumnVector longColumnVector = (LongColumnVector) batch.cols[0]; BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[1]; diff --git a/java/dev/checkstyle/checkstyle-spotless.xml b/java/dev/checkstyle/checkstyle-spotless.xml index cbaec1a39bf2c..a2e9a60b12c72 100644 --- a/java/dev/checkstyle/checkstyle-spotless.xml +++ b/java/dev/checkstyle/checkstyle-spotless.xml @@ -89,7 +89,7 @@ - --> + @@ -99,7 +99,7 @@ value="WhitespaceAround: ''{0}'' is not followed by whitespace. Empty blocks may only be represented as '{}' when not part of a multi-block statement (4.1.3)"/> - + --> From 9ea7f6f0c66af537ca6511de2df2a77078000158 Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Mon, 10 Jun 2024 05:34:43 -0700 Subject: [PATCH 26/59] GH-40828: [Java] Format arrow-maven-plugins modules (#42054) ### Rationale for this change Adding code style and formatting options for `apache-maven-plugin` module and submodules ### What changes are included in this PR? Java code formatting spotless plugin has been added. ### Are these changes tested? Yes, but doesn't involve test cases, the plugin itself corrects. ### Are there any user-facing changes? No * GitHub Issue: #40828 Authored-by: Laurent Goujon Signed-off-by: David Li --- .../plugins/BaseModuleInfoCompilerPlugin.java | 26 ++++++++----------- .../plugins/ModuleInfoCompilerPlugin.java | 10 +++---- .../plugins/ModuleInfoTestCompilerPlugin.java | 6 +---- java/maven/pom.xml | 12 ++++++++- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java index 37cbf5d7e772b..4fc8fc46e6bcc 100644 --- a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java +++ b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.maven.plugins; import java.io.File; @@ -28,14 +27,11 @@ import java.nio.file.Paths; import java.util.List; import java.util.Optional; - import org.apache.maven.plugin.AbstractMojo; import org.apache.maven.plugin.MojoExecutionException; import org.glavo.mic.ModuleInfoCompiler; -/** - * Compiles the first module-info.java file in the project purely syntactically. - */ +/** Compiles the first module-info.java file in the project purely syntactically. */ public abstract class BaseModuleInfoCompilerPlugin extends AbstractMojo { protected abstract List getSourceRoots(); @@ -60,9 +56,10 @@ public void execute() throws MojoExecutionException { // Invoke the compiler, ModuleInfoCompiler compiler = new ModuleInfoCompiler(); - try (Reader reader = new InputStreamReader(Files.newInputStream(moduleInfoFile.get().toPath()), - StandardCharsets.UTF_8); - OutputStream output = Files.newOutputStream(targetPath)) { + try (Reader reader = + new InputStreamReader( + Files.newInputStream(moduleInfoFile.get().toPath()), StandardCharsets.UTF_8); + OutputStream output = Files.newOutputStream(targetPath)) { compiler.compile(reader, output); getLog().info("Successfully wrote module-info.class file."); } catch (IOException ex) { @@ -73,18 +70,17 @@ public void execute() throws MojoExecutionException { } } - /** - * Finds the first module-info.java file in the set of source directories. - */ + /** Finds the first module-info.java file in the set of source directories. */ private Optional findFirstModuleInfo(List sourceDirectories) { if (sourceDirectories == null) { return Optional.empty(); } - return sourceDirectories.stream().map(Paths::get) - .map(sourcePath -> - sourcePath.toFile().listFiles(file -> - file.getName().equals("module-info.java"))) + return sourceDirectories.stream() + .map(Paths::get) + .map( + sourcePath -> + sourcePath.toFile().listFiles(file -> file.getName().equals("module-info.java"))) .filter(matchingFiles -> matchingFiles != null && matchingFiles.length != 0) .map(matchingFiles -> matchingFiles[0]) .findAny(); diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java index 31df6372925cc..e66a475dbf8be 100644 --- a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java +++ b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java @@ -14,24 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.maven.plugins; import java.util.ArrayList; import java.util.List; - import org.apache.maven.plugins.annotations.LifecyclePhase; import org.apache.maven.plugins.annotations.Mojo; import org.apache.maven.plugins.annotations.Parameter; import org.apache.maven.project.MavenProject; -/** - * A maven plugin for compiler module-info files in main code with JDK8. - */ +/** A maven plugin for compiler module-info files in main code with JDK8. */ @Mojo(name = "compile", defaultPhase = LifecyclePhase.COMPILE) public class ModuleInfoCompilerPlugin extends BaseModuleInfoCompilerPlugin { - @Parameter(defaultValue = "${project.compileSourceRoots}", property = "compileSourceRoots", + @Parameter( + defaultValue = "${project.compileSourceRoots}", + property = "compileSourceRoots", required = true) private final List compileSourceRoots = new ArrayList<>(); diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java index 4705506ac5346..f18ac9faac735 100644 --- a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java +++ b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java @@ -14,19 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.maven.plugins; import java.util.List; - import org.apache.maven.plugins.annotations.LifecyclePhase; import org.apache.maven.plugins.annotations.Mojo; import org.apache.maven.plugins.annotations.Parameter; import org.apache.maven.project.MavenProject; -/** - * A maven plugin for compiler module-info files in unit tests with JDK8. - */ +/** A maven plugin for compiler module-info files in unit tests with JDK8. */ @Mojo(name = "testCompile", defaultPhase = LifecyclePhase.TEST_COMPILE) public class ModuleInfoTestCompilerPlugin extends BaseModuleInfoCompilerPlugin { diff --git a/java/maven/pom.xml b/java/maven/pom.xml index 8a4043016e770..e712b6fa6856d 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -236,7 +236,7 @@ org.apache.maven.plugins maven-checkstyle-plugin - ../dev/checkstyle/checkstyle.xml + ../dev/checkstyle/checkstyle-spotless.xml ../dev/license/asf-java.license ../dev/checkstyle/suppressions.xml true @@ -298,6 +298,16 @@ + + + 1.7 + + + + ${maven.multiModuleProjectDirectory}/dev/license/asf-java.license + package + + From 0ff17bff3ba9f4eb666e68fda4f5e04e4f009ef3 Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Mon, 10 Jun 2024 05:37:01 -0700 Subject: [PATCH 27/59] GH-41929: [Java] pom.xml license formatting (#42049) ### What changes are included in this PR? Reformat the XML license to match style defined at https://www.apache.org/legal/src-headers.html Include license formatting to all Maven `pom.xml` files via spotless plugin. ### Are these changes tested? Via CI. ### Are there any user-facing changes? No * GitHub Issue: #41929 Authored-by: Laurent Goujon Signed-off-by: David Li --- java/adapter/avro/pom.xml | 28 ++++++++++------ java/adapter/jdbc/pom.xml | 28 ++++++++++------ java/adapter/orc/pom.xml | 28 ++++++++++------ java/algorithm/pom.xml | 28 ++++++++++------ java/bom/pom.xml | 32 +++++++++++++------ java/c/pom.xml | 28 ++++++++++------ java/compression/pom.xml | 28 ++++++++++------ java/dataset/pom.xml | 28 ++++++++++------ java/dev/license/asf-xml.license | 28 ++++++++++------ java/flight/flight-core/pom.xml | 28 ++++++++++------ java/flight/flight-integration-tests/pom.xml | 28 ++++++++++------ java/flight/flight-sql-jdbc-core/pom.xml | 28 ++++++++++------ java/flight/flight-sql-jdbc-driver/pom.xml | 28 ++++++++++------ java/flight/flight-sql/pom.xml | 28 ++++++++++------ java/flight/pom.xml | 28 ++++++++++------ java/format/pom.xml | 28 ++++++++++------ java/gandiva/pom.xml | 28 ++++++++++------ .../module-info-compiler-maven-plugin/pom.xml | 28 ++++++++++------ java/maven/pom.xml | 32 +++++++++++++------ java/memory/memory-core/pom.xml | 28 ++++++++++------ java/memory/memory-netty-buffer-patch/pom.xml | 28 ++++++++++------ java/memory/memory-netty/pom.xml | 28 ++++++++++------ java/memory/memory-unsafe/pom.xml | 28 ++++++++++------ java/memory/pom.xml | 28 ++++++++++------ java/performance/pom.xml | 28 ++++++++++------ java/pom.xml | 32 +++++++++++++------ java/tools/pom.xml | 28 ++++++++++------ java/vector/pom.xml | 28 ++++++++++------ 28 files changed, 516 insertions(+), 280 deletions(-) diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index 0af1641aa1041..88aea71862957 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index b444eff56277d..fc0f7e0927a89 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index 7df08e1a98b36..6c79ba8da4589 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml index 5984cce766d9e..57e8fdc81bbc7 100644 --- a/java/algorithm/pom.xml +++ b/java/algorithm/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/bom/pom.xml b/java/bom/pom.xml index 77aed2d0f6a37..5fafbf38c7cdf 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 @@ -177,6 +185,10 @@ spotless-maven-plugin + + ${maven.multiModuleProjectDirectory}/dev/license/asf-xml.license + (<configuration|<project) + diff --git a/java/c/pom.xml b/java/c/pom.xml index afb6e0cd8b890..8a7e5d47ad3f9 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/compression/pom.xml b/java/compression/pom.xml index a3d6c0ac558dd..561877bd5cd36 100644 --- a/java/compression/pom.xml +++ b/java/compression/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index 3dea16204a4db..d016d49048013 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/dev/license/asf-xml.license b/java/dev/license/asf-xml.license index a43b97bca8f0f..b1abef4bf4b64 100644 --- a/java/dev/license/asf-xml.license +++ b/java/dev/license/asf-xml.license @@ -1,11 +1,19 @@ - \ No newline at end of file + diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index f2070d4ff7cba..437970bb93f8f 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index cd2c28ba8959f..7b9cf2dd8aaba 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index 50d7b2617a5a9..7fe4e7f18ca28 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index 4456270e7b347..23b2c9c7cbef4 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index 14fde34c3b4f3..2719f82e37578 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/flight/pom.xml b/java/flight/pom.xml index ad2e5f04f4faf..38495fa356e51 100644 --- a/java/flight/pom.xml +++ b/java/flight/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/format/pom.xml b/java/format/pom.xml index 4483047e20960..44ff213fee703 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index 1c17023e5c8ad..2a1e83f3e21fc 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml index 9f0cd7b1039dd..b00c03a014980 100644 --- a/java/maven/module-info-compiler-maven-plugin/pom.xml +++ b/java/maven/module-info-compiler-maven-plugin/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/maven/pom.xml b/java/maven/pom.xml index e712b6fa6856d..757dab2372608 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 + 4.0.0 diff --git a/java/memory/memory-netty-buffer-patch/pom.xml b/java/memory/memory-netty-buffer-patch/pom.xml index beae12c22d2bf..1f645472a3398 100644 --- a/java/memory/memory-netty-buffer-patch/pom.xml +++ b/java/memory/memory-netty-buffer-patch/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml index d815276b09e50..f66899151128a 100644 --- a/java/memory/memory-netty/pom.xml +++ b/java/memory/memory-netty/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml index f1aa8fde1faa1..c1a42e3ff0081 100644 --- a/java/memory/memory-unsafe/pom.xml +++ b/java/memory/memory-unsafe/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/memory/pom.xml b/java/memory/pom.xml index 867f4c2400d0a..e0dc467fe245e 100644 --- a/java/memory/pom.xml +++ b/java/memory/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/performance/pom.xml b/java/performance/pom.xml index 07ca8d1e61d48..83b0a88da063b 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/pom.xml b/java/pom.xml index 085546573596a..1010b43c992bd 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 @@ -804,6 +812,10 @@ spotless-maven-plugin + + ${maven.multiModuleProjectDirectory}/dev/license/asf-xml.license + (<configuration|<project) + diff --git a/java/tools/pom.xml b/java/tools/pom.xml index 53dcd51771054..7633b06274f57 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 diff --git a/java/vector/pom.xml b/java/vector/pom.xml index 6ff869ee21aff..76cb11be89f5a 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -1,14 +1,22 @@ - + 4.0.0 From 53c1fc9f19709aa63d87cd5424950f675ca93dc9 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Mon, 10 Jun 2024 11:46:30 -0300 Subject: [PATCH 28/59] GH-41970: [C++] Misc changes making code around list-like types and list-view types behave the same way (#41971) ### Rationale for this change These changes facilitate the refactoring of compute kernels and tests to support not only the list-like types but also list-views. ### What changes are included in this PR? - Inclusion of `FixedSizeListBuilder::TypeClass` to make it in line with `VarLengthListLikeBuilder` classes - Small documentation updates related to list-view types - Change `list`/`large_list`/`fixed_size_list` factories to take `shared_ptr` by value instead of `const &` - Change `ListType`/`LargeListType`/`FixedSizeList` constructors take `shared_ptr` by value instead of `const &` ### Are these changes tested? Yes. By existing tests. ### Are there any user-facing changes? - The `list`/`large_list`/`fixed_size_list` type factories now take the `shared_ptr` parameter value instead of `const &`. - The `ListType`/`LargeListType`/`FixedSizeList` constructors now take the `shared_ptr` parameter by value instead of `const &` * GitHub Issue: #41970 Lead-authored-by: Felipe Oliveira Carvalho Co-authored-by: Antoine Pitrou Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/array/builder_nested.h | 2 ++ .../arrow/compute/kernels/scalar_cast_test.cc | 2 +- cpp/src/arrow/compute/kernels/vector_hash.cc | 13 +++++----- cpp/src/arrow/type.cc | 24 ++++++++--------- cpp/src/arrow/type.h | 26 +++++++++---------- cpp/src/arrow/type_fwd.h | 12 ++++----- docs/source/cpp/compute.rst | 25 ++++++++++++------ docs/source/format/Columnar.rst | 1 + 8 files changed, 58 insertions(+), 47 deletions(-) diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h index 6089cf04d421f..1851ef9122274 100644 --- a/cpp/src/arrow/array/builder_nested.h +++ b/cpp/src/arrow/array/builder_nested.h @@ -642,6 +642,8 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder { /// \brief Builder class for fixed-length list array value types class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder { public: + using TypeClass = FixedSizeListType; + /// Use this constructor to define the built array's type explicitly. If value_builder /// has indeterminate type, this builder will also. FixedSizeListBuilder(MemoryPool* pool, diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index a6d7f6097b59b..f60d8f2e19e98 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2281,7 +2281,7 @@ TEST(Cast, ListToPrimitive) { Cast(*ArrayFromJSON(list(binary()), R"([["1", "2"], ["3", "4"]])"), utf8())); } -using make_list_t = std::shared_ptr(const std::shared_ptr&); +using make_list_t = std::shared_ptr(std::shared_ptr); static const auto list_factories = std::vector{&list, &large_list}; diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc index 44bb7372c3f68..5067298858132 100644 --- a/cpp/src/arrow/compute/kernels/vector_hash.cc +++ b/cpp/src/arrow/compute/kernels/vector_hash.cc @@ -698,13 +698,12 @@ void AddHashKernels(VectorFunction* func, VectorKernel base, OutputType out_ty) DCHECK_OK(func->AddKernel(base)); } - // Example parametric types that we want to match only on Type::type - auto parametric_types = {time32(TimeUnit::SECOND), time64(TimeUnit::MICRO), - timestamp(TimeUnit::SECOND), duration(TimeUnit::SECOND), - fixed_size_binary(0)}; - for (const auto& ty : parametric_types) { - base.init = GetHashInit(ty->id()); - base.signature = KernelSignature::Make({ty->id()}, out_ty); + // Parametric types that we want matching to be dependent only on type id + auto parametric_types = {Type::TIME32, Type::TIME64, Type::TIMESTAMP, Type::DURATION, + Type::FIXED_SIZE_BINARY}; + for (const auto& type_id : parametric_types) { + base.init = GetHashInit(type_id); + base.signature = KernelSignature::Make({type_id}, out_ty); DCHECK_OK(func->AddKernel(base)); } diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index 8ce03a91c70ae..f0cd82e803121 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -3149,20 +3149,20 @@ std::shared_ptr time64(TimeUnit::type unit) { return std::make_shared(unit); } -std::shared_ptr list(const std::shared_ptr& value_type) { - return std::make_shared(value_type); +std::shared_ptr list(std::shared_ptr value_type) { + return std::make_shared(std::move(value_type)); } -std::shared_ptr list(const std::shared_ptr& value_field) { - return std::make_shared(value_field); +std::shared_ptr list(std::shared_ptr value_field) { + return std::make_shared(std::move(value_field)); } -std::shared_ptr large_list(const std::shared_ptr& value_type) { - return std::make_shared(value_type); +std::shared_ptr large_list(std::shared_ptr value_type) { + return std::make_shared(std::move(value_type)); } -std::shared_ptr large_list(const std::shared_ptr& value_field) { - return std::make_shared(value_field); +std::shared_ptr large_list(std::shared_ptr value_field) { + return std::make_shared(std::move(value_field)); } std::shared_ptr map(std::shared_ptr key_type, @@ -3183,14 +3183,14 @@ std::shared_ptr map(std::shared_ptr key_field, keys_sorted); } -std::shared_ptr fixed_size_list(const std::shared_ptr& value_type, +std::shared_ptr fixed_size_list(std::shared_ptr value_type, int32_t list_size) { - return std::make_shared(value_type, list_size); + return std::make_shared(std::move(value_type), list_size); } -std::shared_ptr fixed_size_list(const std::shared_ptr& value_field, +std::shared_ptr fixed_size_list(std::shared_ptr value_field, int32_t list_size) { - return std::make_shared(value_field, list_size); + return std::make_shared(std::move(value_field), list_size); } std::shared_ptr list_view(std::shared_ptr value_type) { diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index bb05e6efdb987..e087c8ca1c387 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -1122,11 +1122,11 @@ class ARROW_EXPORT ListType : public BaseListType { static constexpr const char* type_name() { return "list"; } // List can contain any other logical value type - explicit ListType(const std::shared_ptr& value_type) - : ListType(std::make_shared("item", value_type)) {} + explicit ListType(std::shared_ptr value_type) + : ListType(std::make_shared("item", std::move(value_type))) {} - explicit ListType(const std::shared_ptr& value_field) : BaseListType(type_id) { - children_ = {value_field}; + explicit ListType(std::shared_ptr value_field) : BaseListType(type_id) { + children_ = {std::move(value_field)}; } DataTypeLayout layout() const override { @@ -1153,12 +1153,11 @@ class ARROW_EXPORT LargeListType : public BaseListType { static constexpr const char* type_name() { return "large_list"; } // List can contain any other logical value type - explicit LargeListType(const std::shared_ptr& value_type) - : LargeListType(std::make_shared("item", value_type)) {} + explicit LargeListType(std::shared_ptr value_type) + : LargeListType(std::make_shared("item", std::move(value_type))) {} - explicit LargeListType(const std::shared_ptr& value_field) - : BaseListType(type_id) { - children_ = {value_field}; + explicit LargeListType(std::shared_ptr value_field) : BaseListType(type_id) { + children_ = {std::move(value_field)}; } DataTypeLayout layout() const override { @@ -1296,12 +1295,13 @@ class ARROW_EXPORT FixedSizeListType : public BaseListType { static constexpr const char* type_name() { return "fixed_size_list"; } // List can contain any other logical value type - FixedSizeListType(const std::shared_ptr& value_type, int32_t list_size) - : FixedSizeListType(std::make_shared("item", value_type), list_size) {} + FixedSizeListType(std::shared_ptr value_type, int32_t list_size) + : FixedSizeListType(std::make_shared("item", std::move(value_type)), + list_size) {} - FixedSizeListType(const std::shared_ptr& value_field, int32_t list_size) + FixedSizeListType(std::shared_ptr value_field, int32_t list_size) : BaseListType(type_id), list_size_(list_size) { - children_ = {value_field}; + children_ = {std::move(value_field)}; } DataTypeLayout layout() const override { diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h index 63eec10bf723b..08777d247edbf 100644 --- a/cpp/src/arrow/type_fwd.h +++ b/cpp/src/arrow/type_fwd.h @@ -525,19 +525,19 @@ std::shared_ptr decimal256(int32_t precision, int32_t scale); /// \brief Create a ListType instance from its child Field type ARROW_EXPORT -std::shared_ptr list(const std::shared_ptr& value_type); +std::shared_ptr list(std::shared_ptr value_type); /// \brief Create a ListType instance from its child DataType ARROW_EXPORT -std::shared_ptr list(const std::shared_ptr& value_type); +std::shared_ptr list(std::shared_ptr value_type); /// \brief Create a LargeListType instance from its child Field type ARROW_EXPORT -std::shared_ptr large_list(const std::shared_ptr& value_type); +std::shared_ptr large_list(std::shared_ptr value_type); /// \brief Create a LargeListType instance from its child DataType ARROW_EXPORT -std::shared_ptr large_list(const std::shared_ptr& value_type); +std::shared_ptr large_list(std::shared_ptr value_type); /// \brief Create a ListViewType instance ARROW_EXPORT std::shared_ptr list_view(std::shared_ptr value_type); @@ -568,12 +568,12 @@ std::shared_ptr map(std::shared_ptr key_type, /// \brief Create a FixedSizeListType instance from its child Field type ARROW_EXPORT -std::shared_ptr fixed_size_list(const std::shared_ptr& value_type, +std::shared_ptr fixed_size_list(std::shared_ptr value_type, int32_t list_size); /// \brief Create a FixedSizeListType instance from its child DataType ARROW_EXPORT -std::shared_ptr fixed_size_list(const std::shared_ptr& value_type, +std::shared_ptr fixed_size_list(std::shared_ptr value_type, int32_t list_size); /// \brief Return a Duration instance (naming use _type to avoid namespace conflict with /// built in time classes). diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 701c7d573ac0e..2c403e9880f5c 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1298,8 +1298,8 @@ Structural transforms +---------------------+------------+-------------+------------------+------------------------------+--------+ * \(1) Each output element is the length of the corresponding input element - (null if input is null). Output type is Int32 for List and FixedSizeList, - Int64 for LargeList. + (null if input is null). Output type is Int32 for List, ListView, and + FixedSizeList, Int64 for LargeList and LargeListView. * \(2) The output struct's field types are the types of its arguments. The field names are specified using an instance of :struct:`MakeStructOptions`. @@ -1413,13 +1413,15 @@ null input value is converted into a null output value. +-----------------------------+------------------------------------+---------+ | Struct | Struct | \(2) | +-----------------------------+------------------------------------+---------+ -| List-like | List-like | \(3) | +| List-like | List-like or (Large)ListView | \(3) | +-----------------------------+------------------------------------+---------+ -| Map | Map or List of two-field struct | \(4) | +| (Large)ListView | List-like or (Large)ListView | \(4) | ++-----------------------------+------------------------------------+---------+ +| Map | Map or List of two-field struct | \(5) | +-----------------------------+------------------------------------+---------+ | Null | Any | | +-----------------------------+------------------------------------+---------+ -| Any | Extension | \(5) | +| Any | Extension | \(6) | +-----------------------------+------------------------------------+---------+ * \(1) The dictionary indices are unchanged, the dictionary values are @@ -1433,14 +1435,21 @@ null input value is converted into a null output value. * \(3) The list offsets are unchanged, the list values are cast from the input value type to the output value type (if a conversion is - available). + available). If the output type is (Large)ListView, then sizes are + derived from the offsets. + +* \(4) If output type is list-like, offsets (consequently, the values array) + might have to be rebuilt to be sorted and spaced adequately. If output type is + a list-view type, the offsets and sizes are unchanged. In any case, the list + values are cast from the input value type to the output value type (if a + conversion is available). -* \(4) Offsets are unchanged, the keys and values are cast from respective input +* \(5) Offsets are unchanged, the keys and values are cast from respective input to output types (if a conversion is available). If output type is a list of struct, the key field is output as the first field and the value field the second field, regardless of field names chosen. -* \(5) Any input type that can be cast to the resulting extension's storage type. +* \(6) Any input type that can be cast to the resulting extension's storage type. This excludes extension types, unless being cast to the same extension type. Temporal component extraction diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst index c4dc772808af4..7ae0c2b4bdbd8 100644 --- a/docs/source/format/Columnar.rst +++ b/docs/source/format/Columnar.rst @@ -1172,6 +1172,7 @@ of memory buffers for each layout. "Variable Binary",validity,offsets,data, "Variable Binary View",validity,views,,data "List",validity,offsets,, + "List View",validity,offsets,sizes, "Fixed-size List",validity,,, "Struct",validity,,, "Sparse Union",type ids,,, From 036fca0ae5c8956c83b69478d413c24f32398f8c Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 10 Jun 2024 16:47:56 +0200 Subject: [PATCH 29/59] GH-41862: [C++][S3] Fix potential deadlock when closing output stream (#41876) ### Rationale for this change When the Future returned by `OutputStream::CloseAsync` finishes, it can invoke a user-supplied callback. That callback may well destroy the stream as a side effect. If the stream is a S3 output stream, this might lead to a deadlock involving the mutex in the output stream's `UploadState` structure, since the callback is called with that mutex locked. ### What changes are included in this PR? Unlock the `UploadState` mutex before marking the Future finished, to avoid deadlocking. ### Are these changes tested? No. Unfortunately, I wasn't able to write a test that would trigger the original condition. Additional preconditions seem to be required to reproduce the deadlock. For example, it might require a mutex implementation that hangs if destroyed while locked. ### Are there any user-facing changes? No. * GitHub Issue: #41862 Authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/arrow/filesystem/s3fs.cc | 18 +++++++++++++--- cpp/src/arrow/filesystem/s3fs_test.cc | 31 +++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index c456be2d0d3cd..99cee19ed1e78 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -1606,6 +1606,10 @@ class ObjectOutputStream final : public io::OutputStream { io::internal::CloseFromDestructor(this); } + std::shared_ptr Self() { + return std::dynamic_pointer_cast(shared_from_this()); + } + Status Init() { ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock()); @@ -1724,9 +1728,9 @@ class ObjectOutputStream final : public io::OutputStream { RETURN_NOT_OK(EnsureReadyToFlushFromClose()); - auto self = std::dynamic_pointer_cast(shared_from_this()); // Wait for in-progress uploads to finish (if async writes are enabled) - return FlushAsync().Then([self]() { return self->FinishPartUploadAfterFlush(); }); + return FlushAsync().Then( + [self = Self()]() { return self->FinishPartUploadAfterFlush(); }); } bool closed() const override { return closed_; } @@ -1892,7 +1896,15 @@ class ObjectOutputStream final : public io::OutputStream { } // Notify completion if (--state->parts_in_progress == 0) { - state->pending_parts_completed.MarkFinished(state->status); + // GH-41862: avoid potential deadlock if the Future's callback is called + // with the mutex taken. + auto fut = state->pending_parts_completed; + lock.unlock(); + // State could be mutated concurrently if another thread writes to the + // stream, but in this case the Flush() call is only advisory anyway. + // Besides, it's not generally sound to write to an OutputStream from + // several threads at once. + fut.MarkFinished(state->status); } } diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index 7bfa120eda678..5a160a78ceea0 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -614,9 +614,26 @@ class TestS3FS : public S3TestMixin { // after CloseAsync or synchronously after stream.reset() since we're just // checking that `closeAsyncFut` keeps the stream alive until completion // rather than segfaulting on a dangling stream - auto closeAsyncFut = stream->CloseAsync(); + auto close_fut = stream->CloseAsync(); stream.reset(); - ASSERT_OK(closeAsyncFut.MoveResult()); + ASSERT_OK(close_fut.MoveResult()); + AssertObjectContents(client_.get(), "bucket", "somefile", "new data"); + } + + void TestOpenOutputStreamCloseAsyncFutureDeadlock() { + // This is inspired by GH-41862, though it fails to reproduce the actual + // issue reported there (actual preconditions might be required). + // Here we lose our reference to an output stream from its CloseAsync callback. + // This should not deadlock. + std::shared_ptr stream; + ASSERT_OK_AND_ASSIGN(stream, fs_->OpenOutputStream("bucket/somefile")); + ASSERT_OK(stream->Write("new data")); + auto close_fut = stream->CloseAsync(); + close_fut.AddCallback([stream = std::move(stream)](Status st) mutable { + // Trigger stream destruction from callback + stream.reset(); + }); + ASSERT_OK(close_fut.MoveResult()); AssertObjectContents(client_.get(), "bucket", "somefile", "new data"); } @@ -1254,6 +1271,16 @@ TEST_F(TestS3FS, OpenOutputStreamAsyncDestructorSyncWrite) { TestOpenOutputStreamCloseAsyncDestructor(); } +TEST_F(TestS3FS, OpenOutputStreamCloseAsyncFutureDeadlockBackgroundWrites) { + TestOpenOutputStreamCloseAsyncFutureDeadlock(); +} + +TEST_F(TestS3FS, OpenOutputStreamCloseAsyncFutureDeadlockSyncWrite) { + options_.background_writes = false; + MakeFileSystem(); + TestOpenOutputStreamCloseAsyncFutureDeadlock(); +} + TEST_F(TestS3FS, OpenOutputStreamMetadata) { std::shared_ptr stream; From 4f890977650a36abaaec74ad2eaac31c04b5bf76 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Mon, 10 Jun 2024 12:32:46 -0300 Subject: [PATCH 30/59] GH-41301: [C++] Extract the kernel loops used for PrimitiveTakeExec and generalize to any fixed-width type (#41373) ### Rationale for this change I want to instantiate this primitive operation in other scenarios (e.g. the optimized version of `Take` that handles chunked arrays) and extend the sub-classes of `GatherCRTP` with different member functions that re-use the `WriteValue` function generically (any fixed-width type and even bit-wide booleans). When taking these improvements to `Filter` I will also re-use the "gather" concept and parameterize it by bitmaps/boolean-arrays instead of selection vectors (`indices`) like `take` does. So gather is not a "renaming of take" but rather a generalization of `take` and `filter` do in Arrow with different representations of what should be gathered from the `values` array. ### What changes are included in this PR? - Introduce the Gather class helper to delegate fixed-width memory gathering: both static and dynamically sized (size known at compile time or size known at runtime) - Specialized `Take` implementation for values/indices without nulls - Fold the Boolean, Primitives, and Fixed-Width Binary implementation of `Take` into a single one - Skip validity bitmap allocation when inputs (values and indices) have no nulls ### Are these changes tested? - Existing tests - New test assertions that check that `Take` guarantees null values are zeroed out * GitHub Issue: #41301 Authored-by: Felipe Oliveira Carvalho Signed-off-by: Felipe Oliveira Carvalho --- .../arrow/compute/kernels/gather_internal.h | 306 ++++++++++++++ .../kernels/vector_selection_internal.cc | 68 +--- .../kernels/vector_selection_internal.h | 3 +- .../kernels/vector_selection_take_internal.cc | 375 ++++++------------ .../compute/kernels/vector_selection_test.cc | 17 +- 5 files changed, 435 insertions(+), 334 deletions(-) create mode 100644 cpp/src/arrow/compute/kernels/gather_internal.h diff --git a/cpp/src/arrow/compute/kernels/gather_internal.h b/cpp/src/arrow/compute/kernels/gather_internal.h new file mode 100644 index 0000000000000..4c161533a7277 --- /dev/null +++ b/cpp/src/arrow/compute/kernels/gather_internal.h @@ -0,0 +1,306 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "arrow/array/data.h" +#include "arrow/util/bit_block_counter.h" +#include "arrow/util/bit_run_reader.h" +#include "arrow/util/bit_util.h" +#include "arrow/util/bitmap_ops.h" +#include "arrow/util/macros.h" + +// Implementation helpers for kernels that need to load/gather fixed-width +// data from multiple, arbitrary indices. +// +// https://en.wikipedia.org/wiki/Gather/scatter_(vector_addressing) + +namespace arrow::internal { + +// CRTP [1] base class for Gather that provides a gathering loop in terms of +// Write*() methods that must be implemented by the derived class. +// +// [1] https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern +template +class GatherBaseCRTP { + public: + // Output offset is not supported by Gather and idx is supposed to have offset + // pre-applied. idx_validity parameters on functions can use the offset they + // carry to read the validity bitmap as bitmaps can't have pre-applied offsets + // (they might not align to byte boundaries). + + GatherBaseCRTP() = default; + ARROW_DISALLOW_COPY_AND_ASSIGN(GatherBaseCRTP); + ARROW_DEFAULT_MOVE_AND_ASSIGN(GatherBaseCRTP); + + protected: + ARROW_FORCE_INLINE int64_t ExecuteNoNulls(int64_t idx_length) { + auto* self = static_cast(this); + for (int64_t position = 0; position < idx_length; position++) { + self->WriteValue(position); + } + return idx_length; + } + + // See derived Gather classes below for the meaning of the parameters, pre and + // post-conditions. + // + // src_validity is not necessarily the source of the values that are being + // gathered (e.g. the source could be a nested fixed-size list array and the + // values being gathered are from the innermost buffer), so the ArraySpan is + // used solely to check for nulls in the source values and nothing else. + // + // idx_length is the number of elements in idx and consequently the number of + // bits that might be written to out_is_valid. Member `Write*()` functions will be + // called with positions from 0 to idx_length - 1. + // + // If `kOutputIsZeroInitialized` is true, then `WriteZero()` or `WriteZeroSegment()` + // doesn't have to be called for resulting null positions. A position is + // considered null if either the index or the source value is null at that + // position. + template + ARROW_FORCE_INLINE int64_t ExecuteWithNulls(const ArraySpan& src_validity, + int64_t idx_length, const IndexCType* idx, + const ArraySpan& idx_validity, + uint8_t* out_is_valid) { + auto* self = static_cast(this); + OptionalBitBlockCounter indices_bit_counter(idx_validity.buffers[0].data, + idx_validity.offset, idx_length); + int64_t position = 0; + int64_t valid_count = 0; + while (position < idx_length) { + BitBlockCount block = indices_bit_counter.NextBlock(); + if (!src_validity.MayHaveNulls()) { + // Source values are never null, so things are easier + valid_count += block.popcount; + if (block.popcount == block.length) { + // Fastest path: neither source values nor index nulls + bit_util::SetBitsTo(out_is_valid, position, block.length, true); + for (int64_t i = 0; i < block.length; ++i) { + self->WriteValue(position); + ++position; + } + } else if (block.popcount > 0) { + // Slow path: some indices but not all are null + for (int64_t i = 0; i < block.length; ++i) { + ARROW_COMPILER_ASSUME(idx_validity.buffers[0].data != nullptr); + if (idx_validity.IsValid(position)) { + // index is not null + bit_util::SetBit(out_is_valid, position); + self->WriteValue(position); + } else if constexpr (!kOutputIsZeroInitialized) { + self->WriteZero(position); + } + ++position; + } + } else { + self->WriteZeroSegment(position, block.length); + position += block.length; + } + } else { + // Source values may be null, so we must do random access into src_validity + if (block.popcount == block.length) { + // Faster path: indices are not null but source values may be + for (int64_t i = 0; i < block.length; ++i) { + ARROW_COMPILER_ASSUME(src_validity.buffers[0].data != nullptr); + if (src_validity.IsValid(idx[position])) { + // value is not null + self->WriteValue(position); + bit_util::SetBit(out_is_valid, position); + ++valid_count; + } else if constexpr (!kOutputIsZeroInitialized) { + self->WriteZero(position); + } + ++position; + } + } else if (block.popcount > 0) { + // Slow path: some but not all indices are null. Since we are doing + // random access in general we have to check the value nullness one by + // one. + for (int64_t i = 0; i < block.length; ++i) { + ARROW_COMPILER_ASSUME(src_validity.buffers[0].data != nullptr); + ARROW_COMPILER_ASSUME(idx_validity.buffers[0].data != nullptr); + if (idx_validity.IsValid(position) && src_validity.IsValid(idx[position])) { + // index is not null && value is not null + self->WriteValue(position); + bit_util::SetBit(out_is_valid, position); + ++valid_count; + } else if constexpr (!kOutputIsZeroInitialized) { + self->WriteZero(position); + } + ++position; + } + } else { + if constexpr (!kOutputIsZeroInitialized) { + self->WriteZeroSegment(position, block.length); + } + position += block.length; + } + } + } + return valid_count; + } +}; + +// A gather primitive for primitive fixed-width types with a integral byte width. If +// `kWithFactor` is true, the actual width is a runtime multiple of `kValueWidthInbits` +// (this can be useful for fixed-size list inputs and other input types with unusual byte +// widths that don't deserve value specialization). +template +class Gather : public GatherBaseCRTP> { + public: + static_assert(kValueWidthInBits >= 0 && kValueWidthInBits % 8 == 0); + static constexpr int kValueWidth = kValueWidthInBits / 8; + + private: + const int64_t src_length_; // number of elements of kValueWidth bytes in src_ + const uint8_t* src_; + const int64_t idx_length_; // number IndexCType elements in idx_ + const IndexCType* idx_; + uint8_t* out_; + int64_t factor_; + + public: + void WriteValue(int64_t position) { + if constexpr (kWithFactor) { + const int64_t scaled_factor = kValueWidth * factor_; + memcpy(out_ + position * scaled_factor, src_ + idx_[position] * scaled_factor, + scaled_factor); + } else { + memcpy(out_ + position * kValueWidth, src_ + idx_[position] * kValueWidth, + kValueWidth); + } + } + + void WriteZero(int64_t position) { + if constexpr (kWithFactor) { + const int64_t scaled_factor = kValueWidth * factor_; + memset(out_ + position * scaled_factor, 0, scaled_factor); + } else { + memset(out_ + position * kValueWidth, 0, kValueWidth); + } + } + + void WriteZeroSegment(int64_t position, int64_t length) { + if constexpr (kWithFactor) { + const int64_t scaled_factor = kValueWidth * factor_; + memset(out_ + position * scaled_factor, 0, length * scaled_factor); + } else { + memset(out_ + position * kValueWidth, 0, length * kValueWidth); + } + } + + public: + Gather(int64_t src_length, const uint8_t* src, int64_t zero_src_offset, + int64_t idx_length, const IndexCType* idx, uint8_t* out, int64_t factor) + : src_length_(src_length), + src_(src), + idx_length_(idx_length), + idx_(idx), + out_(out), + factor_(factor) { + assert(zero_src_offset == 0); + assert(src && idx && out); + assert((kWithFactor || factor == 1) && + "When kWithFactor is false, the factor is assumed to be 1 at compile time"); + } + + ARROW_FORCE_INLINE int64_t Execute() { return this->ExecuteNoNulls(idx_length_); } + + /// \pre If kOutputIsZeroInitialized, then this->out_ has to be zero initialized. + /// \pre Bits in out_is_valid have to always be zero initialized. + /// \post The bits for the valid elements (and only those) are set in out_is_valid. + /// \post If !kOutputIsZeroInitialized, then positions in this->_out containing null + /// elements have 0s written to them. This might be less efficient than + /// zero-initializing first and calling this->Execute() afterwards. + /// \return The number of valid elements in out. + template + ARROW_FORCE_INLINE int64_t Execute(const ArraySpan& src_validity, + const ArraySpan& idx_validity, + uint8_t* out_is_valid) { + assert(src_length_ == src_validity.length); + assert(idx_length_ == idx_validity.length); + assert(out_is_valid); + return this->template ExecuteWithNulls( + src_validity, idx_length_, idx_, idx_validity, out_is_valid); + } +}; + +// A gather primitive for boolean inputs. Unlike its counterpart above, +// this does not support passing a non-trivial factor parameter. +template +class Gather + : public GatherBaseCRTP> { + private: + const int64_t src_length_; // number of elements of bits bytes in src_ after offset + const uint8_t* src_; // the boolean array data buffer in bits + const int64_t src_offset_; // offset in bits + const int64_t idx_length_; // number IndexCType elements in idx_ + const IndexCType* idx_; + uint8_t* out_; // output boolean array data buffer in bits + + public: + Gather(int64_t src_length, const uint8_t* src, int64_t src_offset, int64_t idx_length, + const IndexCType* idx, uint8_t* out, int64_t factor) + : src_length_(src_length), + src_(src), + src_offset_(src_offset), + idx_length_(idx_length), + idx_(idx), + out_(out) { + assert(src && idx && out); + assert(factor == 1 && + "factor != 1 is not supported when Gather is used to gather bits/booleans"); + } + + void WriteValue(int64_t position) { + bit_util::SetBitTo(out_, position, + bit_util::GetBit(src_, src_offset_ + idx_[position])); + } + + void WriteZero(int64_t position) { bit_util::ClearBit(out_, position); } + + void WriteZeroSegment(int64_t position, int64_t block_length) { + bit_util::SetBitsTo(out_, position, block_length, false); + } + + ARROW_FORCE_INLINE int64_t Execute() { return this->ExecuteNoNulls(idx_length_); } + + /// \pre If kOutputIsZeroInitialized, then this->out_ has to be zero initialized. + /// \pre Bits in out_is_valid have to always be zero initialized. + /// \post The bits for the valid elements (and only those) are set in out_is_valid. + /// \post If !kOutputIsZeroInitialized, then positions in this->_out containing null + /// elements have 0s written to them. This might be less efficient than + /// zero-initializing first and calling this->Execute() afterwards. + /// \return The number of valid elements in out. + template + ARROW_FORCE_INLINE int64_t Execute(const ArraySpan& src_validity, + const ArraySpan& idx_validity, + uint8_t* out_is_valid) { + assert(src_length_ == src_validity.length); + assert(idx_length_ == idx_validity.length); + assert(out_is_valid); + return this->template ExecuteWithNulls( + src_validity, idx_length_, idx_, idx_validity, out_is_valid); + } +}; + +} // namespace arrow::internal diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc index 2ba660e49ac38..1009bea5e7b1b 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc @@ -547,39 +547,6 @@ struct VarBinarySelectionImpl : public Selection, T } }; -struct FSBSelectionImpl : public Selection { - using Base = Selection; - LIFT_BASE_MEMBERS(); - - TypedBufferBuilder data_builder; - - FSBSelectionImpl(KernelContext* ctx, const ExecSpan& batch, int64_t output_length, - ExecResult* out) - : Base(ctx, batch, output_length, out), data_builder(ctx->memory_pool()) {} - - template - Status GenerateOutput() { - FixedSizeBinaryArray typed_values(this->values.ToArrayData()); - int32_t value_size = typed_values.byte_width(); - - RETURN_NOT_OK(data_builder.Reserve(value_size * output_length)); - Adapter adapter(this); - return adapter.Generate( - [&](int64_t index) { - auto val = typed_values.GetView(index); - data_builder.UnsafeAppend(reinterpret_cast(val.data()), - value_size); - return Status::OK(); - }, - [&]() { - data_builder.UnsafeAppend(value_size, static_cast(0x00)); - return Status::OK(); - }); - } - - Status Finish() override { return data_builder.Finish(&out->buffers[1]); } -}; - template struct ListSelectionImpl : public Selection, Type> { using offset_type = typename Type::offset_type; @@ -939,23 +906,6 @@ Status LargeVarBinaryTakeExec(KernelContext* ctx, const ExecSpan& batch, return TakeExec>(ctx, batch, out); } -Status FSBTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { - const ArraySpan& values = batch[0].array; - const auto byte_width = values.type->byte_width(); - // Use primitive Take implementation (presumably faster) for some byte widths - switch (byte_width) { - case 1: - case 2: - case 4: - case 8: - case 16: - case 32: - return PrimitiveTakeExec(ctx, batch, out); - default: - return TakeExec(ctx, batch, out); - } -} - Status ListTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { return TakeExec>(ctx, batch, out); } @@ -968,26 +918,12 @@ Status FSLTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { const ArraySpan& values = batch[0].array; // If a FixedSizeList wraps a fixed-width type we can, in some cases, use - // PrimitiveTakeExec for a fixed-size list array. + // FixedWidthTakeExec for a fixed-size list array. if (util::IsFixedWidthLike(values, /*force_null_count=*/true, /*exclude_bool_and_dictionary=*/true)) { - const auto byte_width = util::FixedWidthInBytes(*values.type); - // Additionally, PrimitiveTakeExec is only implemented for specific byte widths. - // TODO(GH-41301): Extend PrimitiveTakeExec for any fixed-width type. - switch (byte_width) { - case 1: - case 2: - case 4: - case 8: - case 16: - case 32: - return PrimitiveTakeExec(ctx, batch, out); - default: - break; // fallback to TakeExec - } + return FixedWidthTakeExec(ctx, batch, out); } - return TakeExec(ctx, batch, out); } diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.h b/cpp/src/arrow/compute/kernels/vector_selection_internal.h index a169f4b38a2b8..c5075d6dfe87b 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.h @@ -73,8 +73,7 @@ Status MapFilterExec(KernelContext*, const ExecSpan&, ExecResult*); Status VarBinaryTakeExec(KernelContext*, const ExecSpan&, ExecResult*); Status LargeVarBinaryTakeExec(KernelContext*, const ExecSpan&, ExecResult*); -Status PrimitiveTakeExec(KernelContext*, const ExecSpan&, ExecResult*); -Status FSBTakeExec(KernelContext*, const ExecSpan&, ExecResult*); +Status FixedWidthTakeExec(KernelContext*, const ExecSpan&, ExecResult*); Status ListTakeExec(KernelContext*, const ExecSpan&, ExecResult*); Status LargeListTakeExec(KernelContext*, const ExecSpan&, ExecResult*); Status FSLTakeExec(KernelContext*, const ExecSpan&, ExecResult*); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc index 1a9af0efcd700..dee80e9d258fb 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "arrow/array/builder_primitive.h" @@ -27,6 +28,7 @@ #include "arrow/chunked_array.h" #include "arrow/compute/api_vector.h" #include "arrow/compute/kernels/codegen_internal.h" +#include "arrow/compute/kernels/gather_internal.h" #include "arrow/compute/kernels/vector_selection_internal.h" #include "arrow/compute/kernels/vector_selection_take_internal.h" #include "arrow/memory_pool.h" @@ -324,238 +326,79 @@ namespace { using TakeState = OptionsWrapper; // ---------------------------------------------------------------------- -// Implement optimized take for primitive types from boolean to 1/2/4/8/16/32-byte -// C-type based types. Use common implementation for every byte width and only -// generate code for unsigned integer indices, since after boundschecking to -// check for negative numbers in the indices we can safely reinterpret_cast -// signed integers as unsigned. - -/// \brief The Take implementation for primitive (fixed-width) types does not -/// use the logical Arrow type but rather the physical C type. This way we -/// only generate one take function for each byte width. +// Implement optimized take for primitive types from boolean to +// 1/2/4/8/16/32-byte C-type based types and fixed-size binary (0 or more +// bytes). +// +// Use one specialization for each of these primitive byte-widths so the +// compiler can specialize the memcpy to dedicated CPU instructions and for +// fixed-width binary use the 1-byte specialization but pass WithFactor=true +// that makes the kernel consider the factor parameter provided at runtime. +// +// Only unsigned index types need to be instantiated since after +// boundschecking to check for negative numbers in the indices we can safely +// reinterpret_cast signed integers as unsigned. + +/// \brief The Take implementation for primitive types and fixed-width binary. /// /// Also note that this function can also handle fixed-size-list arrays if /// they fit the criteria described in fixed_width_internal.h, so use the /// function defined in that file to access values and destination pointers /// and DO NOT ASSUME `values.type()` is a primitive type. /// +/// NOTE: Template parameters are types instead of values to let +/// `TakeIndexDispatch<>` forward `typename... Args` after the index type. +/// /// \pre the indices have been boundschecked -template -struct PrimitiveTakeImpl { - static constexpr int kValueWidth = ValueWidthConstant::value; - - static void Exec(const ArraySpan& values, const ArraySpan& indices, - ArrayData* out_arr) { - DCHECK_EQ(util::FixedWidthInBytes(*values.type), kValueWidth); - const auto* values_data = util::OffsetPointerOfFixedByteWidthValues(values); - const uint8_t* values_is_valid = values.buffers[0].data; - auto values_offset = values.offset; - - const auto* indices_data = indices.GetValues(1); - const uint8_t* indices_is_valid = indices.buffers[0].data; - auto indices_offset = indices.offset; - - DCHECK_EQ(out_arr->offset, 0); - auto* out = util::MutableFixedWidthValuesPointer(out_arr); - auto out_is_valid = out_arr->buffers[0]->mutable_data(); - - // If either the values or indices have nulls, we preemptively zero out the - // out validity bitmap so that we don't have to use ClearBit in each - // iteration for nulls. - if (values.null_count != 0 || indices.null_count != 0) { - bit_util::SetBitsTo(out_is_valid, 0, indices.length, false); - } - - auto WriteValue = [&](int64_t position) { - memcpy(out + position * kValueWidth, - values_data + indices_data[position] * kValueWidth, kValueWidth); - }; - - auto WriteZero = [&](int64_t position) { - memset(out + position * kValueWidth, 0, kValueWidth); - }; - - auto WriteZeroSegment = [&](int64_t position, int64_t length) { - memset(out + position * kValueWidth, 0, kValueWidth * length); - }; - - OptionalBitBlockCounter indices_bit_counter(indices_is_valid, indices_offset, - indices.length); - int64_t position = 0; +template +struct FixedWidthTakeImpl { + static constexpr int kValueWidthInBits = ValueBitWidthConstant::value; + + static Status Exec(KernelContext* ctx, const ArraySpan& values, + const ArraySpan& indices, ArrayData* out_arr, int64_t factor) { +#ifndef NDEBUG + int64_t bit_width = util::FixedWidthInBits(*values.type); + DCHECK(WithFactor::value || (kValueWidthInBits == bit_width && factor == 1)); + DCHECK(!WithFactor::value || + (factor > 0 && kValueWidthInBits == 8 && // factors are used with bytes + static_cast(factor * kValueWidthInBits) == bit_width)); +#endif + const bool out_has_validity = values.MayHaveNulls() || indices.MayHaveNulls(); + + const uint8_t* src; + int64_t src_offset; + std::tie(src_offset, src) = util::OffsetPointerOfFixedBitWidthValues(values); + uint8_t* out = util::MutableFixedWidthValuesPointer(out_arr); int64_t valid_count = 0; - while (position < indices.length) { - BitBlockCount block = indices_bit_counter.NextBlock(); - if (values.null_count == 0) { - // Values are never null, so things are easier - valid_count += block.popcount; - if (block.popcount == block.length) { - // Fastest path: neither values nor index nulls - bit_util::SetBitsTo(out_is_valid, position, block.length, true); - for (int64_t i = 0; i < block.length; ++i) { - WriteValue(position); - ++position; - } - } else if (block.popcount > 0) { - // Slow path: some indices but not all are null - for (int64_t i = 0; i < block.length; ++i) { - if (bit_util::GetBit(indices_is_valid, indices_offset + position)) { - // index is not null - bit_util::SetBit(out_is_valid, position); - WriteValue(position); - } else { - WriteZero(position); - } - ++position; - } - } else { - WriteZeroSegment(position, block.length); - position += block.length; - } - } else { - // Values have nulls, so we must do random access into the values bitmap - if (block.popcount == block.length) { - // Faster path: indices are not null but values may be - for (int64_t i = 0; i < block.length; ++i) { - if (bit_util::GetBit(values_is_valid, - values_offset + indices_data[position])) { - // value is not null - WriteValue(position); - bit_util::SetBit(out_is_valid, position); - ++valid_count; - } else { - WriteZero(position); - } - ++position; - } - } else if (block.popcount > 0) { - // Slow path: some but not all indices are null. Since we are doing - // random access in general we have to check the value nullness one by - // one. - for (int64_t i = 0; i < block.length; ++i) { - if (bit_util::GetBit(indices_is_valid, indices_offset + position) && - bit_util::GetBit(values_is_valid, - values_offset + indices_data[position])) { - // index is not null && value is not null - WriteValue(position); - bit_util::SetBit(out_is_valid, position); - ++valid_count; - } else { - WriteZero(position); - } - ++position; - } - } else { - WriteZeroSegment(position, block.length); - position += block.length; - } - } - } - out_arr->null_count = out_arr->length - valid_count; - } -}; - -template -struct BooleanTakeImpl { - static void Exec(const ArraySpan& values, const ArraySpan& indices, - ArrayData* out_arr) { - const uint8_t* values_data = values.buffers[1].data; - const uint8_t* values_is_valid = values.buffers[0].data; - auto values_offset = values.offset; - - const auto* indices_data = indices.GetValues(1); - const uint8_t* indices_is_valid = indices.buffers[0].data; - auto indices_offset = indices.offset; - - auto out = out_arr->buffers[1]->mutable_data(); - auto out_is_valid = out_arr->buffers[0]->mutable_data(); - auto out_offset = out_arr->offset; - - // If either the values or indices have nulls, we preemptively zero out the - // out validity bitmap so that we don't have to use ClearBit in each - // iteration for nulls. - if (values.null_count != 0 || indices.null_count != 0) { - bit_util::SetBitsTo(out_is_valid, out_offset, indices.length, false); - } - // Avoid uninitialized data in values array - bit_util::SetBitsTo(out, out_offset, indices.length, false); - - auto PlaceDataBit = [&](int64_t loc, IndexCType index) { - bit_util::SetBitTo(out, out_offset + loc, - bit_util::GetBit(values_data, values_offset + index)); - }; - - OptionalBitBlockCounter indices_bit_counter(indices_is_valid, indices_offset, - indices.length); - int64_t position = 0; - int64_t valid_count = 0; - while (position < indices.length) { - BitBlockCount block = indices_bit_counter.NextBlock(); - if (values.null_count == 0) { - // Values are never null, so things are easier - valid_count += block.popcount; - if (block.popcount == block.length) { - // Fastest path: neither values nor index nulls - bit_util::SetBitsTo(out_is_valid, out_offset + position, block.length, true); - for (int64_t i = 0; i < block.length; ++i) { - PlaceDataBit(position, indices_data[position]); - ++position; - } - } else if (block.popcount > 0) { - // Slow path: some but not all indices are null - for (int64_t i = 0; i < block.length; ++i) { - if (bit_util::GetBit(indices_is_valid, indices_offset + position)) { - // index is not null - bit_util::SetBit(out_is_valid, out_offset + position); - PlaceDataBit(position, indices_data[position]); - } - ++position; - } - } else { - position += block.length; - } - } else { - // Values have nulls, so we must do random access into the values bitmap - if (block.popcount == block.length) { - // Faster path: indices are not null but values may be - for (int64_t i = 0; i < block.length; ++i) { - if (bit_util::GetBit(values_is_valid, - values_offset + indices_data[position])) { - // value is not null - bit_util::SetBit(out_is_valid, out_offset + position); - PlaceDataBit(position, indices_data[position]); - ++valid_count; - } - ++position; - } - } else if (block.popcount > 0) { - // Slow path: some but not all indices are null. Since we are doing - // random access in general we have to check the value nullness one by - // one. - for (int64_t i = 0; i < block.length; ++i) { - if (bit_util::GetBit(indices_is_valid, indices_offset + position)) { - // index is not null - if (bit_util::GetBit(values_is_valid, - values_offset + indices_data[position])) { - // value is not null - PlaceDataBit(position, indices_data[position]); - bit_util::SetBit(out_is_valid, out_offset + position); - ++valid_count; - } - } - ++position; - } - } else { - position += block.length; - } - } + arrow::internal::Gather gather{ + /*src_length=*/values.length, + src, + src_offset, + /*idx_length=*/indices.length, + /*idx=*/indices.GetValues(1), + out, + factor}; + if (out_has_validity) { + DCHECK_EQ(out_arr->offset, 0); + // out_is_valid must be zero-initiliazed, because Gather::Execute + // saves time by not having to ClearBit on every null element. + auto out_is_valid = out_arr->GetMutableValues(0); + memset(out_is_valid, 0, bit_util::BytesForBits(out_arr->length)); + valid_count = gather.template Execute( + /*src_validity=*/values, /*idx_validity=*/indices, out_is_valid); + } else { + valid_count = gather.Execute(); } out_arr->null_count = out_arr->length - valid_count; + return Status::OK(); } }; template