Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into pandas-metadata-fie…
Browse files Browse the repository at this point in the history
…ld-name
  • Loading branch information
jorisvandenbossche committed Dec 9, 2024
2 parents 251cd97 + dfb6d0e commit 9db7b0b
Show file tree
Hide file tree
Showing 20 changed files with 173 additions and 45 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/dev_pr/helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ const https = require('https');
/**
* Given the title of a PullRequest return the Issue
*
* @param {String} title
* @param {String} title
* @returns {Issue} or null if no issue detected.
*
* @typedef {Object} Issue
Expand Down Expand Up @@ -62,6 +62,5 @@ function detectIssue(title) {

module.exports = {
detectIssue,
getJiraInfo,
getGitHubInfo
};
13 changes: 9 additions & 4 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2061,10 +2061,14 @@ macro(build_substrait)

# Missing dll-interface:
list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "/wd4251")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
"Clang")
# Protobuf generated files trigger some errors on CLANG TSAN builds
list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "-Wno-error=shorten-64-to-32")
else()
# GH-44954: silence [[deprecated]] declarations in protobuf-generated code
list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "-Wno-deprecated")
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
"Clang")
# Protobuf generated files trigger some errors on CLANG TSAN builds
list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "-Wno-error=shorten-64-to-32")
endif()
endif()

set(SUBSTRAIT_SOURCES)
Expand Down Expand Up @@ -2116,6 +2120,7 @@ macro(build_substrait)

add_library(substrait STATIC ${SUBSTRAIT_SOURCES})
set_target_properties(substrait PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_options(substrait PRIVATE "${SUBSTRAIT_SUPPRESSED_FLAGS}")
target_include_directories(substrait PUBLIC ${SUBSTRAIT_INCLUDES})
target_link_libraries(substrait PUBLIC ${ARROW_PROTOBUF_LIBPROTOBUF})
add_dependencies(substrait substrait_gen)
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/engine/substrait/expression_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,12 @@
#include "arrow/engine/substrait/type_fwd.h"
#include "arrow/engine/substrait/visibility.h"
#include "arrow/result.h"
#include "arrow/util/macros.h"

// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
ARROW_SUPPRESS_DEPRECATION_WARNING
#include "substrait/algebra.pb.h" // IWYU pragma: export
ARROW_UNSUPPRESS_DEPRECATION_WARNING

namespace arrow {
namespace engine {
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/engine/substrait/extended_expression_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,12 @@
#include "arrow/engine/substrait/visibility.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/util/macros.h"

// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
ARROW_SUPPRESS_DEPRECATION_WARNING
#include "substrait/extended_expression.pb.h" // IWYU pragma: export
ARROW_UNSUPPRESS_DEPRECATION_WARNING

namespace arrow {
namespace engine {
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/engine/substrait/plan_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,12 @@
#include "arrow/engine/substrait/visibility.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/util/macros.h"

// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
ARROW_SUPPRESS_DEPRECATION_WARNING
#include "substrait/plan.pb.h" // IWYU pragma: export
ARROW_UNSUPPRESS_DEPRECATION_WARNING

namespace arrow {
namespace engine {
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/engine/substrait/relation_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,12 @@
#include "arrow/engine/substrait/type_fwd.h"
#include "arrow/engine/substrait/visibility.h"
#include "arrow/result.h"
#include "arrow/util/macros.h"

// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
ARROW_SUPPRESS_DEPRECATION_WARNING
#include "substrait/algebra.pb.h" // IWYU pragma: export
ARROW_UNSUPPRESS_DEPRECATION_WARNING

namespace arrow {
namespace engine {
Expand Down
6 changes: 5 additions & 1 deletion cpp/src/arrow/engine/substrait/test_plan_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,12 @@
#include "arrow/status.h"
#include "arrow/table.h"
#include "arrow/type_fwd.h"
#include "arrow/util/macros.h"

#include "substrait/algebra.pb.h"
// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
ARROW_SUPPRESS_DEPRECATION_WARNING
#include "substrait/algebra.pb.h" // IWYU pragma: export
ARROW_UNSUPPRESS_DEPRECATION_WARNING

namespace arrow {
namespace engine {
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/engine/substrait/util_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,14 @@
#include "arrow/engine/substrait/visibility.h"
#include "arrow/result.h"
#include "arrow/util/hashing.h"
#include "arrow/util/macros.h"
#include "arrow/util/unreachable.h"

// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
ARROW_SUPPRESS_DEPRECATION_WARNING
#include "substrait/algebra.pb.h" // IWYU pragma: export
#include "substrait/plan.pb.h" // IWYU pragma: export
ARROW_UNSUPPRESS_DEPRECATION_WARNING

namespace arrow {
namespace engine {
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/flight/sql/client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "arrow/ipc/reader.h"
#include "arrow/result.h"
#include "arrow/util/logging.h"
#include "arrow/util/macros.h"

namespace flight_sql_pb = arrow::flight::protocol::sql;

Expand Down Expand Up @@ -829,6 +830,8 @@ Status FlightSqlClient::Rollback(const FlightCallOptions& options,
return results->Drain();
}

// ActionCancelQuery{Request,Result} are deprecated
ARROW_SUPPRESS_DEPRECATION_WARNING
::arrow::Result<CancelResult> FlightSqlClient::CancelQuery(
const FlightCallOptions& options, const FlightInfo& info) {
flight_sql_pb::ActionCancelQueryRequest cancel_query;
Expand All @@ -855,6 +858,7 @@ ::arrow::Result<CancelResult> FlightSqlClient::CancelQuery(
}
return Status::IOError("Server returned unknown result ", result.result());
}
ARROW_UNSUPPRESS_DEPRECATION_WARNING

Status FlightSqlClient::Close() { return impl_->Close(); }

Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/flight/sql/protocol_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,15 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations

#include "arrow/util/macros.h"

// GH-44954: silence [[deprecated]] declarations in protobuf-generated code
ARROW_SUPPRESS_DEPRECATION_WARNING
#include "arrow/flight/sql/protocol_internal.h"

// NOTE(lidavidm): Normally this is forbidden, but on Windows to get
// the dllexport/dllimport macro in the right places, we need to
// ensure our header gets included (and Protobuf will not insert the
// include for you)
#include "arrow/flight/sql/FlightSql.pb.cc" // NOLINT
ARROW_UNSUPPRESS_DEPRECATION_WARNING
3 changes: 3 additions & 0 deletions cpp/src/arrow/flight/sql/protocol_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@

// This addresses platform-specific defines, e.g. on Windows
#include "arrow/flight/platform.h" // IWYU pragma: keep
#include "arrow/util/macros.h"

// This header holds the Flight SQL definitions.

#include "arrow/flight/sql/visibility.h"

ARROW_SUPPRESS_DEPRECATION_WARNING
#include "arrow/flight/sql/FlightSql.pb.h" // IWYU pragma: export
ARROW_UNSUPPRESS_DEPRECATION_WARNING
7 changes: 7 additions & 0 deletions cpp/src/arrow/flight/sql/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "arrow/flight/sql/sql_info_internal.h"
#include "arrow/type.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/macros.h"

#define PROPERTY_TO_OPTIONAL(COMMAND, PROPERTY) \
COMMAND.has_##PROPERTY() ? std::make_optional(COMMAND.PROPERTY()) : std::nullopt
Expand Down Expand Up @@ -337,6 +338,8 @@ arrow::Result<ActionBeginTransactionRequest> ParseActionBeginTransactionRequest(
return result;
}

// ActionCancelQueryRequest is deprecated
ARROW_SUPPRESS_DEPRECATION_WARNING
arrow::Result<ActionCancelQueryRequest> ParseActionCancelQueryRequest(
const Action& action) {
pb::sql::ActionCancelQueryRequest command;
Expand All @@ -346,6 +349,7 @@ arrow::Result<ActionCancelQueryRequest> ParseActionCancelQueryRequest(
ARROW_ASSIGN_OR_RAISE(result.info, FlightInfo::Deserialize(command.info()));
return result;
}
ARROW_UNSUPPRESS_DEPRECATION_WARNING

arrow::Result<ActionCreatePreparedStatementRequest>
ParseActionCreatePreparedStatementRequest(const Action& action) {
Expand Down Expand Up @@ -468,6 +472,8 @@ arrow::Result<Result> PackActionResult(const FlightEndpoint& endpoint) {
return endpoint.SerializeToBuffer();
}

// ActionCancelQueryResult is deprecated
ARROW_SUPPRESS_DEPRECATION_WARNING
arrow::Result<Result> PackActionResult(CancelResult result) {
pb::sql::ActionCancelQueryResult pb_result;
switch (result) {
Expand All @@ -487,6 +493,7 @@ arrow::Result<Result> PackActionResult(CancelResult result) {
}
return PackActionResult(pb_result);
}
ARROW_UNSUPPRESS_DEPRECATION_WARNING

arrow::Result<Result> PackActionResult(ActionCreatePreparedStatementResult result) {
pb::sql::ActionCreatePreparedStatementResult pb_result;
Expand Down
10 changes: 10 additions & 0 deletions docs/source/python/api/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ may expose data type-specific methods or properties.
DurationArray
MonthDayNanoIntervalArray
Decimal128Array
Decimal256Array
DictionaryArray
ListArray
FixedSizeListArray
Expand All @@ -86,6 +87,9 @@ may expose data type-specific methods or properties.
ExtensionArray
FixedShapeTensorArray
OpaqueArray
JsonArray
UuidArray
Bool8Array

.. _api.scalar:

Expand All @@ -112,6 +116,7 @@ classes may expose data type-specific methods or properties.
Int16Scalar
Int32Scalar
Int64Scalar
NullScalar
UInt8Scalar
UInt16Scalar
UInt32Scalar
Expand All @@ -134,9 +139,11 @@ classes may expose data type-specific methods or properties.
DurationScalar
MonthDayNanoIntervalScalar
Decimal128Scalar
Decimal256Scalar
DictionaryScalar
RunEndEncodedScalar
ListScalar
FixedSizeListScalar
LargeListScalar
ListViewScalar
LargeListViewScalar
Expand All @@ -146,3 +153,6 @@ classes may expose data type-specific methods or properties.
ExtensionScalar
FixedShapeTensorScalar
OpaqueScalar
JsonScalar
UuidScalar
Bool8Scalar
17 changes: 17 additions & 0 deletions docs/source/python/api/datatypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,13 @@ These should be used to create Arrow data types and schemas.
dictionary
run_end_encoded
fixed_shape_tensor
union
dense_union
sparse_union
opaque
bool8
uuid
json_
field
schema
from_numpy_dtype
Expand Down Expand Up @@ -96,13 +102,19 @@ functions above.
DataType
DictionaryType
ListType
ListViewType
FixedSizeListType
LargeListType
LargeListViewType
MapType
StructType
UnionType
DenseUnionType
SparseUnionType
TimestampType
Time32Type
Time64Type
DurationType
FixedSizeBinaryType
Decimal128Type
Decimal256Type
Expand All @@ -115,8 +127,10 @@ Specific classes and functions for extension types.
.. autosummary::
:toctree: ../generated/

BaseExtensionType
ExtensionType
PyExtensionType
UnknownExtensionType
register_extension_type
unregister_extension_type

Expand All @@ -128,6 +142,9 @@ implemented by PyArrow.

FixedShapeTensorType
OpaqueType
JsonType
UuidType
Bool8Type

.. _api.types.checking:
.. currentmodule:: pyarrow.types
Expand Down
5 changes: 4 additions & 1 deletion r/R/arrow-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ supported_dplyr_methods <- list(
relocate = NULL,
compute = NULL,
collapse = NULL,
distinct = "`.keep_all = TRUE` not supported",
distinct = c(
"`.keep_all = TRUE` returns a non-missing value if present,",
"only returning missing values if all are missing."
),
left_join = "the `copy` argument is ignored",
right_join = "the `copy` argument is ignored",
inner_join = "the `copy` argument is ignored",
Expand Down
25 changes: 18 additions & 7 deletions r/R/dplyr-distinct.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,6 @@
# The following S3 methods are registered on load if dplyr is present

distinct.arrow_dplyr_query <- function(.data, ..., .keep_all = FALSE) {
if (.keep_all == TRUE) {
# TODO(ARROW-14045): the function is called "hash_one" (from ARROW-13993)
# May need to call it: `summarize(x = one(x), ...)` for x in non-group cols
arrow_not_supported("`distinct()` with `.keep_all = TRUE`")
}

original_gv <- dplyr::group_vars(.data)
if (length(quos(...))) {
# group_by() calls mutate() if there are any expressions in ...
Expand All @@ -33,11 +27,28 @@ distinct.arrow_dplyr_query <- function(.data, ..., .keep_all = FALSE) {
.data <- dplyr::group_by(.data, !!!syms(names(.data)))
}

out <- dplyr::summarize(.data, .groups = "drop")
if (isTRUE(.keep_all)) {
# Note: in regular dplyr, `.keep_all = TRUE` returns the first row's value.
# However, Acero's `hash_one` function prefers returning non-null values.
# So, you'll get the same shape of data, but the values may differ.
keeps <- names(.data)[!(names(.data) %in% .data$group_by_vars)]
exprs <- lapply(keeps, function(x) call2("one", sym(x)))
names(exprs) <- keeps
} else {
exprs <- list()
}

out <- dplyr::summarize(.data, !!!exprs, .groups = "drop")

# distinct() doesn't modify group by vars, so restore the original ones
if (length(original_gv)) {
out$group_by_vars <- original_gv
}
if (isTRUE(.keep_all)) {
# Also ensure the column order matches the original
# summarize() will put the group_by_vars first
out <- dplyr::select(out, !!!syms(names(.data)))
}
out
}

Expand Down
7 changes: 7 additions & 0 deletions r/R/dplyr-funcs-agg.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,13 @@ register_bindings_aggregate <- function() {
options = list(skip_nulls = na.rm, min_count = 0L)
)
})
register_binding("arrow::one", function(...) {
set_agg(
fun = "one",
data = ensure_one_arg(list2(...), "one"),
options = list()
)
})
}

set_agg <- function(...) {
Expand Down
Loading

0 comments on commit 9db7b0b

Please sign in to comment.