Closed
Description
While reviewing apache/arrow#45346, noticed this behavior in data.table
. Not sure if intended to have same self ref but are treated as different objects.
dt_in <- data.table::data.table(x = 1:10)
attributes(dt_in)
#> $names
#> [1] "x"
#>
#> $row.names
#> [1] 1 2 3 4 5 6 7 8 9 10
#>
#> $class
#> [1] "data.table" "data.frame"
#>
#> $.internal.selfref
#> <pointer: 0x141813ee0>
arrow::write_parquet(dt_in, "test.parquet")
dt_out <- arrow::read_parquet("test.parquet")
attributes(dt_out)
#> $names
#> [1] "x"
#>
#> $row.names
#> [1] 1 2 3 4 5 6 7 8 9 10
#>
#> $class
#> [1] "data.table" "data.frame"
data.table::setDT(dt_out)
attributes(dt_out)
#> $names
#> [1] "x"
#>
#> $row.names
#> [1] 1 2 3 4 5 6 7 8 9 10
#>
#> $class
#> [1] "data.table" "data.frame"
#>
#> $.internal.selfref
#> <pointer: 0x141813ee0>
dt_in[, y := 10:1]
attributes(dt_in)
#> $names
#> [1] "x" "y"
#>
#> $row.names
#> [1] 1 2 3 4 5 6 7 8 9 10
#>
#> $class
#> [1] "data.table" "data.frame"
#>
#> $.internal.selfref
#> <pointer: 0x141813ee0>
attributes(dt_out)
#> $names
#> [1] "x"
#>
#> $row.names
#> [1] 1 2 3 4 5 6 7 8 9 10
#>
#> $class
#> [1] "data.table" "data.frame"
#>
#> $.internal.selfref
#> <pointer: 0x141813ee0>
print(dt_in)
#> x y
#> <int> <int>
#> 1: 1 10
#> 2: 2 9
#> 3: 3 8
#> 4: 4 7
#> 5: 5 6
#> 6: 6 5
#> 7: 7 4
#> 8: 8 3
#> 9: 9 2
#> 10: 10 1
print(dt_out)
#> x
#> <int>
#> 1: 1
#> 2: 2
#> 3: 3
#> 4: 4
#> 5: 5
#> 6: 6
#> 7: 7
#> 8: 8
#> 9: 9
#> 10: 10
Created on 2025-01-24 with reprex v2.1.1