This tutorial shows how to use TileDB-R’s fromDataFrame()
function to create a TileDB array from a data.frame
object. This applies to both dense and sparse arrays.
For the complete API reference of this function, visit fromDataFrame()
in the TileDB-R API docs .
First, import the necessary libraries and set the array URI (that is, its path, which in this tutorial will be on local storage).
# Import necessary libraries
library (tiledb)
# Set array URIs
(sparse_array_uri <- tempfile ("fromDataFrame_sparse_r" ))
(dense_array_uri <- tempfile ("fromDataFrame_dense_r" ))
Define the dataframes you’ll use in this tutorial. You’ll use the coords_sparse
dataframe to create sparse arrays with fromDataFrame()
and the coords_dense
dataframe to create dense arrays with fromDataFrame()
.
# Create dense coordinates
(mat <- matrix (1 L: 16 L, nrow = 4 L))
(coords_dense <- reshape2:: melt (
mat,
varnames = c ("d1" , "d2" , value.name = "a" )
)
)
# Create sparse data frame
(mat2 <- Matrix:: sparseMatrix (
i = c (3 L, 1 L, 4 L, 3 L, 1 L, 2 L),
j = c (1 L, 2 L, 2 L, 3 L, 4 L, 4 L),
x = c (4 L, 1 L, 6 L, 5 L, 2 L, 3 L),
repr = "T"
))
(coords_sparse <- data.frame (d1 = mat2@ i, d2 = mat2@ j, a = mat2@ x))
Use the fromDataFrame()
function to create a sparse array from the coords_sparse
dataframe. Since dataframes in R have no concept of sparsity, the fromDataFrame()
by default creates a sparse TileDB array from the dataframe you pass as an argument.
At a minimum, fromDataFrame()
needs the data.frame
object and the array URI.
# Create a sparse array from the `coords_sparse` dataframe
# At a minimum, you must pass the dataframe object and array URI
# The `col_index` argument is optional and specifies the columns
# to use as the dimensions of the array
fromDataFrame (coords_sparse, sparse_array_uri, col_index = c ("d1" , "d2" ))
Now that you created the array, read its schema:
arr <- tiledb_array (sparse_array_uri, query_type = "READ" , return_as = "data.frame" )
# Print the schema of the array
schema (arr)
tiledb_array_schema(
domain=tiledb_domain(c(
tiledb_dim(name="d1", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
tiledb_dim(name="d2", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
)),
attrs=c(
tiledb_attr(name="a", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
),
cell_order="COL_MAJOR", tile_order="COL_MAJOR", capacity=10000, sparse=TRUE, allows_dups=TRUE,
coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
)
Now read its data.
# Read the data from the array
arr[]
A data.frame: 6 x 3
<int>
<int>
<int>
2
0
4
0
1
1
3
1
6
2
2
5
0
3
2
1
3
3
By default, sparse arrays created through fromDataFrame()
allow duplicate values. Try adding a new cell value at coordinates [3, 1]
:
# Reopen the array for writing and write the duplicate data
arr <- tiledb_array_close (arr)
arr <- tiledb_array_open (arr, type = "WRITE" )
arr[3 , 1 ] <- 2
# Reopen the array for reading
arr <- tiledb_array_close (arr)
arr <- tiledb_array_open (arr, type = "READ" )
print (schema (arr))
print (arr[])
tiledb_array_schema(
domain=tiledb_domain(c(
tiledb_dim(name="d1", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
tiledb_dim(name="d2", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
)),
attrs=c(
tiledb_attr(name="a", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
),
cell_order="COL_MAJOR", tile_order="COL_MAJOR", capacity=10000, sparse=TRUE, allows_dups=TRUE,
coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
)
d1 d2 a
1 2 0 4
2 0 1 1
3 3 1 6
4 2 2 5
5 0 3 2
6 1 3 3
7 3 1 2
The array now returns 7 rows instead of 6. You can also set the allows_dups
argument to FALSE
to prevent TileDB from adding duplicates during writes.
When you disable duplicates, writing to a cell in an array that already has a value will overwrite the existing cell value.
arr <- tiledb_array_close (arr)
if (file.exists (sparse_array_uri)) {
unlink (sparse_array_uri, recursive = TRUE )
}
fromDataFrame (
coords_sparse,
sparse_array_uri,
col_index = c ("d1" , "d2" ),
allows_dups = FALSE
)
arr <- tiledb_array (sparse_array_uri, query_type = "WRITE" , return_as = "data.frame" )
arr[3 , 1 ] <- 2
arr <- tiledb_array_close (arr)
arr <- tiledb_array_open (arr, type = "READ" )
print (schema (arr))
print (arr[])
tiledb_array_schema(
domain=tiledb_domain(c(
tiledb_dim(name="d1", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
tiledb_dim(name="d2", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
)),
attrs=c(
tiledb_attr(name="a", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
),
cell_order="COL_MAJOR", tile_order="COL_MAJOR", capacity=10000, sparse=TRUE, allows_dups=FALSE,
coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
)
d1 d2 a
1 2 0 4
2 0 1 1
3 3 1 2
4 2 2 5
5 0 3 2
6 1 3 3
You can specify the cell and tile order of an array by using the cell_order
and tile_order
arguments. The default is COL_MAJOR
order for both.
arr <- tiledb_array_close (arr)
if (file.exists (sparse_array_uri)) {
unlink (sparse_array_uri, recursive = TRUE )
}
fromDataFrame (
coords_sparse,
sparse_array_uri,
col_index = c ("d1" , "d2" ),
cell_order = "ROW_MAJOR" ,
tile_order = "COL_MAJOR" ,
)
arr <- tiledb_array (
sparse_array_uri,
query_type = "READ" ,
return_as = "data.frame"
)
print (schema (arr))
print (arr[])
tiledb_array_schema(
domain=tiledb_domain(c(
tiledb_dim(name="d1", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
tiledb_dim(name="d2", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
)),
attrs=c(
tiledb_attr(name="a", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
),
cell_order="ROW_MAJOR", tile_order="COL_MAJOR", capacity=10000, sparse=TRUE, allows_dups=TRUE,
coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
)
d1 d2 a
1 0 1 1
2 0 3 2
3 1 3 3
4 2 0 4
5 2 2 5
6 3 1 6
You can apply filters or compression to the array by using the filter
argument. The filters you can apply depend on the data type of the attribute.
arr <- tiledb_array_close (arr)
if (file.exists (sparse_array_uri)) {
unlink (sparse_array_uri, recursive = TRUE )
}
fromDataFrame (
coords_sparse,
sparse_array_uri,
col_index = c ("d1" , "d2" ),
filter = c ("ZSTD" , "GZIP" )
)
arr <- tiledb_array (
sparse_array_uri,
query_type = "READ" ,
return_as = "data.frame"
)
print (schema (arr))
print (arr[])
tiledb_array_schema(
domain=tiledb_domain(c(
tiledb_dim(name="d1", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
tiledb_dim(name="d2", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
)),
attrs=c(
tiledb_attr(name="a", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1), tiledb_filter_set_option(tiledb_filter("GZIP"),"COMPRESSION_LEVEL",-1))))
),
cell_order="COL_MAJOR", tile_order="COL_MAJOR", capacity=10000, sparse=TRUE, allows_dups=TRUE,
coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
)
d1 d2 a
1 2 0 4
2 0 1 1
3 3 1 6
4 2 2 5
5 0 3 2
6 1 3 3
The default capacity of arrays you create with fromDataFrame()
is 10,000 cells. You can change this with the capacity
argument:
arr <- tiledb_array_close (arr)
if (file.exists (sparse_array_uri)) {
unlink (sparse_array_uri, recursive = TRUE )
}
fromDataFrame (
coords_sparse,
sparse_array_uri,
col_index = c ("d1" , "d2" ),
capacity = 3 L,
)
arr <- tiledb_array (
sparse_array_uri,
query_type = "READ" ,
return_as = "data.frame"
)
print (schema (arr))
print (arr[])
tiledb_array_schema(
domain=tiledb_domain(c(
tiledb_dim(name="d1", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
tiledb_dim(name="d2", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
)),
attrs=c(
tiledb_attr(name="a", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
),
cell_order="COL_MAJOR", tile_order="COL_MAJOR", capacity=3, sparse=TRUE, allows_dups=TRUE,
coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
)
d1 d2 a
1 2 0 4
2 0 1 1
3 3 1 6
4 2 2 5
5 0 3 2
6 1 3 3
You can set the array domain by using the tile_domain
argument. The default is the minimum and maximum values of the dataframe.
arr <- tiledb_array_close (arr)
if (file.exists (sparse_array_uri)) {
unlink (sparse_array_uri, recursive = TRUE )
}
fromDataFrame (
coords_sparse,
sparse_array_uri,
col_index = c ("d1" , "d2" ),
tile_domain = list (
d1 = c (0 L, 4 L),
d2 = c (0 L, 5 L)
)
)
arr <- tiledb_array (
sparse_array_uri,
query_type = "READ" ,
return_as = "data.frame"
)
print (schema (arr))
print (arr[])
tiledb_array_schema(
domain=tiledb_domain(c(
tiledb_dim(name="d1", domain=c(0L,4L), tile=5L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
tiledb_dim(name="d2", domain=c(0L,5L), tile=6L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
)),
attrs=c(
tiledb_attr(name="a", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
),
cell_order="COL_MAJOR", tile_order="COL_MAJOR", capacity=10000, sparse=TRUE, allows_dups=TRUE,
coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
)
d1 d2 a
1 2 0 4
2 0 1 1
3 3 1 6
4 2 2 5
5 0 3 2
6 1 3 3
The tile_extent
argument controls the tile extent of the row dimensions.
arr <- tiledb_array_close (arr)
if (file.exists (sparse_array_uri)) {
unlink (sparse_array_uri, recursive = TRUE )
}
fromDataFrame (
coords_sparse,
sparse_array_uri,
col_index = c ("d1" , "d2" ),
tile_extent = 2 L
)
arr <- tiledb_array (
sparse_array_uri,
query_type = "READ" ,
return_as = "data.frame"
)
print (schema (arr))
print (arr[])
tiledb_array_schema(
domain=tiledb_domain(c(
tiledb_dim(name="d1", domain=c(0L,3L), tile=2L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
tiledb_dim(name="d2", domain=c(0L,3L), tile=2L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
)),
attrs=c(
tiledb_attr(name="a", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
),
cell_order="COL_MAJOR", tile_order="COL_MAJOR", capacity=10000, sparse=TRUE, allows_dups=TRUE,
coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
)
d1 d2 a
1 0 1 1
2 2 0 4
3 3 1 6
4 0 3 2
5 1 3 3
6 2 2 5
Now, create a dense array from the coords_dense
dataframe. Here, you’ll set mode
to "schema_only"
to create the array schema without writing any data. This is useful when you want to create an empty array and write data to it later.
Recall from earlier that dataframes in R have no notion of sparsity, so you must set sparse = FALSE
in fromDataFrame()
to create a dense array instead of a sparse array.
# Create a dense array from the `coords_dense` dataframe
arr <- tiledb_array_close (arr)
if (file.exists (dense_array_uri)) {
unlink (dense_array_uri, recursive = TRUE )
}
fromDataFrame (
coords_dense,
dense_array_uri,
col_index = c ("d1" , "d2" ),
sparse = FALSE ,
mode = "schema_only"
)
arr <- tiledb_array (
dense_array_uri,
query_type = "WRITE" ,
return_as = "data.frame"
)
arr[] <- t (array (a_dense, dim = c (4 , 4 )))
arr <- tiledb_array_close (arr)
arr <- tiledb_array_open (arr, type = "READ" )
print (schema (arr))
print (arr[])
arr <- tiledb_array_close (arr)
tiledb_array_schema(
domain=tiledb_domain(c(
tiledb_dim(name="d1", domain=c(1L,4L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
tiledb_dim(name="d2", domain=c(1L,4L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
)),
attrs=c(
tiledb_attr(name="a", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
),
cell_order="COL_MAJOR", tile_order="COL_MAJOR", capacity=10000, sparse=FALSE, allows_dups=FALSE,
coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
)
d1 d2 a
1 1 1 1
2 2 1 5
3 3 1 9
4 4 1 13
5 1 2 2
6 2 2 6
7 3 2 10
8 4 2 14
9 1 3 3
10 2 3 7
11 3 3 11
12 4 3 15
13 1 4 4
14 2 4 8
15 3 4 12
16 4 4 16
You can also use the "append"
mode with fromDataFrame()
to append data to an existing array. This is useful when you want to add new data to an already populated array. Try appending data to the sparse array.
Using the "append"
mode with fromDataFrame()
is supported only for sparse arrays.
append_df <- data.frame (
d1 = 1 L,
d2 = 1 L,
a = 2 L
)
fromDataFrame (
append_df,
sparse_array_uri,
col_index = c ("d1" , "d2" ),
mode = "append"
)
arr <- tiledb_array (
sparse_array_uri,
query_type = "READ" ,
return_as = "data.frame"
)
print (schema (arr))
print (arr[])
tiledb_array_schema(
domain=tiledb_domain(c(
tiledb_dim(name="d1", domain=c(0L,3L), tile=2L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1)))),
tiledb_dim(name="d2", domain=c(0L,3L), tile=2L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
)),
attrs=c(
tiledb_attr(name="a", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
),
cell_order="COL_MAJOR", tile_order="COL_MAJOR", capacity=10000, sparse=TRUE, allows_dups=TRUE,
coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
)
d1 d2 a
1 0 1 1
2 2 0 4
3 3 1 6
4 0 3 2
5 1 3 3
6 2 2 5
7 1 1 2
You can set filters and compression for specific dimensions and attributes with the filter_list
argument:
arr <- tiledb_array_close (arr)
if (file.exists (sparse_array_uri)) {
unlink (sparse_array_uri, recursive = TRUE )
}
fromDataFrame (
coords_sparse,
sparse_array_uri,
col_index = c ("d1" , "d2" ),
filter_list = list (
d1 = "GZIP" ,
d2 = "ZSTD" ,
a = "GZIP"
)
)
arr <- tiledb_array (
sparse_array_uri,
query_type = "READ" ,
return_as = "data.frame"
)
print (schema (arr))
print (arr[])
tiledb_array_schema(
domain=tiledb_domain(c(
tiledb_dim(name="d1", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("GZIP"),"COMPRESSION_LEVEL",-1)))),
tiledb_dim(name="d2", domain=c(0L,3L), tile=4L, type="INT32", filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))))
)),
attrs=c(
tiledb_attr(name="a", type="INT32", ncells=1, nullable=FALSE, filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("GZIP"),"COMPRESSION_LEVEL",-1))))
),
cell_order="COL_MAJOR", tile_order="COL_MAJOR", capacity=10000, sparse=TRUE, allows_dups=TRUE,
coords_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
offsets_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("ZSTD"),"COMPRESSION_LEVEL",-1))),
validity_filter_list=tiledb_filter_list(c(tiledb_filter_set_option(tiledb_filter("RLE"),"COMPRESSION_LEVEL",-1)))
)
d1 d2 a
1 2 0 4
2 0 1 1
3 3 1 6
4 2 2 5
5 0 3 2
6 1 3 3
Clean up in the end by deleting the array.
# Clean up the arrays
if (file.exists (sparse_array_uri)) {
unlink (sparse_array_uri, recursive = TRUE )
}
if (file.exists (dense_array_uri)) {
unlink (dense_array_uri, recursive = TRUE )
}