# Import necessary libraries
import os.path
import shutil
import numpy as np
import tiledb
# Set array URI
= os.path.expanduser("~/incomplete_queries_python")
array_uri
# Delete array if it already exists
if os.path.exists(array_uri):
shutil.rmtree(array_uri)
Incomplete Queries
This tutorial highlights TileDB’s incomplete queries functionality. For more information on incomplete queries, visit Key Concepts: Reads.
First, import the necessary libraries, set the array URI (that is, its path, which in this tutorial will be on local storage), and delete any previously created arrays with the same name.
Next, create the array by specifying its schema. This example uses a sparse array, but the described incomplete query functionality is applicable to any array.
# The array will be 100 cells with dimensions "x".
dom = tiledb.Domain(tiledb.Dim(name="x", domain=(0, 99), tile=100, dtype=np.int64))
# The array will be dense with a single string typed attribute "a"
schema = tiledb.ArraySchema(
domain=dom, sparse=True, attrs=[tiledb.Attr(name="a", dtype=str)]
)
# Create the (empty) array on disk.
tiledb.SparseArray.create(array_uri, schema)
Set a buffer of 800 bytes. This will force the query to return as incomplete.
Now that you created the array, write some data to the array. The data you’ll write is the Latin alphabet with varying repeat lengths.
with tiledb.open(array_uri, mode="w") as A:
extent = A.schema.domain.dim("x").domain
ncells = extent[1] - extent[0] + 1
# Data is the Latin alphabet with varying repeat lengths
data = [chr(i % 26 + 97) * (i % 52) for i in range(ncells)]
# Coords are the dimension range
coords = np.arange(extent[0], extent[1] + 1)
A[coords] = data
Read the results as a dataframe.
# in order to force iteration, restrict the buffer sizes
# this setting gives at least 3 iterations for the example data
with tiledb.open(array_uri, config=cfg) as A:
# iterate over results as a dataframe
iterable = A.query(return_incomplete=True).df[:]
for i, result in enumerate(iterable):
print(f"--- result {i} is a '{type(result)}' with size {len(result)}")
print(result)
print("---")
print(f"Query completed after {i} iterations")
--- result 0 is a '<class 'pandas.core.frame.DataFrame'>' with size 40
x a
0 0
1 1 b
2 2 cc
3 3 ddd
4 4 eeee
5 5 fffff
6 6 gggggg
7 7 hhhhhhh
8 8 iiiiiiii
9 9 jjjjjjjjj
10 10 kkkkkkkkkk
11 11 lllllllllll
12 12 mmmmmmmmmmmm
13 13 nnnnnnnnnnnnn
14 14 oooooooooooooo
15 15 ppppppppppppppp
16 16 qqqqqqqqqqqqqqqq
17 17 rrrrrrrrrrrrrrrrr
18 18 ssssssssssssssssss
19 19 ttttttttttttttttttt
20 20 uuuuuuuuuuuuuuuuuuuu
21 21 vvvvvvvvvvvvvvvvvvvvv
22 22 wwwwwwwwwwwwwwwwwwwwww
23 23 xxxxxxxxxxxxxxxxxxxxxxx
24 24 yyyyyyyyyyyyyyyyyyyyyyyy
25 25 zzzzzzzzzzzzzzzzzzzzzzzzz
26 26 aaaaaaaaaaaaaaaaaaaaaaaaaa
27 27 bbbbbbbbbbbbbbbbbbbbbbbbbbb
28 28 cccccccccccccccccccccccccccc
29 29 ddddddddddddddddddddddddddddd
30 30 eeeeeeeeeeeeeeeeeeeeeeeeeeeeee
31 31 fffffffffffffffffffffffffffffff
32 32 gggggggggggggggggggggggggggggggg
33 33 hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
34 34 iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
35 35 jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj
36 36 kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk
37 37 lllllllllllllllllllllllllllllllllllll
38 38 mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm
39 39 nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn
---
--- result 1 is a '<class 'pandas.core.frame.DataFrame'>' with size 35
x a
0 40 oooooooooooooooooooooooooooooooooooooooo
1 41 ppppppppppppppppppppppppppppppppppppppppp
2 42 qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq
3 43 rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr
4 44 ssssssssssssssssssssssssssssssssssssssssssss
5 45 ttttttttttttttttttttttttttttttttttttttttttttt
6 46 uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu
7 47 vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
8 48 wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww
9 49 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
10 50 yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy...
11 51 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz...
12 52
13 53 b
14 54 cc
15 55 ddd
16 56 eeee
17 57 fffff
18 58 gggggg
19 59 hhhhhhh
20 60 iiiiiiii
21 61 jjjjjjjjj
22 62 kkkkkkkkkk
23 63 lllllllllll
24 64 mmmmmmmmmmmm
25 65 nnnnnnnnnnnnn
26 66 oooooooooooooo
27 67 ppppppppppppppp
28 68 qqqqqqqqqqqqqqqq
29 69 rrrrrrrrrrrrrrrrr
30 70 ssssssssssssssssss
31 71 ttttttttttttttttttt
32 72 uuuuuuuuuuuuuuuuuuuu
33 73 vvvvvvvvvvvvvvvvvvvvv
34 74 wwwwwwwwwwwwwwwwwwwwww
---
--- result 2 is a '<class 'pandas.core.frame.DataFrame'>' with size 23
x a
0 75 xxxxxxxxxxxxxxxxxxxxxxx
1 76 yyyyyyyyyyyyyyyyyyyyyyyy
2 77 zzzzzzzzzzzzzzzzzzzzzzzzz
3 78 aaaaaaaaaaaaaaaaaaaaaaaaaa
4 79 bbbbbbbbbbbbbbbbbbbbbbbbbbb
5 80 cccccccccccccccccccccccccccc
6 81 ddddddddddddddddddddddddddddd
7 82 eeeeeeeeeeeeeeeeeeeeeeeeeeeeee
8 83 fffffffffffffffffffffffffffffff
9 84 gggggggggggggggggggggggggggggggg
10 85 hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
11 86 iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
12 87 jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj
13 88 kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk
14 89 lllllllllllllllllllllllllllllllllllll
15 90 mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm
16 91 nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn
17 92 oooooooooooooooooooooooooooooooooooooooo
18 93 ppppppppppppppppppppppppppppppppppppppppp
19 94 qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq
20 95 rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr
21 96 ssssssssssssssssssssssssssssssssssssssssssss
22 97 ttttttttttttttttttttttttttttttttttttttttttttt
---
--- result 3 is a '<class 'pandas.core.frame.DataFrame'>' with size 2
x a
0 98 uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu
1 99 vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
---
Query completed after 3 iterations
You can also read the results as an OrderedDict
.
# you can also iterate results as an OrderedDict
with tiledb.open(array_uri, config=cfg) as A:
iterable = A.query(return_incomplete=True).multi_index[:]
for i, result in enumerate(iterable):
print(f"--- result {i} is a '{type(result)}' with size {len(result)}")
print(result)
print("---")
print(f"Query completed after {i} iterations")
--- result 0 is a '<class 'collections.OrderedDict'>' with size 2
OrderedDict([('x', array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39])), ('a', array(['', 'b', 'cc', 'ddd', 'eeee', 'fffff', 'gggggg', 'hhhhhhh',
'iiiiiiii', 'jjjjjjjjj', 'kkkkkkkkkk', 'lllllllllll',
'mmmmmmmmmmmm', 'nnnnnnnnnnnnn', 'oooooooooooooo',
'ppppppppppppppp', 'qqqqqqqqqqqqqqqq', 'rrrrrrrrrrrrrrrrr',
'ssssssssssssssssss', 'ttttttttttttttttttt',
'uuuuuuuuuuuuuuuuuuuu', 'vvvvvvvvvvvvvvvvvvvvv',
'wwwwwwwwwwwwwwwwwwwwww', 'xxxxxxxxxxxxxxxxxxxxxxx',
'yyyyyyyyyyyyyyyyyyyyyyyy', 'zzzzzzzzzzzzzzzzzzzzzzzzz',
'aaaaaaaaaaaaaaaaaaaaaaaaaa', 'bbbbbbbbbbbbbbbbbbbbbbbbbbb',
'cccccccccccccccccccccccccccc', 'ddddddddddddddddddddddddddddd',
'eeeeeeeeeeeeeeeeeeeeeeeeeeeeee',
'fffffffffffffffffffffffffffffff',
'gggggggggggggggggggggggggggggggg',
'hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh',
'iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii',
'jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj',
'kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk',
'lllllllllllllllllllllllllllllllllllll',
'mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm',
'nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn'], dtype=object))])
---
--- result 1 is a '<class 'collections.OrderedDict'>' with size 2
OrderedDict([('x', array([40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
74])), ('a', array(['oooooooooooooooooooooooooooooooooooooooo',
'ppppppppppppppppppppppppppppppppppppppppp',
'qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq',
'rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr',
'ssssssssssssssssssssssssssssssssssssssssssss',
'ttttttttttttttttttttttttttttttttttttttttttttt',
'uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu',
'vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv',
'wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww',
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx',
'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy',
'zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz', '', 'b',
'cc', 'ddd', 'eeee', 'fffff', 'gggggg', 'hhhhhhh', 'iiiiiiii',
'jjjjjjjjj', 'kkkkkkkkkk', 'lllllllllll', 'mmmmmmmmmmmm',
'nnnnnnnnnnnnn', 'oooooooooooooo', 'ppppppppppppppp',
'qqqqqqqqqqqqqqqq', 'rrrrrrrrrrrrrrrrr', 'ssssssssssssssssss',
'ttttttttttttttttttt', 'uuuuuuuuuuuuuuuuuuuu',
'vvvvvvvvvvvvvvvvvvvvv', 'wwwwwwwwwwwwwwwwwwwwww'], dtype=object))])
---
--- result 2 is a '<class 'collections.OrderedDict'>' with size 2
OrderedDict([('x', array([75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97])), ('a', array(['xxxxxxxxxxxxxxxxxxxxxxx', 'yyyyyyyyyyyyyyyyyyyyyyyy',
'zzzzzzzzzzzzzzzzzzzzzzzzz', 'aaaaaaaaaaaaaaaaaaaaaaaaaa',
'bbbbbbbbbbbbbbbbbbbbbbbbbbb', 'cccccccccccccccccccccccccccc',
'ddddddddddddddddddddddddddddd', 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeee',
'fffffffffffffffffffffffffffffff',
'gggggggggggggggggggggggggggggggg',
'hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh',
'iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii',
'jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj',
'kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk',
'lllllllllllllllllllllllllllllllllllll',
'mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm',
'nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn',
'oooooooooooooooooooooooooooooooooooooooo',
'ppppppppppppppppppppppppppppppppppppppppp',
'qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq',
'rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr',
'ssssssssssssssssssssssssssssssssssssssssssss',
'ttttttttttttttttttttttttttttttttttttttttttttt'], dtype=object))])
---
--- result 3 is a '<class 'collections.OrderedDict'>' with size 2
OrderedDict([('x', array([98, 99])), ('a', array(['uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu',
'vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv'], dtype=object))])
---
Query completed after 3 iterations
Clean up in the end by deleting the array.