diff --git a/Cargo.lock b/Cargo.lock index e689db1..78691fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,17 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom 0.2.16", + "once_cell", + "version_check", +] + [[package]] name = "ahash" version = "0.8.12" @@ -16,7 +27,7 @@ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", "const-random", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "version_check", "zerocopy", @@ -24,9 +35,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] @@ -63,9 +74,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.20" +version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ "anstyle", "anstyle-parse", @@ -78,9 +89,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.11" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anstyle-parse" @@ -93,29 +104,29 @@ dependencies = [ [[package]] name = "anstyle-query" -version = "1.1.4" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.10" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anyhow" -version = "1.0.98" +version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "apache-avro" @@ -146,6 +157,24 @@ dependencies = [ "zstd", ] +[[package]] +name = "ar_archive_writer" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +dependencies = [ + "object", +] + +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "arrayref" version = "0.3.9" @@ -160,53 +189,101 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.0.0" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +dependencies = [ + "arrow-arith 56.2.0", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-cast 56.2.0", + "arrow-data 56.2.0", + "arrow-ord 56.2.0", + "arrow-row 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", + "arrow-string 56.2.0", +] + +[[package]] +name = "arrow" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4df8bb5b0bd64c0b9bc61317fcc480bad0f00e56d3bc32c69a4c8dada4786bae" +checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", + "arrow-arith 57.1.0", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-cast 57.1.0", "arrow-csv", - "arrow-data", + "arrow-data 57.1.0", "arrow-ipc", "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-ord 57.1.0", + "arrow-row 57.1.0", + "arrow-schema 57.1.0", + "arrow-select 57.1.0", + "arrow-string 57.1.0", +] + +[[package]] +name = "arrow-arith" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "chrono", + "num", ] [[package]] name = "arrow-arith" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1a640186d3bd30a24cb42264c2dafb30e236a6f50d510e56d40b708c9582491" +checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", "chrono", "num-traits", ] [[package]] name = "arrow-array" -version = "57.0.0" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +dependencies = [ + "ahash 0.8.12", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "chrono", + "half", + "hashbrown 0.16.1", + "num", +] + +[[package]] +name = "arrow-array" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219fe420e6800979744c8393b687afb0252b3f8a89b91027d27887b72aa36d31" +checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" dependencies = [ - "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "ahash 0.8.12", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", "chrono", "chrono-tz", "half", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "num-complex", "num-integer", "num-traits", @@ -214,9 +291,20 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.0.0" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76885a2697a7edf6b59577f568b456afc94ce0e2edc15b784ce3685b6c3c5c27" +checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" dependencies = [ "bytes", "half", @@ -226,15 +314,37 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.0.0" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", + "atoi", + "base64 0.22.1", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-cast" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9ebb4c987e6b3b236fb4a14b20b34835abfdd80acead3ccf1f9bf399e1f168" +checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-ord 57.1.0", + "arrow-schema 57.1.0", + "arrow-select 57.1.0", "atoi", "base64 0.22.1", "chrono", @@ -247,13 +357,13 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92386159c8d4bce96f8bd396b0642a0d544d471bdc2ef34d631aec80db40a09c" +checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b" dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-schema", + "arrow-array 57.1.0", + "arrow-cast 57.1.0", + "arrow-schema 57.1.0", "chrono", "csv", "csv-core", @@ -262,12 +372,24 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.0.0" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +dependencies = [ + "arrow-buffer 56.2.0", + "arrow-schema 56.2.0", + "half", + "num", +] + +[[package]] +name = "arrow-data" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727681b95de313b600eddc2a37e736dcb21980a40f640314dcf360e2f36bc89b" +checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 57.1.0", + "arrow-schema 57.1.0", "half", "num-integer", "num-traits", @@ -275,15 +397,15 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9ba92e3de170295c98a84e5af22e2b037f0c7b32449445e6c493b5fca27f27" +checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", + "arrow-select 57.1.0", "flatbuffers", "lz4_flex", "zstd", @@ -291,18 +413,18 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b969b4a421ae83828591c6bf5450bd52e6d489584142845ad6a861f42fe35df8" +checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-cast 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", "chrono", "half", - "indexmap 2.12.0", + "indexmap 2.12.1", "itoa", "lexical-core", "memchr", @@ -315,35 +437,70 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.0.0" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", +] + +[[package]] +name = "arrow-ord" +version = "57.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" +dependencies = [ + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", + "arrow-select 57.1.0", +] + +[[package]] +name = "arrow-row" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "141c05298b21d03e88062317a1f1a73f5ba7b6eb041b350015b1cd6aabc0519b" +checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "half", ] [[package]] name = "arrow-row" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f3c06a6abad6164508ed283c7a02151515cef3de4b4ff2cebbcaeb85533db2" +checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", "half", ] [[package]] name = "arrow-schema" -version = "57.0.0" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +dependencies = [ + "bitflags", +] + +[[package]] +name = "arrow-schema" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5" +checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" dependencies = [ "serde_core", "serde_json", @@ -351,33 +508,64 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.0.0" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +dependencies = [ + "ahash 0.8.12", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "num", +] + +[[package]] +name = "arrow-select" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bafa595babaad59f2455f4957d0f26448fb472722c186739f4fac0823a1bdb47" +checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "ahash 0.8.12", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", "num-traits", ] [[package]] name = "arrow-string" -version = "57.0.0" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", + "memchr", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "arrow-string" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f46457dbbb99f2650ff3ac23e46a929e0ab81db809b02aa5511c258348bef2" +checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-data 57.1.0", + "arrow-schema 57.1.0", + "arrow-select 57.1.0", "memchr", "num-traits", "regex", - "regex-syntax 0.8.6", + "regex-syntax", ] [[package]] @@ -416,7 +604,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -427,7 +615,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -453,9 +641,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.8.10" +version = "1.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1856b1b48b65f71a4dd940b1c0931f9a7b646d4a924b9828ffefc1454714668a" +checksum = "96571e6996817bf3d58f6b569e4b9fd2e9d2fcf9f7424eed07b2ce9bb87535e5" dependencies = [ "aws-credential-types", "aws-runtime", @@ -472,7 +660,7 @@ dependencies = [ "bytes", "fastrand", "hex", - "http 1.3.1", + "http 1.4.0", "ring", "time", "tokio", @@ -483,9 +671,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.9" +version = "1.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86590e57ea40121d47d3f2e131bfd873dea15d78dc2f4604f4734537ad9e56c4" +checksum = "3cd362783681b15d136480ad555a099e82ecd8e2d10a841e14dfd0078d67fee3" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -495,9 +683,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.13.3" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba" +checksum = "6b5ce75405893cd713f9ab8e297d8e438f624dde7d706108285f7e17a25a180f" dependencies = [ "aws-lc-sys", "zeroize", @@ -505,11 +693,10 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.30.0" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbfd150b5dbdb988bcc8fb1fe787eb6b7ee6180ca24da683b61ea5405f3d43ff" +checksum = "179c3777a8b5e70e90ea426114ffc565b2c1a9f82f6c4a0c5a34aa6ef5e781b6" dependencies = [ - "bindgen", "cc", "cmake", "dunce", @@ -518,9 +705,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.5.14" +version = "1.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fe0fd441565b0b318c76e7206c8d1d0b0166b3e986cf30e890b61feb6192045" +checksum = "d81b5b2898f6798ad58f484856768bca817e3cd9de0974c24ae0f1113fe88f1b" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -542,9 +729,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.89.0" +version = "1.91.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c1b1af02288f729e95b72bd17988c009aa72e26dcb59b3200f86d7aea726c9" +checksum = "8ee6402a36f27b52fe67661c6732d684b2635152b676aa2babbfb5204f99115d" dependencies = [ "aws-credential-types", "aws-runtime", @@ -564,9 +751,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.91.0" +version = "1.93.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e8122301558dc7c6c68e878af918880b82ff41897a60c8c4e18e4dc4d93e9f1" +checksum = "a45a7f750bbd170ee3677671ad782d90b894548f4e4ae168302c57ec9de5cb3e" dependencies = [ "aws-credential-types", "aws-runtime", @@ -586,9 +773,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.92.0" +version = "1.95.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0c7808adcff8333eaa76a849e6de926c6ac1a1268b9fd6afe32de9c29ef29d2" +checksum = "55542378e419558e6b1f398ca70adb0b2088077e79ad9f14eb09441f2f7b2164" dependencies = [ "aws-credential-types", "aws-runtime", @@ -609,9 +796,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.3.6" +version = "1.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c35452ec3f001e1f2f6db107b6373f1f48f05ec63ba2c5c9fa91f07dad32af11" +checksum = "69e523e1c4e8e7e8ff219d732988e22bfeae8a1cafdbe6d9eca1546fa080be7c" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -622,7 +809,7 @@ dependencies = [ "hex", "hmac", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "percent-encoding", "sha2", "time", @@ -631,9 +818,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.6" +version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "127fcfad33b7dfc531141fda7e1c402ac65f88aca5511a4d31e2e3d2cd01ce9c" +checksum = "9ee19095c7c4dda59f1697d028ce704c24b2d33c6718790c7f1d5a3015b4107c" dependencies = [ "futures-util", "pin-project-lite", @@ -642,9 +829,9 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.62.5" +version = "0.62.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445d5d720c99eed0b4aa674ed00d835d9b1427dd73e04adaf2f94c6b2d6f9fca" +checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", @@ -653,7 +840,7 @@ dependencies = [ "futures-core", "futures-util", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "percent-encoding", "pin-project-lite", @@ -663,15 +850,15 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.1.4" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "623254723e8dfd535f566ee7b2381645f8981da086b5c4aa26c0c41582bb1d2c" +checksum = "59e62db736db19c488966c8d787f52e6270be565727236fd5579eaa301e7bc4a" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", "aws-smithy-types", "h2", - "http 1.3.1", + "http 1.4.0", "hyper", "hyper-rustls", "hyper-util", @@ -687,27 +874,27 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.61.7" +version = "0.61.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2db31f727935fc63c6eeae8b37b438847639ec330a9161ece694efba257e0c54" +checksum = "a6864c190cbb8e30cf4b77b2c8f3b6dfffa697a09b7218d2f7cd3d4c4065a9f7" dependencies = [ "aws-smithy-types", ] [[package]] name = "aws-smithy-observability" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d1881b1ea6d313f9890710d65c158bdab6fb08c91ea825f74c1c8c357baf4cc" +checksum = "17f616c3f2260612fe44cede278bafa18e73e6479c4e393e2c4518cf2a9a228a" dependencies = [ "aws-smithy-runtime-api", ] [[package]] name = "aws-smithy-query" -version = "0.60.8" +version = "0.60.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d28a63441360c477465f80c7abac3b9c4d075ca638f982e605b7dc2a2c7156c9" +checksum = "ae5d689cf437eae90460e944a58b5668530d433b4ff85789e69d2f2a556e057d" dependencies = [ "aws-smithy-types", "urlencoding", @@ -715,9 +902,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.9.4" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bbe9d018d646b96c7be063dd07987849862b0e6d07c778aad7d93d1be6c1ef0" +checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -728,7 +915,7 @@ dependencies = [ "bytes", "fastrand", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "http-body 1.0.1", "pin-project-lite", @@ -739,15 +926,15 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.9.2" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7204f9fd94749a7c53b26da1b961b4ac36bf070ef1e0b94bb09f79d4f6c193" +checksum = "ab0d43d899f9e508300e587bf582ba54c27a452dd0a9ea294690669138ae14a2" dependencies = [ "aws-smithy-async", "aws-smithy-types", "bytes", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "pin-project-lite", "tokio", "tracing", @@ -756,15 +943,15 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.3.4" +version = "1.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25f535879a207fce0db74b679cfc3e91a3159c8144d717d55f5832aea9eef46e" +checksum = "905cb13a9895626d49cf2ced759b062d913834c7482c38e49557eac4e6193f01" dependencies = [ "base64-simd", "bytes", "bytes-utils", "http 0.2.12", - "http 1.3.1", + "http 1.4.0", "http-body 0.4.6", "http-body 1.0.1", "http-body-util", @@ -779,18 +966,18 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.12" +version = "0.60.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eab77cdd036b11056d2a30a7af7b775789fb024bf216acc13884c6c97752ae56" +checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "1.3.10" +version = "1.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d79fb68e3d7fe5d4833ea34dc87d2e97d26d3086cb3da660bb6b1f76d98680b6" +checksum = "1d980627d2dd7bfc32a3c025685a033eeab8d365cc840c631ef59d1b8f428164" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -810,7 +997,7 @@ dependencies = [ "axum-core", "bytes", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "itoa", @@ -836,7 +1023,7 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "mime", @@ -871,9 +1058,9 @@ dependencies = [ [[package]] name = "bigdecimal" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934" dependencies = [ "autocfg", "libm", @@ -883,34 +1070,11 @@ dependencies = [ "serde", ] -[[package]] -name = "bindgen" -version = "0.69.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" -dependencies = [ - "bitflags", - "cexpr", - "clang-sys", - "itertools 0.12.1", - "lazy_static", - "lazycell", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash 1.1.0", - "shlex", - "syn", - "which", -] - [[package]] name = "bitflags" -version = "2.9.1" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "bitvec" @@ -957,9 +1121,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.7.2" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2529c31017402be841eb45892278a6c21a000c0a17643af326c73a73f83f0fb" +checksum = "ebeb9aaf9329dff6ceb65c689ca3db33dbf15f324909c60e4e5eef5701ce31b1" dependencies = [ "bon-macros", "rustversion", @@ -967,9 +1131,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.7.2" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82020dadcb845a345591863adb65d74fa8dc5c18a0b6d408470e13b7adc7005" +checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645" dependencies = [ "darling", "ident_case", @@ -977,14 +1141,37 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 2.0.111", +] + +[[package]] +name = "borsh" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1da5ab77c1437701eeff7c88d968729e7766172279eab0676857b3d63af7a6f" +dependencies = [ + "borsh-derive", + "cfg_aliases", +] + +[[package]] +name = "borsh-derive" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0686c856aa6aac0c4498f936d7d6a02df690f614c03e4d906d1018062b5c5e2c" +dependencies = [ + "once_cell", + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 2.0.111", ] [[package]] name = "brotli" -version = "8.0.1" +version = "8.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9991eea70ea4f293524138648e41ee89b0b2b12ddef3b255effa43c8056e0e0d" +checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -1007,6 +1194,28 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "byteorder" version = "1.5.0" @@ -1015,9 +1224,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" [[package]] name = "bytes-utils" @@ -1057,31 +1266,29 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" -version = "1.2.32" +version = "1.2.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2352e5597e9c544d5e6d9c95190d5d27738ade584fa8db0a16e130e5c2b5296e" +checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" dependencies = [ + "find-msvc-tools", "jobserver", "libc", "shlex", ] -[[package]] -name = "cexpr" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" -dependencies = [ - "nom", -] - [[package]] name = "cfg-if" -version = "1.0.1" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "cfg_aliases" @@ -1098,7 +1305,7 @@ dependencies = [ "iana-time-zone", "num-traits", "serde", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -1111,22 +1318,11 @@ dependencies = [ "phf", ] -[[package]] -name = "clang-sys" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" -dependencies = [ - "glob", - "libc", - "libloading", -] - [[package]] name = "clap" -version = "4.5.52" +version = "4.5.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa8120877db0e5c011242f96806ce3c94e0737ab8108532a76a3300a01db2ab8" +checksum = "c9e340e012a1bf4935f5282ed1436d1489548e8f72308207ea5df0e23d2d03f8" dependencies = [ "clap_builder", "clap_derive", @@ -1134,9 +1330,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.52" +version = "4.5.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02576b399397b659c26064fbc92a75fede9d18ffd5f80ca1cd74ddab167016e1" +checksum = "d76b5d13eaa18c901fd2f7fca939fefe3a0727a953561fefdf3b2922b8569d00" dependencies = [ "anstream", "anstyle", @@ -1153,14 +1349,14 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] name = "clap_lex" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "clipboard-win" @@ -1173,9 +1369,9 @@ dependencies = [ [[package]] name = "cmake" -version = "0.1.54" +version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" +checksum = "b042e5d8a74ae91bb0961acd039822472ec99f8ab0948cbf6d1369588f8be586" dependencies = [ "cc", ] @@ -1319,9 +1515,9 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", @@ -1329,21 +1525,21 @@ dependencies = [ [[package]] name = "csv" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" dependencies = [ "csv-core", "itoa", "ryu", - "serde", + "serde_core", ] [[package]] name = "csv-core" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" dependencies = [ "memchr", ] @@ -1369,7 +1565,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn", + "syn 2.0.111", ] [[package]] @@ -1380,7 +1576,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -1403,8 +1599,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" dependencies = [ - "arrow", - "arrow-schema", + "arrow 57.1.0", + "arrow-schema 57.1.0", "async-trait", "bytes", "bzip2 0.6.1", @@ -1437,7 +1633,7 @@ dependencies = [ "datafusion-sql", "flate2", "futures", - "itertools 0.14.0", + "itertools", "log", "object_store", "parking_lot", @@ -1460,7 +1656,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "dashmap", "datafusion-common", @@ -1472,7 +1668,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "itertools 0.14.0", + "itertools", "log", "object_store", "parking_lot", @@ -1485,7 +1681,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "datafusion-catalog", "datafusion-common", @@ -1497,7 +1693,7 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-plan", "futures", - "itertools 0.14.0", + "itertools", "log", "object_store", "tokio", @@ -1509,7 +1705,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fab982df44f818a749cb5200504ccb919f4608cb9808daf8b3fb98aa7955fd1e" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "aws-config", "aws-credential-types", @@ -1537,15 +1733,15 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" dependencies = [ - "ahash", + "ahash 0.8.12", "apache-avro", - "arrow", + "arrow 57.1.0", "arrow-ipc", "chrono", "half", "hashbrown 0.14.5", "hex", - "indexmap 2.12.0", + "indexmap 2.12.1", "libc", "log", "object_store", @@ -1574,7 +1770,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" dependencies = [ - "arrow", + "arrow 57.1.0", "async-compression", "async-trait", "bytes", @@ -1592,7 +1788,7 @@ dependencies = [ "flate2", "futures", "glob", - "itertools 0.14.0", + "itertools", "log", "object_store", "rand 0.9.2", @@ -1609,7 +1805,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" dependencies = [ - "arrow", + "arrow 57.1.0", "arrow-ipc", "async-trait", "bytes", @@ -1622,7 +1818,7 @@ dependencies = [ "datafusion-physical-plan", "datafusion-session", "futures", - "itertools 0.14.0", + "itertools", "object_store", "tokio", ] @@ -1634,7 +1830,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "388ed8be535f562cc655b9c3d22edbfb0f1a50a25c242647a98b6d92a75b55a1" dependencies = [ "apache-avro", - "arrow", + "arrow 57.1.0", "async-trait", "bytes", "datafusion-common", @@ -1653,7 +1849,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "bytes", "datafusion-common", @@ -1676,7 +1872,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "bytes", "datafusion-common", @@ -1698,7 +1894,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "bytes", "datafusion-common", @@ -1714,7 +1910,7 @@ dependencies = [ "datafusion-pruning", "datafusion-session", "futures", - "itertools 0.14.0", + "itertools", "log", "object_store", "parking_lot", @@ -1734,7 +1930,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "dashmap", "datafusion-common", @@ -1755,7 +1951,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "chrono", "datafusion-common", @@ -1764,8 +1960,8 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.12.0", - "itertools 0.14.0", + "indexmap 2.12.1", + "itertools", "paste", "recursive", "serde_json", @@ -1778,10 +1974,10 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" dependencies = [ - "arrow", + "arrow 57.1.0", "datafusion-common", - "indexmap 2.12.0", - "itertools 0.14.0", + "indexmap 2.12.1", + "itertools", "paste", ] @@ -1791,8 +1987,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" dependencies = [ - "arrow", - "arrow-buffer", + "arrow 57.1.0", + "arrow-buffer 57.1.0", "base64 0.22.1", "blake2", "blake3", @@ -1804,7 +2000,7 @@ dependencies = [ "datafusion-expr-common", "datafusion-macros", "hex", - "itertools 0.14.0", + "itertools", "log", "md-5", "num-traits", @@ -1821,8 +2017,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" dependencies = [ - "ahash", - "arrow", + "ahash 0.8.12", + "arrow 57.1.0", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1842,8 +2038,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" dependencies = [ - "ahash", - "arrow", + "ahash 0.8.12", + "arrow 57.1.0", "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", @@ -1855,8 +2051,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" dependencies = [ - "arrow", - "arrow-ord", + "arrow 57.1.0", + "arrow-ord 57.1.0", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1867,7 +2063,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-macros", "datafusion-physical-expr-common", - "itertools 0.14.0", + "itertools", "log", "paste", ] @@ -1878,7 +2074,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" dependencies = [ - "arrow", + "arrow 57.1.0", "async-trait", "datafusion-catalog", "datafusion-common", @@ -1894,7 +2090,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" dependencies = [ - "arrow", + "arrow 57.1.0", "datafusion-common", "datafusion-doc", "datafusion-expr", @@ -1924,7 +2120,7 @@ checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ "datafusion-doc", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -1933,18 +2129,18 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" dependencies = [ - "arrow", + "arrow 57.1.0", "chrono", "datafusion-common", "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.12.0", - "itertools 0.14.0", + "indexmap 2.12.1", + "itertools", "log", "recursive", "regex", - "regex-syntax 0.8.6", + "regex-syntax", ] [[package]] @@ -1953,8 +2149,8 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" dependencies = [ - "ahash", - "arrow", + "ahash 0.8.12", + "arrow 57.1.0", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -1962,8 +2158,8 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "indexmap 2.12.0", - "itertools 0.14.0", + "indexmap 2.12.1", + "itertools", "parking_lot", "paste", "petgraph", @@ -1975,13 +2171,13 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" dependencies = [ - "arrow", + "arrow 57.1.0", "datafusion-common", "datafusion-expr", "datafusion-functions", "datafusion-physical-expr", "datafusion-physical-expr-common", - "itertools 0.14.0", + "itertools", ] [[package]] @@ -1990,12 +2186,12 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" dependencies = [ - "ahash", - "arrow", + "ahash 0.8.12", + "arrow 57.1.0", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "itertools 0.14.0", + "itertools", ] [[package]] @@ -2004,7 +2200,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" dependencies = [ - "arrow", + "arrow 57.1.0", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -2013,7 +2209,7 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-pruning", - "itertools 0.14.0", + "itertools", "recursive", ] @@ -2023,10 +2219,10 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" dependencies = [ - "ahash", - "arrow", - "arrow-ord", - "arrow-schema", + "ahash 0.8.12", + "arrow 57.1.0", + "arrow-ord 57.1.0", + "arrow-schema 57.1.0", "async-trait", "chrono", "datafusion-common", @@ -2040,8 +2236,8 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap 2.12.0", - "itertools 0.14.0", + "indexmap 2.12.1", + "itertools", "log", "parking_lot", "pin-project-lite", @@ -2054,14 +2250,14 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" dependencies = [ - "arrow", + "arrow 57.1.0", "datafusion-common", "datafusion-datasource", "datafusion-expr-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", - "itertools 0.14.0", + "itertools", "log", ] @@ -2085,12 +2281,12 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" dependencies = [ - "arrow", + "arrow 57.1.0", "bigdecimal", "chrono", "datafusion-common", "datafusion-expr", - "indexmap 2.12.0", + "indexmap 2.12.1", "log", "recursive", "regex", @@ -2099,13 +2295,24 @@ dependencies = [ [[package]] name = "deranged" -version = "0.4.0" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" +checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" dependencies = [ "powerfmt", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.111", +] + [[package]] name = "digest" version = "0.10.7" @@ -2146,7 +2353,24 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", +] + +[[package]] +name = "duckdb" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7eeb487dde618b9f6ab26a451775ad5fac3fabe1ca2b64cbbe90b105f264ccd" +dependencies = [ + "arrow 56.2.0", + "cast", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libduckdb-sys", + "num-integer", + "rust_decimal", + "strum 0.27.2", ] [[package]] @@ -2169,9 +2393,9 @@ checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" [[package]] name = "env_filter" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" dependencies = [ "log", "regex", @@ -2198,12 +2422,12 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.13" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -2212,6 +2436,18 @@ version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fastrand" version = "2.3.0" @@ -2225,10 +2461,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", - "rustix 1.0.8", + "rustix", "windows-sys 0.59.0", ] +[[package]] +name = "filetime" +version = "0.2.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.60.2", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" + [[package]] name = "fixedbitset" version = "0.5.7" @@ -2237,9 +2491,9 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "25.2.10" +version = "25.9.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5" dependencies = [ "bitflags", "rustc_version", @@ -2345,7 +2599,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -2403,29 +2657,29 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "js-sys", "libc", "r-efi", - "wasi 0.14.2+wasi-0.2.4", + "wasip2", "wasm-bindgen", ] [[package]] name = "glob" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "h2" @@ -2438,8 +2692,8 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.3.1", - "indexmap 2.12.0", + "http 1.4.0", + "indexmap 2.12.1", "slab", "tokio", "tokio-util", @@ -2463,6 +2717,9 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.8", +] [[package]] name = "hashbrown" @@ -2470,7 +2727,7 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "ahash", + "ahash 0.8.12", "allocator-api2", ] @@ -2485,9 +2742,18 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.16.0" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "hashlink" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] [[package]] name = "hdrhistogram" @@ -2525,11 +2791,11 @@ dependencies = [ [[package]] name = "home" -version = "0.5.11" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2545,12 +2811,11 @@ dependencies = [ [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -2572,7 +2837,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.3.1", + "http 1.4.0", ] [[package]] @@ -2583,7 +2848,7 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "pin-project-lite", ] @@ -2602,26 +2867,28 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "humantime" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" +checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hyper" -version = "1.6.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" dependencies = [ + "atomic-waker", "bytes", "futures-channel", - "futures-util", + "futures-core", "h2", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "httparse", "httpdate", "itoa", "pin-project-lite", + "pin-utils", "smallvec", "tokio", "want", @@ -2633,7 +2900,7 @@ version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http 1.3.1", + "http 1.4.0", "hyper", "hyper-util", "rustls", @@ -2642,6 +2909,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", + "webpki-roots", ] [[package]] @@ -2659,23 +2927,23 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.16" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" +checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" dependencies = [ "base64 0.22.1", "bytes", "futures-channel", "futures-core", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "hyper", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.0", + "socket2 0.6.1", "tokio", "tower-service", "tracing", @@ -2683,9 +2951,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.63" +version = "0.1.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -2707,9 +2975,9 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" dependencies = [ "displaydoc", "potential_utf", @@ -2720,9 +2988,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" dependencies = [ "displaydoc", "litemap", @@ -2733,11 +3001,10 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" dependencies = [ - "displaydoc", "icu_collections", "icu_normalizer_data", "icu_properties", @@ -2748,42 +3015,38 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" [[package]] name = "icu_properties" -version = "2.0.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" dependencies = [ - "displaydoc", "icu_collections", "icu_locale_core", "icu_properties_data", "icu_provider", - "potential_utf", "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "2.0.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" [[package]] name = "icu_provider" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" dependencies = [ "displaydoc", "icu_locale_core", - "stable_deref_trait", - "tinystr", "writeable", "yoke", "zerofrom", @@ -2830,12 +3093,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", - "hashbrown 0.16.0", + "hashbrown 0.16.1", ] [[package]] @@ -2852,9 +3115,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" dependencies = [ "memchr", "serde", @@ -2862,18 +3125,9 @@ dependencies = [ [[package]] name = "is_terminal_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" - -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "itertools" @@ -2892,43 +3146,43 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jiff" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" dependencies = [ "jiff-static", "log", "portable-atomic", "portable-atomic-util", - "serde", + "serde_core", ] [[package]] name = "jiff-static" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] name = "jobserver" -version = "0.1.33" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "libc", ] [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" dependencies = [ "once_cell", "wasm-bindgen", @@ -2940,17 +3194,11 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "lexical-core" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" dependencies = [ "lexical-parse-float", "lexical-parse-integer", @@ -2961,53 +3209,46 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" dependencies = [ "lexical-parse-integer", "lexical-util", - "static_assertions", ] [[package]] name = "lexical-parse-integer" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" dependencies = [ "lexical-util", - "static_assertions", ] [[package]] name = "lexical-util" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" -dependencies = [ - "static_assertions", -] +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" [[package]] name = "lexical-write-float" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" dependencies = [ "lexical-util", "lexical-write-integer", - "static_assertions", ] [[package]] name = "lexical-write-integer" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" dependencies = [ "lexical-util", - "static_assertions", ] [[package]] @@ -3018,18 +3259,25 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.177" +version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" [[package]] -name = "libloading" -version = "0.8.8" +name = "libduckdb-sys" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" +checksum = "c8c60c2d269e63ae5197e4fe9075efffed35dfda0095a5ac8b41f3c765b18456" dependencies = [ - "cfg-if", - "windows-targets 0.53.3", + "cc", + "flate2", + "pkg-config", + "reqwest", + "serde", + "serde_json", + "tar", + "vcpkg", + "zip", ] [[package]] @@ -3040,9 +3288,9 @@ checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "libmimalloc-sys" -version = "0.1.43" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf88cd67e9de251c1781dbe2f641a1a3ad66eaae831b8a2c38fbdc5ddae16d4d" +checksum = "667f4fec20f29dfc6bc7357c582d91796c169ad7e2fce709468aefeb2c099870" dependencies = [ "cc", "libc", @@ -3050,56 +3298,50 @@ dependencies = [ [[package]] name = "libredox" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" +checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" dependencies = [ "bitflags", "libc", + "redox_syscall", ] [[package]] name = "libz-rs-sys" -version = "0.5.1" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "172a788537a2221661b480fee8dc5f96c580eb34fa88764d3205dc356c7e4221" +checksum = "15413ef615ad868d4d65dce091cb233b229419c7c0c4bcaa746c0901c49ff39c" dependencies = [ "zlib-rs", ] [[package]] name = "linux-raw-sys" -version = "0.4.15" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" - -[[package]] -name = "linux-raw-sys" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "litemap" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" [[package]] name = "lock_api" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", ] [[package]] name = "log" -version = "0.4.27" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "lru-slab" @@ -3109,9 +3351,9 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lz4_flex" -version = "0.11.5" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" dependencies = [ "twox-hash", ] @@ -3129,11 +3371,11 @@ dependencies = [ [[package]] name = "matchers" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" dependencies = [ - "regex-automata 0.1.10", + "regex-automata", ] [[package]] @@ -3154,15 +3396,15 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.5" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "mimalloc" -version = "0.1.47" +version = "0.1.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1791cbe101e95af5764f06f20f6760521f7158f69dbf9d6baf941ee1bf6bc40" +checksum = "e1ee66a4b64c74f4ef288bcbb9192ad9c3feaad75193129ac8509af543894fd8" dependencies = [ "libmimalloc-sys", ] @@ -3191,13 +3433,13 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.4" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi 0.11.1+wasi-snapshot-preview1", - "windows-sys 0.59.0", + "wasi", + "windows-sys 0.61.2", ] [[package]] @@ -3233,12 +3475,25 @@ dependencies = [ [[package]] name = "nu-ansi-term" -version = "0.46.0" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "num" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" dependencies = [ - "overload", - "winapi", + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", ] [[package]] @@ -3276,6 +3531,28 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -3286,6 +3563,15 @@ dependencies = [ "libm", ] +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + [[package]] name = "object_store" version = "0.12.4" @@ -3298,11 +3584,11 @@ dependencies = [ "chrono", "form_urlencoded", "futures", - "http 1.3.1", + "http 1.4.0", "http-body-util", "humantime", "hyper", - "itertools 0.14.0", + "itertools", "md-5", "parking_lot", "percent-encoding", @@ -3331,9 +3617,9 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "once_cell_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "openssl-probe" @@ -3344,6 +3630,15 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "optd-catalog" version = "0.1.0" +dependencies = [ + "duckdb", + "futures", + "serde", + "serde_json", + "snafu", + "tempfile", + "tokio", +] [[package]] name = "optd-cli" @@ -3353,9 +3648,12 @@ dependencies = [ "datafusion", "datafusion-cli", "dirs", + "futures", "object_store", + "optd-catalog", "optd-datafusion", "regex", + "tempfile", "tokio", "tracing", "tracing-subscriber", @@ -3369,7 +3667,7 @@ dependencies = [ "anyhow", "bitvec", "console-subscriber", - "itertools 0.14.0", + "itertools", "pretty-xmlish", "snafu", "tokio", @@ -3382,16 +3680,17 @@ dependencies = [ name = "optd-datafusion" version = "0.1.0" dependencies = [ + "async-trait", "datafusion", - "itertools 0.14.0", + "itertools", + "optd-catalog", "optd-core", + "serde_json", + "tempfile", + "tokio", "tracing", ] -[[package]] -name = "optd-storage" -version = "0.1.0" - [[package]] name = "option-ext" version = "0.2.0" @@ -3413,17 +3712,11 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - [[package]] name = "parking_lot" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", "parking_lot_core", @@ -3431,31 +3724,31 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.11" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.6", + "windows-link", ] [[package]] name = "parquet" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0f31027ef1af7549f7cec603a9a21dce706d3f8d7c2060a68f43c1773be95a" +checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", + "ahash 0.8.12", + "arrow-array 57.1.0", + "arrow-buffer 57.1.0", + "arrow-cast 57.1.0", + "arrow-data 57.1.0", "arrow-ipc", - "arrow-schema", - "arrow-select", + "arrow-schema 57.1.0", + "arrow-select 57.1.0", "base64 0.22.1", "brotli", "bytes", @@ -3463,7 +3756,7 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "lz4_flex", "num-bigint", "num-integer", @@ -3500,7 +3793,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.12.0", + "indexmap 2.12.1", "serde", ] @@ -3539,7 +3832,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -3577,9 +3870,9 @@ dependencies = [ [[package]] name = "potential_utf" -version = "0.1.2" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" dependencies = [ "zerovec", ] @@ -3607,12 +3900,12 @@ checksum = "96b8aab53732b7a9c5c39bb0e130f85671b48b188ef258c3b9f7f5da1877382a" [[package]] name = "prettyplease" -version = "0.2.36" +version = "0.2.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff24dfcda44452b9816fff4cd4227e1bb73ff5a2f1bc1105aa92fb8565ce44d2" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn", + "syn 2.0.111", ] [[package]] @@ -3626,9 +3919,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] @@ -3650,10 +3943,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools", "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -3667,13 +3960,34 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.26" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" +checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" dependencies = [ + "ar_archive_writer", "cc", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "quad-rand" version = "0.2.3" @@ -3682,9 +3996,9 @@ checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" [[package]] name = "quick-xml" -version = "0.38.1" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9845d9dccf565065824e69f9f235fafba1587031eda353c1f1561cd6a6be78f4" +checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" dependencies = [ "memchr", "serde", @@ -3692,18 +4006,18 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.8" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8" +checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20" dependencies = [ "bytes", "cfg_aliases", "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.1.1", + "rustc-hash", "rustls", - "socket2 0.5.10", + "socket2 0.6.1", "thiserror", "tokio", "tracing", @@ -3712,16 +4026,16 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.12" +version = "0.11.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e" +checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" dependencies = [ "bytes", - "getrandom 0.3.3", + "getrandom 0.3.4", "lru-slab", "rand 0.9.2", "ring", - "rustc-hash 2.1.1", + "rustc-hash", "rustls", "rustls-pki-types", "slab", @@ -3733,16 +4047,16 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.13" +version = "0.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcebb1209ee276352ef14ff8732e24cc2b02bbac986cd74a4c81bcb2f9881970" +checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd" dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.1", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -3832,7 +4146,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", ] [[package]] @@ -3852,14 +4166,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn", + "syn 2.0.111", ] [[package]] name = "redox_syscall" -version = "0.5.17" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ "bitflags", ] @@ -3883,17 +4197,8 @@ checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.13", - "regex-syntax 0.8.6", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -dependencies = [ - "regex-syntax 0.6.29", + "regex-automata", + "regex-syntax", ] [[package]] @@ -3904,26 +4209,20 @@ checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.6", + "regex-syntax", ] [[package]] name = "regex-lite" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" +checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" [[package]] name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - -[[package]] -name = "regex-syntax" -version = "0.8.6" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "relative-path" @@ -3931,18 +4230,28 @@ version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + [[package]] name = "reqwest" -version = "0.12.22" +version = "0.12.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531" +checksum = "b6eff9328d40131d43bd911d42d79eb6a47312002a4daefc9e37f17e74a7701a" dependencies = [ "base64 0.22.1", "bytes", + "futures-channel", "futures-core", "futures-util", "h2", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "hyper", @@ -3971,6 +4280,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", + "webpki-roots", ] [[package]] @@ -3987,6 +4297,35 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rkyv" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "rstest" version = "0.26.1" @@ -4012,15 +4351,25 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn", + "syn 2.0.111", "unicode-ident", ] [[package]] -name = "rustc-hash" -version = "1.1.0" +name = "rust_decimal" +version = "1.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +checksum = "35affe401787a9bd846712274d97654355d21b2a2c092a3139aabe31e9022282" +dependencies = [ + "arrayvec", + "borsh", + "bytes", + "num-traits", + "rand 0.8.5", + "rkyv", + "serde", + "serde_json", +] [[package]] name = "rustc-hash" @@ -4039,35 +4388,22 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", -] - -[[package]] -name = "rustix" -version = "1.0.8" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys 0.9.4", - "windows-sys 0.60.2", + "linux-raw-sys", + "windows-sys 0.61.2", ] [[package]] name = "rustls" -version = "0.23.31" +version = "0.23.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ebcbd2f03de0fc1122ad9bb24b127a5a6cd51d72604a3f3c50ac459762b6cc" +checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ "aws-lc-rs", "once_cell", @@ -4080,9 +4416,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" +checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923" dependencies = [ "openssl-probe", "rustls-pki-types", @@ -4101,9 +4437,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.12.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +checksum = "708c0f9d5f54ba0272468c1d306a52c495b31fa155e91bc25371e6df7996908c" dependencies = [ "web-time", "zeroize", @@ -4111,9 +4447,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.4" +version = "0.103.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc" +checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" dependencies = [ "aws-lc-rs", "ring", @@ -4129,9 +4465,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "rustyline" -version = "17.0.1" +version = "17.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6614df0b6d4cfb20d1d5e295332921793ce499af3ebc011bf1e393380e1e492" +checksum = "e902948a25149d50edc1a8e0141aad50f54e22ba83ff988cf8f7c9ef07f50564" dependencies = [ "bitflags", "cfg-if", @@ -4166,11 +4502,11 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4179,11 +4515,17 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "security-framework" -version = "3.3.0" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80fb1d92c5028aa318b4b8bd7302a5bfcf48be96a37fc6fc790f806b0004ee0c" +checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ "bitflags", "core-foundation", @@ -4194,9 +4536,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.14.0" +version = "2.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" +checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" dependencies = [ "core-foundation-sys", "libc", @@ -4204,9 +4546,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.26" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" [[package]] name = "seq-macro" @@ -4226,11 +4568,12 @@ dependencies = [ [[package]] name = "serde_bytes" -version = "0.11.17" +version = "0.11.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96" +checksum = "a5d440709e79d88e51ac01c4b72fc6cb7314017bb7da9eeff678aa94c10e3ea8" dependencies = [ "serde", + "serde_core", ] [[package]] @@ -4250,19 +4593,20 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] name = "serde_json" -version = "1.0.142" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ "itoa", "memchr", "ryu", "serde", + "serde_core", ] [[package]] @@ -4305,18 +4649,18 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.6" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" dependencies = [ "libc", ] [[package]] name = "simd-adler32" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] name = "simdutf8" @@ -4344,23 +4688,23 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "snafu" -version = "0.8.6" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "320b01e011bf8d5d7a4a4a4be966d9160968935849c83b918827f6a435e7f627" +checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2" dependencies = [ "snafu-derive", ] [[package]] name = "snafu-derive" -version = "0.8.6" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1961e2ef424c1424204d3a5d6975f934f56b6d50ff5732382d84ebf460e147f7" +checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -4381,12 +4725,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -4408,20 +4752,20 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] name = "stable_deref_trait" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stacker" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" dependencies = [ "cc", "cfg-if", @@ -4430,12 +4774,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - [[package]] name = "strsim" version = "0.11.1" @@ -4453,6 +4791,9 @@ name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +dependencies = [ + "strum_macros 0.27.2", +] [[package]] name = "strum_macros" @@ -4464,7 +4805,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 2.0.111", ] [[package]] @@ -4476,7 +4817,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -4487,9 +4828,20 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.110" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" dependencies = [ "proc-macro2", "quote", @@ -4513,7 +4865,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -4522,17 +4874,28 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tar" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempfile" -version = "3.20.0" +version = "3.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", - "rustix 1.0.8", - "windows-sys 0.59.0", + "rustix", + "windows-sys 0.61.2", ] [[package]] @@ -4552,7 +4915,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -4577,9 +4940,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.41" +version = "0.3.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" +checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ "deranged", "num-conv", @@ -4591,15 +4954,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.4" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" +checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" [[package]] name = "time-macros" -version = "0.2.22" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" +checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" dependencies = [ "num-conv", "time-core", @@ -4616,9 +4979,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" dependencies = [ "displaydoc", "zerovec", @@ -4626,9 +4989,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" dependencies = [ "tinyvec_macros", ] @@ -4651,7 +5014,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.0", + "socket2 0.6.1", "tokio-macros", "tracing", "windows-sys 0.61.2", @@ -4665,14 +5028,14 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] name = "tokio-rustls" -version = "0.26.2" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ "rustls", "tokio", @@ -4691,9 +5054,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.16" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" dependencies = [ "bytes", "futures-core", @@ -4713,11 +5076,11 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.23.7" +version = "0.23.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" +checksum = "5d7cbc3b4b49633d57a0509303158ca50de80ae32c265093b24c414705807832" dependencies = [ - "indexmap 2.12.0", + "indexmap 2.12.1", "toml_datetime", "toml_parser", "winnow", @@ -4744,7 +5107,7 @@ dependencies = [ "base64 0.22.1", "bytes", "h2", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", "hyper", @@ -4799,14 +5162,14 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ "bitflags", "bytes", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "iri-string", "pin-project-lite", @@ -4829,9 +5192,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" dependencies = [ "log", "pin-project-lite", @@ -4841,20 +5204,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" dependencies = [ "once_cell", "valuable", @@ -4873,14 +5236,14 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.19" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" dependencies = [ "matchers", "nu-ansi-term", "once_cell", - "regex", + "regex-automata", "sharded-slab", "smallvec", "thread_local", @@ -4907,7 +5270,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568" dependencies = [ "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -4918,21 +5281,21 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "twox-hash" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" [[package]] name = "typenum" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "unicode-ident" -version = "1.0.18" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "unicode-segmentation" @@ -4942,9 +5305,9 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "untrusted" @@ -4984,13 +5347,13 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.18.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "js-sys", - "serde", + "serde_core", "wasm-bindgen", ] @@ -5000,6 +5363,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "version_check" version = "0.9.5" @@ -5038,45 +5407,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" +name = "wasip2" +version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "wit-bindgen-rt", + "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.100" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.50" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", @@ -5087,9 +5443,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -5097,22 +5453,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ + "bumpalo", "proc-macro2", "quote", - "syn", - "wasm-bindgen-backend", + "syn 2.0.111", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" dependencies = [ "unicode-ident", ] @@ -5132,9 +5488,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.77" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", @@ -5151,89 +5507,58 @@ dependencies = [ ] [[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix 0.38.44", -] - -[[package]] -name = "winapi" -version = "0.3.9" +name = "webpki-roots" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", + "rustls-pki-types", ] -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-core" -version = "0.61.2" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", - "windows-link 0.1.3", + "windows-link", "windows-result", "windows-strings", ] [[package]] name = "windows-implement" -version = "0.60.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] name = "windows-interface" -version = "0.59.1" +version = "0.59.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] -[[package]] -name = "windows-link" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" - [[package]] name = "windows-link" version = "0.2.1" @@ -5242,20 +5567,20 @@ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-result" -version = "0.3.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] name = "windows-strings" -version = "0.4.2" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] @@ -5282,7 +5607,7 @@ version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.53.3", + "windows-targets 0.53.5", ] [[package]] @@ -5291,7 +5616,7 @@ version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -5312,19 +5637,19 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.53.3" +version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ - "windows-link 0.1.3", - "windows_aarch64_gnullvm 0.53.0", - "windows_aarch64_msvc 0.53.0", - "windows_i686_gnu 0.53.0", - "windows_i686_gnullvm 0.53.0", - "windows_i686_msvc 0.53.0", - "windows_x86_64_gnu 0.53.0", - "windows_x86_64_gnullvm 0.53.0", - "windows_x86_64_msvc 0.53.0", + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", ] [[package]] @@ -5335,9 +5660,9 @@ checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" [[package]] name = "windows_aarch64_msvc" @@ -5347,9 +5672,9 @@ checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_aarch64_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" [[package]] name = "windows_i686_gnu" @@ -5359,9 +5684,9 @@ checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnu" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" [[package]] name = "windows_i686_gnullvm" @@ -5371,9 +5696,9 @@ checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" [[package]] name = "windows_i686_msvc" @@ -5383,9 +5708,9 @@ checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_i686_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" [[package]] name = "windows_x86_64_gnu" @@ -5395,9 +5720,9 @@ checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnu" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" [[package]] name = "windows_x86_64_gnullvm" @@ -5407,9 +5732,9 @@ checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" [[package]] name = "windows_x86_64_msvc" @@ -5419,33 +5744,30 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "windows_x86_64_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.13" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" dependencies = [ "memchr", ] [[package]] -name = "wit-bindgen-rt" -version = "0.39.0" +name = "wit-bindgen" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "writeable" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" [[package]] name = "wyz" @@ -5456,6 +5778,16 @@ dependencies = [ "tap", ] +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix", +] + [[package]] name = "xmlparser" version = "0.13.6" @@ -5473,11 +5805,10 @@ dependencies = [ [[package]] name = "yoke" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" dependencies = [ - "serde", "stable_deref_trait", "yoke-derive", "zerofrom", @@ -5485,34 +5816,34 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.26" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.26" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", ] [[package]] @@ -5532,21 +5863,21 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", "synstructure", ] [[package]] name = "zeroize" -version = "1.8.1" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" [[package]] name = "zerotrie" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" dependencies = [ "displaydoc", "yoke", @@ -5555,9 +5886,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.4" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" dependencies = [ "yoke", "zerofrom", @@ -5566,20 +5897,46 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.111", +] + +[[package]] +name = "zip" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb2a05c7c36fde6c09b08576c9f7fb4cda705990f73b58fe011abf7dfb24168b" +dependencies = [ + "arbitrary", + "crc32fast", + "flate2", + "indexmap 2.12.1", + "memchr", + "zopfli", ] [[package]] name = "zlib-rs" -version = "0.5.1" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51f936044d677be1a1168fae1d03b583a285a5dd9d8cbf7b24c23aa1fc775235" + +[[package]] +name = "zopfli" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626bd9fa9734751fc50d6060752170984d7053f5a39061f524cda68023d4db8a" +checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249" +dependencies = [ + "bumpalo", + "crc32fast", + "log", + "simd-adler32", +] [[package]] name = "zstd" @@ -5601,9 +5958,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.15+zstd.1.5.7" +version = "2.0.16+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index 45a6fd1..d02d6a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,20 +1,11 @@ [workspace] resolver = "2" -members = [ - "cli", - "connectors/datafusion", - "optd/catalog", - "optd/core", - "optd/storage", -] +members = ["cli", "connectors/datafusion", "optd/core", "optd/catalog"] -# By default, only compiles the `optd-core` crate. default-members = ["optd/core"] [workspace.dependencies] -optd-datafusion = { path = "connectors/datafusion" } - tokio = { version = "1.47", features = ["macros", "rt", "sync"] } tracing = "0.1" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 7114cd2..5d1a862 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -23,3 +23,9 @@ object_store = "0.12.3" url = "2.5.4" tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } tracing = { workspace = true } + +futures = "0.3.31" +optd-catalog = { path = "../optd/catalog", version = "0.1" } + +[dev-dependencies] +tempfile = "3" diff --git a/cli/smoke_test_cli.sh b/cli/smoke_test_cli.sh new file mode 100755 index 0000000..726d1c8 --- /dev/null +++ b/cli/smoke_test_cli.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +# CLI smoke test - verifies catalog integration is active + +set -e # Exit on error + +GREEN='\033[0;32m' +RED='\033[0;31m' +RESET='\033[0m' + +echo "=== CLI Smoke Test ===" + +# Build +echo "Building..." +cargo build --package optd-cli --quiet +if [ ! -f ./target/debug/optd-cli ]; then + echo -e "${RED}✗ Build failed${RESET}" + exit 1 +fi + +CLI=./target/debug/optd-cli + +# Test 1: Basic functionality +echo "Test 1: Basic query execution" +output=$($CLI -c "SELECT 1 as test;" 2>&1) +if [ $? -eq 0 ] && echo "$output" | grep -q "OptD catalog"; then + echo -e "${GREEN}✓ PASS${RESET} - CLI runs, catalog integration active" +else + echo -e "${RED}✗ FAIL${RESET}" + exit 1 +fi + +# Test 2: Session persistence (multiple commands) +echo "Test 2: Session state persistence" +output=$($CLI -c "CREATE TABLE t (x INT);" -c "INSERT INTO t VALUES (1);" -c "SELECT * FROM t;" 2>&1) +if [ $? -eq 0 ] && echo "$output" | grep -q "1 row"; then + echo -e "${GREEN}✓ PASS${RESET} - Multiple commands work, session persists" +else + echo -e "${RED}✗ FAIL${RESET}" + exit 1 +fi + +# Test 3: Metadata path configuration +echo "Test 3: Metadata path environment variable" +TMPDIR_PATH=$(mktemp -d) +export OPTD_METADATA_CATALOG_PATH="$TMPDIR_PATH/test.ducklake" +output=$($CLI -c "SELECT 1;" 2>&1) +unset OPTD_METADATA_CATALOG_PATH +rm -rf "$TMPDIR_PATH" +if echo "$output" | grep -q "Using OptD catalog with metadata path"; then + echo -e "${GREEN}✓ PASS${RESET} - Metadata path recognized" +else + echo -e "${RED}✗ FAIL${RESET}" + exit 1 +fi + +echo "" +echo -e "${GREEN}✓ All smoke tests passed!${RESET}" diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 5d4111b..89a878d 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -1,11 +1,14 @@ -use std::sync::Arc; - use datafusion::{ + common::{DataFusionError, Result, exec_err, not_impl_err}, + datasource::TableProvider, execution::{SessionStateBuilder, runtime_env::RuntimeEnv}, + logical_expr::{CreateExternalTable, LogicalPlanBuilder}, prelude::{DataFrame, SessionConfig, SessionContext}, + sql::TableReference, }; use datafusion_cli::cli_context::CliSessionContext; use optd_datafusion::{OptdExtensionConfig, SessionStateBuilderOptdExt}; +use std::sync::Arc; pub struct OptdCliSessionContext { inner: SessionContext, @@ -39,10 +42,62 @@ impl OptdCliSessionContext { &self.inner } - pub fn return_empty_dataframe(&self) -> datafusion::common::Result { - let plan = datafusion::logical_expr::LogicalPlanBuilder::empty(false).build()?; + pub fn return_empty_dataframe(&self) -> Result { + let plan = LogicalPlanBuilder::empty(false).build()?; Ok(DataFrame::new(self.inner.state(), plan)) } + + async fn create_external_table(&self, cmd: &CreateExternalTable) -> Result { + let exist = self.inner.table_exist(cmd.name.clone())?; + + if cmd.temporary { + return not_impl_err!("Temporary tables not supported"); + } + + if exist { + match cmd.if_not_exists { + true => return self.return_empty_dataframe(), + false => { + return exec_err!("Table '{}' already exists", cmd.name); + } + } + } + + let table_provider: Arc = self.create_custom_table(cmd).await?; + self.register_table(cmd.name.clone(), table_provider)?; + + self.return_empty_dataframe() + } + + async fn create_custom_table( + &self, + cmd: &CreateExternalTable, + ) -> Result> { + let state = self.inner.state_ref().read().clone(); + let file_type = cmd.file_type.to_uppercase(); + let factory = state + .table_factories() + .get(file_type.as_str()) + .ok_or_else(|| { + DataFusionError::Execution(format!("Unable to find factory for {}", cmd.file_type)) + })?; + let table = (*factory).create(&state, cmd).await?; + Ok(table) + } + + pub fn register_table( + &self, + table_ref: impl Into, + provider: Arc, + ) -> Result>> { + let table_ref: TableReference = table_ref.into(); + let table = table_ref.table().to_owned(); + self.inner + .state_ref() + .read() + .schema_for_ref(table_ref)? + .register_table(table, provider) + } } impl CliSessionContext for OptdCliSessionContext { @@ -72,12 +127,8 @@ impl CliSessionContext for OptdCliSessionContext { plan: datafusion::logical_expr::LogicalPlan, ) -> ::core::pin::Pin< Box< - dyn ::core::future::Future< - Output = Result< - datafusion::prelude::DataFrame, - datafusion::common::DataFusionError, - >, - > + ::core::marker::Send + dyn ::core::future::Future> + + ::core::marker::Send + 'async_trait, >, > @@ -102,8 +153,14 @@ impl CliSessionContext for OptdCliSessionContext { } _ => (), } + } else if let datafusion::logical_expr::LogicalPlan::Ddl(ddl) = &plan { + match ddl { + datafusion::logical_expr::DdlStatement::CreateExternalTable(create_table) => { + return self.create_external_table(&create_table).await; + } + _ => (), + } } - self.inner.execute_logical_plan(plan).await }; diff --git a/cli/src/main.rs b/cli/src/main.rs index 6379169..88547d0 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -44,7 +44,9 @@ use datafusion::common::config_err; use datafusion::config::ConfigOptions; use datafusion::execution::disk_manager::{DiskManagerBuilder, DiskManagerMode}; +use optd_catalog::{CatalogService, DuckLakeCatalog}; use optd_cli::OptdCliSessionContext; +use optd_datafusion::OptdCatalogProviderList; #[derive(Debug, Parser, PartialEq)] #[clap(author, version, about, long_about= None)] @@ -214,11 +216,35 @@ async fn main_inner() -> Result<()> { let cli_ctx = cli_ctx.enable_url_table(); let ctx = cli_ctx.inner(); + // Initialize catalog with optional DuckLake catalog service + let catalog_handle = if let Ok(metadata_path) = env::var("OPTD_METADATA_CATALOG_PATH") { + if !args.quiet { + println!("Using OptD catalog with metadata path: {}", metadata_path); + } + let ducklake_catalog = DuckLakeCatalog::try_new(None, Some(&metadata_path)) + .map_err(|e| DataFusionError::External(Box::new(e)))?; + let (service, handle) = CatalogService::new(ducklake_catalog); + tokio::spawn(async move { service.run().await }); + Some(handle) + } else { + if !args.quiet { + println!("OptD catalog integration enabled (no persistent metadata)"); + } + None + }; + + // Wrap the catalog list with OptdCatalogProviderList + let original_catalog_list = ctx.state().catalog_list().clone(); + let optd_catalog_list = + OptdCatalogProviderList::new(original_catalog_list.clone(), catalog_handle); + // install dynamic catalog provider that can register required object stores - ctx.register_catalog_list(Arc::new(DynamicObjectStoreCatalog::new( - ctx.state().catalog_list().clone(), + // and wrap it with OptD catalog provider + let dynamic_catalog = Arc::new(DynamicObjectStoreCatalog::new( + Arc::new(optd_catalog_list), ctx.state_weak_ref(), - ))); + )); + ctx.register_catalog_list(dynamic_catalog); // register `parquet_metadata` table function to get metadata from parquet files ctx.register_udtf("parquet_metadata", Arc::new(ParquetMetadataFunc {})); diff --git a/cli/tests/catalog_service_integration.rs b/cli/tests/catalog_service_integration.rs new file mode 100644 index 0000000..d893b50 --- /dev/null +++ b/cli/tests/catalog_service_integration.rs @@ -0,0 +1,323 @@ +// Integration tests for OptD catalog service handle functions + +use datafusion::{ + arrow::array::{Int32Array, RecordBatch}, + arrow::datatypes::{DataType, Field, Schema}, + catalog::CatalogProviderList, + prelude::SessionContext, +}; +use optd_catalog::{CatalogService, DuckLakeCatalog}; +use optd_datafusion::OptdCatalogProviderList; +use std::sync::Arc; +use tempfile::TempDir; + +#[tokio::test] +async fn test_catalog_service_handle() -> Result<(), Box> { + // Setup catalog with test data + let temp_dir = TempDir::new()?; + let metadata_path = temp_dir.path().join("metadata.ducklake"); + + { + let setup_catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap()))?; + let conn = setup_catalog.get_connection(); + conn.execute_batch("CREATE TABLE test_table (id INTEGER, name VARCHAR, age INTEGER)")?; + conn.execute_batch( + "INSERT INTO test_table VALUES (1, 'Alice', 30), (2, 'Bob', 25), (3, 'Carol', 35)", + )?; + } + + // Start catalog service again to check restart resilience + let catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap()))?; + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + // Test catalog service handle functions + let snapshot = handle.current_snapshot().await?; + assert_eq!( + snapshot.0, 2, + "Snapshot should be 2 (CREATE TABLE and INSERT)" + ); + + let snapshot_info = handle.current_snapshot_info().await?; + assert!( + snapshot_info.schema_version >= 0, + "Schema version should be greater than or equal to 0" + ); + assert_eq!(snapshot_info.id.0, snapshot.0, "Snapshot IDs should match"); + + let schema = handle.current_schema(None, "test_table").await?; + assert_eq!(schema.fields().len(), 3, "Should have 3 fields"); + assert_eq!(schema.field(0).name(), "id"); + assert_eq!(schema.field(1).name(), "name"); + assert_eq!(schema.field(2).name(), "age"); + + // Test statistics + let query_catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap()))?; + let conn = query_catalog.get_connection(); + + let table_id: i64 = conn.query_row( + "SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'test_table'", + [], + |row| row.get(0), + )?; + + let age_column_id: i64 = conn.query_row( + "SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'age'", + [table_id], + |row| row.get(0), + )?; + + // Test statistics update API + handle + .update_table_column_stats(age_column_id, table_id, "ndv", r#"{"distinct_count": 3}"#) + .await?; + + let updated_snapshot = handle.current_snapshot().await?; + assert_eq!( + updated_snapshot.0, 3, + "Should be snapshot 3 after stats update" + ); + + let stats = handle + .table_statistics("test_table", updated_snapshot) + .await? + .unwrap(); + assert_eq!(stats.row_count, 3, "Should have 3 rows"); + + let age_stats = stats + .column_statistics + .iter() + .find(|c| c.name == "age") + .expect("Should have statistics for 'age' column"); + + assert_eq!(age_stats.name, "age"); + assert_eq!(age_stats.column_type, "int32"); + + // Verify the ndv statistic was actually persisted + assert_eq!( + age_stats.advanced_stats.len(), + 1, + "Should have 1 advanced statistic" + ); + assert_eq!(age_stats.advanced_stats[0].stats_type, "ndv"); + assert_eq!( + age_stats.advanced_stats[0] + .data + .get("distinct_count") + .and_then(|v| v.as_i64()), + Some(3), + "Should have distinct_count of 3 in ndv statistic" + ); + + // Test multiple statistics on the same column (add histogram) + handle + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"buckets": [{"lower": 25, "upper": 30, "count": 2}, {"lower": 30, "upper": 35, "count": 1}]}"# + ) + .await?; + + let updated_snapshot2 = handle.current_snapshot().await?; + assert_eq!( + updated_snapshot2.0, 4, + "Should be snapshot 4 after histogram update" + ); + + let stats2 = handle + .table_statistics("test_table", updated_snapshot2) + .await? + .unwrap(); + + let age_stats2 = stats2 + .column_statistics + .iter() + .find(|c| c.name == "age") + .expect("Should have statistics for 'age' column"); + + // Should now have both ndv and histogram statistics + assert_eq!( + age_stats2.advanced_stats.len(), + 2, + "Should have 2 advanced statistics" + ); + + let ndv_stat = age_stats2 + .advanced_stats + .iter() + .find(|s| s.stats_type == "ndv") + .expect("Should have ndv"); + let histogram_stat = age_stats2 + .advanced_stats + .iter() + .find(|s| s.stats_type == "histogram") + .expect("Should have histogram"); + + assert_eq!( + ndv_stat.data.get("distinct_count").and_then(|v| v.as_i64()), + Some(3), + "ndv statistic should persist" + ); + + assert!( + histogram_stat + .data + .get("buckets") + .and_then(|v| v.as_array()) + .is_some(), + "histogram should have buckets array" + ); + + let buckets = histogram_stat + .data + .get("buckets") + .unwrap() + .as_array() + .unwrap(); + assert_eq!(buckets.len(), 2, "Should have 2 histogram buckets"); + + Ok(()) +} + +#[tokio::test] +async fn test_datafusion_catalog_integration() -> Result<(), Box> { + // Setup catalog with test data and statistics + let temp_dir = TempDir::new()?; + let metadata_path = temp_dir.path().join("metadata.ducklake"); + + { + let setup_catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap()))?; + let conn = setup_catalog.get_connection(); + conn.execute_batch("CREATE TABLE df_test (id INTEGER, value INTEGER)")?; + conn.execute_batch( + "INSERT INTO df_test VALUES (1, 10), (2, 20), (3, 30), (4, 40), (5, 50)", + )?; + } + + let catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap()))?; + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + // Setup statistics for testing + let query_catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap()))?; + let conn = query_catalog.get_connection(); + + let table_id: i64 = conn.query_row( + "SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'df_test'", + [], + |row| row.get(0), + )?; + + let value_column_id: i64 = conn.query_row( + "SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'value'", + [table_id], + |row| row.get(0), + )?; + + // Add test statistics + handle + .update_table_column_stats(value_column_id, table_id, "ndv", r#"{"distinct_count": 5}"#) + .await?; + handle + .update_table_column_stats( + value_column_id, + table_id, + "histogram", + r#"{"buckets": [{"lower": 10, "upper": 30, "count": 3}, {"lower": 30, "upper": 50, "count": 2}]}"# + ) + .await?; + + // Test DataFusion catalog integration + let ctx = SessionContext::new(); + ctx.register_batch( + "df_test", + RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("value", DataType::Int32, false), + ])), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])), + Arc::new(Int32Array::from(vec![10, 20, 30, 40, 50])), + ], + )?, + )?; + + let optd_catalog_list = + OptdCatalogProviderList::new(ctx.state().catalog_list().clone(), Some(handle.clone())); + + let catalog = optd_catalog_list.catalog("datafusion").unwrap(); + let optd_catalog = catalog + .as_any() + .downcast_ref::() + .expect("Should be OptdCatalogProvider"); + + assert!( + optd_catalog.catalog_handle().is_some(), + "Catalog handle should propagate through DataFusion integration" + ); + + // Verify statistics retrieval through DataFusion catalog + let stats_via_catalog = optd_catalog + .catalog_handle() + .unwrap() + .table_statistics( + "df_test", + optd_catalog + .catalog_handle() + .unwrap() + .current_snapshot() + .await?, + ) + .await? + .unwrap(); + + assert_eq!(stats_via_catalog.row_count, 5); + + let value_stats = stats_via_catalog + .column_statistics + .iter() + .find(|c| c.name == "value") + .expect("Should find value column statistics"); + + assert_eq!( + value_stats.advanced_stats.len(), + 2, + "Should have both ndv and histogram stats" + ); + + // Verify ndv statistic + assert_eq!( + value_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "ndv") + .and_then(|s| s.data.get("distinct_count").and_then(|v| v.as_i64())), + Some(5), + "Should retrieve ndv statistics through DataFusion catalog" + ); + + // Verify histogram statistic + let histogram = value_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "histogram") + .expect("Should have histogram statistic"); + let buckets = histogram + .data + .get("buckets") + .and_then(|v| v.as_array()) + .expect("Should have buckets"); + assert_eq!(buckets.len(), 2); + assert_eq!(buckets[0].get("lower").and_then(|v| v.as_i64()), Some(10)); + assert_eq!(buckets[0].get("count").and_then(|v| v.as_i64()), Some(3)); + + Ok(()) +} diff --git a/connectors/datafusion/Cargo.toml b/connectors/datafusion/Cargo.toml index e4704a9..93c7c62 100644 --- a/connectors/datafusion/Cargo.toml +++ b/connectors/datafusion/Cargo.toml @@ -8,4 +8,11 @@ repository.workspace = true datafusion = { workspace = true } tracing = { workspace = true, features = ["log"] } optd-core = { path = "../../optd/core", version = "0.1" } +optd-catalog = { path = "../../optd/catalog", version = "0.1" } itertools = "0.14.0" +async-trait = "0.1" + +[dev-dependencies] +tempfile = "3.13" +serde_json = "1" +tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } diff --git a/connectors/datafusion/src/catalog.rs b/connectors/datafusion/src/catalog.rs new file mode 100644 index 0000000..1b48572 --- /dev/null +++ b/connectors/datafusion/src/catalog.rs @@ -0,0 +1,150 @@ +use async_trait::async_trait; +use datafusion::{ + catalog::{CatalogProvider, CatalogProviderList, SchemaProvider, TableProvider}, + common::DataFusionError, + error::Result, +}; +use optd_catalog::CatalogServiceHandle; +use std::any::Any; +use std::sync::Arc; + +use crate::table::OptdTableProvider; + +#[derive(Debug)] +pub struct OptdCatalogProviderList { + inner: Arc, + catalog_handle: Option, +} + +impl OptdCatalogProviderList { + pub fn new( + inner: Arc, + catalog_handle: Option, + ) -> Self { + Self { + inner, + catalog_handle, + } + } +} + +impl CatalogProviderList for OptdCatalogProviderList { + fn as_any(&self) -> &dyn Any { + self + } + + fn register_catalog( + &self, + name: String, + catalog: Arc, + ) -> Option> { + self.inner.register_catalog(name, catalog) + } + + fn catalog_names(&self) -> Vec { + self.inner.catalog_names() + } + + fn catalog(&self, name: &str) -> Option> { + let catalog_handle = self.catalog_handle.clone(); + self.inner.catalog(name).map(|catalog| { + Arc::new(OptdCatalogProvider::new(catalog, catalog_handle)) as Arc + }) + } +} + +#[derive(Debug, Clone)] +pub struct OptdCatalogProvider { + inner: Arc, + catalog_handle: Option, +} + +impl OptdCatalogProvider { + pub fn new( + inner: Arc, + catalog_handle: Option, + ) -> Self { + Self { + inner, + catalog_handle, + } + } + + pub fn catalog_handle(&self) -> Option<&CatalogServiceHandle> { + self.catalog_handle.as_ref() + } +} + +impl CatalogProvider for OptdCatalogProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema_names(&self) -> Vec { + self.inner.schema_names() + } + + fn schema(&self, name: &str) -> Option> { + self.inner + .schema(name) + .map(|schema| Arc::new(OptdSchemaProvider::new(schema)) as Arc) + } + + fn register_schema( + &self, + name: &str, + schema: Arc, + ) -> Result>> { + self.inner.register_schema(name, schema) + } +} + +#[derive(Debug)] +pub struct OptdSchemaProvider { + inner: Arc, +} + +impl OptdSchemaProvider { + pub fn new(inner: Arc) -> Self { + Self { inner } + } +} + +#[async_trait] +impl SchemaProvider for OptdSchemaProvider { + fn as_any(&self) -> &(dyn std::any::Any + 'static) { + self + } + + fn table_names(&self) -> Vec { + self.inner.table_names() + } + + async fn table(&self, name: &str) -> Result>, DataFusionError> { + let table_opt = self.inner.table(name).await?; + + if let Some(table) = table_opt { + let optd_table = Arc::new(OptdTableProvider::new(table, name.to_string())); + + Ok(Some(optd_table as Arc)) + } else { + Ok(None) + } + } + + fn register_table( + &self, + name: String, + table: Arc, + ) -> Result>> { + self.inner.register_table(name, table) + } + + fn deregister_table(&self, name: &str) -> Result>> { + self.inner.deregister_table(name) + } + + fn table_exist(&self, name: &str) -> bool { + self.inner.table_exist(name) + } +} diff --git a/connectors/datafusion/src/lib.rs b/connectors/datafusion/src/lib.rs index 9c50034..b2123e5 100644 --- a/connectors/datafusion/src/lib.rs +++ b/connectors/datafusion/src/lib.rs @@ -1,10 +1,14 @@ +mod catalog; mod extension; mod planner; +mod table; use std::sync::Arc; +pub use catalog::{OptdCatalogProvider, OptdCatalogProviderList, OptdSchemaProvider}; pub use extension::{OptdExtension, OptdExtensionConfig}; pub use planner::OptdQueryPlanner; +pub use table::{OptdTable, OptdTableProvider}; pub trait SessionStateBuilderOptdExt: Sized { fn with_optd_planner(self) -> Self; diff --git a/connectors/datafusion/src/table.rs b/connectors/datafusion/src/table.rs new file mode 100644 index 0000000..c4b65d8 --- /dev/null +++ b/connectors/datafusion/src/table.rs @@ -0,0 +1,150 @@ +use std::{any::Any, borrow::Cow, sync::Arc}; + +use datafusion::{ + arrow::datatypes::SchemaRef, + catalog::{Session, TableProvider}, + common::{Constraints, Statistics}, + datasource::{TableType, listing::ListingTable}, + error::Result, + logical_expr::{LogicalPlan, TableProviderFilterPushDown, dml::InsertOp}, + physical_plan::ExecutionPlan, + prelude::Expr, + sql::TableReference, +}; + +#[allow(dead_code)] +pub struct OptdTable { + inner: Box, + name: String, + table_reference: TableReference, +} + +impl OptdTable { + pub fn try_new( + inner: ListingTable, + name: String, + table_reference: TableReference, + ) -> Result { + Ok(Self { + inner: Box::new(inner), + name, + table_reference, + }) + } + + pub fn new_with_inner( + inner: Box, + name: String, + table_reference: TableReference, + ) -> Self { + Self { + inner, + name, + table_reference, + } + } + + pub fn name(&self) -> &str { + &self.name + } + + pub fn table_reference(&self) -> &TableReference { + &self.table_reference + } +} + +#[derive(Debug, Clone)] +pub struct OptdTableProvider { + inner: Arc, + table_name: String, +} + +impl OptdTableProvider { + pub fn new(inner: Arc, table_name: String) -> Self { + Self { inner, table_name } + } + + pub fn table_name(&self) -> &str { + &self.table_name + } +} + +#[async_trait::async_trait] +impl TableProvider for OptdTableProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + self.inner.schema() + } + + fn table_type(&self) -> TableType { + self.inner.table_type() + } + + async fn scan( + &self, + state: &dyn Session, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> Result> { + self.inner.scan(state, projection, filters, limit).await + } + + fn constraints(&self) -> Option<&Constraints> { + self.inner.constraints() + } + + fn get_table_definition(&self) -> Option<&str> { + self.inner.get_table_definition() + } + + fn get_logical_plan(&'_ self) -> Option> { + self.inner.get_logical_plan() + } + + fn get_column_default(&self, _column: &str) -> Option<&Expr> { + None + } + + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> Result> { + Ok(vec![ + TableProviderFilterPushDown::Unsupported; + filters.len() + ]) + } + + fn statistics(&self) -> Option { + let stats = self.inner.statistics(); + + if let Some(ref s) = stats { + tracing::debug!( + "Retrieved statistics from inner provider for table {} (num_rows={:?}, total_byte_size={:?})", + self.table_name, + s.num_rows, + s.total_byte_size + ); + } else { + tracing::debug!( + "No statistics available for table {} from inner provider", + self.table_name + ); + } + + stats + } + + async fn insert_into( + &self, + _state: &dyn Session, + _input: Arc, + _insert_op: InsertOp, + ) -> Result> { + self.inner.insert_into(_state, _input, _insert_op).await + } +} diff --git a/connectors/datafusion/tests/integration_test.rs b/connectors/datafusion/tests/integration_test.rs new file mode 100644 index 0000000..3030a3e --- /dev/null +++ b/connectors/datafusion/tests/integration_test.rs @@ -0,0 +1,989 @@ +use datafusion::{ + arrow::{ + array::{Float64Array, Int32Array, Int64Array, RecordBatch, StringArray}, + datatypes::{DataType, Field, Schema}, + }, + catalog::{CatalogProviderList, MemorySchemaProvider, TableProvider}, + datasource::MemTable, + execution::context::SessionContext, + prelude::*, +}; +use optd_catalog::{CatalogService, DuckLakeCatalog}; +use optd_datafusion::{OptdCatalogProvider, OptdCatalogProviderList, OptdTableProvider}; +use serde_json; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{SystemTime, UNIX_EPOCH}; +use tempfile::TempDir; + +static TEST_COUNTER: AtomicU64 = AtomicU64::new(0); + +/// Creates a test catalog with isolated metadata directory +/// TempDir is returned to keep the directory alive +fn create_test_catalog() -> (TempDir, DuckLakeCatalog) { + let temp_dir = TempDir::new().unwrap(); + let counter = TEST_COUNTER.fetch_add(1, Ordering::SeqCst); + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let unique_dir = temp_dir + .path() + .join(format!("df_test_{}_{}", timestamp, counter)); + std::fs::create_dir_all(&unique_dir).unwrap(); + let metadata_path = unique_dir.join("metadata.ducklake"); + + let catalog = DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap())).unwrap(); + + (temp_dir, catalog) +} + +/// Creates test schema and batch +fn create_test_data( + fields: Vec<(&str, DataType)>, + columns: Vec>, +) -> (Arc, RecordBatch) { + let schema = Arc::new(Schema::new( + fields + .into_iter() + .map(|(name, dtype)| Field::new(name, dtype, false)) + .collect::>(), + )); + let batch = RecordBatch::try_new(schema.clone(), columns).unwrap(); + (schema, batch) +} + +/// Wraps a catalog list as OptdCatalogProvider +async fn get_wrapped_catalog( + catalog_list: Arc, + catalog_handle: Option, +) -> Arc { + let optd_catalog_list = OptdCatalogProviderList::new(catalog_list, catalog_handle); + let catalog = optd_catalog_list.catalog("datafusion").unwrap(); + Arc::new( + catalog + .as_any() + .downcast_ref::() + .unwrap() + .clone(), + ) +} + +/// Retrieves a table as OptdTableProvider +async fn get_wrapped_table( + catalog_list: Arc, + catalog_handle: Option, + table_name: &str, +) -> Arc { + let optd_catalog_list = OptdCatalogProviderList::new(catalog_list, catalog_handle); + let catalog = optd_catalog_list.catalog("datafusion").unwrap(); + let schema = catalog.schema("public").unwrap(); + let table = schema + .table(table_name) + .await + .expect("Failed to retrieve table") + .expect("Table not found"); + Arc::new( + table + .as_any() + .downcast_ref::() + .unwrap() + .clone(), + ) +} + +#[tokio::test] +async fn test_catalog_provider_list_wrapping() { + let ctx = SessionContext::new(); + let catalog_list = ctx.state().catalog_list().clone(); + + let optd_catalog_list = OptdCatalogProviderList::new(catalog_list.clone(), None); + + let original_names = catalog_list.catalog_names(); + let wrapped_names = optd_catalog_list.catalog_names(); + assert_eq!(original_names, wrapped_names); + assert!(wrapped_names.contains(&"datafusion".to_string())); +} + +#[tokio::test] +async fn test_table_provider_wrapping() { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, false), + ])); + + let batch = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), + ], + ) + .unwrap(); + + let mem_table = Arc::new(MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap()); + let optd_table = OptdTableProvider::new(mem_table.clone(), "test_table".to_string()); + + assert_eq!(optd_table.table_name(), "test_table"); + assert_eq!(optd_table.schema(), schema); + assert!(optd_table.statistics().is_none()); +} + +#[tokio::test] +async fn test_schema_retrieval() { + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![("id", DataType::Int32), ("value", DataType::Int32)], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])), + Arc::new(Int32Array::from(vec![10, 20, 30, 40, 50])), + ], + ); + ctx.register_batch("numbers", batch).unwrap(); + + let optd_table = get_wrapped_table(ctx.state().catalog_list().clone(), None, "numbers").await; + assert_eq!(optd_table.table_name(), "numbers"); + + let schema = optd_table.schema(); + assert_eq!(schema.field(0).name(), "id"); + assert_eq!(schema.field(0).data_type(), &DataType::Int32); + assert_eq!(schema.field(1).name(), "value"); + assert_eq!(schema.field(1).data_type(), &DataType::Int32); + + let expected_schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("value", DataType::Int32, false), + ])); + + assert_eq!(schema.as_ref(), expected_schema.as_ref()); +} + +#[tokio::test] +async fn test_query_execution_with_wrapped_catalog() { + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![("id", DataType::Int32), ("value", DataType::Int32)], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])), + Arc::new(Int32Array::from(vec![10, 20, 30, 40, 50])), + ], + ); + ctx.register_batch("test_data", batch).unwrap(); + + let results = ctx + .sql("SELECT id, value FROM test_data WHERE value > 20") + .await + .unwrap() + .collect() + .await + .unwrap(); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 3); + assert_eq!( + results[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap() + .values(), + &[3, 4, 5] + ); +} + +#[tokio::test] +async fn test_table_provider_accessibility_from_plan() { + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![("id", DataType::Int32), ("name", DataType::Utf8)], + vec![ + Arc::new(Int32Array::from(vec![1, 2])), + Arc::new(StringArray::from(vec!["Alice", "Bob"])), + ], + ); + ctx.register_batch("users", batch).unwrap(); + + let df = ctx.sql("SELECT * FROM users").await.unwrap(); + assert!(format!("{:?}", df.logical_plan()).contains("users")); + + let results = df.collect().await.unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 2); + assert_eq!( + results[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap() + .values(), + &[1, 2] + ); + assert_eq!( + results[0] + .column(1) + .as_any() + .downcast_ref::() + .unwrap() + .iter() + .collect::>(), + vec![Some("Alice"), Some("Bob")] + ); +} + +#[tokio::test] +async fn test_table_metadata_access_through_catalog() { + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![ + ("customer_id", DataType::Int32), + ("order_amount", DataType::Int32), + ], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 1, 3, 2, 1])), + Arc::new(Int32Array::from(vec![100, 200, 150, 300, 250, 120])), + ], + ); + ctx.register_batch("orders", batch).unwrap(); + + let optd_table = get_wrapped_table(ctx.state().catalog_list().clone(), None, "orders").await; + let catalog = get_wrapped_catalog(ctx.state().catalog_list().clone(), None).await; + + assert_eq!(optd_table.table_name(), "orders"); + assert!(catalog.catalog_handle().is_none()); + assert!(optd_table.statistics().is_none()); + + let results = ctx + .sql("SELECT customer_id, SUM(order_amount) FROM orders GROUP BY customer_id") + .await + .unwrap() + .collect() + .await + .unwrap(); + let total_rows: usize = results.iter().map(|batch| batch.num_rows()).sum(); + assert_eq!(total_rows, 3, "Should have 3 rows for 3 unique customers"); + assert!(!results.is_empty(), "Should have at least one batch"); + assert_eq!( + results[0].num_columns(), + 2, + "Each batch should have 2 columns (customer_id and sum)" + ); + + // Collect all results into vectors for verification + let mut all_customer_ids = Vec::new(); + let mut all_sums = Vec::new(); + for batch in &results { + let customer_ids = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let sums = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + all_customer_ids.extend(customer_ids.values()); + all_sums.extend(sums.values()); + } + + // Sort by customer_id for consistent verification + let mut pairs: Vec<_> = all_customer_ids + .iter() + .zip(all_sums.iter()) + .map(|(c, s)| (*c, *s)) + .collect(); + pairs.sort_by_key(|p| p.0); + + assert_eq!( + pairs, + vec![(1, 370), (2, 450), (3, 300)], + "Expected customer_id 1->370, 2->450, 3->300" + ); +} + +#[tokio::test] +async fn test_csv_table_wrapping() { + let _tmp_dir = tempfile::TempDir::new().unwrap(); + let csv_path = _tmp_dir.path().join("test.csv"); + let mut file = std::fs::File::create(&csv_path).unwrap(); + std::io::Write::write_all(&mut file, b"id,value\n1,10\n2,20\n").unwrap(); + + let ctx = SessionContext::new(); + + ctx.register_csv( + "test_csv", + csv_path.to_str().unwrap(), + CsvReadOptions::default(), + ) + .await + .unwrap(); + + let df = ctx.sql("SELECT * FROM test_csv").await.unwrap(); + let results = df.collect().await.unwrap(); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 2); + + // CSV columns are typically parsed as Int64, not Int32 + let id_col = results[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let value_col = results[0] + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(id_col.values(), &[1, 2]); + assert_eq!(value_col.values(), &[10, 20]); +} + +#[tokio::test] +async fn test_full_optimizer_integration_pipeline() { + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![ + ("product_id", DataType::Int32), + ("category", DataType::Utf8), + ("price", DataType::Int32), + ], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])), + Arc::new(StringArray::from(vec!["A", "B", "A", "C", "B"])), + Arc::new(Int32Array::from(vec![100, 200, 150, 300, 250])), + ], + ); + ctx.register_batch("products", batch).unwrap(); + + let catalog_list = ctx.state().catalog_list().clone(); + let optd_catalog_list = OptdCatalogProviderList::new(catalog_list, None); + let catalog = optd_catalog_list.catalog("datafusion").unwrap(); + assert!(catalog.schema_names().contains(&"public".to_string())); + + let df = ctx + .sql("SELECT category, AVG(price) as avg_price FROM products GROUP BY category") + .await + .unwrap(); + + assert!(format!("{:?}", df.logical_plan()).contains("products")); + + let results = df.collect().await.unwrap(); + let total_rows: usize = results.iter().map(|batch| batch.num_rows()).sum(); + assert_eq!(total_rows, 3, "Should have 3 categories"); + assert_eq!(results[0].num_columns(), 2); + + // Collect and verify exact AVG results: A->125, B->225, C->300 + let mut category_avgs = Vec::new(); + for batch in &results { + let categories = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let avg_prices = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + for i in 0..batch.num_rows() { + category_avgs.push((categories.value(i).to_string(), avg_prices.value(i))); + } + } + category_avgs.sort_by(|a, b| a.0.cmp(&b.0)); + + assert_eq!(category_avgs.len(), 3); + assert_eq!(category_avgs[0].0, "A"); + assert!( + (category_avgs[0].1 - 125.0).abs() < 0.01, + "Category A avg should be 125" + ); + assert_eq!(category_avgs[1].0, "B"); + assert!( + (category_avgs[1].1 - 225.0).abs() < 0.01, + "Category B avg should be 225" + ); + assert_eq!(category_avgs[2].0, "C"); + assert!( + (category_avgs[2].1 - 300.0).abs() < 0.01, + "Category C avg should be 300" + ); +} + +// Tests with CatalogService integration + +#[tokio::test] +async fn test_catalog_service_handle_propagation() { + let (_temp_dir, catalog) = create_test_catalog(); + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + let ctx = SessionContext::new(); + let (schema, batch) = create_test_data( + vec![("id", DataType::Int32), ("name", DataType::Utf8)], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), + ], + ); + ctx.register_batch("users", batch).unwrap(); + + let optd_table = get_wrapped_table( + ctx.state().catalog_list().clone(), + Some(handle.clone()), + "users", + ) + .await; + let catalog = get_wrapped_catalog(ctx.state().catalog_list().clone(), Some(handle)).await; + + assert!(catalog.catalog_handle().is_some()); + assert_eq!(optd_table.table_name(), "users"); + assert_eq!(optd_table.schema(), schema); +} + +#[tokio::test] +async fn test_catalog_service_snapshot_retrieval() { + let (_temp_dir, catalog) = create_test_catalog(); + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![("id", DataType::Int32)], + vec![Arc::new(Int32Array::from(vec![1, 2, 3]))], + ); + ctx.register_batch("test", batch).unwrap(); + + let catalog = get_wrapped_catalog(ctx.state().catalog_list().clone(), Some(handle)).await; + let catalog_handle = catalog.catalog_handle().unwrap(); + + let snapshot = catalog_handle.current_snapshot().await.unwrap(); + assert_eq!(snapshot.0, 0, "Fresh catalog should start at snapshot 0"); + + let snapshot_info = catalog_handle.current_snapshot_info().await.unwrap(); + assert_eq!(snapshot_info.id.0, 0); + assert_eq!(snapshot_info.schema_version, 0); + assert!(snapshot_info.next_catalog_id >= 0); + assert!(snapshot_info.next_file_id >= 0); +} + +#[tokio::test] +async fn test_catalog_service_schema_retrieval() { + let (_temp_dir, catalog) = create_test_catalog(); + let conn = catalog.get_connection(); + conn.execute_batch( + "CREATE TABLE test_schema_table (id INTEGER, value VARCHAR, amount DECIMAL(10,2))", + ) + .unwrap(); + + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + let schema = handle + .current_schema(None, "test_schema_table") + .await + .unwrap(); + + assert_eq!(schema.fields().len(), 3); + assert_eq!(schema.field(0).name(), "id"); + assert_eq!(schema.field(1).name(), "value"); + assert_eq!(schema.field(2).name(), "amount"); +} + +#[tokio::test] +async fn test_full_workflow_with_catalog_service() { + let (_temp_dir, catalog) = create_test_catalog(); + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![ + ("product_id", DataType::Int32), + ("category", DataType::Utf8), + ("price", DataType::Int32), + ], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])), + Arc::new(StringArray::from(vec!["A", "B", "A", "C", "B"])), + Arc::new(Int32Array::from(vec![100, 200, 150, 300, 250])), + ], + ); + ctx.register_batch("products", batch).unwrap(); + + let catalog = + get_wrapped_catalog(ctx.state().catalog_list().clone(), Some(handle.clone())).await; + + assert!(catalog.catalog_handle().is_some()); + + let snapshot = catalog + .catalog_handle() + .unwrap() + .current_snapshot() + .await + .unwrap(); + assert_eq!(snapshot.0, 0, "Fresh catalog should start at snapshot 0"); + + let results = ctx + .sql("SELECT category, AVG(price) as avg_price FROM products GROUP BY category") + .await + .unwrap() + .collect() + .await + .unwrap(); + + let total_rows: usize = results.iter().map(|batch| batch.num_rows()).sum(); + assert_eq!(total_rows, 3, "Should have 3 categories"); + + // Verify exact AVG results + let mut category_avgs = Vec::new(); + for batch in &results { + let categories = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let avg_prices = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + for i in 0..batch.num_rows() { + category_avgs.push((categories.value(i).to_string(), avg_prices.value(i))); + } + } + category_avgs.sort_by(|a, b| a.0.cmp(&b.0)); + + assert_eq!( + category_avgs, + vec![ + ("A".to_string(), 125.0), + ("B".to_string(), 225.0), + ("C".to_string(), 300.0) + ] + ); +} + +#[tokio::test] +async fn test_catalog_service_statistics_update_and_retrieval() { + let (_temp_dir, catalog) = create_test_catalog(); + let conn = catalog.get_connection(); + + // Create a table with known structure + conn.execute_batch( + "CREATE TABLE stats_table (id INTEGER, name VARCHAR, age INTEGER); + INSERT INTO stats_table VALUES (1, 'Alice', 30), (2, 'Bob', 25), (3, 'Charlie', 35);", + ) + .unwrap(); + + // Get table_id and column_id for statistics + let table_id: i64 = conn.query_row( + "SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'stats_table'", + [], + |row| row.get(0), + ).unwrap(); + + let age_column_id: i64 = conn + .query_row( + "SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'age'", + [table_id], + |row| row.get(0), + ) + .unwrap(); + + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + // Update statistics through the catalog service + handle + .update_table_column_stats(age_column_id, table_id, "ndv", r#"{"distinct_count": 3}"#) + .await + .unwrap(); + + handle + .update_table_column_stats(age_column_id, table_id, "min_value", "25") + .await + .unwrap(); + + handle + .update_table_column_stats(age_column_id, table_id, "max_value", "35") + .await + .unwrap(); + + // Retrieve statistics + let snapshot = handle.current_snapshot().await.unwrap(); + let stats = handle + .table_statistics("stats_table", snapshot) + .await + .unwrap(); + + assert!(stats.is_some(), "Statistics should be available"); + let stats = stats.unwrap(); + + // Verify table-level statistics + assert_eq!(stats.row_count, 3, "Table should have 3 rows"); + + // Verify column statistics + let age_stats = stats + .column_statistics + .iter() + .find(|c| c.name == "age") + .expect("age column should have statistics"); + + assert_eq!( + age_stats.advanced_stats.len(), + 3, + "Should have 3 stat types" + ); + + let ndv_stat = age_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "ndv") + .expect("Should have ndv statistic"); + assert_eq!(ndv_stat.data, serde_json::json!({"distinct_count": 3})); + + let min_stat = age_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "min_value") + .expect("Should have min_value statistic"); + assert_eq!(min_stat.data, serde_json::json!(25)); + + let max_stat = age_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "max_value") + .expect("Should have max_value statistic"); + assert_eq!(max_stat.data, serde_json::json!(35)); +} + +#[tokio::test] +async fn test_catalog_service_with_datafusion_integration() { + let (_temp_dir, catalog) = create_test_catalog(); + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + let ctx = SessionContext::new(); + let (_, batch) = create_test_data( + vec![("id", DataType::Int32), ("value", DataType::Int32)], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])), + Arc::new(Int32Array::from(vec![100, 200, 150, 300, 250])), + ], + ); + ctx.register_batch("test_table", batch).unwrap(); + + let catalog = get_wrapped_catalog(ctx.state().catalog_list().clone(), Some(handle)).await; + + let snapshot = catalog + .catalog_handle() + .unwrap() + .current_snapshot() + .await + .unwrap(); + assert_eq!(snapshot.0, 0); + + let results = ctx + .sql("SELECT id, value FROM test_table WHERE value > 150") + .await + .unwrap() + .collect() + .await + .unwrap(); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 3); + + // Verify exact filtered results: rows with value > 150 are (2,200), (4,300), (5,250) + let id_col = results[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let value_col = results[0] + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(id_col.values(), &[2, 4, 5]); + assert_eq!(value_col.values(), &[200, 300, 250]); +} + +#[tokio::test] +async fn test_multiple_schemas_isolation() { + let ctx = SessionContext::new(); + + // Register tables in the default "public" schema + let (_, batch1) = create_test_data( + vec![("id", DataType::Int32), ("name", DataType::Utf8)], + vec![ + Arc::new(Int32Array::from(vec![1, 2])), + Arc::new(StringArray::from(vec!["Alice", "Bob"])), + ], + ); + ctx.register_batch("users", batch1).unwrap(); + + // Create a custom schema and register a table there + let (_, batch2) = create_test_data( + vec![("id", DataType::Int32), ("department", DataType::Utf8)], + vec![ + Arc::new(Int32Array::from(vec![10, 20])), + Arc::new(StringArray::from(vec!["Engineering", "Sales"])), + ], + ); + + // DataFusion's default catalog structure: catalog.schema.table + // We'll use the memory catalog provider to create multiple schemas + let mem_table = MemTable::try_new(batch2.schema(), vec![vec![batch2]]).unwrap(); + ctx.catalog("datafusion") + .unwrap() + .register_schema("custom_schema", Arc::new(MemorySchemaProvider::new())) + .unwrap(); + + ctx.catalog("datafusion") + .unwrap() + .schema("custom_schema") + .unwrap() + .register_table("departments".to_string(), Arc::new(mem_table)) + .unwrap(); + + // Wrap with OptdCatalogProviderList + let catalog_list = ctx.state().catalog_list().clone(); + let optd_catalog_list = OptdCatalogProviderList::new(catalog_list, None); + + // Test 1: Verify both schemas exist + let catalog = optd_catalog_list.catalog("datafusion").unwrap(); + let schema_names = catalog.schema_names(); + assert!(schema_names.contains(&"public".to_string())); + assert!(schema_names.contains(&"custom_schema".to_string())); + + // Test 2: Verify tables are isolated in their respective schemas + let public_schema = catalog.schema("public").unwrap(); + let custom_schema = catalog.schema("custom_schema").unwrap(); + + let users_in_public = public_schema.table("users").await.unwrap(); + assert!( + users_in_public.is_some(), + "users should exist in public schema" + ); + let departments_in_public = public_schema.table("departments").await.unwrap(); + assert!( + departments_in_public.is_none(), + "departments should not exist in public schema" + ); + + let departments_in_custom = custom_schema.table("departments").await.unwrap(); + assert!( + departments_in_custom.is_some(), + "departments should exist in custom_schema" + ); + let users_in_custom = custom_schema.table("users").await.unwrap(); + assert!( + users_in_custom.is_none(), + "users should not exist in custom_schema" + ); + + // Test 3: Verify OptdTableProvider wraps tables from both schemas + let users_table = users_in_public.unwrap(); + let users_optd = users_table + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(users_optd.table_name(), "users"); + + let departments_table = departments_in_custom.unwrap(); + let departments_optd = departments_table + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(departments_optd.table_name(), "departments"); + + // Test 4: Verify queries work with schema qualification + let results = ctx + .sql("SELECT * FROM public.users") + .await + .unwrap() + .collect() + .await + .unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 2); + + // Verify exact user data + let id_col = results[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let name_col = results[0] + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(id_col.values(), &[1, 2]); + assert_eq!( + name_col.iter().collect::>(), + vec![Some("Alice"), Some("Bob")] + ); + + let results = ctx + .sql("SELECT * FROM custom_schema.departments") + .await + .unwrap() + .collect() + .await + .unwrap(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].num_rows(), 2); + + // Verify exact department data + let id_col = results[0] + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let dept_col = results[0] + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(id_col.values(), &[10, 20]); + assert_eq!( + dept_col.iter().collect::>(), + vec![Some("Engineering"), Some("Sales")] + ); +} + +#[tokio::test] +async fn test_multiple_schemas_with_catalog_service() { + let (_temp_dir, catalog) = create_test_catalog(); + let (service, handle) = CatalogService::new(catalog); + tokio::spawn(async move { service.run().await }); + + let ctx = SessionContext::new(); + + // Register tables in public schema + let (_, batch1) = create_test_data( + vec![("id", DataType::Int32), ("value", DataType::Int32)], + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(Int32Array::from(vec![100, 200, 300])), + ], + ); + ctx.register_batch("table1", batch1).unwrap(); + + // Create and register in custom schema + let (_, batch2) = create_test_data( + vec![("id", DataType::Int32), ("amount", DataType::Int32)], + vec![ + Arc::new(Int32Array::from(vec![10, 20])), + Arc::new(Int32Array::from(vec![500, 600])), + ], + ); + + let mem_table = MemTable::try_new(batch2.schema(), vec![vec![batch2]]).unwrap(); + ctx.catalog("datafusion") + .unwrap() + .register_schema("analytics", Arc::new(MemorySchemaProvider::new())) + .unwrap(); + + ctx.catalog("datafusion") + .unwrap() + .schema("analytics") + .unwrap() + .register_table("table2".to_string(), Arc::new(mem_table)) + .unwrap(); + + // Wrap with catalog service handle + let catalog_list = ctx.state().catalog_list().clone(); + let optd_catalog_list = OptdCatalogProviderList::new(catalog_list, Some(handle.clone())); + + // Verify handle propagates to tables in both schemas + let catalog_provider = optd_catalog_list.catalog("datafusion").unwrap(); + let optd_catalog = catalog_provider + .as_any() + .downcast_ref::() + .expect("Should be OptdCatalogProvider"); + + let table1 = catalog_provider + .schema("public") + .unwrap() + .table("table1") + .await + .unwrap() + .unwrap(); + let _table1_optd = table1.as_any().downcast_ref::().unwrap(); + + let table2 = catalog_provider + .schema("analytics") + .unwrap() + .table("table2") + .await + .unwrap() + .unwrap(); + let _table2_optd = table2.as_any().downcast_ref::().unwrap(); + + // Verify catalog has the handle (handle is at catalog level, not table level) + let handle = optd_catalog + .catalog_handle() + .expect("catalog should have catalog handle"); + + // Verify catalog service is accessible + let snapshot = handle.current_snapshot().await.unwrap(); + assert_eq!(snapshot.0, 0, "Fresh catalog should start at snapshot 0"); + + // Verify cross-schema query works + let results = ctx + .sql("SELECT t1.id, t1.value, t2.amount FROM public.table1 t1 CROSS JOIN analytics.table2 t2") + .await + .unwrap() + .collect() + .await + .unwrap(); + let total_rows: usize = results.iter().map(|batch| batch.num_rows()).sum(); + assert_eq!( + total_rows, 6, + "3 rows from table1 * 2 rows from table2 = 6 rows" + ); + + // Verify exact cross join results + let mut all_rows = Vec::new(); + for batch in &results { + let t1_id = batch + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let t1_value = batch + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + let t2_amount = batch + .column(2) + .as_any() + .downcast_ref::() + .unwrap(); + for i in 0..batch.num_rows() { + all_rows.push((t1_id.value(i), t1_value.value(i), t2_amount.value(i))); + } + } + all_rows.sort(); + + // Expected: each row from table1 (1,100), (2,200), (3,300) paired with each row from table2 (10,500), (20,600) + assert_eq!( + all_rows, + vec![ + (1, 100, 500), + (1, 100, 600), + (2, 200, 500), + (2, 200, 600), + (3, 300, 500), + (3, 300, 600), + ] + ); +} diff --git a/optd/catalog/Cargo.toml b/optd/catalog/Cargo.toml index 332c535..cd4008f 100644 --- a/optd/catalog/Cargo.toml +++ b/optd/catalog/Cargo.toml @@ -5,3 +5,13 @@ edition.workspace = true repository.workspace = true [dependencies] +serde = { version = "1.0", features = ["derive"] } +duckdb = { version = "1.4.0", features = ["bundled"] } +snafu = "0.8.6" +serde_json = "1.0" +tokio = { workspace = true, features = ["sync", "rt"] } + +[dev-dependencies] +tempfile = "3.8" +tokio = { workspace = true, features = ["full", "test-util"] } +futures = "0.3" diff --git a/optd/catalog/src/lib.rs b/optd/catalog/src/lib.rs index 8b13789..0dcec8f 100644 --- a/optd/catalog/src/lib.rs +++ b/optd/catalog/src/lib.rs @@ -1 +1,618 @@ +use duckdb::{ + Connection, Error as DuckDBError, + arrow::datatypes::{Field, Schema, SchemaRef}, + params, + types::Null, +}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use snafu::{ResultExt, prelude::*}; +use std::{collections::HashMap, sync::Arc}; + +mod service; +pub use service::{CatalogBackend, CatalogRequest, CatalogService, CatalogServiceHandle}; + +/// Operations for managing table statistics with snapshot-based time travel. +pub trait Catalog { + /// Gets the current (most recent) snapshot ID. + fn current_snapshot(&mut self) -> Result; + + /// Gets complete metadata for the current snapshot. + fn current_snapshot_info(&mut self) -> Result; + + /// Gets the Arrow schema for a table at the current snapshot. + fn current_schema(&mut self, schema: Option<&str>, table: &str) -> Result; + + /// Gets schema information including name, ID, and snapshot range. + fn current_schema_info(&mut self) -> Result; + + /// Retrieves table and column statistics at a specific snapshot. + fn table_statistics( + &mut self, + table_name: &str, + snapshot: SnapshotId, + ) -> Result, Error>; + + /// Updates or inserts advanced statistics for a table column. + fn update_table_column_stats( + &mut self, + column_id: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), Error>; +} + +const DEFAULT_METADATA_FILE: &str = "metadata.ducklake"; + +const CREATE_EXTRA_TABLES_QUERY: &str = r#" + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats ( + column_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + table_id BIGINT, + stats_type VARCHAR, + payload VARCHAR + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_query ( + query_id BIGINT, + query_string VARCHAR, + root_group_id BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_query_instance ( + query_instance_id BIGINT PRIMARY KEY, + query_id BIGINT, + creation_time BIGINT, + snapshot_id BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_group ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_group_stats ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + stats_type VARCHAR, + payload VARCHAR + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_execution_subplan_feedback ( + group_id BIGINT, + begin_snapshot BIGINT, + end_snapshot BIGINT, + stats_type VARCHAR, + payload VARCHAR + ); + + CREATE TABLE IF NOT EXISTS __ducklake_metadata_metalake.main.optd_subplan_scalar_feedback ( + scalar_id BIGINT, + group_id BIGINT, + stats_type VARCHAR, + payload VARCHAR, + query_instance_id BIGINT + ); +"#; + +// SQL query to fetch the latest snapshot information. +const SNAPSHOT_INFO_QUERY: &str = r#" + SELECT snapshot_id, schema_version, next_catalog_id, next_file_id + FROM __ducklake_metadata_metalake.main.ducklake_snapshot + WHERE snapshot_id = (SELECT MAX(snapshot_id) + FROM __ducklake_metadata_metalake.main.ducklake_snapshot); +"#; + +// SQL query to fetch schema information including name, ID, and snapshot valid range. +const SCHEMA_INFO_QUERY: &str = r#" + SELECT ds.schema_id, ds.schema_name, ds.begin_snapshot, ds.end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_schema ds + WHERE ds.schema_name = current_schema(); +"#; + +/// SQL query to fetch table statistics including column metadata and advanced stats at a specific snapshot. +const FETCH_TABLE_STATS_QUERY: &str = r#" + SELECT + ts.table_id, + dc.column_id, + dc.column_name, + dc.column_type, + ts.record_count, + ts.next_row_id, + ts.file_size_bytes, + tcas.stats_type, + tcas.payload + FROM __ducklake_metadata_metalake.main.ducklake_table_stats ts + INNER JOIN __ducklake_metadata_metalake.main.ducklake_table dt ON ts.table_id = dt.table_id + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + INNER JOIN __ducklake_metadata_metalake.main.ducklake_column dc ON dt.table_id = dc.table_id + LEFT JOIN __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats tcas + ON dc.table_id = tcas.table_id + AND dc.column_id = tcas.column_id + AND ? >= tcas.begin_snapshot + AND (? < tcas.end_snapshot OR tcas.end_snapshot IS NULL) + WHERE + ds.schema_name = current_schema() + AND dt.table_name = ? + AND ts.record_count IS NOT NULL + AND ts.file_size_bytes IS NOT NULL + AND ? >= dc.begin_snapshot + AND (? < dc.end_snapshot OR dc.end_snapshot IS NULL) + ORDER BY ts.table_id, dc.column_id, tcas.stats_type; +"#; + +/// SQL query to close an existing advanced statistics entry by setting its end_snapshot. +const UPDATE_ADV_STATS_QUERY: &str = r#" + UPDATE __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + SET end_snapshot = ? + WHERE end_snapshot IS NULL + AND stats_type = ? + AND column_id = ? + AND table_id = ?; +"#; + +/// SQL query to insert a new advanced statistics entry. +const INSERT_ADV_STATS_QUERY: &str = r#" + INSERT INTO __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + (column_id, begin_snapshot, end_snapshot, table_id, stats_type, payload) + VALUES (?, ?, ?, ?, ?, ?); +"#; + +/// SQL query to insert a new snapshot record. +const INSERT_SNAPSHOT_QUERY: &str = r#" + INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot + (snapshot_id, snapshot_time, schema_version, next_catalog_id, next_file_id) + VALUES (?, NOW(), ?, ?, ?); +"#; + +/// SQL query to record a snapshot change in the change log. +const INSERT_SNAPSHOT_CHANGE_QUERY: &str = r#" + INSERT INTO __ducklake_metadata_metalake.main.ducklake_snapshot_changes + (snapshot_id, changes_made, author, commit_message, commit_extra_info) + VALUES (?, ?, ?, ?, ?); +"#; + +/// Error types for statistics operations. +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Database connection error: {}", source))] + Connection { source: DuckDBError }, + #[snafu(display("Query execution failed: {}", source))] + QueryExecution { source: DuckDBError }, + #[snafu(display("Transaction error: {}", source))] + Transaction { source: DuckDBError }, + #[snafu(display("JSON serialization error: {}", source))] + JsonSerialization { source: serde_json::Error }, + #[snafu(display("ARROW DataType conversion error: {}", source))] + ArrowDataTypeConversion { source: duckdb::Error }, + #[snafu(display( + "Get statistics failed for table: {}, column: {}, snapshot: {}", + table, + column, + snapshot + ))] + GetStatsFailed { + table: String, + column: String, + snapshot: i64, + }, + #[snafu(display( + "Group statistics not found for group_id: {}, stats_type: {}, snapshot: {}", + group_id, + stats_type, + snapshot + ))] + GroupStatsNotFound { + group_id: i64, + stats_type: String, + snapshot: i64, + }, +} + +/// Internal representation of a row from the table statistics query. +/// Used for collecting data before aggregating into TableStatistics. +struct TableColumnStatisticsEntry { + _table_id: i64, + column_id: i64, + column_name: String, + column_type: String, + record_count: i64, + _next_row_id: i64, + _file_size_bytes: i64, + stats_type: Option, + payload: Option, +} + +/// Statistics for a table including row count and per-column statistics. +/// Main structure returned when querying table statistics. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TableStatistics { + pub row_count: usize, + pub column_statistics: Vec, +} + +impl FromIterator> for TableStatistics { + fn from_iter>>( + iter: T, + ) -> Self { + let mut row_flag = false; + let mut row_count = 0; + let mut column_statistics = Vec::new(); + + // Stats will be ordered by table_id then column_id + for e in iter.into_iter().flatten() { + // Check if unique table/column combination + if column_statistics + .last() + .is_none_or(|last: &ColumnStatistics| last.column_id != e.column_id) + { + // New column encountered + column_statistics.push(ColumnStatistics::new( + e.column_id, + e.column_type.clone(), + e.column_name.clone(), + Vec::new(), + )); + } + + assert!( + !column_statistics.is_empty() + && column_statistics.last().unwrap().column_id == e.column_id, + "Column statistics should not be empty and last column_id should match current column_id" + ); + + if let Some(last_column_stat) = column_statistics.last_mut() + && let (Some(stats_type), Some(payload)) = (e.stats_type, e.payload) + { + let data = serde_json::from_str(&payload).unwrap_or(Value::Null); + last_column_stat.add_advanced_stat(AdvanceColumnStatistics { stats_type, data }); + } + + // Assuming all columns have the same record_count, only need to set once + if !row_flag { + row_count = e.record_count as usize; + row_flag = true; + } + } + + TableStatistics { + row_count, + column_statistics, + } + } +} + +/// Statistics for a single column including type, name, and advanced statistics. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ColumnStatistics { + pub column_id: i64, + pub column_type: String, + pub name: String, + pub advanced_stats: Vec, +} + +impl ColumnStatistics { + fn new( + column_id: i64, + column_type: String, + name: String, + advanced_stats: Vec, + ) -> Self { + Self { + column_id, + column_type, + name, + advanced_stats, + } + } + + fn add_advanced_stat(&mut self, stat: AdvanceColumnStatistics) { + self.advanced_stats.push(stat); + } +} + +/// An advanced statistics entry with type and serialized data at a snapshot. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AdvanceColumnStatistics { + /// Type of the statistical summaries (e.g., histogram, distinct count). + pub stats_type: String, + /// Serialized data for the statistics at a snapshot. + pub data: Value, +} + +/// Identifier for a snapshot in the statistics database. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct SnapshotId(pub i64); + +/// Snapshot metadata including schema version and next IDs. +#[derive(Debug, Clone, Serialize, Deserialize)] + +pub struct SnapshotInfo { + pub id: SnapshotId, + pub schema_version: i64, + pub next_catalog_id: i64, + pub next_file_id: i64, +} + +/// Schema information including name, ID, and valid snapshot range. +#[derive(Debug, Clone, Serialize, Deserialize)] + +pub struct CurrentSchema { + pub schema_name: String, + pub schema_id: i64, + pub begin_snapshot: i64, + pub end_snapshot: Option, +} + +/// A catalog implementation using DuckDB with snapshot management. +pub struct DuckLakeCatalog { + conn: Connection, +} + +impl Catalog for DuckLakeCatalog { + fn current_snapshot(&mut self) -> Result { + let txn = self.conn.transaction().context(TransactionSnafu)?; + let result = Self::current_snapshot_inner(&txn); + txn.commit().context(TransactionSnafu)?; + result + } + + fn current_snapshot_info(&mut self) -> Result { + let txn = self.conn.transaction().context(TransactionSnafu)?; + let result = Self::current_snapshot_info_inner(&txn); + txn.commit().context(TransactionSnafu)?; + result + } + + fn current_schema(&mut self, schema: Option<&str>, table: &str) -> Result { + let txn = self.conn.transaction().context(TransactionSnafu)?; + let result = Self::current_schema_inner(&txn, schema, table); + txn.commit().context(TransactionSnafu)?; + result + } + + fn current_schema_info(&mut self) -> Result { + let txn = self.conn.transaction().context(TransactionSnafu)?; + let result = Self::current_schema_info_inner(&txn); + txn.commit().context(TransactionSnafu)?; + result + } + + fn table_statistics( + &mut self, + table: &str, + snapshot: SnapshotId, + ) -> Result, Error> { + let txn = self.conn.transaction().context(TransactionSnafu)?; + let result = Self::table_statistics_inner(&txn, table, snapshot); + txn.commit().context(TransactionSnafu)?; + result + } + + /// Update table column statistics + fn update_table_column_stats( + &mut self, + column_id: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), Error> { + let txn = self.conn.transaction().context(TransactionSnafu)?; + let result = + Self::update_table_column_stats_inner(&txn, column_id, table_id, stats_type, payload); + txn.commit().context(TransactionSnafu)?; + result + } +} + +impl DuckLakeCatalog { + /// Creates a new DuckLakeStatisticsProvider with optional file paths. + /// If `location` is None, uses in-memory database. If `metadata_path` is None, uses default metadata file. + pub fn try_new(location: Option<&str>, metadata_path: Option<&str>) -> Result { + let conn = if let Some(path) = location { + Connection::open(path).context(ConnectionSnafu)? + } else { + Connection::open_in_memory().context(ConnectionSnafu)? + }; + + // Use provided metadata path or default to DEFAULT_METADATA_FILE + let metadata_file = metadata_path.unwrap_or(DEFAULT_METADATA_FILE); + let setup_query = format!( + r#" + INSTALL ducklake; + LOAD ducklake; + ATTACH 'ducklake:{metadata_file}' AS metalake; + USE metalake; + + {CREATE_EXTRA_TABLES_QUERY} + "# + ); + conn.execute_batch(&setup_query).context(ConnectionSnafu)?; + Ok(Self { conn }) + } + + /// Returns a reference to the underlying DuckDB connection. + pub fn get_connection(&self) -> &Connection { + &self.conn + } + + fn current_snapshot_inner(conn: &Connection) -> Result { + conn.prepare("FROM ducklake_current_snapshot('metalake');") + .context(QueryExecutionSnafu)? + .query_row([], |row| Ok(SnapshotId(row.get(0)?))) + .context(QueryExecutionSnafu) + } + + fn current_snapshot_info_inner(conn: &Connection) -> Result { + conn.prepare(SNAPSHOT_INFO_QUERY) + .context(QueryExecutionSnafu)? + .query_row([], |row| { + Ok(SnapshotInfo { + id: SnapshotId(row.get("snapshot_id")?), + schema_version: row.get("schema_version")?, + next_catalog_id: row.get("next_catalog_id")?, + next_file_id: row.get("next_file_id")?, + }) + }) + .context(QueryExecutionSnafu) + } + + fn current_schema_inner( + conn: &Connection, + schema: Option<&str>, + table: &str, + ) -> Result { + let table_ref = schema + .map(|s| format!("{}.{}", s, table)) + .unwrap_or_else(|| table.to_string()); + + // Use SELECT * with LIMIT 0 to get schema with data types + let schema_query = format!("SELECT * FROM {table_ref} LIMIT 0;"); + let mut stmt = conn.prepare(&schema_query).context(QueryExecutionSnafu)?; + let arrow_result = stmt.query_arrow([]).context(QueryExecutionSnafu)?; + let arrow_schema = arrow_result.get_schema(); + + // Get nullable info from DESCRIBE + // This is to fix Arrow API limitation with nullable info + let describe_query = format!("DESCRIBE {table_ref}"); + let mut stmt = conn.prepare(&describe_query).context(QueryExecutionSnafu)?; + let mut nullable_map = HashMap::new(); + let mut rows = stmt.query([]).context(QueryExecutionSnafu)?; + + while let Some(row) = rows.next().context(QueryExecutionSnafu)? { + let col_name: String = row.get(0).context(QueryExecutionSnafu)?; + let null_str: String = row.get(2).context(QueryExecutionSnafu)?; + nullable_map.insert(col_name, null_str == "YES"); + } + + // Rebuild schema with correct nullable flags + let fields: Vec<_> = arrow_schema + .fields() + .iter() + .map(|field| { + let nullable = nullable_map + .get(field.name().as_str()) + .copied() + .unwrap_or(true); + Arc::new(Field::new( + field.name().as_str(), + field.data_type().clone(), + nullable, + )) + }) + .collect(); + + Ok(Arc::new(Schema::new(fields))) + } + + fn current_schema_info_inner(conn: &Connection) -> Result { + conn.prepare(SCHEMA_INFO_QUERY) + .context(QueryExecutionSnafu)? + .query_row([], |row| { + Ok(CurrentSchema { + schema_name: row.get("schema_name")?, + schema_id: row.get("schema_id")?, + begin_snapshot: row.get("begin_snapshot")?, + end_snapshot: row.get("end_snapshot")?, + }) + }) + .context(QueryExecutionSnafu) + } + + fn table_statistics_inner( + conn: &Connection, + table: &str, + snapshot: SnapshotId, + ) -> Result, Error> { + let mut stmt = conn + .prepare(FETCH_TABLE_STATS_QUERY) + .context(QueryExecutionSnafu)?; + + let entries = stmt + .query_map( + params![&snapshot.0, &snapshot.0, table, &snapshot.0, &snapshot.0,], + |row| { + Ok(TableColumnStatisticsEntry { + _table_id: row.get("table_id")?, + column_id: row.get("column_id")?, + column_name: row.get("column_name")?, + column_type: row.get("column_type")?, + record_count: row.get("record_count")?, + _next_row_id: row.get("next_row_id")?, + _file_size_bytes: row.get("file_size_bytes")?, + stats_type: row.get("stats_type")?, + payload: row.get("payload")?, + }) + }, + ) + .context(QueryExecutionSnafu)? + .map(|result| result.context(QueryExecutionSnafu)); + + Ok(Some(TableStatistics::from_iter(entries))) + } + + fn update_table_column_stats_inner( + conn: &Connection, + column_id: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), Error> { + // Fetch current snapshot info + let curr_snapshot = Self::current_snapshot_info_inner(conn)?; + + // Update matching past snapshot to close it + conn.prepare(UPDATE_ADV_STATS_QUERY) + .context(QueryExecutionSnafu)? + .execute(params![ + curr_snapshot.id.0 + 1, + stats_type, + column_id, + table_id, + ]) + .context(QueryExecutionSnafu)?; + + // Insert new snapshot + conn.prepare(INSERT_ADV_STATS_QUERY) + .context(QueryExecutionSnafu)? + .execute(params![ + column_id, + curr_snapshot.id.0 + 1, + Null, + table_id, + stats_type, + payload, + ]) + .context(QueryExecutionSnafu)?; + + conn.prepare(INSERT_SNAPSHOT_QUERY) + .context(QueryExecutionSnafu)? + .execute(params![ + curr_snapshot.id.0 + 1, + curr_snapshot.schema_version, + curr_snapshot.next_catalog_id, + curr_snapshot.next_file_id, + ]) + .context(QueryExecutionSnafu)?; + + conn.prepare(INSERT_SNAPSHOT_CHANGE_QUERY) + .context(QueryExecutionSnafu)? + .execute(params![ + curr_snapshot.id.0 + 1, + format!( + r#"updated_stats:"main"."ducklake_table_column_adv_stats",{stats_type}:{payload}"#, + ), + Null, + Null, + Null, + ]) + .context(QueryExecutionSnafu)?; + + Ok(()) + } +} diff --git a/optd/catalog/src/service.rs b/optd/catalog/src/service.rs new file mode 100644 index 0000000..c6d4f1f --- /dev/null +++ b/optd/catalog/src/service.rs @@ -0,0 +1,337 @@ +use crate::{ + Catalog, CurrentSchema, DuckLakeCatalog, Error, SchemaRef, SnapshotId, SnapshotInfo, + TableStatistics, +}; +use tokio::sync::{mpsc, oneshot}; + +/// Max pending requests +const CHANNEL_BUFFER_SIZE: usize = 1000; + +/// Trait defining the catalog backend that can be used with the service. +pub trait CatalogBackend: Send + 'static { + fn current_snapshot(&mut self) -> Result; + fn current_snapshot_info(&mut self) -> Result; + fn current_schema(&mut self, schema: Option<&str>, table: &str) -> Result; + fn current_schema_info(&mut self) -> Result; + fn table_statistics( + &mut self, + table_name: &str, + snapshot: SnapshotId, + ) -> Result, Error>; + fn update_table_column_stats( + &mut self, + column_id: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), Error>; +} + +/// Implement CatalogBackend for any type that implements Catalog +impl CatalogBackend for T { + fn current_snapshot(&mut self) -> Result { + Catalog::current_snapshot(self) + } + + fn current_snapshot_info(&mut self) -> Result { + Catalog::current_snapshot_info(self) + } + + fn current_schema(&mut self, schema: Option<&str>, table: &str) -> Result { + Catalog::current_schema(self, schema, table) + } + + fn current_schema_info(&mut self) -> Result { + Catalog::current_schema_info(self) + } + + fn table_statistics( + &mut self, + table_name: &str, + snapshot: SnapshotId, + ) -> Result, Error> { + Catalog::table_statistics(self, table_name, snapshot) + } + + fn update_table_column_stats( + &mut self, + column_id: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), Error> { + Catalog::update_table_column_stats(self, column_id, table_id, stats_type, payload) + } +} + +#[derive(Debug)] +pub enum CatalogRequest { + CurrentSnapshot { + respond_to: oneshot::Sender>, + }, + + CurrentSnapshotInfo { + respond_to: oneshot::Sender>, + }, + + CurrentSchema { + schema: Option, + table: String, + respond_to: oneshot::Sender>, + }, + + CurrentSchemaInfo { + respond_to: oneshot::Sender>, + }, + + TableStatistics { + table_name: String, + snapshot: SnapshotId, + respond_to: oneshot::Sender, Error>>, + }, + + UpdateTableColumnStats { + column_id: i64, + table_id: i64, + stats_type: String, + payload: String, + respond_to: oneshot::Sender>, + }, + + Shutdown, +} + +/// Handle for catalog service interaction +#[derive(Clone, Debug)] +pub struct CatalogServiceHandle { + sender: mpsc::Sender, +} + +impl CatalogServiceHandle { + pub async fn current_snapshot(&self) -> Result { + let (tx, rx) = oneshot::channel(); + self.sender + .send(CatalogRequest::CurrentSnapshot { respond_to: tx }) + .await + .map_err(|_| Error::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })?; + + rx.await.map_err(|_| Error::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })? + } + + pub async fn current_snapshot_info(&self) -> Result { + let (tx, rx) = oneshot::channel(); + self.sender + .send(CatalogRequest::CurrentSnapshotInfo { respond_to: tx }) + .await + .map_err(|_| Error::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })?; + + rx.await.map_err(|_| Error::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })? + } + + pub async fn current_schema( + &self, + schema: Option<&str>, + table: &str, + ) -> Result { + let (tx, rx) = oneshot::channel(); + self.sender + .send(CatalogRequest::CurrentSchema { + schema: schema.map(|s| s.to_string()), + table: table.to_string(), + respond_to: tx, + }) + .await + .map_err(|_| Error::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })?; + + rx.await.map_err(|_| Error::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })? + } + + pub async fn current_schema_info(&self) -> Result { + let (tx, rx) = oneshot::channel(); + self.sender + .send(CatalogRequest::CurrentSchemaInfo { respond_to: tx }) + .await + .map_err(|_| Error::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })?; + + rx.await.map_err(|_| Error::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })? + } + + pub async fn table_statistics( + &self, + table_name: &str, + snapshot: SnapshotId, + ) -> Result, Error> { + let (tx, rx) = oneshot::channel(); + self.sender + .send(CatalogRequest::TableStatistics { + table_name: table_name.to_string(), + snapshot, + respond_to: tx, + }) + .await + .map_err(|_| Error::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })?; + + rx.await.map_err(|_| Error::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })? + } + + pub async fn update_table_column_stats( + &self, + column_id: i64, + table_id: i64, + stats_type: &str, + payload: &str, + ) -> Result<(), Error> { + let (tx, rx) = oneshot::channel(); + self.sender + .send(CatalogRequest::UpdateTableColumnStats { + column_id, + table_id, + stats_type: stats_type.to_string(), + payload: payload.to_string(), + respond_to: tx, + }) + .await + .map_err(|_| Error::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })?; + + rx.await.map_err(|_| Error::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + })? + } + + pub async fn shutdown(&self) -> Result<(), Error> { + self.sender + .send(CatalogRequest::Shutdown) + .await + .map_err(|_| Error::QueryExecution { + source: duckdb::Error::ExecuteReturnedResults, + }) + } +} + +/// The catalog service that processes requests in the background +pub struct CatalogService { + backend: B, + receiver: mpsc::Receiver, +} + +impl CatalogService { + /// Create service with provided backend catalog + pub fn new(backend: B) -> (Self, CatalogServiceHandle) { + let (sender, receiver) = mpsc::channel(CHANNEL_BUFFER_SIZE); + + let service = CatalogService { backend, receiver }; + let handle = CatalogServiceHandle { sender }; + + (service, handle) + } + + /// Run the service, processing requests until shutdown + /// + /// Spawn with tokio: + /// ```ignore + /// tokio::spawn(async move { + /// service.run().await; + /// }); + /// ``` + pub async fn run(mut self) { + while let Some(request) = self.receiver.recv().await { + match request { + CatalogRequest::CurrentSnapshot { respond_to } => { + let result = self.backend.current_snapshot(); + let _ = respond_to.send(result); + } + + CatalogRequest::CurrentSnapshotInfo { respond_to } => { + let result = self.backend.current_snapshot_info(); + let _ = respond_to.send(result); + } + + CatalogRequest::CurrentSchema { + schema, + table, + respond_to, + } => { + let result = self.backend.current_schema(schema.as_deref(), &table); + let _ = respond_to.send(result); + } + + CatalogRequest::CurrentSchemaInfo { respond_to } => { + let result = self.backend.current_schema_info(); + let _ = respond_to.send(result); + } + + CatalogRequest::TableStatistics { + table_name, + snapshot, + respond_to, + } => { + let result = self.backend.table_statistics(&table_name, snapshot); + let _ = respond_to.send(result); + } + + CatalogRequest::UpdateTableColumnStats { + column_id, + table_id, + stats_type, + payload, + respond_to, + } => { + let result = self.backend.update_table_column_stats( + column_id, + table_id, + &stats_type, + &payload, + ); + let _ = respond_to.send(result); + } + + CatalogRequest::Shutdown => { + // drop the receiver to stop accepting new requests + break; + } + } + } + } +} + +// Convenience methods for creating service with DuckLakeCatalog +impl CatalogService { + /// Create service from location paths using DuckLakeCatalog backend + pub fn try_new_from_location( + location: Option<&str>, + metadata_path: Option<&str>, + ) -> Result<(Self, CatalogServiceHandle), Error> { + let catalog = DuckLakeCatalog::try_new(location, metadata_path)?; + Ok(Self::new(catalog)) + } + + /// Get a reference to the underlying DuckLakeCatalog for test setup only. + /// Only available in test/debug builds and should + /// only be used for setting up test fixtures. + #[cfg(any(test, debug_assertions))] + pub fn catalog_for_setup(&self) -> &DuckLakeCatalog { + &self.backend + } +} diff --git a/optd/catalog/tests/service_tests.rs b/optd/catalog/tests/service_tests.rs new file mode 100644 index 0000000..75079ee --- /dev/null +++ b/optd/catalog/tests/service_tests.rs @@ -0,0 +1,1046 @@ +use optd_catalog::{CatalogService, CatalogServiceHandle, DuckLakeCatalog}; +use std::time::Duration; +use tempfile::TempDir; + +/// Helper to create a test catalog service +fn create_test_service() -> ( + TempDir, + CatalogService, + CatalogServiceHandle, +) { + let temp_dir = TempDir::new().unwrap(); + let metadata_path = temp_dir.path().join("metadata.ducklake"); + + let (service, handle) = + CatalogService::try_new_from_location(None, Some(metadata_path.to_str().unwrap())).unwrap(); + + (temp_dir, service, handle) +} + +// ============================================================================ +// Basic Functionality Tests +// ============================================================================ + +#[tokio::test] +async fn test_service_creation_and_shutdown() { + let (_temp_dir, service, handle) = create_test_service(); + + // Verify handle is cloneable (multi-producer capability) + let handle_clone = handle.clone(); + + let service_handle = tokio::spawn(async move { + service.run().await; + }); + + // Both handles should work + let snapshot1 = handle.current_snapshot().await.unwrap(); + let snapshot2 = handle_clone.current_snapshot().await.unwrap(); + assert_eq!( + snapshot1.0, snapshot2.0, + "Cloned handles should access same service" + ); + + // Shutdown should complete gracefully + handle.shutdown().await.unwrap(); + + // Service task should complete + tokio::time::timeout(Duration::from_secs(1), service_handle) + .await + .expect("Service should shutdown within timeout") + .unwrap(); + + // Verify shutdown is idempotent + let result = handle_clone.shutdown().await; + assert!(result.is_err(), "Second shutdown should fail gracefully"); +} + +#[tokio::test] +async fn test_current_snapshot_basic() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + let snapshot = handle.current_snapshot().await.unwrap(); + assert_eq!(snapshot.0, 0, "Initial snapshot should be 0"); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_current_snapshot_info() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + let info = handle.current_snapshot_info().await.unwrap(); + assert_eq!(info.id.0, 0); + assert_eq!(info.schema_version, 0); + assert!(info.next_catalog_id > 0); + assert_eq!(info.next_file_id, 0); + + // Verify snapshot info is consistent with current_snapshot + let snapshot = handle.current_snapshot().await.unwrap(); + assert_eq!( + info.id.0, snapshot.0, + "Snapshot info ID should match current snapshot" + ); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_current_schema_info() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + let schema_info = handle.current_schema_info().await.unwrap(); + assert_eq!(schema_info.schema_name, "main"); + assert_eq!(schema_info.schema_id, 0); + assert_eq!(schema_info.begin_snapshot, 0); + assert!(schema_info.end_snapshot.is_none()); + + handle.shutdown().await.unwrap(); +} + +// ============================================================================ +// Table and Schema Tests +// ============================================================================ + +#[tokio::test] +async fn test_current_schema_with_table() { + let (_temp_dir, service, handle) = create_test_service(); + + // Get the catalog to create a test table BEFORE spawning service + let conn = service.catalog_for_setup().get_connection(); + conn.execute_batch( + r#" + CREATE TABLE test_table ( + id INTEGER NOT NULL, + name VARCHAR, + age INTEGER + ); + "#, + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + // Fetch schema + let schema = handle.current_schema(None, "test_table").await.unwrap(); + + assert_eq!(schema.fields().len(), 3); + assert!(schema.field_with_name("id").is_ok()); + assert!(schema.field_with_name("name").is_ok()); + assert!(schema.field_with_name("age").is_ok()); + + // Check nullable constraints + let id_field = schema.field_with_name("id").unwrap(); + assert!(!id_field.is_nullable(), "id should not be nullable"); + + let name_field = schema.field_with_name("name").unwrap(); + assert!(name_field.is_nullable(), "name should be nullable"); + + // Verify data types are correctly mapped + use duckdb::arrow::datatypes::DataType; + assert!( + matches!(id_field.data_type(), DataType::Int32), + "id should be Int32" + ); + assert!( + matches!(name_field.data_type(), DataType::Utf8), + "name should be Utf8/String" + ); + + // Verify field order matches CREATE TABLE order + assert_eq!(schema.fields()[0].name(), "id"); + assert_eq!(schema.fields()[1].name(), "name"); + assert_eq!(schema.fields()[2].name(), "age"); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_table_statistics_empty_table() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + conn.execute_batch( + r#" + CREATE TABLE empty_table (id INTEGER, name VARCHAR); + "#, + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + let snapshot = handle.current_snapshot().await.unwrap(); + let stats = handle + .table_statistics("empty_table", snapshot) + .await + .unwrap(); + + assert!(stats.is_some()); + let stats = stats.unwrap(); + assert_eq!(stats.row_count, 0); + + // For empty tables with no data, the statistics system may not return column metadata + // This is expected behavior - verify it's empty or has minimal stats + assert_eq!( + stats.column_statistics.len(), + 0, + "Empty table with no data should have 0 column statistics" + ); + + // If there were column statistics, verify no advanced stats would be present + for col_stat in &stats.column_statistics { + assert_eq!( + col_stat.advanced_stats.len(), + 0, + "Empty table should have no advanced stats for {}", + col_stat.name + ); + } + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_table_statistics_nonexistent_table() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + let snapshot = handle.current_snapshot().await.unwrap(); + let stats = handle + .table_statistics("nonexistent_table", snapshot) + .await + .unwrap(); + + assert!(stats.is_some()); + assert_eq!(stats.unwrap().column_statistics.len(), 0); + + handle.shutdown().await.unwrap(); +} + +// ============================================================================ +// Statistics Update Tests +// ============================================================================ + +#[tokio::test] +async fn test_update_and_retrieve_statistics() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + + // Create table and get IDs + conn.execute_batch( + r#" + CREATE TABLE stats_test (id INTEGER, value DOUBLE); + INSERT INTO stats_test VALUES (1, 10.5), (2, 20.5); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'stats_test'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let value_column_id: i64 = conn + .query_row( + r#" + SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'value'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + // Update statistics + handle + .update_table_column_stats(value_column_id, table_id, "min_value", "10.5") + .await + .unwrap(); + + handle + .update_table_column_stats(value_column_id, table_id, "max_value", "20.5") + .await + .unwrap(); + + // Retrieve and verify + let snapshot = handle.current_snapshot().await.unwrap(); + // Table creation creates initial snapshots, then 2 updates create 2 more + assert!( + snapshot.0 >= 2, + "Should have at least 2 snapshots after updates" + ); + + let stats = handle + .table_statistics("stats_test", snapshot) + .await + .unwrap() + .unwrap(); + + let value_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "value") + .expect("Should have stats for value column"); + + assert_eq!(value_stats.advanced_stats.len(), 2); + assert!( + value_stats + .advanced_stats + .iter() + .any(|s| s.stats_type == "min_value") + ); + assert!( + value_stats + .advanced_stats + .iter() + .any(|s| s.stats_type == "max_value") + ); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_statistics_versioning() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE version_test (id INTEGER, count INTEGER); + INSERT INTO version_test VALUES (1, 100); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'version_test'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let count_column_id: i64 = conn + .query_row( + r#" + SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'count'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + // Take snapshot before updates + let snapshot_0 = handle.current_snapshot().await.unwrap(); + + // Update 1 + handle + .update_table_column_stats( + count_column_id, + table_id, + "ndv", + r#"{"distinct_count": 100}"#, + ) + .await + .unwrap(); + + let snapshot_1 = handle.current_snapshot().await.unwrap(); + assert_eq!(snapshot_1.0, snapshot_0.0 + 1); + + // Update 2 (new value) + handle + .update_table_column_stats( + count_column_id, + table_id, + "ndv", + r#"{"distinct_count": 150}"#, + ) + .await + .unwrap(); + + let snapshot_2 = handle.current_snapshot().await.unwrap(); + assert_eq!(snapshot_2.0, snapshot_1.0 + 1); + + // Verify stats at snapshot_1 + let stats_1 = handle + .table_statistics("version_test", snapshot_1) + .await + .unwrap() + .unwrap(); + + let count_stats_1 = stats_1 + .column_statistics + .iter() + .find(|cs| cs.name == "count") + .unwrap(); + + assert_eq!(count_stats_1.advanced_stats.len(), 1); + assert!( + count_stats_1.advanced_stats[0] + .data + .to_string() + .contains("100") + ); + + // Verify stats at snapshot_2 + let stats_2 = handle + .table_statistics("version_test", snapshot_2) + .await + .unwrap() + .unwrap(); + + let count_stats_2 = stats_2 + .column_statistics + .iter() + .find(|cs| cs.name == "count") + .unwrap(); + + assert_eq!(count_stats_2.advanced_stats.len(), 1); + assert!( + count_stats_2.advanced_stats[0] + .data + .to_string() + .contains("150") + ); + + // Verify snapshot_1 still returns old value + let stats_1_again = handle + .table_statistics("version_test", snapshot_1) + .await + .unwrap() + .unwrap(); + + let count_stats_1_again = stats_1_again + .column_statistics + .iter() + .find(|cs| cs.name == "count") + .unwrap(); + + assert!( + count_stats_1_again.advanced_stats[0] + .data + .to_string() + .contains("100"), + "Time-travel query should return historical value, not current value" + ); + + // Verify snapshot_0 has no stats (before any updates) + let stats_0 = handle + .table_statistics("version_test", snapshot_0) + .await + .unwrap() + .unwrap(); + + let count_stats_0 = stats_0 + .column_statistics + .iter() + .find(|cs| cs.name == "count") + .unwrap(); + + assert_eq!( + count_stats_0.advanced_stats.len(), + 0, + "Snapshot before updates should have no advanced stats" + ); + + handle.shutdown().await.unwrap(); +} + +// ============================================================================ +// Concurrency Tests +// ============================================================================ + +#[tokio::test(flavor = "multi_thread")] +async fn test_concurrent_read_operations() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + // Spawn multiple concurrent snapshot requests + let mut tasks = vec![]; + for _ in 0..50 { + let handle_clone = handle.clone(); + tasks.push(tokio::spawn(async move { + handle_clone.current_snapshot().await.unwrap() + })); + } + + // All should succeed with same snapshot ID + for task in tasks { + let snapshot = task.await.unwrap(); + assert_eq!(snapshot.0, 0); + } + + handle.shutdown().await.unwrap(); +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_concurrent_mixed_operations() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE concurrent_test (id INTEGER, data VARCHAR); + INSERT INTO concurrent_test VALUES (1, 'test'); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'concurrent_test'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let id_column_id: i64 = conn + .query_row( + r#" + SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'id'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + let initial_snapshot = handle.current_snapshot().await.unwrap(); + + let mut tasks = vec![]; + + // Mix of reads and writes + for i in 0..20 { + let handle_clone = handle.clone(); + + if i % 2 == 0 { + // Read operation + tasks.push(tokio::spawn(async move { + let _ = handle_clone.current_snapshot().await; + })); + } else { + // Write operation + tasks.push(tokio::spawn(async move { + let _ = handle_clone + .update_table_column_stats( + id_column_id, + table_id, + &format!("stat_{}", i), + &format!(r#"{{"value": {}}}"#, i), + ) + .await; + })); + } + } + + // Wait for all + for task in tasks { + task.await.unwrap(); + } + + // Verify final snapshot progressed + let final_snapshot = handle.current_snapshot().await.unwrap(); + assert!(final_snapshot.0 >= 10, "Should have progressed snapshots"); + + // Verify all writes succeeded by checking stats + let stats = handle + .table_statistics("concurrent_test", final_snapshot) + .await + .unwrap() + .unwrap(); + + let id_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "id") + .expect("Should have stats for id column"); + + // Should have 10 stats (one for each odd i: 1,3,5,7,9,11,13,15,17,19) + assert_eq!( + id_stats.advanced_stats.len(), + 10, + "Should have 10 write operations worth of stats" + ); + + // Verify no stats were lost (check for specific stat names) + let stat_names: Vec<&str> = id_stats + .advanced_stats + .iter() + .map(|s| s.stats_type.as_str()) + .collect(); + for i in (1..20).step_by(2) { + let expected_name = format!("stat_{}", i); + assert!( + stat_names.contains(&expected_name.as_str()), + "Should have stat_{} but got {:?}", + i, + stat_names + ); + } + + // Verify snapshot progression matches write count + let snapshot_diff = final_snapshot.0 - initial_snapshot.0; + assert_eq!( + snapshot_diff, 10, + "Snapshot should have advanced by exactly 10 (one per write)" + ); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_multiple_handles_same_service() { + let (_temp_dir, service, handle1) = create_test_service(); + + // Clone handles + let handle2 = handle1.clone(); + let handle3 = handle1.clone(); + + tokio::spawn(async move { + service.run().await; + }); + + // All handles should work independently + let snapshot1 = handle1.current_snapshot().await.unwrap(); + let snapshot2 = handle2.current_snapshot().await.unwrap(); + let snapshot3 = handle3.current_snapshot().await.unwrap(); + + assert_eq!(snapshot1.0, snapshot2.0); + assert_eq!(snapshot2.0, snapshot3.0); + + handle1.shutdown().await.unwrap(); +} + +// ============================================================================ +// Edge Cases and Error Handling +// ============================================================================ + +#[tokio::test] +async fn test_operations_after_shutdown() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + // Shutdown the service + handle.shutdown().await.unwrap(); + tokio::time::sleep(Duration::from_millis(100)).await; + + // Operations after shutdown should fail + let result = handle.current_snapshot().await; + assert!(result.is_err(), "Operations after shutdown should fail"); + + // Verify multiple operations fail consistently + assert!(handle.current_snapshot_info().await.is_err()); + assert!(handle.current_schema_info().await.is_err()); + assert!( + handle + .table_statistics("any_table", optd_catalog::SnapshotId(0)) + .await + .is_err() + ); + + // Verify error type is consistent (channel closed) + match result { + Err(e) => { + let err_msg = format!("{:?}", e); + assert!( + err_msg.contains("ExecuteReturnedResults") || err_msg.contains("channel"), + "Error should indicate channel/connection issue, got: {}", + err_msg + ); + } + Ok(_) => panic!("Expected error after shutdown"), + } +} + +#[tokio::test] +async fn test_invalid_table_schema_request() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + // Request schema for non-existent table + let result = handle.current_schema(None, "does_not_exist").await; + assert!(result.is_err(), "Should error for non-existent table"); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_large_json_statistics() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE large_stats_test (id INTEGER); + INSERT INTO large_stats_test VALUES (1); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'large_stats_test'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let id_column_id: i64 = conn + .query_row( + r#" + SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'id'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + // Create large histogram data + let large_histogram: Vec = (0..1000).collect(); + let large_payload = serde_json::json!({ + "buckets": large_histogram, + "metadata": "x".repeat(1000) + }) + .to_string(); + + // Should handle large payloads + let result = handle + .update_table_column_stats(id_column_id, table_id, "large_histogram", &large_payload) + .await; + + assert!(result.is_ok(), "Should handle large statistics payloads"); + + // Verify retrieval + let snapshot = handle.current_snapshot().await.unwrap(); + let stats = handle + .table_statistics("large_stats_test", snapshot) + .await + .unwrap() + .unwrap(); + + let id_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "id") + .unwrap(); + + let large_stat = id_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "large_histogram") + .unwrap(); + + assert!(large_stat.data.to_string().len() > 1000); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_special_characters_in_statistics() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE special_chars_test (id INTEGER); + INSERT INTO special_chars_test VALUES (1); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'special_chars_test'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let id_column_id: i64 = conn + .query_row( + r#" + SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'id'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + // Statistics with special characters + let special_payload = + r#"{"value": "test\"with\\special\nchars", "unicode": "测试", "emoji": "🚀"}"#; + + handle + .update_table_column_stats(id_column_id, table_id, "special_test", special_payload) + .await + .unwrap(); + + // Retrieve and verify + let snapshot = handle.current_snapshot().await.unwrap(); + let stats = handle + .table_statistics("special_chars_test", snapshot) + .await + .unwrap() + .unwrap(); + + let id_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "id") + .unwrap(); + + let special_stat = id_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "special_test") + .unwrap(); + + let data_str = special_stat.data.to_string(); + assert!(data_str.contains("测试")); + assert!(data_str.contains("🚀")); + + handle.shutdown().await.unwrap(); +} + +#[tokio::test] +async fn test_rapid_sequential_updates() { + let (_temp_dir, service, handle) = create_test_service(); + + // Setup before spawning service + let conn = service.catalog_for_setup().get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE rapid_test (id INTEGER); + INSERT INTO rapid_test VALUES (1); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'rapid_test'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let id_column_id: i64 = conn + .query_row( + r#" + SELECT column_id FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'id'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + tokio::spawn(async move { + service.run().await; + }); + + let initial_snapshot = handle.current_snapshot().await.unwrap(); + + // Perform 10 rapid updates + for i in 0..10 { + handle + .update_table_column_stats( + id_column_id, + table_id, + "counter", + &format!(r#"{{"count": {}}}"#, i), + ) + .await + .unwrap(); + } + + let final_snapshot = handle.current_snapshot().await.unwrap(); + assert_eq!( + final_snapshot.0, + initial_snapshot.0 + 10, + "Should have 10 new snapshots" + ); + + // Verify the final value is the last update + let final_stats = handle + .table_statistics("rapid_test", final_snapshot) + .await + .unwrap() + .unwrap(); + + let id_stats = final_stats + .column_statistics + .iter() + .find(|cs| cs.name == "id") + .expect("Should have stats for id column"); + + // Should have only 1 stat since same stat_type was updated + assert_eq!(id_stats.advanced_stats.len(), 1); + + let counter_stat = id_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "counter") + .expect("Should have counter stat"); + + // Final value should be 9 (last iteration) + assert!( + counter_stat.data.to_string().contains("9"), + "Final counter value should be 9, got: {}", + counter_stat.data + ); + + // Verify we can query intermediate snapshots + let mid_snapshot = optd_catalog::SnapshotId(initial_snapshot.0 + 5); + let mid_stats = handle + .table_statistics("rapid_test", mid_snapshot) + .await + .unwrap() + .unwrap(); + + let mid_id_stats = mid_stats + .column_statistics + .iter() + .find(|cs| cs.name == "id") + .expect("Should have stats for id column at mid snapshot"); + + if let Some(mid_counter) = mid_id_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "counter") + { + // Mid-point should have value 4 (5th update, 0-indexed) + assert!( + mid_counter.data.to_string().contains("4"), + "Mid-point counter should be 4, got: {}", + mid_counter.data + ); + } + + handle.shutdown().await.unwrap(); +} + +// ============================================================================ +// Performance and Stress Tests +// ============================================================================ + +#[tokio::test(flavor = "multi_thread")] +async fn test_high_concurrency_stress() { + let (_temp_dir, service, handle) = create_test_service(); + + tokio::spawn(async move { + service.run().await; + }); + + // Spawn 50 concurrent tasks + let mut tasks = vec![]; + for i in 0..50 { + let handle_clone = handle.clone(); + tasks.push(tokio::spawn(async move { + if i % 3 == 0 { + let _ = handle_clone.current_snapshot().await; + } else if i % 3 == 1 { + let _ = handle_clone.current_snapshot_info().await; + } else { + let _ = handle_clone.current_schema_info().await; + } + })); + } + + // Should complete without errors + let results: Vec<_> = futures::future::join_all(tasks).await; + for result in results { + assert!(result.is_ok(), "All concurrent operations should succeed"); + } + + handle.shutdown().await.unwrap(); +} diff --git a/optd/catalog/tests/statistics_tests.rs b/optd/catalog/tests/statistics_tests.rs new file mode 100644 index 0000000..8cf7112 --- /dev/null +++ b/optd/catalog/tests/statistics_tests.rs @@ -0,0 +1,1408 @@ +use optd_catalog::{Catalog, DuckLakeCatalog, SnapshotId}; +use serde_json::json; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{SystemTime, UNIX_EPOCH}; +use tempfile::TempDir; + +static TEST_COUNTER: AtomicU64 = AtomicU64::new(0); + +/// Creates a test catalog with isolated metadata directory. +fn create_test_catalog(for_file: bool) -> (TempDir, DuckLakeCatalog) { + let temp_dir = TempDir::new().unwrap(); + let counter = TEST_COUNTER.fetch_add(1, Ordering::SeqCst); + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let unique_dir = temp_dir + .path() + .join(format!("db_{}_{}", timestamp, counter)); + std::fs::create_dir_all(&unique_dir).unwrap(); + let metadata_path = unique_dir.join("metadata.ducklake"); + + let catalog = if for_file { + let db_path = unique_dir.join("test.db"); + DuckLakeCatalog::try_new( + Some(db_path.to_str().unwrap()), + Some(metadata_path.to_str().unwrap()), + ) + } else { + DuckLakeCatalog::try_new(None, Some(metadata_path.to_str().unwrap())) + } + .unwrap(); + + (temp_dir, catalog) +} + +/// Creates a test catalog with a pre-populated test_table (id, name, age columns). +fn create_test_catalog_with_data() -> (TempDir, DuckLakeCatalog, i64, i64) { + let (temp_dir, catalog) = create_test_catalog(false); + let conn = catalog.get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE test_table (id INTEGER, name VARCHAR, age INTEGER); + INSERT INTO test_table VALUES (1, 'Alice', 30), (2, 'Bob', 25), (3, 'Charlie', 35); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'test_table'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let age_column_id: i64 = conn + .query_row( + r#" + SELECT column_id + FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'age'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + (temp_dir, catalog, table_id, age_column_id) +} + +#[test] +fn test_ducklake_statistics_provider_creation() { + // Test both memory-based and file-based provider creation. + let (_temp_dir, _provider) = create_test_catalog(false); + let (_temp_dir, _provider) = create_test_catalog(true); +} + +#[test] +fn test_table_stats_insertion() { + // Test basic statistics insertion without errors. + let (_temp_dir, mut provider) = create_test_catalog(true); + + let result = provider.update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#); + assert!(result.is_ok()); +} + +#[test] +fn test_table_stats_insertion_and_retrieval() { + // Test inserting and retrieving multiple statistics types for a column. + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); + + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") + .unwrap(); + provider + .update_table_column_stats(age_column_id, table_id, "max_value", "35") + .unwrap(); + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"buckets": [{"min": 20, "max": 30, "count": 2}]}"#, + ) + .unwrap(); + + let latest_snapshot = provider.current_snapshot().unwrap(); + let stats = provider + .table_statistics("test_table", latest_snapshot) + .unwrap() + .unwrap(); + + assert_eq!(stats.column_statistics.len(), 3); + assert_eq!(stats.row_count, 3); + + let age_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .expect("Should have statistics for age column"); + + assert_eq!(age_stats.advanced_stats.len(), 3); + assert!( + age_stats + .advanced_stats + .iter() + .any(|s| s.stats_type == "min_value" && (s.data == json!(25) || s.data == json!("25"))) + ); + assert!( + age_stats + .advanced_stats + .iter() + .any(|s| s.stats_type == "max_value" && (s.data == json!(35) || s.data == json!("35"))) + ); + assert!( + age_stats + .advanced_stats + .iter() + .any(|s| s.stats_type == "histogram" && s.data.to_string().contains("buckets")) + ); +} + +#[test] +fn test_current_schema() { + // Test fetching current schema info returns valid metadata. + let (_temp_dir, mut provider) = create_test_catalog(true); + + let schema = provider.current_schema_info().unwrap(); + + assert_eq!(schema.schema_name, "main"); + assert_eq!(schema.schema_id, 0); + assert!(schema.begin_snapshot >= 0); + assert!(schema.end_snapshot.is_none()); +} + +#[test] +fn test_snapshot_versioning_and_stats_types() { + // Test snapshot creation, versioning, and continuity for multiple stats updates. + let (_temp_dir, mut provider) = create_test_catalog(true); + + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) + .unwrap(); + provider + .update_table_column_stats(2, 1, "ndv", r#"{"distinct_count": 2000}"#) + .unwrap(); + provider + .update_table_column_stats(3, 1, "histogram", r#"{"buckets": [1,2,3]}"#) + .unwrap(); + + let snapshots: Vec<(i64, i64)> = { + let conn = provider.get_connection(); + conn.prepare( + r#" + SELECT column_id, begin_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 + ORDER BY begin_snapshot; + "#, + ) + .unwrap() + .query_map([], |row| Ok((row.get(0)?, row.get(1)?))) + .unwrap() + .map(|r| r.unwrap()) + .collect() + }; + assert_eq!(snapshots.len(), 3); + assert!(snapshots[1].1 > snapshots[0].1); + assert!(snapshots[2].1 > snapshots[1].1); + + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1500}"#) + .unwrap(); + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 2000}"#) + .unwrap(); + + let versions: Vec<(i64, Option, String)> = { + let conn = provider.get_connection(); + conn.prepare( + r#" + SELECT begin_snapshot, end_snapshot, payload + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 AND column_id = 1 AND stats_type = 'ndv' + ORDER BY begin_snapshot; + "#, + ) + .unwrap() + .query_map([], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?))) + .unwrap() + .map(|r| r.unwrap()) + .collect() + }; + + assert_eq!(versions.len(), 3); + assert!(versions[0].1.is_some() && versions[1].1.is_some() && versions[2].1.is_none()); + assert_eq!(versions[0].1.unwrap(), versions[1].0); + assert_eq!(versions[1].1.unwrap(), versions[2].0); + assert!(versions[0].2.contains("1000")); + assert!(versions[1].2.contains("1500")); + assert!(versions[2].2.contains("2000")); + + provider + .update_table_column_stats(1, 1, "histogram", r#"{"buckets": [1,2,3,4,5]}"#) + .unwrap(); + provider + .update_table_column_stats(1, 1, "minmax", r#"{"min": 0, "max": 100}"#) + .unwrap(); + + let type_count: i64 = { + let conn = provider.get_connection(); + conn.query_row( + r#" + SELECT COUNT(DISTINCT stats_type) + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 AND column_id = 1 AND end_snapshot IS NULL + "#, + [], + |row| row.get(0), + ) + .unwrap() + }; + assert_eq!(type_count, 3); +} + +#[test] +fn test_snapshot_tracking_and_multi_table_stats() { + // Test snapshot creation tracking and statistics isolation across multiple tables. + let (_temp_dir, mut provider) = create_test_catalog(true); + + let initial_count: i64 = { + let conn = provider.get_connection(); + conn.query_row( + "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_snapshot", + [], + |row| row.get(0), + ) + .unwrap() + }; + + provider + .update_table_column_stats(1, 1, "ndv", r#"{"distinct_count": 1000}"#) + .unwrap(); + provider + .update_table_column_stats(2, 1, "ndv", r#"{"distinct_count": 2000}"#) + .unwrap(); + provider + .update_table_column_stats(3, 1, "ndv", r#"{"distinct_count": 3000}"#) + .unwrap(); + + let after_table1_count: i64 = { + let conn = provider.get_connection(); + conn.query_row( + "SELECT COUNT(*) FROM __ducklake_metadata_metalake.main.ducklake_snapshot", + [], + |row| row.get(0), + ) + .unwrap() + }; + assert_eq!(after_table1_count - initial_count, 3); + + let changes_count: i64 = { + let conn = provider.get_connection(); + conn.query_row( + r#" + SELECT COUNT(*) + FROM __ducklake_metadata_metalake.main.ducklake_snapshot_changes + WHERE changes_made LIKE 'updated_stats:%' + "#, + [], + |row| row.get(0), + ) + .unwrap() + }; + assert_eq!(changes_count, 3); + + provider + .update_table_column_stats(1, 2, "ndv", r#"{"distinct_count": 5000}"#) + .unwrap(); + provider + .update_table_column_stats(2, 2, "ndv", r#"{"distinct_count": 6000}"#) + .unwrap(); + + let (table1_count, table2_count): (i64, i64) = { + let conn = provider.get_connection(); + let table1_count: i64 = conn + .query_row( + r#" + SELECT COUNT(*) + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 1 + "#, + [], + |row| row.get(0), + ) + .unwrap(); + let table2_count: i64 = conn + .query_row( + r#" + SELECT COUNT(*) + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = 2 + "#, + [], + |row| row.get(0), + ) + .unwrap(); + (table1_count, table2_count) + }; + + assert_eq!(table1_count, 3); + assert_eq!(table2_count, 2); + + let all_snapshots: Vec = { + let conn = provider.get_connection(); + conn.prepare( + r#" + SELECT begin_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + ORDER BY begin_snapshot + "#, + ) + .unwrap() + .query_map([], |row| row.get(0)) + .unwrap() + .map(|r| r.unwrap()) + .collect() + }; + + for i in 1..all_snapshots.len() { + assert!(all_snapshots[i] > all_snapshots[i - 1]); + } +} + +#[test] +fn test_update_and_fetch_table_column_stats() { + // Test updating min/max values and advanced statistics with snapshot progression. + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); + + let initial_snapshot = provider.current_snapshot().unwrap(); + assert!( + provider + .table_statistics("test_table", initial_snapshot) + .unwrap() + .is_some() + ); + + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") + .unwrap(); + let snapshot_after_min = provider.current_snapshot().unwrap(); + assert_eq!(snapshot_after_min.0, initial_snapshot.0 + 1); + + provider + .update_table_column_stats(age_column_id, table_id, "max_value", "35") + .unwrap(); + let snapshot_after_max = provider.current_snapshot().unwrap(); + assert_eq!(snapshot_after_max.0, initial_snapshot.0 + 2); + + let (min_val, max_val): (Option, Option) = { + let conn = provider.get_connection(); + conn.query_row( + r#" + SELECT min_value, max_value + FROM __ducklake_metadata_metalake.main.ducklake_table_column_stats + WHERE table_id = ? AND column_id = ?; + "#, + [table_id, age_column_id], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .unwrap() + }; + + assert_eq!(min_val, Some("25".to_string())); + assert_eq!(max_val, Some("35".to_string())); + + let adv_stats: Vec<(String, String, i64, Option)> = { + let conn = provider.get_connection(); + conn.prepare( + r#" + SELECT stats_type, payload, begin_snapshot, end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = ? AND column_id = ? + ORDER BY stats_type, begin_snapshot; + "#, + ) + .unwrap() + .query_map([table_id, age_column_id], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)) + }) + .unwrap() + .map(|r| r.unwrap()) + .collect() + }; + + assert_eq!(adv_stats.len(), 2); + assert!( + adv_stats + .iter() + .any(|(st, p, _, e)| st == "max_value" && p == "35" && e.is_none()) + ); + assert!( + adv_stats + .iter() + .any(|(st, p, _, e)| st == "min_value" && p == "25" && e.is_none()) + ); + + provider.update_table_column_stats(age_column_id, + table_id, + "histogram", + &json!({"buckets": [{"min": 20, "max": 30, "count": 2}, {"min": 30, "max": 40, "count": 1}]}).to_string(), + ) + .unwrap(); + + let snapshot_after_histogram = provider.current_snapshot().unwrap(); + assert_eq!(snapshot_after_histogram.0, initial_snapshot.0 + 3); +} + +#[test] +fn test_fetch_table_stats_with_snapshot_time_travel() { + // Test time-travel capability by fetching statistics at different snapshot points. + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); + + let snapshot_0 = provider.current_snapshot().unwrap(); + + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"version": 1, "buckets": [1, 2, 3]}"#, + ) + .unwrap(); + let snapshot_1 = provider.current_snapshot().unwrap(); + + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"version": 2, "buckets": [1, 2, 3, 4, 5]}"#, + ) + .unwrap(); + let snapshot_2 = provider.current_snapshot().unwrap(); + + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"version": 3, "buckets": [10, 20, 30]}"#, + ) + .unwrap(); + let snapshot_3 = provider.current_snapshot().unwrap(); + + let stats_at_0 = provider + .table_statistics("test_table", snapshot_0) + .unwrap() + .unwrap(); + let age_stats_0 = stats_at_0 + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + assert_eq!(age_stats_0.advanced_stats.len(), 0); + + let stats_at_1 = provider + .table_statistics("test_table", snapshot_1) + .unwrap() + .unwrap(); + let age_stats_1 = stats_at_1 + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + assert_eq!(age_stats_1.advanced_stats.len(), 1); + assert!( + age_stats_1.advanced_stats[0] + .data + .to_string() + .contains("\"version\":1") + ); + + let stats_at_2 = provider + .table_statistics("test_table", snapshot_2) + .unwrap() + .unwrap(); + let age_stats_2 = stats_at_2 + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + assert_eq!(age_stats_2.advanced_stats.len(), 1); + assert!( + age_stats_2.advanced_stats[0] + .data + .to_string() + .contains("\"version\":2") + ); + + let stats_at_3 = provider + .table_statistics("test_table", snapshot_3) + .unwrap() + .unwrap(); + let age_stats_3 = stats_at_3 + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + assert_eq!(age_stats_3.advanced_stats.len(), 1); + assert!( + age_stats_3.advanced_stats[0] + .data + .to_string() + .contains("\"version\":3") + ); +} + +#[test] +fn test_fetch_table_stats_multiple_stat_types() { + // Test fetching when multiple statistics types exist for the same column. + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); + + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") + .unwrap(); + provider + .update_table_column_stats(age_column_id, table_id, "max_value", "35") + .unwrap(); + provider + .update_table_column_stats( + age_column_id, + table_id, + "histogram", + r#"{"buckets": [20, 25, 30, 35]}"#, + ) + .unwrap(); + provider + .update_table_column_stats(age_column_id, table_id, "ndv", r#"{"distinct_count": 3}"#) + .unwrap(); + provider + .update_table_column_stats( + age_column_id, + table_id, + "quantiles", + r#"{"p50": 30, "p95": 34, "p99": 35}"#, + ) + .unwrap(); + + let current_snapshot = provider.current_snapshot().unwrap(); + let stats = provider + .table_statistics("test_table", current_snapshot) + .unwrap() + .unwrap(); + + let age_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + + assert_eq!(age_stats.advanced_stats.len(), 5); + + let stat_types: Vec<&str> = age_stats + .advanced_stats + .iter() + .map(|s| s.stats_type.as_str()) + .collect(); + + assert!(stat_types.contains(&"min_value")); + assert!(stat_types.contains(&"max_value")); + assert!(stat_types.contains(&"histogram")); + assert!(stat_types.contains(&"ndv")); + assert!(stat_types.contains(&"quantiles")); +} + +#[test] +fn test_fetch_table_stats_columns_without_stats() { + // Test that columns without advanced statistics are still returned in fetch results. + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); + + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") + .unwrap(); + + let current_snapshot = provider.current_snapshot().unwrap(); + let stats = provider + .table_statistics("test_table", current_snapshot) + .unwrap() + .unwrap(); + + assert_eq!(stats.column_statistics.len(), 3); + + let id_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "id") + .expect("Should have id column"); + let name_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "name") + .expect("Should have name column"); + let age_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .expect("Should have age column"); + + assert_eq!(id_stats.advanced_stats.len(), 0); + assert_eq!(name_stats.advanced_stats.len(), 0); + assert_eq!(age_stats.advanced_stats.len(), 1); +} + +#[test] +fn test_fetch_table_stats_row_count() { + // Test that row_count is correctly populated from table statistics. + let (_temp_dir, mut provider) = create_test_catalog(false); + let conn = provider.get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE large_table (col1 INTEGER, col2 VARCHAR); + INSERT INTO large_table SELECT i, 'value_' || i::VARCHAR FROM range(1, 101) t(i); + "#, + ) + .unwrap(); + + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id + FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds + ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() + AND dt.table_name = 'large_table'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + + let col1_id: i64 = conn + .query_row( + r#" + SELECT column_id + FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'col1'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + + provider + .update_table_column_stats(col1_id, table_id, "ndv", r#"{"distinct_count": 100}"#) + .unwrap(); + + let current_snapshot = provider.current_snapshot().unwrap(); + let stats = provider + .table_statistics("large_table", current_snapshot) + .unwrap() + .unwrap(); + + assert_eq!(stats.row_count, 100); + assert_eq!(stats.column_statistics.len(), 2); +} + +#[test] +fn test_current_schema_arrow() { + // Test fetching Arrow schema from DuckDB table with type conversions. + let (_temp_dir, mut provider) = create_test_catalog(false); + let conn = provider.get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE schema_test_table ( + id INTEGER, + name VARCHAR, + value DOUBLE, + active BOOLEAN + ); + "#, + ) + .unwrap(); + + let schema = provider.current_schema(None, "schema_test_table").unwrap(); + + assert_eq!(schema.fields().len(), 4); + + let field_names: Vec<&str> = schema.fields().iter().map(|f| f.name().as_str()).collect(); + assert!(field_names.contains(&"id")); + assert!(field_names.contains(&"name")); + assert!(field_names.contains(&"value")); + assert!(field_names.contains(&"active")); + + assert!(matches!( + schema.field_with_name("id").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Int32 + )); + assert!(matches!( + schema.field_with_name("name").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Utf8 + )); + assert!(matches!( + schema.field_with_name("value").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Float64 + )); + assert!(matches!( + schema.field_with_name("active").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Boolean + )); + + let schema_explicit = provider + .current_schema(Some("main"), "schema_test_table") + .unwrap(); + assert_eq!(schema_explicit.fields().len(), 4); +} + +#[test] +fn test_multiple_schemas_comprehensive() { + // Test schema fetching and metadata tracking across multiple database schemas. + let (_temp_dir, mut provider) = create_test_catalog(false); + + let initial_schema_info = provider.current_schema_info().unwrap(); + assert_eq!(initial_schema_info.schema_name, "main"); + assert_eq!(initial_schema_info.schema_id, 0); + assert!(initial_schema_info.end_snapshot.is_none()); + + { + let conn = provider.get_connection(); + conn.execute_batch( + r#" + CREATE SCHEMA analytics; + CREATE SCHEMA reporting; + CREATE TABLE main.users (user_id INTEGER, username VARCHAR, email VARCHAR, created_at TIMESTAMP); + CREATE TABLE analytics.metrics (metric_id BIGINT, metric_name VARCHAR, value DOUBLE, recorded_at DATE); + CREATE TABLE reporting.summary (report_id SMALLINT, report_name TEXT, data BLOB, is_published BOOLEAN); + "#, + ) + .unwrap(); + } + + let main_users_schema = provider.current_schema(None, "users").unwrap(); + assert_eq!(main_users_schema.fields().len(), 4); + assert!(matches!( + main_users_schema + .field_with_name("user_id") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Int32 + )); + assert!(matches!( + main_users_schema + .field_with_name("username") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Utf8 + )); + assert!(matches!( + main_users_schema + .field_with_name("created_at") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Timestamp(_, _) + )); + + let analytics_metrics_schema = provider + .current_schema(Some("analytics"), "metrics") + .unwrap(); + assert_eq!(analytics_metrics_schema.fields().len(), 4); + assert!(matches!( + analytics_metrics_schema + .field_with_name("metric_id") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Int64 + )); + assert!(matches!( + analytics_metrics_schema + .field_with_name("value") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Float64 + )); + assert!(matches!( + analytics_metrics_schema + .field_with_name("recorded_at") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Date32 + )); + + let reporting_summary_schema = provider + .current_schema(Some("reporting"), "summary") + .unwrap(); + assert_eq!(reporting_summary_schema.fields().len(), 4); + assert!(matches!( + reporting_summary_schema + .field_with_name("report_id") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Int16 + )); + assert!(matches!( + reporting_summary_schema + .field_with_name("data") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Binary + )); + assert!(matches!( + reporting_summary_schema + .field_with_name("is_published") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Boolean + )); + + let current_schema_info = provider.current_schema_info().unwrap(); + assert_eq!(current_schema_info.schema_name, "main"); + + { + let conn = provider.get_connection(); + conn.execute("USE analytics;", []).unwrap(); + } + let analytics_schema_info = provider.current_schema_info().unwrap(); + assert_eq!(analytics_schema_info.schema_name, "analytics"); + assert!(analytics_schema_info.end_snapshot.is_none()); + + let metrics_schema_implicit = provider.current_schema(None, "metrics").unwrap(); + assert_eq!(metrics_schema_implicit.fields().len(), 4); + + let users_from_main = provider.current_schema(Some("main"), "users").unwrap(); + assert_eq!(users_from_main.fields().len(), 4); + + { + let conn = provider.get_connection(); + conn.execute("USE reporting;", []).unwrap(); + } + let reporting_schema_info = provider.current_schema_info().unwrap(); + assert_eq!(reporting_schema_info.schema_name, "reporting"); + + let schemas: Vec<(String, i64, i64, Option)> = { + let conn = provider.get_connection(); + conn.prepare( + r#" + SELECT schema_name, schema_id, begin_snapshot, end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_schema + ORDER BY schema_id; + "#, + ) + .unwrap() + .query_map([], |row| { + Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)) + }) + .unwrap() + .map(|r| r.unwrap()) + .collect() + }; + + assert!(schemas.len() >= 3); + + let schema_names: Vec<&str> = schemas + .iter() + .map(|(name, _, _, _)| name.as_str()) + .collect(); + assert!(schema_names.contains(&"main")); + assert!(schema_names.contains(&"analytics")); + assert!(schema_names.contains(&"reporting")); + + for (name, _, _, end_snapshot) in &schemas { + assert!(end_snapshot.is_none(), "Schema {} should be active", name); + } +} + +#[test] +fn test_error_handling_edge_cases() { + // Test various error scenarios: non-existent tables, invalid snapshots, invalid IDs. + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); + + // Non-existent table returns empty results + let current_snapshot = provider.current_snapshot().unwrap(); + let stats = provider + .table_statistics("nonexistent_table", current_snapshot) + .unwrap(); + assert!(stats.is_some()); + assert_eq!(stats.unwrap().column_statistics.len(), 0); + + // Invalid/future snapshot still returns data + provider + .update_table_column_stats(age_column_id, table_id, "min_value", "25") + .unwrap(); + let future_stats = provider + .table_statistics("test_table", SnapshotId(99999)) + .unwrap(); + assert!(future_stats.is_some()); + assert_eq!(future_stats.unwrap().column_statistics.len(), 3); + + // Updating with invalid IDs succeeds without error + let result = + provider.update_table_column_stats(9999, 9999, "ndv", r#"{"distinct_count": 100}"#); + assert!(result.is_ok()); + + // Fetching schema for non-existent table returns error + assert!(provider.current_schema(None, "nonexistent_table").is_err()); + + // Invalid schema name returns error + { + let conn = provider.get_connection(); + conn.execute_batch("CREATE TABLE test (id INTEGER);") + .unwrap(); + } + assert!( + provider + .current_schema(Some("nonexistent_schema"), "test") + .is_err() + ); +} + +#[test] +fn test_update_same_stat_rapidly() { + // Test updating the same statistic multiple times in rapid succession. + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); + + let initial_snapshot = provider.current_snapshot().unwrap(); + + for i in 1..=5 { + provider + .update_table_column_stats( + age_column_id, + table_id, + "ndv", + &format!(r#"{{"distinct_count": {}}}"#, i * 100), + ) + .unwrap(); + } + + let final_snapshot = provider.current_snapshot().unwrap(); + assert_eq!(final_snapshot.0, initial_snapshot.0 + 5); + + let versions: Vec<(i64, Option)> = { + let conn = provider.get_connection(); + conn.prepare( + r#" + SELECT begin_snapshot, end_snapshot + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE table_id = ? AND column_id = ? AND stats_type = 'ndv' + ORDER BY begin_snapshot; + "#, + ) + .unwrap() + .query_map([table_id, age_column_id], |row| { + Ok((row.get(0)?, row.get(1)?)) + }) + .unwrap() + .map(|r| r.unwrap()) + .collect() + }; + + assert_eq!(versions.len(), 5); + for i in 0..4 { + assert!(versions[i].1.is_some()); + assert_eq!(versions[i].1.unwrap(), versions[i + 1].0); + } + assert!(versions[4].1.is_none()); +} + +#[test] +fn test_data_edge_cases() { + // Test empty tables, single columns, special characters, and large payloads. + let (_temp_dir, mut provider) = create_test_catalog(false); + let conn = provider.get_connection(); + + // Empty table with zero rows + conn.execute_batch("CREATE TABLE empty_table (id INTEGER, name VARCHAR);") + .unwrap(); + + let current_snapshot = provider.current_snapshot().unwrap(); + let empty_stats = provider + .table_statistics("empty_table", current_snapshot) + .unwrap() + .unwrap(); + assert_eq!(empty_stats.row_count, 0); + + // Single column table + let conn = provider.get_connection(); + conn.execute_batch( + r#" + CREATE TABLE single_col (value INTEGER); + INSERT INTO single_col VALUES (1), (2), (3); + "#, + ) + .unwrap(); + + let single_snapshot = provider.current_snapshot().unwrap(); + let single_stats = provider + .table_statistics("single_col", single_snapshot) + .unwrap() + .unwrap(); + assert_eq!(single_stats.column_statistics.len(), 1); + assert_eq!(single_stats.row_count, 3); + assert_eq!(single_stats.column_statistics[0].name, "value"); + + // Special characters in payload + let (table_id, age_column_id) = { + let conn = provider.get_connection(); + conn.execute_batch( + r#" + CREATE TABLE test_table (id INTEGER, age INTEGER); + INSERT INTO test_table VALUES (1, 25), (2, 30); + "#, + ) + .unwrap(); + let table_id: i64 = conn + .query_row( + r#" + SELECT table_id FROM __ducklake_metadata_metalake.main.ducklake_table dt + INNER JOIN __ducklake_metadata_metalake.main.ducklake_schema ds ON dt.schema_id = ds.schema_id + WHERE ds.schema_name = current_schema() AND dt.table_name = 'test_table'; + "#, + [], + |row| row.get(0), + ) + .unwrap(); + let age_column_id: i64 = conn + .query_row( + r#" + SELECT column_id + FROM __ducklake_metadata_metalake.main.ducklake_column + WHERE table_id = ? AND column_name = 'age'; + "#, + [table_id], + |row| row.get(0), + ) + .unwrap(); + (table_id, age_column_id) + }; + + let special_payload = + r#"{"value": "test\"with\\special\nchars", "unicode": "测试", "empty": ""}"#; + provider + .update_table_column_stats(age_column_id, table_id, "special_test", special_payload) + .unwrap(); + let retrieved: String = { + let conn = provider.get_connection(); + conn.query_row( + r#" + SELECT payload + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE column_id = ? AND table_id = ? AND stats_type = 'special_test' + AND end_snapshot IS NULL; + "#, + [age_column_id, table_id], + |row| row.get(0), + ) + .unwrap() + }; + assert_eq!(retrieved, special_payload); + + // Large payload + let large_histogram: Vec = (0..1000).collect(); + let large_payload = json!({ + "buckets": large_histogram, + "metadata": "x".repeat(1000) + }) + .to_string(); + provider + .update_table_column_stats(age_column_id, table_id, "large_histogram", &large_payload) + .unwrap(); + let new_snapshot = provider.current_snapshot().unwrap(); + let large_stats = provider + .table_statistics("test_table", new_snapshot) + .unwrap() + .unwrap(); + let age_stats = large_stats + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + let large_stat = age_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "large_histogram") + .unwrap(); + assert!(large_stat.data.to_string().len() > 1000); +} + +#[test] +fn test_schema_edge_cases() { + // Test schema fetching with nullable/non-nullable columns and complex types. + let (_temp_dir, mut provider) = create_test_catalog(false); + let conn = provider.get_connection(); + + // Mixed nullable and non-nullable columns + conn.execute_batch( + r#" + CREATE TABLE mixed_nulls ( + id INTEGER NOT NULL, + optional_name VARCHAR, + required_age INTEGER NOT NULL, + optional_value DOUBLE + ); + "#, + ) + .unwrap(); + + let mixed_schema = provider.current_schema(None, "mixed_nulls").unwrap(); + assert_eq!(mixed_schema.fields().len(), 4); + assert!(!mixed_schema.field_with_name("id").unwrap().is_nullable()); + assert!( + mixed_schema + .field_with_name("optional_name") + .unwrap() + .is_nullable() + ); + assert!( + !mixed_schema + .field_with_name("required_age") + .unwrap() + .is_nullable() + ); + assert!( + mixed_schema + .field_with_name("optional_value") + .unwrap() + .is_nullable() + ); + + // Complex types + let conn = provider.get_connection(); + conn.execute_batch( + r#" + CREATE TABLE complex_types ( + tiny_col TINYINT, + small_col SMALLINT, + int_col INTEGER, + big_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + date_col DATE, + time_col TIME, + timestamp_col TIMESTAMP, + blob_col BLOB, + bool_col BOOLEAN + ); + "#, + ) + .unwrap(); + + let complex_schema = provider.current_schema(None, "complex_types").unwrap(); + assert_eq!(complex_schema.fields().len(), 11); + assert!(matches!( + complex_schema + .field_with_name("tiny_col") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Int8 + )); + assert!(matches!( + complex_schema + .field_with_name("small_col") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Int16 + )); + assert!(matches!( + complex_schema + .field_with_name("float_col") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Float32 + )); + assert!(matches!( + complex_schema + .field_with_name("date_col") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Date32 + )); + assert!(matches!( + complex_schema + .field_with_name("time_col") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Time64(_) + )); + assert!(matches!( + complex_schema + .field_with_name("blob_col") + .unwrap() + .data_type(), + &duckdb::arrow::datatypes::DataType::Binary + )); +} + +#[test] +fn test_concurrent_snapshot_isolation() { + // Test statistics with special characters and edge case JSON values. + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); + + let special_payload = + r#"{"value": "test\"with\\special\nchars", "unicode": "测试", "empty": ""}"#; + let result = provider.update_table_column_stats( + age_column_id, + table_id, + "special_test", + special_payload, + ); + + assert!(result.is_ok()); + + let retrieved_payload: String = { + let conn = provider.get_connection(); + conn.query_row( + r#" + SELECT payload + FROM __ducklake_metadata_metalake.main.ducklake_table_column_adv_stats + WHERE column_id = ? AND table_id = ? AND stats_type = 'special_test' + AND end_snapshot IS NULL; + "#, + [age_column_id, table_id], + |row| row.get(0), + ) + .unwrap() + }; + + assert_eq!(retrieved_payload, special_payload); +} + +#[test] +fn test_large_statistics_payload() { + // Test handling of large statistics payloads. + let (_temp_dir, mut provider, table_id, age_column_id) = create_test_catalog_with_data(); + + let large_histogram: Vec = (0..1000).collect(); + let large_payload = json!({ + "buckets": large_histogram, + "metadata": "x".repeat(1000) + }) + .to_string(); + + let result = provider.update_table_column_stats( + age_column_id, + table_id, + "large_histogram", + &large_payload, + ); + + assert!(result.is_ok()); + + let current_snapshot = provider.current_snapshot().unwrap(); + let stats = provider + .table_statistics("test_table", current_snapshot) + .unwrap() + .unwrap(); + + let age_stats = stats + .column_statistics + .iter() + .find(|cs| cs.name == "age") + .unwrap(); + + let large_stat = age_stats + .advanced_stats + .iter() + .find(|s| s.stats_type == "large_histogram") + .expect("Should have large_histogram stat"); + + assert!(large_stat.data.to_string().len() > 1000); +} + +#[test] +fn test_mixed_null_and_non_null_columns() { + // Test schema fetching with mixed nullable and non-nullable columns. + let (_temp_dir, mut provider) = create_test_catalog(false); + let conn = provider.get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE mixed_nulls ( + id INTEGER NOT NULL, + optional_name VARCHAR, + required_age INTEGER NOT NULL, + optional_value DOUBLE + ); + "#, + ) + .unwrap(); + + let schema = provider.current_schema(None, "mixed_nulls").unwrap(); + + assert_eq!(schema.fields().len(), 4); + + let id_field = schema.field_with_name("id").unwrap(); + assert!(!id_field.is_nullable()); + + let optional_name_field = schema.field_with_name("optional_name").unwrap(); + assert!(optional_name_field.is_nullable()); + + let required_age_field = schema.field_with_name("required_age").unwrap(); + assert!(!required_age_field.is_nullable()); + + let optional_value_field = schema.field_with_name("optional_value").unwrap(); + assert!(optional_value_field.is_nullable()); +} + +#[test] +fn test_schema_with_complex_types() { + // Test schema fetching with various complex and edge case data types. + let (_temp_dir, mut provider) = create_test_catalog(false); + let conn = provider.get_connection(); + + conn.execute_batch( + r#" + CREATE TABLE complex_types ( + tiny_col TINYINT, + small_col SMALLINT, + int_col INTEGER, + big_col BIGINT, + float_col FLOAT, + double_col DOUBLE, + date_col DATE, + time_col TIME, + timestamp_col TIMESTAMP, + blob_col BLOB, + bool_col BOOLEAN + ); + "#, + ) + .unwrap(); + + let schema = provider.current_schema(None, "complex_types").unwrap(); + + assert_eq!(schema.fields().len(), 11); + + assert!(matches!( + schema.field_with_name("tiny_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Int8 + )); + assert!(matches!( + schema.field_with_name("small_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Int16 + )); + assert!(matches!( + schema.field_with_name("int_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Int32 + )); + assert!(matches!( + schema.field_with_name("big_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Int64 + )); + assert!(matches!( + schema.field_with_name("float_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Float32 + )); + assert!(matches!( + schema.field_with_name("double_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Float64 + )); + assert!(matches!( + schema.field_with_name("date_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Date32 + )); + assert!(matches!( + schema.field_with_name("time_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Time64(_) + )); + assert!(matches!( + schema.field_with_name("timestamp_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Timestamp(_, _) + )); + assert!(matches!( + schema.field_with_name("blob_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Binary + )); + assert!(matches!( + schema.field_with_name("bool_col").unwrap().data_type(), + &duckdb::arrow::datatypes::DataType::Boolean + )); +} diff --git a/optd/storage/Cargo.toml b/optd/storage/Cargo.toml deleted file mode 100644 index a23a2b5..0000000 --- a/optd/storage/Cargo.toml +++ /dev/null @@ -1,7 +0,0 @@ -[package] -name = "optd-storage" -version.workspace = true -edition.workspace = true -repository.workspace = true - -[dependencies] diff --git a/optd/storage/src/lib.rs b/optd/storage/src/lib.rs deleted file mode 100644 index b4a7cf8..0000000 --- a/optd/storage/src/lib.rs +++ /dev/null @@ -1,269 +0,0 @@ -#![allow(dead_code)] - -use std::collections::BTreeSet; - -enum CompactionType { - MergeAdjacentTables, - RewriteDeletes, -} - -enum CleanupType { - OldFiles, - OrphanedFiles, -} - -struct DuckLakeTag { - key: String, - value: String, -} - -struct DuckLakeSchemaSetting { - schema_id: usize, - tag: DuckLakeTag, -} - -struct DuckLakeTableSetting { - schema_id: usize, - tag: DuckLakeTag, -} - -struct DuckLakeMetadata { - tags: Vec, - schema_settings: Vec, - table_settings: Vec, -} - -struct DuckLakeSchemaInfo { - id: usize, - uuid: String, - name: String, - path: String, - tags: Vec, -} - -struct DuckLakeColumnInfo { - /// Field index. - id: usize, - name: String, - typ: String, - // TODO: switch to value type? - initial_default: String, - default_value: String, - nulls_allowed: bool, - children: Vec, - tags: Vec, -} - -struct DuckLakeInlinedTableInfo { - table_name: String, - schema_version: usize, -} - -struct DuckLakeTableInfo { - /// Table index. - id: usize, - /// Schema index. - schema_id: usize, - uuid: String, - name: String, - columns: Vec, - tags: Vec, - inlined_data_tables: Vec, -} - -struct DuckLakeColumnStatsInfo { - column_id: usize, - value_count: String, - null_count: String, - column_size_bytes: String, - min_val: String, - max_val: String, - contains_nan: String, - extra_stats: String, -} - -struct DuckLakeFilePartitionInfo { - partition_column_index: usize, - partition_value: String, -} - -struct DuckLakePartialFileInfo { - snapshot_id: usize, - max_row_count: usize, -} - -struct DuckLakeFileInfo { - // DataFileIndex, - id: usize, - // TableIndex - table_id: usize, - file_name: String, - row_count: usize, - file_size_bytes: usize, - footer_size: Option, - row_id_start: Option, - partition_id: Option, - begin_snapshot: Option, - max_partial_file_snapshot: Option, - encryption_key: Option, - mapping_id: usize, - column_stats: Vec, - partition_values: Vec, - partial_file_info: Vec, -} - -// struct DuckLakeInlinedData { -// data: Box, -// column_stats: BTreeMap, -// } - -// struct DuckLakeInlinedDataDeletes { -// rows: BTreeSet, -// } - -// struct DuckLakeInlinedDataInfo { -// table_id: usize, -// row_id_start: usize, -// data: Option>, -// } - -struct DuckLakeDeletedInlinedDataInfo { - table_id: usize, - table_name: String, - deleted_row_ids: Vec, -} - -struct DuckLakeDeleteFileInfo { - id: usize, - table_id: usize, - data_file_id: usize, - path: String, - delete_count: usize, - file_size_bytes: usize, - footer_size: usize, - encryption_key: String, -} - -struct DuckLakePartitionFieldInfo { - // default = 0 - partition_key_index: usize, - field_id: usize, - transform: String, -} - -struct DuckLakePartitionInfo { - id: Option, - table_id: usize, - fields: Vec, -} - -struct DuckLakeGlobalColumnStatsInfo { - column_id: usize, - contains_null: bool, - has_contains_null: bool, - contains_nan: bool, - has_contains_nan: bool, - min_val: String, - has_min: bool, - // TODO(yuchen): should this be Option? - max_val: String, - has_max: bool, - extra_stats: String, - has_extra_stats: bool, -} - -struct DuckLakeGlobalStatsInfo { - table_id: usize, - initialized: bool, - record_count: usize, - next_row_id: usize, - table_size_bytes: usize, - column_stats: Vec, -} - -struct SnapshotChangeInfo { - changes_made: String, -} - -struct SnapshotDeletedFromFiles { - /// DataFileIndex - deleted_from_files: BTreeSet, -} - -struct DuckLakeSnapshotInfo { - id: usize, - // TODO: timestamp_tz_t - time: String, - schema_version: usize, - change_info: SnapshotChangeInfo, - author: String, - commit_message: String, - commit_extra_info: String, -} - -struct DuckLakeViewInfo { - id: usize, - schema_id: usize, - uuid: String, - name: String, - dialect: String, - column_aliases: Vec, - sql: String, - tags: Vec, -} - -struct DuckLakeTagInfo { - id: usize, - key: String, - value: String, -} - -struct DuckLakeColumnTagInfo { - table_id: usize, - field_index: usize, - key: String, - value: String, -} - -struct DuckLakeDroppedColumn { - table_id: usize, - field_id: usize, -} - -struct DuckLakeNewColumn { - table_id: usize, - column_info: DuckLakeColumnInfo, - parent_index: Option, -} - -struct DuckLakeCatalogInfo { - schemas: Vec, - tables: Vec, - views: Vec, - partitions: Vec, -} - -struct DuckLakeFileData { - path: String, - encryption_key: String, - file_size_bytes: usize, - footer_size: Option, -} - -enum DuckLakeDataType { - DataFile, - InlinedData, - TransactionLocalInlinedData, -} - -struct DuckLakeFileListEntry { - file: DuckLakeFileData, - delete_file: DuckLakeFileData, - row_id_start: Option, - snapshot_id: Option, - max_row_count: Option, - snapshot_filter: Option, - mapping_id: usize, - /// default: DuckLakeDataType::DataFile; - data_type: DuckLakeDataType, -}