database-stream-processor
database-stream-processor copied to clipboard
[JIT] Design document
Document JIT architecture and API. The doc should serve two purposes:
- Explain the semantics of the bytecode supported by the JIT compiler and the architecture of the compiler. This should give the reader enough information to be able to understand the source code of the compiler.
- Explain how a frontend (e.g., the SQL compiler) and a runtime (e.g., the DBSP pipeline runner) should interact with JIT.
- Topics to cover
- [ ] String allocation and string lifetimes
- [ ] Block parameters and arguments
- [ ] How to run a JIT program
- [ ] How nested tuples can be implemented using the JIT
- [ ] How to call functions which are not built-in into the JIT (if there will be such functions), e.g., from the SQL runtime library
- [ ] How datatypes such as DECIMAL can be implemented
- [ ] How to pass tuples as function arguments
How to run a JIT program
I do a lot of that here, running programs from json and ingesting csv into them
Block parameters
Here's some example json
{
"nodes": {
"n1": {
"Source": {
"layout": 3
}
},
"n2": {
"Source": {
"layout": 4
}
},
"n3": {
"IndexByColumn": {
"input": 1,
"input_layout": 3,
"key_column": 0,
"discarded_values": [
2,
3,
5,
6,
7,
8,
9,
10,
11
],
"key_layout": 5,
"value_layout": 6
}
},
"n4": {
"IndexByColumn": {
"input": 2,
"input_layout": 4,
"key_column": 1,
"discarded_values": [
2,
3,
4,
5,
6,
7,
8,
9
],
"key_layout": 5,
"value_layout": 7
}
},
"n5": {
"JoinCore": {
"lhs": 4,
"rhs": 3,
"join_fn": {
"args": [
{
"id": 1,
"layout": 5,
"flags": "input"
},
{
"id": 2,
"layout": 7,
"flags": "input"
},
{
"id": 3,
"layout": 6,
"flags": "input"
},
{
"id": 4,
"layout": 8,
"flags": "output"
},
{
"id": 5,
"layout": 1,
"flags": "output"
}
],
"ret": "Unit",
"entry_block": 1,
"blocks": {
"bb1": {
"id": 1,
"params": [],
"body": [
[
6,
{
"Load": {
"source": 2,
"source_layout": 7,
"column": 0,
"column_type": "Timestamp"
}
}
],
[
7,
{
"Load": {
"source": 1,
"source_layout": 5,
"column": 0,
"column_type": "F64"
}
}
],
[
8,
{
"Store": {
"target": 4,
"target_layout": 8,
"column": 0,
"value": {
"Expr": 6
},
"value_type": "Timestamp"
}
}
],
[
9,
{
"Store": {
"target": 4,
"target_layout": 8,
"column": 1,
"value": {
"Expr": 7
},
"value_type": "F64"
}
}
],
[
10,
{
"IsNull": {
"target": 3,
"target_layout": 6,
"column": 0
}
}
],
[
11,
{
"SetNull": {
"target": 4,
"target_layout": 8,
"column": 2,
"is_null": {
"Expr": 10
}
}
}
]
],
"terminator": {
"Branch": {
"cond": {
"Expr": 10
},
"truthy": 3,
"true_params": [],
"falsy": 2,
"false_params": []
}
}
},
"bb2": {
"id": 2,
"params": [],
"body": [
[
12,
{
"Load": {
"source": 3,
"source_layout": 6,
"column": 0,
"column_type": "String"
}
}
],
[
13,
{
"Copy": {
"value": 12,
"value_ty": "String"
}
}
],
[
14,
{
"Store": {
"target": 4,
"target_layout": 8,
"column": 2,
"value": {
"Expr": 13
},
"value_type": "String"
}
}
]
],
"terminator": {
"Jump": {
"target": 3,
"params": []
}
}
},
"bb3": {
"id": 3,
"params": [],
"body": [
[
15,
{
"IsNull": {
"target": 3,
"target_layout": 6,
"column": 1
}
}
],
[
16,
{
"SetNull": {
"target": 4,
"target_layout": 8,
"column": 3,
"is_null": {
"Expr": 15
}
}
}
]
],
"terminator": {
"Branch": {
"cond": {
"Expr": 15
},
"truthy": 5,
"true_params": [],
"falsy": 4,
"false_params": []
}
}
},
"bb4": {
"id": 4,
"params": [],
"body": [
[
17,
{
"Load": {
"source": 3,
"source_layout": 6,
"column": 1,
"column_type": "String"
}
}
],
[
18,
{
"Copy": {
"value": 17,
"value_ty": "String"
}
}
],
[
19,
{
"Store": {
"target": 4,
"target_layout": 8,
"column": 3,
"value": {
"Expr": 18
},
"value_type": "String"
}
}
]
],
"terminator": {
"Jump": {
"target": 5,
"params": []
}
}
},
"bb5": {
"id": 5,
"params": [],
"body": [],
"terminator": {
"Return": {
"value": {
"Imm": "Unit"
}
}
}
}
}
},
"key_layout": 8,
"value_layout": 1,
"output_kind": "Set"
}
},
"n6": {
"Sink": {
"input": 5
}
}
}
}