uast_extract does not allow to extract nested properties
There is currently no way to allow extraction of nested properties of a node, which would be useful for getting, for example, the line and column of the position.
it should be done with https://github.com/src-d/gitbase/issues/701
It's doable, but we have to discuss in what format we want to return extracted data. Here is a one big object in json:
[
{
"@pos": {
"@type": "uast:Positions",
"end": {
"@type": "uast:Position",
"col": 8,
"line": 4,
"offset": 31
},
"start": {
"@type": "uast:Position",
"col": 5,
"line": 4,
"offset": 28
}
},
"@role": [
"Function",
"Declaration",
"Name",
"Identifier"
],
"@token": "sum",
"@type": "FunctionDef",
"args": {
"@pos": {
"@type": "uast:Positions"
},
"@role": [
"Function",
"Declaration",
"Argument",
"Incomplete"
],
"@type": "arguments",
"args": [
{
"@pos": {
"@type": "uast:Positions",
"end": {
"@type": "uast:Position",
"col": 10,
"line": 4,
"offset": 33
},
"start": {
"@type": "uast:Position",
"col": 9,
"line": 4,
"offset": 32
}
},
"@role": [
"Function",
"Declaration",
"Argument",
"Name"
],
"@token": "a",
"@type": "arg",
"annotation": null,
"noops_previous": {
"@pos": {
"@type": "uast:Positions",
"end": {
"@type": "uast:Position",
"col": 1,
"line": 3,
"offset": 23
},
"start": {
"@type": "uast:Position",
"col": 1,
"line": 1,
"offset": 0
}
},
"@role": [
"Noop"
],
"@type": "PreviousNoops",
"lines": [
{
"@pos": {
"@type": "uast:Positions",
"start": {
"@type": "uast:Position",
"col": 1,
"line": 2,
"offset": 1
}
},
"@role": [
"Comment",
"Noop"
],
"@token": "#!/usr/bin/env python\\n",
"@type": "NoopLine"
}
]
}
},
{
"@pos": {
"@type": "uast:Positions",
"end": {
"@type": "uast:Position",
"col": 13,
"line": 4,
"offset": 36
},
"start": {
"@type": "uast:Position",
"col": 12,
"line": 4,
"offset": 35
}
},
"@role": [
"Function",
"Declaration",
"Argument",
"Name"
],
"@token": "b",
"@type": "arg",
"annotation": null
}
]
},
"body": {
"@role": [
"Function",
"Declaration",
"Body"
],
"@type": "FunctionDef.body",
"body_stmts": [
{
"@pos": {
"@type": "uast:Positions",
"end": {
"@type": "uast:Position",
"col": 8,
"line": 5,
"offset": 46
},
"start": {
"@type": "uast:Position",
"col": 2,
"line": 5,
"offset": 40
}
},
"@role": [
"Return",
"Statement"
],
"@token": "return",
"@type": "Return",
"value": {
"@pos": {
"@type": "uast:Positions",
"start": {
"@type": "uast:Position",
"col": 9,
"line": 5,
"offset": 47
}
},
"@role": [
"Expression",
"Binary"
],
"@type": "BinOp",
"left": {
"@pos": {
"@type": "uast:Positions",
"end": {
"@type": "uast:Position",
"col": 10,
"line": 5,
"offset": 48
},
"start": {
"@type": "uast:Position",
"col": 9,
"line": 5,
"offset": 47
}
},
"@role": [
"Identifier",
"Expression",
"Binary",
"Left"
],
"@token": "a",
"@type": "Name",
"ctx": "Load"
},
"op": {
"@pos": {
"@type": "uast:Positions"
},
"@role": [
"Operator",
"Arithmetic",
"Add",
"Binary"
],
"@token": "+",
"@type": "Add"
},
"right": {
"@pos": {
"@type": "uast:Positions",
"end": {
"@type": "uast:Position",
"col": 14,
"line": 5,
"offset": 52
},
"start": {
"@type": "uast:Position",
"col": 13,
"line": 5,
"offset": 51
}
},
"@role": [
"Identifier",
"Expression",
"Binary",
"Right"
],
"@token": "b",
"@type": "Name",
"ctx": "Load"
}
}
}
]
},
"decorator_list": {
"@role": [
"Function",
"Declaration",
"Incomplete"
],
"@type": "FunctionDef.decorators",
"decorators": []
},
"returns": null
},
{
"@pos": {
"@type": "uast:Positions",
"end": {
"@type": "uast:Position",
"col": 10,
"line": 5,
"offset": 48
},
"start": {
"@type": "uast:Position",
"col": 9,
"line": 5,
"offset": 47
}
},
"@role": [
"Identifier",
"Expression",
"Binary",
"Left"
],
"@token": "a",
"@type": "Name",
"ctx": "Load"
},
{
"@pos": {
"@type": "uast:Positions",
"end": {
"@type": "uast:Position",
"col": 14,
"line": 5,
"offset": 52
},
"start": {
"@type": "uast:Position",
"col": 13,
"line": 5,
"offset": 51
}
},
"@role": [
"Identifier",
"Expression",
"Binary",
"Right"
],
"@token": "b",
"@type": "Name",
"ctx": "Load"
},
{
"@pos": {
"@type": "uast:Positions",
"end": {
"@type": "uast:Position",
"col": 10,
"line": 7,
"offset": 63
},
"start": {
"@type": "uast:Position",
"col": 7,
"line": 7,
"offset": 60
}
},
"@role": [
"Identifier",
"Expression",
"Call",
"Callee"
],
"@token": "sum",
"@type": "Name",
"ctx": "Load"
},
{
"@pos": {
"@type": "uast:Positions",
"end": {
"@type": "uast:Position",
"col": 6,
"line": 7,
"offset": 59
},
"start": {
"@type": "uast:Position",
"col": 1,
"line": 7,
"offset": 54
}
},
"@role": [
"Identifier",
"Expression",
"Call",
"Callee"
],
"@token": "print",
"@type": "Name",
"ctx": "Load",
"noops_previous": {
"@pos": {
"@type": "uast:Positions",
"end": {
"@type": "uast:Position",
"col": 1,
"line": 6,
"offset": 53
},
"start": {
"@type": "uast:Position",
"col": 1,
"line": 6,
"offset": 53
}
},
"@role": [
"Noop"
],
"@type": "PreviousNoops",
"lines": []
}
}
]
Same as we're retuning now, right? As for the syntax, maybe jsonpath? we already use it in JSON_EXTRACT UDF.
@erizocosmico - this is what we get from bblfsh (whole node) converted to json. But the question is about extract. So far, extract returns a slice of all extracted sub-node values, e.g.:
["FunctionDef","Name","Name","Name","Name"]
["sum","a","b","sum","print"]
[["Function","Declaration","Name","Identifier"],["Identifier","Expression","Binary","Left"],["Identifier","Expression","Binary","Right"],["Identifier","Expression","Call","Callee"],["Identifier","Expression","Call","Callee"]]
@ajnavarro what shall we do with this?
I would leave it here just to have it in mind.