gitbase icon indicating copy to clipboard operation
gitbase copied to clipboard

uast_extract does not allow to extract nested properties

Open erizocosmico opened this issue 6 years ago • 6 comments

There is currently no way to allow extraction of nested properties of a node, which would be useful for getting, for example, the line and column of the position.

erizocosmico avatar Feb 19 '19 09:02 erizocosmico

it should be done with https://github.com/src-d/gitbase/issues/701

kuba-- avatar Feb 28 '19 12:02 kuba--

It's doable, but we have to discuss in what format we want to return extracted data. Here is a one big object in json:

[
    {
        "@pos": {
            "@type": "uast:Positions",
            "end": {
                "@type": "uast:Position",
                "col": 8,
                "line": 4,
                "offset": 31
            },
            "start": {
                "@type": "uast:Position",
                "col": 5,
                "line": 4,
                "offset": 28
            }
        },
        "@role": [
            "Function",
            "Declaration",
            "Name",
            "Identifier"
        ],
        "@token": "sum",
        "@type": "FunctionDef",
        "args": {
            "@pos": {
                "@type": "uast:Positions"
            },
            "@role": [
                "Function",
                "Declaration",
                "Argument",
                "Incomplete"
            ],
            "@type": "arguments",
            "args": [
                {
                    "@pos": {
                        "@type": "uast:Positions",
                        "end": {
                            "@type": "uast:Position",
                            "col": 10,
                            "line": 4,
                            "offset": 33
                        },
                        "start": {
                            "@type": "uast:Position",
                            "col": 9,
                            "line": 4,
                            "offset": 32
                        }
                    },
                    "@role": [
                        "Function",
                        "Declaration",
                        "Argument",
                        "Name"
                    ],
                    "@token": "a",
                    "@type": "arg",
                    "annotation": null,
                    "noops_previous": {
                        "@pos": {
                            "@type": "uast:Positions",
                            "end": {
                                "@type": "uast:Position",
                                "col": 1,
                                "line": 3,
                                "offset": 23
                            },
                            "start": {
                                "@type": "uast:Position",
                                "col": 1,
                                "line": 1,
                                "offset": 0
                            }
                        },
                        "@role": [
                            "Noop"
                        ],
                        "@type": "PreviousNoops",
                        "lines": [
                            {
                                "@pos": {
                                    "@type": "uast:Positions",
                                    "start": {
                                        "@type": "uast:Position",
                                        "col": 1,
                                        "line": 2,
                                        "offset": 1
                                    }
                                },
                                "@role": [
                                    "Comment",
                                    "Noop"
                                ],
                                "@token": "#!/usr/bin/env python\\n",
                                "@type": "NoopLine"
                            }
                        ]
                    }
                },
                {
                    "@pos": {
                        "@type": "uast:Positions",
                        "end": {
                            "@type": "uast:Position",
                            "col": 13,
                            "line": 4,
                            "offset": 36
                        },
                        "start": {
                            "@type": "uast:Position",
                            "col": 12,
                            "line": 4,
                            "offset": 35
                        }
                    },
                    "@role": [
                        "Function",
                        "Declaration",
                        "Argument",
                        "Name"
                    ],
                    "@token": "b",
                    "@type": "arg",
                    "annotation": null
                }
            ]
        },
        "body": {
            "@role": [
                "Function",
                "Declaration",
                "Body"
            ],
            "@type": "FunctionDef.body",
            "body_stmts": [
                {
                    "@pos": {
                        "@type": "uast:Positions",
                        "end": {
                            "@type": "uast:Position",
                            "col": 8,
                            "line": 5,
                            "offset": 46
                        },
                        "start": {
                            "@type": "uast:Position",
                            "col": 2,
                            "line": 5,
                            "offset": 40
                        }
                    },
                    "@role": [
                        "Return",
                        "Statement"
                    ],
                    "@token": "return",
                    "@type": "Return",
                    "value": {
                        "@pos": {
                            "@type": "uast:Positions",
                            "start": {
                                "@type": "uast:Position",
                                "col": 9,
                                "line": 5,
                                "offset": 47
                            }
                        },
                        "@role": [
                            "Expression",
                            "Binary"
                        ],
                        "@type": "BinOp",
                        "left": {
                            "@pos": {
                                "@type": "uast:Positions",
                                "end": {
                                    "@type": "uast:Position",
                                    "col": 10,
                                    "line": 5,
                                    "offset": 48
                                },
                                "start": {
                                    "@type": "uast:Position",
                                    "col": 9,
                                    "line": 5,
                                    "offset": 47
                                }
                            },
                            "@role": [
                                "Identifier",
                                "Expression",
                                "Binary",
                                "Left"
                            ],
                            "@token": "a",
                            "@type": "Name",
                            "ctx": "Load"
                        },
                        "op": {
                            "@pos": {
                                "@type": "uast:Positions"
                            },
                            "@role": [
                                "Operator",
                                "Arithmetic",
                                "Add",
                                "Binary"
                            ],
                            "@token": "+",
                            "@type": "Add"
                        },
                        "right": {
                            "@pos": {
                                "@type": "uast:Positions",
                                "end": {
                                    "@type": "uast:Position",
                                    "col": 14,
                                    "line": 5,
                                    "offset": 52
                                },
                                "start": {
                                    "@type": "uast:Position",
                                    "col": 13,
                                    "line": 5,
                                    "offset": 51
                                }
                            },
                            "@role": [
                                "Identifier",
                                "Expression",
                                "Binary",
                                "Right"
                            ],
                            "@token": "b",
                            "@type": "Name",
                            "ctx": "Load"
                        }
                    }
                }
            ]
        },
        "decorator_list": {
            "@role": [
                "Function",
                "Declaration",
                "Incomplete"
            ],
            "@type": "FunctionDef.decorators",
            "decorators": []
        },
        "returns": null
    },
    {
        "@pos": {
            "@type": "uast:Positions",
            "end": {
                "@type": "uast:Position",
                "col": 10,
                "line": 5,
                "offset": 48
            },
            "start": {
                "@type": "uast:Position",
                "col": 9,
                "line": 5,
                "offset": 47
            }
        },
        "@role": [
            "Identifier",
            "Expression",
            "Binary",
            "Left"
        ],
        "@token": "a",
        "@type": "Name",
        "ctx": "Load"
    },
    {
        "@pos": {
            "@type": "uast:Positions",
            "end": {
                "@type": "uast:Position",
                "col": 14,
                "line": 5,
                "offset": 52
            },
            "start": {
                "@type": "uast:Position",
                "col": 13,
                "line": 5,
                "offset": 51
            }
        },
        "@role": [
            "Identifier",
            "Expression",
            "Binary",
            "Right"
        ],
        "@token": "b",
        "@type": "Name",
        "ctx": "Load"
    },
    {
        "@pos": {
            "@type": "uast:Positions",
            "end": {
                "@type": "uast:Position",
                "col": 10,
                "line": 7,
                "offset": 63
            },
            "start": {
                "@type": "uast:Position",
                "col": 7,
                "line": 7,
                "offset": 60
            }
        },
        "@role": [
            "Identifier",
            "Expression",
            "Call",
            "Callee"
        ],
        "@token": "sum",
        "@type": "Name",
        "ctx": "Load"
    },
    {
        "@pos": {
            "@type": "uast:Positions",
            "end": {
                "@type": "uast:Position",
                "col": 6,
                "line": 7,
                "offset": 59
            },
            "start": {
                "@type": "uast:Position",
                "col": 1,
                "line": 7,
                "offset": 54
            }
        },
        "@role": [
            "Identifier",
            "Expression",
            "Call",
            "Callee"
        ],
        "@token": "print",
        "@type": "Name",
        "ctx": "Load",
        "noops_previous": {
            "@pos": {
                "@type": "uast:Positions",
                "end": {
                    "@type": "uast:Position",
                    "col": 1,
                    "line": 6,
                    "offset": 53
                },
                "start": {
                    "@type": "uast:Position",
                    "col": 1,
                    "line": 6,
                    "offset": 53
                }
            },
            "@role": [
                "Noop"
            ],
            "@type": "PreviousNoops",
            "lines": []
        }
    }
]

kuba-- avatar Feb 28 '19 18:02 kuba--

Same as we're retuning now, right? As for the syntax, maybe jsonpath? we already use it in JSON_EXTRACT UDF.

erizocosmico avatar Feb 28 '19 22:02 erizocosmico

@erizocosmico - this is what we get from bblfsh (whole node) converted to json. But the question is about extract. So far, extract returns a slice of all extracted sub-node values, e.g.:

["FunctionDef","Name","Name","Name","Name"]

["sum","a","b","sum","print"]

[["Function","Declaration","Name","Identifier"],["Identifier","Expression","Binary","Left"],["Identifier","Expression","Binary","Right"],["Identifier","Expression","Call","Callee"],["Identifier","Expression","Call","Callee"]]

kuba-- avatar Mar 01 '19 10:03 kuba--

@ajnavarro what shall we do with this?

erizocosmico avatar Oct 09 '19 08:10 erizocosmico

I would leave it here just to have it in mind.

ajnavarro avatar Oct 09 '19 09:10 ajnavarro