add timelines for slow log
Feature Request
Is your feature request related to a problem? Please describe: For now, the format of the slow log is hard to read and comprehend.
{
"start": "15:35:31.156",
"end": "15:35:32.733",
"duration": "1577ms",
"func": "get",
"key": "indexInfo_:_pf01_:_APD01_:_0202250000062470157",
"spans": [
{
"name": "callWithRetry pdpb.PD/GetStore",
"start": "15:35:31.156",
"end": "15:35:31.623",
"duration": "467ms"
},
{
"name": "gRPC pdpb.PD/GetStore",
"start": "15:35:31.156",
"end": "15:35:31.156",
"duration": "0ms"
},
{
"name": "backoff BoPDRPC",
"start": "15:35:31.156",
"end": "15:35:31.239",
"duration": "83ms"
},
{
"name": "gRPC pdpb.PD/GetStore",
"start": "15:35:31.240",
"end": "15:35:31.240",
"duration": "0ms"
},
{
"name": "backoff BoPDRPC",
"start": "15:35:31.240",
"end": "15:35:31.375",
"duration": "135ms"
},
{
"name": "gRPC pdpb.PD/GetStore",
"start": "15:35:31.376",
"end": "15:35:31.376",
"duration": "0ms"
},
{
"name": "backoff BoPDRPC",
"start": "15:35:31.376",
"end": "15:35:31.622",
"duration": "246ms"
},
{
"name": "gRPC pdpb.PD/GetStore",
"start": "15:35:31.622",
"end": "15:35:31.622",
"duration": "0ms"
},
{
"name": "backoff BoPDRPC",
"start": "15:35:31.622",
"end": "N/A",
"duration": "N/A"
},
{
"name": "backoff BoRegionMiss",
"start": "15:35:31.623",
"end": "15:35:31.643",
"duration": "20ms"
},
...
]
}
However, the timeline graph will be more illustrated:

Describe the feature you'd like: Add a tool to produce a simple timeline graph while given a log file.
Describe alternatives you've considered: Use tracing for the future, slow log can't trace the event relationship.
Teachability, Documentation, Adoption, Migration Strategy:
compared with a timeline tool to be displayed in the terminal, maybe a better replacement is a request stats which can analyze the grpc and backoff total time and order all the items in a specific order like the following:
the python script
#!/usr/bin/env python3
#!coding:utf-8
import re
import json
import argparse
def main():
pattern = r'.*SlowLog.*'
slowstr = "SlowLog:"
args = parse_args()
items = []
with open(args.slowlog, encoding = 'utf-8') as f:
for line in f.readlines():
matched = re.match(pattern, line, re.M|re.I)
if matched is not None:
log = json.loads(line[(line.index(slowstr) + len(slowstr)):])
item = {
'req': log['func'],
'start': log['start'],
'total latency': int(log['duration'][:len(log['duration'])-2]),
'total grpc latency': 0,
'total backoff latency': 0,
}
for span in log['spans']:
if 'gRPC tikvpb.Tikv' in span['name'] and span['duration'] != 'N/A':
item['total grpc latency'] += int(span['duration'][:len(span['duration'])-2])
if 'backoff ' in span['name'] and span['duration'] != 'N/A':
item['total backoff latency'] += int(span['duration'][:len(span['duration'])-2])
items.append(item)
if args.order == "total":
items = sorted(items, key=lambda d: d['total latency'], reverse=True)
elif args.order == "grpc":
items = sorted(items, key=lambda d: d['total grpc latency'], reverse=True)
elif args.order == "backoff":
items = sorted(items, key=lambda d: d['total backoff latency'], reverse=True)
else:
print("unsupported order field, use default value: total")
items = sorted(items, key=lambda d: d['total latency'], reverse=True)
fmtStr = "{:<12} {:<14} {:<14} {:<20} {:<20}"
print(fmtStr.format("Request", "Start", "Total Lat(ms)", "Total gRPC Lat(ms)", "Total Backoff Lat(ms)"))
for item in items:
print(fmtStr.format(item['req'], item['start'], item['total latency'], item['total grpc latency'], item['total backoff latency']))
def parse_args():
parser = argparse.ArgumentParser(description="rstats: A TiKV Java Client Request Stats Analyzer")
parser.add_argument(
"--slowlog",
dest="slowlog",
help="slowlog file, default: slow.log",
default="slow.log")
parser.add_argument(
"--order",
dest="order",
help="sort the output, default: total. Accepted value: total, grpc, backoff",
default="total")
return parser.parse_args()
if __name__ == '__main__':
main()
➜ ./rstat.py --order grpc
Request Start Total Lat(ms) Total gRPC Lat(ms) Total Backoff Lat(ms)
putIfAbsent 22:50:36.364 868 751 116
putIfAbsent 22:50:36.945 625 571 46
get 05:23:05.893 11 11 0
putIfAbsent 05:23:10.665 23 0 21
This issue is stale because it has been open 30 days with no activity.