parsing scripts, none-tracing baseline, tweak the script for faulty and event horizon experiments

1bc84ee8 · Zhiqiang Xie · 5b866e5a · 1bc84ee8 · 1bc84ee8 · 1bc84ee8
Commit 1bc84ee8 authored 2 years ago by Zhiqiang Xie
--- a/config/gen_configs.py
+++ b/config/gen_configs.py
@@ -10,6 +10,9 @@ SERVERS = {'brain15': 48, 'brain16': 48, 'brain17': 48, 'brain18': 48, 'brain19'
           'pinky11': 16, 'pinky12': 16}
 # pinky09 is for client and pinky10 (139.19.171.33) is for collectors

+# write the buffer size setting to the config file
+# \" echo -en \\\"cap 25000\\nbuf_length 4096\\naddr 127.0.0.1\\nport 5050\\nlc_addr 127.0.0.1\\nlc_port 5252\\nr_addr 127.0.0.1\\nr_port 5253\\npayload 1000\\nretroactive_sampling_percentage 1.0\\nhead_sampling_probability 0.0\\\" > /etc/hindsight_conf/default.conf;\
+
 TMUX = """

 export BENCH_LOG_PATH=$HOME/hindsight-grpc/config/alibaba/benchmark.log
@@ -28,6 +31,7 @@ server_hindsight () {
    extra_l=0
    extra_s=0
    rate=0
+    delay=0

    if [ $4 = "hs_faulty" ]
    then
@@ -37,7 +41,7 @@ server_hindsight () {
            extra_s=0.001
        fi
        n=-n
-        rate=10
+        rate=1
        tracer=hindsight
    else
        extra=0
@@ -45,6 +49,12 @@ server_hindsight () {
        n=
    fi

+    if [ $4 = "hs_event" ]
+    then
+        delay=0
+        tracer=hindsight
+    fi
+
    send "docker run -it -v ${HOME}/hindsight-grpc/config/alibaba:/config/ \\
        -d --name $1 -p $2:$2 -p $3:$3 --shm-size=2gb --rm tracer bash -c \\
        \\"cd build; ./server -a /config/sub_alibaba_addresses.json \\
@@ -52,7 +62,7 @@ server_hindsight () {
            -x $tracer -c $5 --trigger=6:$6 --trigger=7:$extra_l --trigger=8:$extra_s -i $7 $n $1 \\
            & sleep 5; cd /root/hindsight/agent/; \\
        go run cmd/agent2/main.go -lc pinky10:5252 -r pinky10:5253 \\
-            -serv $1 -port $3 -host $8 -rate $rate\\""
+            -serv $1 -port $3 -host $8 -rate $rate -delay $delay\\""
 }

 server_run () {
@@ -63,7 +73,7 @@ server_run () {
    else
        trigger=0
    fi
-    if [ $4 = "hindsight" ] || [ $4 = "ot-hindsight" ] || [ $4 = "hs_faulty" ]
+    if [ $4 = "hindsight" ] || [ $4 = "ot-hindsight" ] || [ $4 = "hs_faulty" ] || [ $4 = "hs_event" ]
    then
        server_hindsight $2 $3 $9 $4 $5 $trigger $7 $10
    else
@@ -215,7 +225,7 @@ run_bench () {
        sample=0.1
    elif [ $1 = "hs_faulty" ]
    then
-        limit=30000
+        limit=10000
    fi
    send "./client -a /config/sub_alibaba_addresses.json \\
        -t /config/sub_alibaba_topology.json \\
@@ -223,10 +233,13 @@ run_bench () {

    sleep $4
    # make sure that the client was turned down
-    send C-c
+    # send C-c
    send "tail -n 4 temp.log >> /config/benchmark.log"

-    if [ $1 = "hindsight" ] || [ $1 = "hs_faulty" ]
+    if [ $1 = "none" ]
+    then
+        :
+    elif [ $1 = "hindsight" ] || [ $1 = "hs_faulty" ]
    then
        log_net 1
        tmux select-pane -t 2
@@ -234,7 +247,7 @@ run_bench () {
        sleep 10
        if [ $1 = "hs_faulty" ]
        then
-            extract=12
+            extract=20
        else
            extract=5
        fi
@@ -257,18 +270,21 @@ run_all () {
    do
        for i in {50..700..50}
        do
-            for tracer in ot-jaeger-head-1 ot-jaeger-head-10 hindsight ot-jaeger ot-jaeger-simple
+            for tracer in none ot-jaeger-head-1 ot-jaeger-head-10 hindsight ot-jaeger ot-jaeger-simple
            do
                tmux select-pane -t 1
                send "kill_servers"
-                sleep 50
+                sleep 20

                let "throughput = $i * 20 "
                echo "bench $tracer with setting $throughput" >> $BENCH_LOG_PATH

-                tmux select-pane -t 0
-                send "run_collectors $tracer"
-                sleep 5
+                if [ $tracer != "none" ]
+                then
+                    tmux select-pane -t 0
+                    send "run_collectors $tracer"
+                    sleep 5
+                fi

                tmux select-pane -t 1
                let "wait = 10000 / $i + 50"
@@ -276,18 +292,26 @@ run_all () {
                then
                    let "wait = wait + 50"
                    send "deploy_servers ot-jaeger 1024 0.01 simple"
-                elif [ $tracer = "ot-jaeger-head-1" ] || [ $tracer = "ot-jaeger-head-10" ]
+                elif [ $tracer = "ot-jaeger-head-1" ] || [ $tracer = "ot-jaeger-head-10" ] || [ $tracer = "ot-jaeger" ]
                then
                    send "deploy_servers ot-jaeger 1024 0.01"
                else
                    send "deploy_servers $tracer 1024 0.01"
                    sleep 50
                fi
-                sleep 30
+
+                if [ $tracer != "none" ]
+                then
+                    sleep 30
+                fi

                tmux select-pane -t 0
                send "run_bench $tracer $i $j $wait"
-                let "wait = wait + 50"
+
+                if [ $tracer != "none" ]
+                then
+                    let "wait = wait + 20"
+                fi
                sleep $wait
            done
        done
@@ -296,11 +320,11 @@ run_all () {


 run_faulty () {
-    for i in {500..1500..50}
+    for i in {50..1400..50}
    do
        tmux select-pane -t 1
        send "kill_servers"
-        sleep 50
+        sleep 20

        let "throughput = $i * 20"
        echo "bench hs_faulty with setting $throughput" >> $BENCH_LOG_PATH
@@ -311,12 +335,12 @@ run_faulty () {

        tmux select-pane -t 1
        send "deploy_servers hs_faulty 1024 0.01"
-        sleep 80
+        sleep 70

        tmux select-pane -t 0
-        let "wait = 30000 / $i + 10"
+        let "wait = 10000 / $i + 15"
        send "run_bench hs_faulty $i 0 $wait"
-        let "wait = wait + 50"
+        let "wait = wait + 20"
        sleep $wait
    done
 }

--- a/config/parse_benchmark_log.py
+++ b/config/parse_benchmark_log.py
@@ -3,7 +3,7 @@ import sys
 import re

 data_dict = {m: {t: {i: [] for i in range(1000, 15001, 1000)}
-                 for t in ["hindsight", "ot-jaeger", "ot-jaeger-simple", "ot-jaeger-head-1", "ot-jaeger-head-10"]}
+                 for t in ["hindsight", "ot-jaeger", "ot-jaeger-simple", "ot-jaeger-head-1", "ot-jaeger-head-10", "none"]}
             for m in ["complete_throughput", "complete_rate", "extra_bandwidth", "avg_latency", "achieved_throughput"]}

 lines = sys.stdin.readlines()
@@ -25,32 +25,46 @@ while index < len(lines):
        index += 1
        continue

-    try:
-        assert "net_stat" in lines[index+1]
-        net_start = int(lines[index+2])
-        assert "Duration:" in lines[index+3]
-        assert "Total requests:" in lines[index+4]
-        throughput = float(
-            re.match("overall throughput: (.*) requests/s", lines[index+5]).group(1))
-        latency = re.match(
-            "Average / Max / Min latency of a request is: (.*)/(.*)/(.*) ms", lines[index+6])
-        avg_, min_, max_ = latency.groups()
-        assert "net_stat" in lines[index+7]
-        net_traffic = int(lines[index+8]) - net_start
-        est_time = 200000 / throughput
-        # MBytes/s
-        bandwidth = net_traffic / est_time / 1024 / 1024
-    except:
-        index += 1
-        continue
+    if tracer == "none":
+        try:
+            assert "Duration:" in lines[index+1]
+            assert "Total requests:" in lines[index+2]
+            throughput = float(
+                re.match("overall throughput: (.*) requests/s", lines[index+3]).group(1))
+            latency = re.match(
+                "Average / Max / Min latency of a request is: (.*)/(.*)/(.*) ms", lines[index+4])
+            avg_, min_, max_ = latency.groups()
+            bandwidth = 0
+            index += 5
+        except:
+            index += 1
+            continue
+    else:
+        try:
+            assert "net_stat" in lines[index+1]
+            net_start = int(lines[index+2])
+            assert "Duration:" in lines[index+3]
+            assert "Total requests:" in lines[index+4]
+            throughput = float(
+                re.match("overall throughput: (.*) requests/s", lines[index+5]).group(1))
+            latency = re.match(
+                "Average / Max / Min latency of a request is: (.*)/(.*)/(.*) ms", lines[index+6])
+            avg_, min_, max_ = latency.groups()
+            assert "net_stat" in lines[index+7]
+            net_traffic = int(lines[index+8]) - net_start
+            est_time = 200000 / throughput
+            # MBytes/s
+            bandwidth = net_traffic / est_time / 1024 / 1024
+            index += 9
+        except:
+            index += 1
+            continue

    data_dict["extra_bandwidth"][tracer][bound_throughput].append(bandwidth)
    data_dict["avg_latency"][tracer][bound_throughput].append(float(avg_))
    data_dict["achieved_throughput"][tracer][bound_throughput].append(
        throughput)

-    index += 9
-
    if "hindsight" == tracer:
        try:
            assert "Processing hindsight_traces" in lines[index]

--- a/config/parse_breadcrumbs.py
+++ b/config/parse_breadcrumbs.py
+import pandas as pd
+
+traversal = {i: {} for i in range(1000, 15001, 1000)}
+
+for setting in range(50, 751, 50):
+    for r in range(1, 5):
+        try:
+            with open("../build/breadcrumbs_hindsight_{}_{}".format(setting, r)) as f:
+                lines = f.readlines()
+                assert lines[0] == "t,queue,total_agents,dissemination_time_ms\n"
+
+                dissemination_time = []
+            for l in lines[1:]:
+                agent, time = [int(i) for i in l.split(',')[2:4]]
+                time = 0.5 if time == 0 else time
+                if agent in traversal[setting * 20]:
+                    traversal[setting * 20][agent].append(time)
+                else:
+                    traversal[setting * 20][agent] = [time]
+        except:
+            pass
+traversal = {k: {a: sum(l)/len(l) for a, l in v.items()}
+             for k, v in traversal.items()}
+
+indices = [i for i in range(1, 51)]
+to_excel = {k: [v[i] if i in v else 0 for i in indices]
+            for k, v in traversal.items()}
+
+df = pd.DataFrame(data=to_excel, index=indices)
+df.to_excel('alibaba/breadcrumbs.xlsx')
+
+
+traversal = {i: {} for i in range(1000, 28001, 1000)}
+
+for setting in range(50, 1401, 50):
+    try:
+        with open("../build/breadcrumbs_hs_faulty_{}_0".format(setting)) as f:
+            lines = f.readlines()
+            assert lines[0] == "t,queue,total_agents,dissemination_time_ms\n"
+
+            dissemination_time = []
+        for l in lines[1:]:
+            agent, time = [int(i) for i in l.split(',')[2:4]]
+            if agent in traversal[setting * 20]:
+                traversal[setting * 20][agent].append(time)
+            else:
+                traversal[setting * 20][agent] = [time]
+    except:
+        pass
+traversal = {k: {a: sum(l)/len(l) for a, l in v.items()}
+             for k, v in traversal.items()}
+
+max_agents = max([i for v in traversal.values() for i in v])
+assert max_agents <= 50
+
+indices = [i for i in range(1, 51)]
+to_excel = {k: [v[i] if i in v else 0 for i in indices]
+            for k, v in traversal.items()}
+
+df = pd.DataFrame(data=to_excel, index=indices)
+df.to_excel('alibaba/breadcrumbs_faulty.xlsx')
--- a/config/parse_faulty.py
+++ b/config/parse_faulty.py
+from tokenize import group
+import pandas as pd
+import sys
+import re
+
+data_dict = {m: {t: {i: [] for i in range(1000, 28001, 1000)}
+                 for t in ["trigger001", "trigger050", "trigger0001"]}
+             for m in ["hs_complete_throughput", "hs_complete_rate"]}
+
+throughput_dict = {i: [] for i in range(1000, 28001, 1000)}
+
+lines = sys.stdin.readlines()
+index = 0
+while index < len(lines):
+    parts = lines[index].split()
+    if len(parts) <= 1:
+        index += 1
+        continue
+    if parts[0] != "bench":
+        index += 1
+        # print("error processing line", index, lines[index])
+        continue
+
+    print(lines[index])
+
+    assert "bench hs_faulty with setting" in lines[index]
+
+    bound_throughput = int(parts[-1])
+
+    try:
+        assert "net_stat" in lines[index+1]
+        net_start = int(lines[index+2])
+        assert "Duration:" in lines[index+3]
+        assert "Total requests:" in lines[index+4]
+        throughput = float(
+            re.match("overall throughput: (.*) requests/s", lines[index+5]).group(1))
+        latency = re.match(
+            "Average / Max / Min latency of a request is: (.*)/(.*)/(.*) ms", lines[index+6])
+        avg_, min_, max_ = latency.groups()
+        assert "net_stat" in lines[index+7]
+        net_traffic = int(lines[index+8]) - net_start
+        est_time = 200000 / throughput
+        # MBytes/s
+        bandwidth = net_traffic / est_time / 1024 / 1024
+        index += 9
+    except:
+        index += 1
+        continue
+
+    print(throughput)
+    throughput_dict[bound_throughput].append(throughput)
+
+    try:
+        assert "Processing hindsight_traces" in lines[index]
+        assert "buffers from hindsight_traces" in lines[index+1]
+        collected = int(
+            re.match("(.*) traces total", lines[index+2]).group(1))
+        assert "Trigger" in lines[index+3]
+        completed = int(lines[index+4].split()[3])
+
+        mapping = {"6": "trigger001", "7": "trigger050", "8": "trigger0001"}
+        mapping_expect = {"6": 2000, "7": 100000, "8": 200}
+
+        for line in lines[index+5:index+15]:
+            splits = line.split()
+            if len(splits) < 6 or splits[0] != "All" or splits[5] != "Valid" or splits[1] not in mapping:
+                index += 1
+                continue
+
+            data_dict["hs_complete_throughput"][mapping[splits[1]]
+                                                ][bound_throughput].append(float(splits[3]) / est_time)
+            if splits[1] == "7":
+                data_dict["hs_complete_rate"][mapping[splits[1]]][bound_throughput].append(
+                    float(splits[3]) / mapping_expect[splits[1]])
+            else:
+                data_dict["hs_complete_rate"][mapping[splits[1]]
+                                              ][bound_throughput].append(float(splits[4]))
+
+    except:
+        continue
+
+# print('complete_throughput', data_dict['complete_throughput'])
+# print('complete_rate', data_dict['complete_rate'])
+# print('extra_bandwidth', data_dict['extra_bandwidth'])
+# print('avg_latency', data_dict['avg_latency'])
+
+
+def avg(data):
+    if len(data) == 0:
+        return 0
+    return float(sum(data)) / len(data)
+
+
+def write_to_excel(name):
+    to_excel = {t: [avg(v) for v in d.values()]
+                for t, d in data_dict[name].items()}
+    indices = [i for i in range(1000, 28001, 1000)]
+
+    df = pd.DataFrame(data=to_excel, index=indices)
+    df.to_excel('alibaba/' + name + '.xlsx')
+
+
+for name in data_dict:
+    write_to_excel(name)
+
+
+to_excel = {"throughput": [avg(v) for v in throughput_dict.values()]}
+indices = [i for i in range(1000, 28001, 1000)]
+
+df = pd.DataFrame(data=to_excel, index=indices)
+df.to_excel('alibaba/hs_achieved_throughput.xlsx')