Skip to content
Snippets Groups Projects
Commit 1bc84ee8 authored by Zhiqiang Xie's avatar Zhiqiang Xie
Browse files

parsing scripts, none-tracing baseline, tweak the script for faulty and event horizon experiments

parent 5b866e5a
Branches zxie
No related tags found
1 merge request!12up to date
......@@ -10,6 +10,9 @@ SERVERS = {'brain15': 48, 'brain16': 48, 'brain17': 48, 'brain18': 48, 'brain19'
'pinky11': 16, 'pinky12': 16}
# pinky09 is for client and pinky10 (139.19.171.33) is for collectors
# write the buffer size setting to the config file
# \" echo -en \\\"cap 25000\\nbuf_length 4096\\naddr 127.0.0.1\\nport 5050\\nlc_addr 127.0.0.1\\nlc_port 5252\\nr_addr 127.0.0.1\\nr_port 5253\\npayload 1000\\nretroactive_sampling_percentage 1.0\\nhead_sampling_probability 0.0\\\" > /etc/hindsight_conf/default.conf;\
TMUX = """
export BENCH_LOG_PATH=$HOME/hindsight-grpc/config/alibaba/benchmark.log
......@@ -28,6 +31,7 @@ server_hindsight () {
extra_l=0
extra_s=0
rate=0
delay=0
if [ $4 = "hs_faulty" ]
then
......@@ -37,7 +41,7 @@ server_hindsight () {
extra_s=0.001
fi
n=-n
rate=10
rate=1
tracer=hindsight
else
extra=0
......@@ -45,6 +49,12 @@ server_hindsight () {
n=
fi
if [ $4 = "hs_event" ]
then
delay=0
tracer=hindsight
fi
send "docker run -it -v ${HOME}/hindsight-grpc/config/alibaba:/config/ \\
-d --name $1 -p $2:$2 -p $3:$3 --shm-size=2gb --rm tracer bash -c \\
\\"cd build; ./server -a /config/sub_alibaba_addresses.json \\
......@@ -52,7 +62,7 @@ server_hindsight () {
-x $tracer -c $5 --trigger=6:$6 --trigger=7:$extra_l --trigger=8:$extra_s -i $7 $n $1 \\
& sleep 5; cd /root/hindsight/agent/; \\
go run cmd/agent2/main.go -lc pinky10:5252 -r pinky10:5253 \\
-serv $1 -port $3 -host $8 -rate $rate\\""
-serv $1 -port $3 -host $8 -rate $rate -delay $delay\\""
}
server_run () {
......@@ -63,7 +73,7 @@ server_run () {
else
trigger=0
fi
if [ $4 = "hindsight" ] || [ $4 = "ot-hindsight" ] || [ $4 = "hs_faulty" ]
if [ $4 = "hindsight" ] || [ $4 = "ot-hindsight" ] || [ $4 = "hs_faulty" ] || [ $4 = "hs_event" ]
then
server_hindsight $2 $3 $9 $4 $5 $trigger $7 $10
else
......@@ -215,7 +225,7 @@ run_bench () {
sample=0.1
elif [ $1 = "hs_faulty" ]
then
limit=30000
limit=10000
fi
send "./client -a /config/sub_alibaba_addresses.json \\
-t /config/sub_alibaba_topology.json \\
......@@ -223,10 +233,13 @@ run_bench () {
sleep $4
# make sure that the client was turned down
send C-c
# send C-c
send "tail -n 4 temp.log >> /config/benchmark.log"
if [ $1 = "hindsight" ] || [ $1 = "hs_faulty" ]
if [ $1 = "none" ]
then
:
elif [ $1 = "hindsight" ] || [ $1 = "hs_faulty" ]
then
log_net 1
tmux select-pane -t 2
......@@ -234,7 +247,7 @@ run_bench () {
sleep 10
if [ $1 = "hs_faulty" ]
then
extract=12
extract=20
else
extract=5
fi
......@@ -257,18 +270,21 @@ run_all () {
do
for i in {50..700..50}
do
for tracer in ot-jaeger-head-1 ot-jaeger-head-10 hindsight ot-jaeger ot-jaeger-simple
for tracer in none ot-jaeger-head-1 ot-jaeger-head-10 hindsight ot-jaeger ot-jaeger-simple
do
tmux select-pane -t 1
send "kill_servers"
sleep 50
sleep 20
let "throughput = $i * 20 "
echo "bench $tracer with setting $throughput" >> $BENCH_LOG_PATH
tmux select-pane -t 0
send "run_collectors $tracer"
sleep 5
if [ $tracer != "none" ]
then
tmux select-pane -t 0
send "run_collectors $tracer"
sleep 5
fi
tmux select-pane -t 1
let "wait = 10000 / $i + 50"
......@@ -276,18 +292,26 @@ run_all () {
then
let "wait = wait + 50"
send "deploy_servers ot-jaeger 1024 0.01 simple"
elif [ $tracer = "ot-jaeger-head-1" ] || [ $tracer = "ot-jaeger-head-10" ]
elif [ $tracer = "ot-jaeger-head-1" ] || [ $tracer = "ot-jaeger-head-10" ] || [ $tracer = "ot-jaeger" ]
then
send "deploy_servers ot-jaeger 1024 0.01"
else
send "deploy_servers $tracer 1024 0.01"
sleep 50
fi
sleep 30
if [ $tracer != "none" ]
then
sleep 30
fi
tmux select-pane -t 0
send "run_bench $tracer $i $j $wait"
let "wait = wait + 50"
if [ $tracer != "none" ]
then
let "wait = wait + 20"
fi
sleep $wait
done
done
......@@ -296,11 +320,11 @@ run_all () {
run_faulty () {
for i in {500..1500..50}
for i in {50..1400..50}
do
tmux select-pane -t 1
send "kill_servers"
sleep 50
sleep 20
let "throughput = $i * 20"
echo "bench hs_faulty with setting $throughput" >> $BENCH_LOG_PATH
......@@ -311,12 +335,12 @@ run_faulty () {
tmux select-pane -t 1
send "deploy_servers hs_faulty 1024 0.01"
sleep 80
sleep 70
tmux select-pane -t 0
let "wait = 30000 / $i + 10"
let "wait = 10000 / $i + 15"
send "run_bench hs_faulty $i 0 $wait"
let "wait = wait + 50"
let "wait = wait + 20"
sleep $wait
done
}
......
......@@ -3,7 +3,7 @@ import sys
import re
data_dict = {m: {t: {i: [] for i in range(1000, 15001, 1000)}
for t in ["hindsight", "ot-jaeger", "ot-jaeger-simple", "ot-jaeger-head-1", "ot-jaeger-head-10"]}
for t in ["hindsight", "ot-jaeger", "ot-jaeger-simple", "ot-jaeger-head-1", "ot-jaeger-head-10", "none"]}
for m in ["complete_throughput", "complete_rate", "extra_bandwidth", "avg_latency", "achieved_throughput"]}
lines = sys.stdin.readlines()
......@@ -25,32 +25,46 @@ while index < len(lines):
index += 1
continue
try:
assert "net_stat" in lines[index+1]
net_start = int(lines[index+2])
assert "Duration:" in lines[index+3]
assert "Total requests:" in lines[index+4]
throughput = float(
re.match("overall throughput: (.*) requests/s", lines[index+5]).group(1))
latency = re.match(
"Average / Max / Min latency of a request is: (.*)/(.*)/(.*) ms", lines[index+6])
avg_, min_, max_ = latency.groups()
assert "net_stat" in lines[index+7]
net_traffic = int(lines[index+8]) - net_start
est_time = 200000 / throughput
# MBytes/s
bandwidth = net_traffic / est_time / 1024 / 1024
except:
index += 1
continue
if tracer == "none":
try:
assert "Duration:" in lines[index+1]
assert "Total requests:" in lines[index+2]
throughput = float(
re.match("overall throughput: (.*) requests/s", lines[index+3]).group(1))
latency = re.match(
"Average / Max / Min latency of a request is: (.*)/(.*)/(.*) ms", lines[index+4])
avg_, min_, max_ = latency.groups()
bandwidth = 0
index += 5
except:
index += 1
continue
else:
try:
assert "net_stat" in lines[index+1]
net_start = int(lines[index+2])
assert "Duration:" in lines[index+3]
assert "Total requests:" in lines[index+4]
throughput = float(
re.match("overall throughput: (.*) requests/s", lines[index+5]).group(1))
latency = re.match(
"Average / Max / Min latency of a request is: (.*)/(.*)/(.*) ms", lines[index+6])
avg_, min_, max_ = latency.groups()
assert "net_stat" in lines[index+7]
net_traffic = int(lines[index+8]) - net_start
est_time = 200000 / throughput
# MBytes/s
bandwidth = net_traffic / est_time / 1024 / 1024
index += 9
except:
index += 1
continue
data_dict["extra_bandwidth"][tracer][bound_throughput].append(bandwidth)
data_dict["avg_latency"][tracer][bound_throughput].append(float(avg_))
data_dict["achieved_throughput"][tracer][bound_throughput].append(
throughput)
index += 9
if "hindsight" == tracer:
try:
assert "Processing hindsight_traces" in lines[index]
......
import pandas as pd
traversal = {i: {} for i in range(1000, 15001, 1000)}
for setting in range(50, 751, 50):
for r in range(1, 5):
try:
with open("../build/breadcrumbs_hindsight_{}_{}".format(setting, r)) as f:
lines = f.readlines()
assert lines[0] == "t,queue,total_agents,dissemination_time_ms\n"
dissemination_time = []
for l in lines[1:]:
agent, time = [int(i) for i in l.split(',')[2:4]]
time = 0.5 if time == 0 else time
if agent in traversal[setting * 20]:
traversal[setting * 20][agent].append(time)
else:
traversal[setting * 20][agent] = [time]
except:
pass
traversal = {k: {a: sum(l)/len(l) for a, l in v.items()}
for k, v in traversal.items()}
indices = [i for i in range(1, 51)]
to_excel = {k: [v[i] if i in v else 0 for i in indices]
for k, v in traversal.items()}
df = pd.DataFrame(data=to_excel, index=indices)
df.to_excel('alibaba/breadcrumbs.xlsx')
traversal = {i: {} for i in range(1000, 28001, 1000)}
for setting in range(50, 1401, 50):
try:
with open("../build/breadcrumbs_hs_faulty_{}_0".format(setting)) as f:
lines = f.readlines()
assert lines[0] == "t,queue,total_agents,dissemination_time_ms\n"
dissemination_time = []
for l in lines[1:]:
agent, time = [int(i) for i in l.split(',')[2:4]]
if agent in traversal[setting * 20]:
traversal[setting * 20][agent].append(time)
else:
traversal[setting * 20][agent] = [time]
except:
pass
traversal = {k: {a: sum(l)/len(l) for a, l in v.items()}
for k, v in traversal.items()}
max_agents = max([i for v in traversal.values() for i in v])
assert max_agents <= 50
indices = [i for i in range(1, 51)]
to_excel = {k: [v[i] if i in v else 0 for i in indices]
for k, v in traversal.items()}
df = pd.DataFrame(data=to_excel, index=indices)
df.to_excel('alibaba/breadcrumbs_faulty.xlsx')
from tokenize import group
import pandas as pd
import sys
import re
data_dict = {m: {t: {i: [] for i in range(1000, 28001, 1000)}
for t in ["trigger001", "trigger050", "trigger0001"]}
for m in ["hs_complete_throughput", "hs_complete_rate"]}
throughput_dict = {i: [] for i in range(1000, 28001, 1000)}
lines = sys.stdin.readlines()
index = 0
while index < len(lines):
parts = lines[index].split()
if len(parts) <= 1:
index += 1
continue
if parts[0] != "bench":
index += 1
# print("error processing line", index, lines[index])
continue
print(lines[index])
assert "bench hs_faulty with setting" in lines[index]
bound_throughput = int(parts[-1])
try:
assert "net_stat" in lines[index+1]
net_start = int(lines[index+2])
assert "Duration:" in lines[index+3]
assert "Total requests:" in lines[index+4]
throughput = float(
re.match("overall throughput: (.*) requests/s", lines[index+5]).group(1))
latency = re.match(
"Average / Max / Min latency of a request is: (.*)/(.*)/(.*) ms", lines[index+6])
avg_, min_, max_ = latency.groups()
assert "net_stat" in lines[index+7]
net_traffic = int(lines[index+8]) - net_start
est_time = 200000 / throughput
# MBytes/s
bandwidth = net_traffic / est_time / 1024 / 1024
index += 9
except:
index += 1
continue
print(throughput)
throughput_dict[bound_throughput].append(throughput)
try:
assert "Processing hindsight_traces" in lines[index]
assert "buffers from hindsight_traces" in lines[index+1]
collected = int(
re.match("(.*) traces total", lines[index+2]).group(1))
assert "Trigger" in lines[index+3]
completed = int(lines[index+4].split()[3])
mapping = {"6": "trigger001", "7": "trigger050", "8": "trigger0001"}
mapping_expect = {"6": 2000, "7": 100000, "8": 200}
for line in lines[index+5:index+15]:
splits = line.split()
if len(splits) < 6 or splits[0] != "All" or splits[5] != "Valid" or splits[1] not in mapping:
index += 1
continue
data_dict["hs_complete_throughput"][mapping[splits[1]]
][bound_throughput].append(float(splits[3]) / est_time)
if splits[1] == "7":
data_dict["hs_complete_rate"][mapping[splits[1]]][bound_throughput].append(
float(splits[3]) / mapping_expect[splits[1]])
else:
data_dict["hs_complete_rate"][mapping[splits[1]]
][bound_throughput].append(float(splits[4]))
except:
continue
# print('complete_throughput', data_dict['complete_throughput'])
# print('complete_rate', data_dict['complete_rate'])
# print('extra_bandwidth', data_dict['extra_bandwidth'])
# print('avg_latency', data_dict['avg_latency'])
def avg(data):
if len(data) == 0:
return 0
return float(sum(data)) / len(data)
def write_to_excel(name):
to_excel = {t: [avg(v) for v in d.values()]
for t, d in data_dict[name].items()}
indices = [i for i in range(1000, 28001, 1000)]
df = pd.DataFrame(data=to_excel, index=indices)
df.to_excel('alibaba/' + name + '.xlsx')
for name in data_dict:
write_to_excel(name)
to_excel = {"throughput": [avg(v) for v in throughput_dict.values()]}
indices = [i for i in range(1000, 28001, 1000)]
df = pd.DataFrame(data=to_excel, index=indices)
df.to_excel('alibaba/hs_achieved_throughput.xlsx')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment