From 923f267c91e885940a574b0deda0c82e6a2c4d19 Mon Sep 17 00:00:00 2001
From: Aastha Mehta <aasthakm@mpi-sws.org>
Date: Tue, 1 Mar 2022 22:31:21 +0100
Subject: [PATCH] commit pending changes

---
 sme_extn/sme_helper.php | 40 ++++++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/sme_extn/sme_helper.php b/sme_extn/sme_helper.php
index da3e09b5..77eaddc4 100644
--- a/sme_extn/sme_helper.php
+++ b/sme_extn/sme_helper.php
@@ -477,6 +477,23 @@ function filesize_binning_wiktionary($file_size)
   return "extra-large";
 }
 
+function filesize_binning_wiktionary_unzipped($file_size)
+{
+  /*
+   * #frames in profiles: S - 21, M - 21, L - 64, XL - 3879 --> 4363
+   */
+#  if (intval($file_size) <= 14600)
+#    return "small";
+
+  if (intval($file_size) <= 24820)
+    return "medium";
+
+  if (intval($file_size) <= 81760)
+    return "large";
+
+  return "extra-large";
+}
+
 function filesize_binning_medical_wiktionary($file_size)
 {
   /*
@@ -499,7 +516,7 @@ function filesize_binning_medical_wiktionary($file_size)
     return "large";
 
   if ((intval($file_size) > 12000 && intval($file_size) <= 49348)
-    || (intval($file_size) > 85432 && intval($file_size) <= 600000))
+    || (intval($file_size) > 85432))
     return "extra-large";
 }
 
@@ -546,13 +563,24 @@ function set_request_id($pubinput, $privinput)
   #print_r($privinput);
   for ($i = 0; $i < $num_pub; $i++) {
 
-    $cluster_name= $cpu_load . $pubinput[$i];
-    $pubinput[$i]= $cpu_load . $pubinput[$i];
+    $cluster_name = $cpu_load . $pubinput[$i];
+    $pubinput[$i] = $cpu_load . $pubinput[$i];
 
     if (isset($wgSMEPrivBinning) && $wgSMEPrivBinning == 1) {
-      $hash=md5($privinput[$i]);
-      $file_size = filesize($wgFileCacheDirectory.'/'.substr($hash,0,1).'/'.substr($hash,0,2).'/'.urlencode($privinput[$i]).'.html.gz');
-      $cluster_suff = filesize_binning_medical_wiktionary(intval($file_size));
+      $hash = md5($privinput[$i]);
+      if (str_contains($wgFileCacheDirectory, "padded") == True) {
+        $sz_fpath = $wgFileCacheDirectory;
+        $sz_fpath .= '/'.substr($hash,0,1).'/'.substr($hash,0,2);
+        $sz_fpath .= '/'.urlencode($privinput[$i]).'.html';
+        $file_size = filesize($sz_fpath);
+      } else {
+        $sz_fpath = $wgFileCacheDirectory;
+        $sz_fpath .= '/'.substr($hash,0,1).'/'.substr($hash,0,2);
+        $sz_fpath .= '/'.urlencode($privinput[$i]).'.html.gz';
+        $file_size = filesize($sz_fpath);
+      }
+      $cluster_suff = filesize_binning_wiktionary(intval($file_size));
+#      $cluster_suff = filesize_binning_wiktionary_unzipped(intval($file_size));
       $cluster_name = $cluster_name . $cluster_suff;
       $public_input_hash[] = md5($cluster_name, True);
     } else {
-- 
GitLab