How Easy Can You Fake Statistical Significance?

Posted on Posted in Psychology

I have already written about p-hacking and John Oliver explained how it’s done very well in his show (the video is embedded in my previous post about it).

Statistical significance is – within the social siences – when you can be 95 % sure that your results are not produced purely randomly. But that isn’t something that many people can work with. I decided to code a small showcase in JavaScript right here to show you that it is easily possible to get statistically significant results simply by chance.

Showcase

For my demonstration, let’s assume we got 2 groups, both consisting of 250 people. Those 2 groups consist of one intervention group and one control group. The intervention group received a special training to improve their reaction times while the control group did not. We don’t quite know whether or not the training actually might have deteriorated the performance of the people because it wore them out too much, so we are testing two-tailed. This simply means that we can’t be quite sure which direction the results will be in.

In any case, the reaction times of every single person within the two groups were measured and should now be tested for statistical significance. This is usually done with a two sample t test (fancy name, just compares the means of our two groups and tells us if they differ significantly from each other). Below (after you click on “Generate Significance”) you find the data for each group – it has been generated completely by chance, using JavaScript, right in your browser.

Sample size group 1:
Sample size group 2:
Mean Group 1:
Mean Group 2:
Standard Deviation Group 1:
Standard Deviation Group 2:
Trials:
t value:

For the JavaScript inclined people, here is the code for that (if you find errors in the calculation, please submit a comment):

document.getElementById("go").onclick = calculate;
/**
* Big thanks to https://gist.github.com/Daniel-Hug/7273430
*/
var arr = {	
    sum: function(array) {
      var num = 0;
      for (var i = 0, l = array.length; i < l; i++) num += array[i];
      return num;
    },
    mean: function(array) {
      return arr.sum(array) / array.length;
    },
    variance: function(array) {
      var mean = arr.mean(array);
      return arr.mean(array.map(function(num) {
        return Math.pow(num - mean, 2);
      }));
    },
    standardDeviation: function(array) {
      return Math.sqrt(arr.variance(array));
    },
 };
/**
 * Calculate the t-test and display the results
 */
 function calculate() {
    var g1_n = document.getElementById("g1_n");
    var g2_n = document.getElementById("g2_n");
    var data_g1 = [];
    var data_g2 = [];
    var n = 0;
    var significant = false;
    var trials = 1;
    while (!significant) {
        while (n < 250) { 
            // let's create random data following a normal distribution for our two groups
            data_g1[n] = gaussianRandom(1, 10);
            data_g2[n] = gaussianRandom(1, 10); 
            g1_n.innerHTML = n + 1; 
            g2_n.innerHTML = n + 1; 
            n++; 
        } 
        // a t-value that is bigger than 2 is certainly significant. I used 2 instead of 1.96 because
        // javascript isn't too good with numbers and rounding errors can occur easily
        if (ttest(data_g1, data_g2, n) > 2) {
            significant = true;
        } else {
            // no luck this time, reset everything and go again
            data_g1 = [];
            data_g2 = [];
            n = 0;
            trials++;
        }
    }
    //the array is 0-indexed so real n is +1 then
    n = n + 1;
    document.getElementById("m_g1").innerHTML = arr.mean(data_g1).toFixed(3);
    document.getElementById("m_g2").innerHTML = arr.mean(data_g2).toFixed(3);
    document.getElementById("sd_g1").innerHTML = arr.standardDeviation(data_g1).toFixed(3);
    document.getElementById("sd_g2").innerHTML = arr.standardDeviation(data_g2).toFixed(3);
    document.getElementById("trials").innerHTML = trials;
    document.getElementById("expl_trials").innerHTML = trials;
    document.getElementById("tvalue").innerHTML = ttest(data_g1, data_g2, n).toFixed(3);
    document.getElementById("explanation").css.display = "block";
 }
 /**
  * Perform a t test
  * @param {array} group1 The first group with the data
  * @param {array} group2 The second group with the data
  * @param {Number} n The sample size
  * @returns {Number} the t value
  */
 function ttest(group1, group2, n) {
    var v1 = arr.variance(group1);
    var v2 = arr.variance(group2);
    var t = 0;
    var upper = arr.mean(group1) - arr.mean(group2);
    var lower = v1 / n + v2 / n;
    lower = Math.sqrt(lower);
    t = upper / lower;
    if (t < 0) {
        t = t * -1;
    }
    return t;
 }
 
 // thanks to Dorian http://stackoverflow.com/questions/25582882/javascript-math-random-normal-distribution-gaussian-bell-curve/39187274#39187274
 function gaussianRand() {
    var rand = 0;
    for (var i = 0; i < 6; i += 1) {
        rand += Math.random();
    }
    return rand / 6;
 }
 
 function gaussianRandom(start, end) {
    return Math.floor(start + gaussianRand() * (end - start + 1));
 }

Leave a Reply

Your email address will not be published. Required fields are marked *