11|THE \(t\)-TEST

Overview

\(t\) vs. \(z\)
The \(t\) distribution
The \(t\)-test
Learning checks

\(t\) vs. \(z\)

\(z\)-test

Useful if we know everything about original population

\(z = \dfrac{M - \mu}{\sigma_M}\)

Problem

Often don’t know everything about original population

\(\renewcommand{\CancelColor}{\red}z = \dfrac{M - \mu}{\require{enclose}\enclose{horizontalstrike}{\sigma_M}}\)

\(t\)-test solution

Estimate population variability using sample

\(t = \dfrac{M-\mu}{s_M}\)

The \(t\) statistic

Estimated standard error \(s_M\) used in place of (unknown) population standard error \(\sigma_M\)

\[z = \dfrac{M - \mu}{\require{enclose}\enclose{horizontalstrike}{\sigma_M}} \ \ \ \ \ \ \ \ \ \ \ \ \ t = \dfrac{M - \mu}{s_M}\]

\[ \begin{align} \text{Standard error} = \sigma_M = \dfrac{\sigma}{\sqrt{n}} \ \text{or...} \ \dfrac{\sqrt{\sigma^2}}{\sqrt{n}} \ \text{or...} \ \sqrt{\dfrac{\sigma^2}{n}} \\ \\ \text{Estimated standard error} = s_M = \dfrac{s}{\sqrt{n}} \ \text{or...} \ \dfrac{\sqrt{s^2}}{\sqrt{n}} \ \text{or...} \ \sqrt{\dfrac{s^2}{n}} \end{align} \]

Degrees of freedom

\(df\) depends on kind of \(t\)-test you’re doing
Single sample \(t\)-test: \(df = n - 1\)

\(\text{Population variance} = \sigma^2 = \dfrac{SS}{N}\)

\(\text{Sample variance} = s^2 = \dfrac{SS}{df} = \dfrac{SS}{n-1}\)

\(\text{Sample standard deviation} = s = \sqrt{\dfrac{SS}{df}} = \sqrt{\dfrac{SS}{n-1}}\)

The \(t\) distribution

Degrees of freedom
- \(t\) distribution is actually a family of distributions
- Slightly different curve for each value of \(df\)
- As \(df\) increases, \(t\) distribution gets closer to normal
- Because the more degrees of freedom, the better \(s^2\) represents \(\sigma^2\)

The \(t\) distribution

viewof df = html`<input type=range min=1 max=50 step=1 value=1 style="width: 50%;">`

jStat = require("https://cdn.jsdelivr.net/npm/jstat@latest/dist/jstat.min.js")

d3 = require("https://d3js.org/d3.v5.min.js")


height = 400
width = 800


<!-- comment -->
tex`df = ${df.toLocaleString("en")}`

data = {
  var values = jStat(-4, 4, 210)[0],
      <!-- df = df, -->
      arr = [];
  for (var i in values) {
    arr.push(
      {
        value: values[i], 
        density: jStat.studentt.pdf(values[i], df)
      }
    )
  }
  return arr;
}

norm_data = {
  var values = jStat(-4, 4, 210)[0],
      <!-- df = df, -->
      arr = [];
  for (var i in values) {
    arr.push(
      {
        value: values[i],
        density: jStat.normal.pdf(values[i], 0, 1)
      }
    )
  }
  return arr;
}

<!-- norm_data -->


chart = {
  const svg = d3.select(DOM.svg(width, height));

  <!-- svg.append("g") -->
  <!--     .call(xAxis); -->

  <!-- svg.append("g") -->
  <!--     .call(yAxis); -->
  
  svg.append("path")
      .datum(data)
      .attr("fill", "none")
      .attr("stroke", "red")
      .attr("stroke-width", 4)
      .attr("stroke-linejoin", "round")
      .attr("stroke-linecap", "round")
      .attr("d", line);
      
  svg.append("path")
      .datum(norm_arr)
      .attr("fill", "none")
      .attr("stroke", "black")
      .attr("stroke-width", 2)
      .attr("stroke-linejoin", "round")
      .attr("stroke-linecap", "round")
      .attr("stroke-dasharray", "5, 5")
      .attr("d", line)
      .attr("class", "invertable");
  
  return svg.node();
}

<!-- margin = ({top: 20, right: 0, bottom: 30, left: 40}) -->
margin = ({top: 20, right: 0, bottom: 0, left: 0})

line = d3.line()
    .x(d => x(d.value))
    .y(d => y(d.density))

x = d3.scaleLinear()
  .domain([d3.min(data, d => d.value * 0.9), d3.max(data, d => d.value * 0.9)]).nice()
  .range([margin.left, width - margin.right])

<!-- y = d3.scaleLinear() -->
<!--   .domain([d3.min(data, d => d.density * 0.9), d3.max(data, d => d.density / 0.9)]) -->
<!--   .range([height - margin.bottom, margin.top]) -->
  
y = d3.scaleLinear()
  .domain([0, 0.4])
  .range([height - margin.bottom, margin.top])

Normal distribution \(t\) distribution

\(t\) table

Proportion in 1 tail	0.1	0.05	0.025	0.01	0.005
Proportion in 2 tails	0.2	0.1	0.05	0.02	0.01
1	3.078	6.314	12.706	31.821	63.657
2	1.886	2.920	4.303	6.965	9.925
3	1.638	2.353	3.182	4.541	5.841
4	1.533	2.132	2.776	3.747	4.604
5	1.476	2.015	2.571	3.365	4.032
6	1.440	1.943	2.447	3.143	3.707
7	1.415	1.895	2.365	2.998	3.499
\(df\) 8	1.397	1.860	2.306	2.896	3.355
9	1.383	1.833	2.262	2.821	3.250
10	1.372	1.812	2.228	2.764	3.169
11	1.363	1.796	2.201	2.718	3.106
12	1.356	1.782	2.179	2.681	3.055
13	1.350	1.771	2.160	2.650	3.012
14	1.345	1.761	2.145	2.624	2.977
15	1.341	1.753	2.131	2.602	2.947
...	...	...	...	...	...

\(t\) table & R

Proportion in 1 tail	0.1	0.05	0.025	0.01	0.005
Proportion in 2 tails	0.2	0.1	0.05	0.02	0.01
1	3.078	6.314	12.706	31.821	63.657
2	1.886	2.920	4.303	6.965	9.925
3	1.638	2.353	3.182	4.541	5.841
4	1.533	2.132	2.776	3.747	4.604
5	1.476	2.015	2.571	3.365	4.032
6	1.440	1.943	2.447	3.143	3.707
7	1.415	1.895	2.365	2.998	3.499
\(df\) 8	1.397	1.860	2.306	2.896	3.355
9	1.383	1.833	2.262	2.821	3.250
10	1.372	1.812	2.228	2.764	3.169
11	1.363	1.796	2.201	2.718	3.106
12	1.356	1.782	2.179	2.681	3.055
13	1.350	1.771	2.160	2.650	3.012
14	1.345	1.761	2.145	2.624	2.977
15	1.341	1.753	2.131	2.602	2.947
...	...	...	...	...	...

Using R
pt() and qt() instead of pnorm() and qnorm()

qnorm(.05)

[1] -1.644854

qt(.05)

Error in qt(0.05): argument "df" is missing, with no default

qt(.05, df = 5)

[1] -2.015048

qt(.05, df = 10)

[1] -1.812461

The \(t\)-test

Hypothesis testing with \(t\)
Single-sample \(t\) test

Class reaction times

 [1] 327.0 335.0 359.0 430.0 275.4 272.0 350.0 343.2 278.0 354.0 303.0 328.0
[13] 371.0 312.0 346.0 359.0    NA 259.0 313.6 258.0 244.0 374.4    NA 338.0
[25] 290.0

RT	\(f\)
240-259	3
260-279	3
280-299	1
300-319	3
320-339	4
340-359	6
360-379	2
380-399	0
400-419	0
420-439	1

Hypothesis test

Four steps:
- 1: State the null and alternative hypotheses
- 2: Locate the critical region using the \(t\) distribution probabilities, \(df\), and \(\alpha\)
- 3: Calculate the \(t\) test statistic
- 4: Make a decision regarding \(H_0\) (null hypothesis)

1. State hypotheses

Step 1: State hypotheses
- \(H_0\): Stats students have the same average reaction times as the general population \(\mu = 284\)
- \(H_1\): Stats students have different average reaction times to the general population

2. Decision criterion

Specify \(\alpha\), identify critical region(s)
For \(t\), depends on \(df\) and thus \(n\)
For single-sample \(t\)-test, \(df = n – 1\)

\(df\)	\(\alpha = .05\)
1	12.706
2	4.303
3	3.182
4	2.776
5	2.571
...	...
20	2.086
21	2.080
22	2.074
23	2.069
24	2.064
25	2.060
26	2.056
27	2.052
28	2.048
29	2.045
30	2.042
...	...

3. Calculate statistic

Calculate \(t\)-statistic for the sample mean
Quantifies the difference between the observed sample mean and the hypothesized population mean divided by the estimated standard error

\(\mu = 284 \\ M = 322.59 \\ SD = 45.31 \\ n = 23\)

\[\begin{align} t = \dfrac{M - \mu}{s_M} &= \dfrac{322.59 - 284}{45.31/\sqrt{23}} \\ &= \dfrac{38.59}{9.45} \\ &= 4.08 \end{align}\]

4. Make decision

Step 4a: Make a decision about \(H_0\)
- \(t = 4.08\) exceeds critical values \([-2.07, 2.07]\)
- \(p < \alpha\)
- “Statistically significant” difference

Conclusion

Learning checks

What is the difference between the \(t\) distribution and the \(z\) distribution?
The results of a hypothesis test are reported as follows: \(t(21) = 2.38, p < .05\). How many people were in the sample and what was the statistical decision?
True/False
- An effect that exists is more likely to be detected if sample size \(n\) is large
- An effect that exists is less likely to be detected if the sample standard deviation \(s\) is large

<!-- jStat = require("https://cdn.jsdelivr.net/npm/jstat@latest/dist/jstat.min.js") -->



cover = {

const w = 1050;
const h = 500;
const duration = 2000; // animation duration per path
const maxDelay = d3.max(test_data, d => d.delay);

// d3 scales for mapping data to canvas space
const tx = d3.scaleLinear()
  .domain([-3, 3]) // x range of your t-distributions
  .range([0, w]);

const ty = d3.scaleLinear()
  .domain([0, 0.4]) // y range of t-distribution densities
  .range([h, 0]);

// setup canvas
const devicePixelRatio = window.devicePixelRatio || 1;

const canvas = d3.select("#cover-image")
  .append("canvas")
  .attr("width", w * devicePixelRatio)
  .attr("height", h * devicePixelRatio)
  .style("width", `${w}px`) // ensures the CSS size stays the same
  .style("height", `${h}px`)
  .node();
  
const ctx = canvas.getContext("2d");

// scale for high DPI
ctx.scale(devicePixelRatio, devicePixelRatio);
ctx.lineWidth = 1;
ctx.lineJoin = "round"; // smooth joins
ctx.lineCap = "round";  // smooth endpoints

// utility: draw a line progressively
function drawPartialPath(ctx, data, color, progress) {
  ctx.beginPath();
  ctx.strokeStyle = color;
  ctx.lineWidth = 2;

  // calculate the number of visible points
  const totalPoints = Math.floor(progress * data.length);

  // slice the data so the visible part shrinks **from the end**
  const visibleData = data.slice(-totalPoints);

  visibleData.forEach((point, i) => {
    const x = tx(point.value);
    const y = ty(point.density);
    if (i === 0) ctx.moveTo(x, y);
    else ctx.lineTo(x, y);
  });

  ctx.stroke();
}

function animatePaths() {
  const start = performance.now();
  const delayPerLine = 200; 
  const maxDelay = delayPerLine * test_data.length;

  function step(timestamp) {
    const elapsed = timestamp - start;
    ctx.clearRect(0, 0, w, h);

    test_data.forEach((d, i) => {
      const lineStartTime = d.delay;
      const lineElapsed = Math.max(0, elapsed - lineStartTime);
      const progress = Math.min(lineElapsed / duration, 1);

      if (progress > 0) {
        // draw forward
        drawPartialPathForward(ctx, d.data, d.color, progress);
      }
    });

    if (elapsed < maxDelay + duration) requestAnimationFrame(step);
  }

  requestAnimationFrame(step);
}

function drawPartialPathForward(ctx, data, color, progress) {
  ctx.beginPath();
  ctx.strokeStyle = color;
  ctx.lineWidth = 1;

  const totalPoints = Math.floor(progress * data.length);
  const visibleData = data.slice(0, totalPoints);

  visibleData.forEach((point, i) => {
    const x = tx(point.value);
    const y = ty(point.density);
    if (i === 0) ctx.moveTo(x, y);
    else ctx.lineTo(x, y);
  });

  ctx.stroke();
}

function drawPartialPathReverse(ctx, data, color, progress) {
  ctx.beginPath();
  ctx.strokeStyle = color;
  ctx.lineWidth = 1;

  // progress=1 means full line, progress=0 means none.
  const startIndex = Math.floor((1 - progress) * data.length);
  const visibleData = data.slice(startIndex);

  visibleData.forEach((point, i) => {
    const x = tx(point.value);
    const y = ty(point.density);
    if (i === 0) ctx.moveTo(x, y);
    else ctx.lineTo(x, y);
  });

  ctx.stroke();
}

function undrawPaths() {
  return new Promise(resolve => {
    const start = performance.now();
    const maxLineDelay = d3.max(test_data, d => d.delay);

    function step(timestamp) {
      const elapsed = timestamp - start;
      ctx.clearRect(0, 0, w, h);

      test_data.forEach((d, i) => {
        const lineStartTime = d.delay;
        const lineElapsed = Math.max(0, elapsed - lineStartTime);
        const progress = Math.min(lineElapsed / duration, 1);
        const reverseProgress = 1 - progress;
        if (reverseProgress > 0) {
          drawPartialPathReverse(ctx, d.data, d.color, reverseProgress);
        }
      });

      // now that line timings depend on d.delay, check if we’ve elapsed enough time
      if (elapsed < maxLineDelay + duration) {
        requestAnimationFrame(step);
      } else {
        resolve();
      }
    }

    requestAnimationFrame(step);
  });
}

// usage:
canvas.addEventListener("click", () => {
  undrawPaths().then(() => {
    animatePaths();
  });
});

animatePaths();
}