overview

in R studio

# IMPORT DATA
path = "\\skipping\\skipping_data.csv"
df = read.csv(path)

# CONDITIONALLY REMOVE DATA
# WHEN I LOSE TRACK OF THE COUNT, I ASSIGN A VALUE OF 0
data = df[df$number_of_jumps != 0, ]

# RENAME VARIABLES
x1 = data$number_of_seconds
x2 = data$number_of_errors
t = data$date # THE DATE AS DAYS FROM JAN 1, 1900
y = data$number_of_jumps

# A LITTLE GRAPH
plot(x1, y)

# OLS
model = lm(y ~ x1 + x2 + t, data=data)
summary(model)

using rafficot

n_total
n_omitted
n_included
xTx
(xTx)_inv
xTy

the coefficients

b = (xTx)_inv * xTy

b = xTx-1 * xTy

b = (x^{T}x)^{-1} x^{T}y

  # TRY STHING ELSE
  y2 = y/x1
  plot(x1, y2)
  model2 = lm(y2 ~ x1 + x2)
  summary(model2)
  abline(model2, col="orange")

  # CALCULATE THE BINS. BE CAREFUL. IT ONLY ACCEPTS NUMBER LITERALS. 

  # A USEFUL HISTOGRAM : THE AVG NUMBER OF JUMPERS PER SECOND 
  hist(y2,
       main="average jumps per second",
       breaks = c(0.05*40:60))

  # ANOTHER USEFUL HISTOGRAM : NUMBER OF SECONDS (DURATION OF A SET) 
  x1_max = max(x1)
  bin_width = 10
  bin_upper_limit = ceiling(x1_max/bin_width)
  hist(x1,
       main="number of seconds",
       breaks = c(bin_width*0:bin_upper_limit))
  
  # MAYBE LETS INCLUDE TIME (t REPRESENTS THE DAYS FROM JAN 1, 1900) 


  # MAKES MORE SENSE TO LOOK AT x1*t I THINK 
  x1_sq = x1 * x1
  x1t = x1 * t
  model = lm(y ~ x1 + x1_sq + x2 + x1t)
  summary(model)

  max()
  mean()
  sum()

histogram

total number of sets
relevant sets
within range
number of bins
-
+
frequency of number of seconds
(the duration of a set)
number of seconds
number of jumps
vs
skipping duration (number of seconds)
number of seconds