First commit
This commit is contained in:
87
Project.R
Normal file
87
Project.R
Normal file
@@ -0,0 +1,87 @@
|
||||
# Task 1
|
||||
|
||||
|
||||
# b)
|
||||
X <- 0:12
|
||||
Y <- dgeom(X, p = 0.45)
|
||||
plot(X, Y, type = "h", main = "Geometric distribution (p=0.45)", ylab = "P(X=k)")
|
||||
points(X, Y, pch = 16)
|
||||
|
||||
# Task 2
|
||||
|
||||
# b)
|
||||
X <- 0:30
|
||||
Y <- dbinom(X, size = 30, prob = .6)
|
||||
plot(X, Y, type = "h", main = "Binominal distribution (p=0.6)", ylab = "P(X=k)")
|
||||
points(X, Y, pch = 16)
|
||||
|
||||
# Task 3
|
||||
|
||||
# 2)
|
||||
X <- 0:40
|
||||
Y <- dpois(X, lambda = 20)
|
||||
plot(X, Y, type = "h", main = "Poisson distribution (lambda=20)", ylab = "P(X=k)")
|
||||
points(X, Y, pch = 16)
|
||||
|
||||
# Task 4
|
||||
library(readr)
|
||||
data_set1 <- read_csv("data_set1.csv")
|
||||
View(data_set1)
|
||||
|
||||
# 1) The name of the column is "Val".
|
||||
# 2) There are 1029 rows.
|
||||
# 3) Max = 109.379
|
||||
max(data_set1)
|
||||
# 4) Min = 4.193534
|
||||
min(data_set1)
|
||||
# 5) Mean = 50.49665
|
||||
mean(data_set1$Val)
|
||||
# 6) Median = 50.52415
|
||||
median(data_set1$Val)
|
||||
# 7) Variance = 218.7175
|
||||
var(data_set1$Val)
|
||||
# 8) Standard deviation = 14.7891
|
||||
sd(data_set1$Val)
|
||||
|
||||
# Task 5
|
||||
# 1)
|
||||
library(readr)
|
||||
data_set1 <- read_csv("data_set1.csv")
|
||||
X <- 0:100
|
||||
Y <- dnorm(X, mean = mean(data_set1$Val), sd = sd(data_set1$Val))
|
||||
plot(X, Y, type = "l", ylim = c(0, 0.03), main = "Data set vs normal distribution")
|
||||
# 2)
|
||||
d <- density(data_set1$Val, bw = 3)
|
||||
# 3)
|
||||
points(d, col = "red", type = "l")
|
||||
# 4)
|
||||
abline(v = mean(data_set1$Val), col = "green")
|
||||
|
||||
# Task 6
|
||||
# 4 variables most correlated with hp: mpg, cyl, disp, carb
|
||||
cars <- mtcars
|
||||
round(cor(cars), digits = 2)
|
||||
|
||||
# Task 7
|
||||
# 1)
|
||||
model <- lm(hp ~ cyl + disp + carb + mpg, data = mtcars)
|
||||
# 2)
|
||||
hp_hat <- predict(model)
|
||||
# 3)
|
||||
residuals <- mtcars$hp - hp_hat
|
||||
# 4)
|
||||
hpplot <- density(residuals)
|
||||
plot(hpplot, main = "Density of residuals")
|
||||
# 6)
|
||||
summary(model)$r.squared # 0.8594845 - Correct and accurate
|
||||
|
||||
# Task 8
|
||||
library(readr)
|
||||
data_set2 <- read_csv("data_set2.csv")
|
||||
X <- min(data_set2):max(data_set2)
|
||||
Y <- dnorm(X, mean = mean(data_set2$Val), sd = sd(data_set2$Val))
|
||||
plot(X, Y, type = "l", main = "Normal distribution of stick lengths")
|
||||
d <- density(data_set2$Val, bw = 1)
|
||||
points(d, col = "red", type = "l")
|
||||
abline(v = mean(data_set2$Val), col = "green")
|
||||
# The length of the sticks is not acceptable as the mean is much higher than the null hypothesis of µ = 30 and most values are not around 30.
|
||||
BIN
Statistical mathematics project work.pdf
Normal file
BIN
Statistical mathematics project work.pdf
Normal file
Binary file not shown.
1030
data_set1.csv
Normal file
1030
data_set1.csv
Normal file
File diff suppressed because it is too large
Load Diff
1201
data_set2.csv
Normal file
1201
data_set2.csv
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user