Abstract
In this report, the team GDM has visualized stock price data of three vaccine companies BioNTech, AstraZeneca, and Moderna Inc., has made statistical comments about them separately. Each team member was assigned to the analysis of one vaccine company. Team has decided to assign Mert Taşdemir to BioNTech, Yasin Doğa Özkütük to AstraZeneca and Gizem Miçooğulları to Moderna Inc.. In each section almost the same process is done, this includes determining critical dates for the time interval, visualization of whole data set which specifies critical dates, categorizing data, bar chart for average and variance for each ‘period’(interval between critical dates), quantile-quantile plot to test whether periods of data fits into a normal distribution or not and density ridge graph which includes \(%95-\)confidence intervals for each section. In final section, we combined our foundings and concluded some statistical results.library(rmarkdown)
library(readxl)
library(tidyverse)
library(knitr)
library(ggridges)
library(plotly)
library(cowplot)
biontechdata=read_excel("biontech.xlsx")
biontechdata= biontechdata %>% select(Date | Close)
cridateseventsbiontech=read_excel("criticaldatesbiontech.xlsx")
cridateseventsbiontech %>% kable()
Date | Event |
---|---|
2020-04-23 | First Volunteer in Germany Receives Vaccine |
2020-05-05 | First Volunteer in United States Receives Vaccine |
2020-07-27 | Phase 2 and 3 are started |
2020-12-02 | Authorized in United Kingdom |
2020-12-11 | Authorized in United States |
2020-12-21 | Authorized in European Union |
2021-08-23 | FDA grants full approval |
cridateseventsbiontech %>%
filter(Date==cridateseventsbiontech$Date[2] |Date==cridateseventsbiontech$Date[3] |
Date==cridateseventsbiontech$Date[6] |Date==cridateseventsbiontech$Date[7]) %>%
kable()
Date | Event |
---|---|
2020-05-05 | First Volunteer in United States Receives Vaccine |
2020-07-27 | Phase 2 and 3 are started |
2020-12-21 | Authorized in European Union |
2021-08-23 | FDA grants full approval |
biontechdata$Date <- as.Date(biontechdata$Date)
newcridates=as.Date(c("2020-05-05","2020-07-27","2020-12-21","2021-08-23"))
biontechdata=biontechdata %>%
mutate(date_type=case_when(biontechdata$Date
%in% newcridates ~ "Critical", TRUE ~ "Not Critical"))
highlight=biontechdata %>%
filter(date_type=="Critical")
closevalue=ggplot(biontechdata, aes(x = Date, y = Close))+
geom_point()+
geom_point(data=highlight,aes(x = Date, y = Close,color=date_type))+
labs(x = "Date", y="Close Price Value($)",
title= "Close Price Value of BioNTech stocks between 2020-01-01 and 2021-10-01")+
scale_color_manual(values = c("Critical" = "red",
"Not Critical" = "black"))
closevalue= closevalue+
theme_minimal()+
theme(legend.position = "bottom")
ggplotly(closevalue)
biontechdata = biontechdata %>%
mutate(Period=case_when(Date <= "2020-05-05" ~ 1,
Date > "2020-05-05" & Date <= "2020-07-27" ~ 2,
Date > "2020-07-27" & Date <= "2021-08-23" ~ 3,
Date > "2021-08-23" ~ 4 ))
avg=biontechdata %>%
group_by(Period) %>%
summarise(Average=mean(Close))
ggplot(avg,aes(x= Average,y=factor(Period)))+
geom_col(fill="lightblue")+
scale_y_discrete(labels=c("First","Second","Third","Fourth"))+
labs(x = "Average Close Price Value($)",
y="Period",
title= "Average Close Price Value of BioNTech Stocks Among Periods")+
theme_minimal()+
theme(legend.position = "bottom")
ggplot(biontechdata,aes(sample=Close))+
stat_qq(color="blue")+
stat_qq_line(color="red")+
facet_wrap(~Period)+
labs(x = " ", y=" ",
title= "Quantile-Quantile Plot of Each Period")+
theme_minimal()+
theme(legend.position = "bottom")
ggplot(biontechdata, aes(x= Close, y= factor(Period), fill=factor(stat(quantile))))+
stat_density_ridges(geom="density_ridges_gradient",
calc_ecdf=T, quantiles
=c(0.025,0.975))+ylab("Period")+
scale_fill_manual(name="Probability",
values=c("pink","lightblue","green"),
labels=c("(0,0.025)","(0.025,0.975)","(0.975,1)"))+
theme_minimal()
variance=biontechdata %>%
group_by(Period) %>%
summarise(Variance=var(Close))
ggplot(variance,aes(x=Variance,y=factor(Period)))+
geom_col(fill="green")+
scale_y_discrete(labels=c("First","Second","Third","Fourth"))+
labs(x = "Variance of Period",
y="Period",
title= "Variance of Close Price Value of BioNTech Stocks Among Periods")+
theme_minimal()+
theme(legend.position = "bottom")
ModernaData = read_excel("Moderna.xlsx")
ModernaData = ModernaData %>% select(Date | Close)
criticalDatesModerna = read_excel("criticalDateModerna.xlsx")
criticalDatesModerna %>% kable()
Date | Event |
---|---|
2020-03-16 | First Volunteer in America receives first vax |
2020-07-27 | Phase 2 & 3 have started |
2020-12-02 | Authorized in UK |
2020-12-18 | Authorized in USA |
2021-01-06 | Authorized in EU |
2021-12-18 | FDA grants full approval |
criticalDatesModerna %>%
filter(Date == criticalDatesModerna$Date[1] |
Date == criticalDatesModerna$Date[2] | Date== criticalDatesModerna$Date[3] |
Date == criticalDatesModerna$Date[4] |
Date == criticalDatesModerna$Date[5]) %>%
kable()
Date | Event |
---|---|
2020-03-16 | First Volunteer in America receives first vax |
2020-07-27 | Phase 2 & 3 have started |
2020-12-02 | Authorized in UK |
2020-12-18 | Authorized in USA |
2021-01-06 | Authorized in EU |
ModernaData$Date <- as.Date(ModernaData$Date)
newCriticalDates=as.Date(c("2020-03-16","2020-07-27",
"2020-12-02","2020-12-18",
"2021-01-06"))
ModernaData=ModernaData %>%
mutate(dateType=case_when(ModernaData$Date %in%
newCriticalDates ~ "Critical",
TRUE ~ "Not Critical"))
highLight=ModernaData %>%
filter(dateType=="Critical")
closeValue=ggplot(ModernaData, aes(x = Date, y = Close))+
geom_point()+
geom_point(data=highLight,aes(x = Date, y = Close,color=dateType))+
labs(x = "Date", y="Close Price Value($)",
title= "Close Price Value of Moderna Inc.
stocks between 2020-01-01 and 2021-10-01")+
scale_color_manual(values = c("Critical" = "red",
"Not Critical" = "black"))
closevalue=closeValue+theme_minimal()+
theme(legend.position = "bottom")
ggplotly(closevalue)
ModernaData = ModernaData %>%
mutate(Period=case_when(Date <= "2020-03-16" ~ 1,
Date > "2020-03-16" & Date <= "2020-07-27" ~ 2,
Date > "2020-07-27" & Date <= "2020-12-02" ~ 3,
Date > "2020-12-02" & Date <= "2020-12-18" ~ 4,
Date > "2020-12-18" & Date <= "2021-01-06" ~ 5,
Date > "2021-01-06" & Date <= "2021-12-18" ~ 6))
average=ModernaData %>%
group_by(Period) %>%
summarise(Average=mean(Close))
ggplot(average,aes(x= Average,y=factor(Period)))+
geom_col(fill="lightblue")+
scale_y_discrete(labels=c("First","Second","Third",
"Fourth", "Five", "Six"))+
labs(x = "Average Close Price Value",
y="Period",
title= "Average Close Price Value of Moderna Inc. Stocks Among Periods")+
theme_minimal()+
theme(legend.position = "bottom")
ggplot(ModernaData,aes(sample=Close))+
stat_qq(color="blue")+
stat_qq_line(color="red")+
facet_wrap(~Period)+
labs(x = " ", y=" ",title= "Quantile-Quantile
Plot of Each Period")+
theme_minimal()+
theme(legend.position = "bottom")
ggplot(ModernaData, aes(x= Close, y= factor(Period),
fill=factor(stat(quantile))))+
stat_density_ridges(geom="density_ridges_gradient",
calc_ecdf=T, quantiles = c(0.025,0.975))+
ylab("Period")+
scale_fill_manual(name="Probability",
values=c("pink","lightblue","green"),
labels=c("(0,0.025)","(0.025,0.975)","(0.975,1)"))+
theme_minimal()
varians=ModernaData %>%
group_by(Period) %>%
summarise(Variance=var(Close))
ggplot(varians,aes(x=Variance,y=factor(Period)))+
geom_col(fill="green")+
scale_y_discrete(labels=c("First","Second","Third",
"Fourth", "Five", "Six"))+
labs(x = "Variance of Periods", y="Period",
title= "Variance of Close Price Value of
Moderna Inc. Stocks Among Periods")+
theme_minimal()+
theme(legend.position = "bottom")
astrazenecadata=read_excel("astrazeneca.xlsx")
astrazenecadata=astrazenecadata %>% select(Date | Close)
datesAstrazeneca=as.Date(c("2020-04-23","2020-05-22","2020-08-31",
"2020-12-3020","21-01-29","2021-03-25","2021-04-07"))
eventsAstrazeneca=c("First volunteer given vaccine",
"Phase 2","US phase 3 trial begins","Authorized in Uk",
"Authorized in EU",
"Reuses data after US",
"EMA funds possible casval link between vaccine
and rare clotting events")
cridatesAstraZeneca=tibble(datesAstrazeneca,eventsAstrazeneca)
cridatesAstraZeneca %>% rename("Date"=datesAstrazeneca ,
"Event"=eventsAstrazeneca) %>%
kable()
Date | Event |
---|---|
2020-04-23 | First volunteer given vaccine |
2020-05-22 | Phase 2 |
2020-08-31 | US phase 3 trial begins |
2020-12-30 | Authorized in Uk |
0021-01-29 | Authorized in EU |
2021-03-25 | Reuses data after US |
2021-04-07 | EMA funds possible casval link between vaccine |
and rare clotting events |
astrazenecadata$Date <- as.Date(astrazenecadata$Date)
newCriticalDates=as.Date(c("2020-04-23","2020-05-22",
"2020-08-31","2020-12-30",
"2021-01-29" , "2021-03-25", "2021-04-07"))
astrazenecadata=astrazenecadata %>%
mutate(datetype=case_when(astrazenecadata$Date %in% newCriticalDates ~ "Critical",
TRUE ~ "Not Critical"))
highLight=astrazenecadata %>%
filter(datetype=="Critical")
closeValue=ggplot(astrazenecadata,
aes(x = Date, y = Close))+
geom_point()+
geom_point(data=highLight,
aes(x = Date, y = Close,color=datetype))+
labs(x = "Date",
y="Close Price Value($)",
title= "Close Price of Astrazeneca in NasdaqGS
Between 2020-01-01 and 2021-10-01")+
scale_color_manual(values = c("Critical" = "red",
"Not Critical" = "black"))
closeValue=closeValue+theme_minimal()+
theme(legend.position = "bottom")
ggplotly(closeValue)
astrazenecadata = astrazenecadata %>%
mutate(Section=case_when(Date <= "2020-04-23" ~ 1,
Date > "2020-04-23" & Date <= "2020-05-22" ~ 2,
Date > "2020-05-22" & Date <= "2020-08-31" ~ 3,
Date > "2020-08-31" & Date <= "2020-12-30" ~ 4,
Date > "2020-12-30" & Date <= "2021-01-29" ~ 5,
Date > "2021-01-29" & Date <= "2021-03-25" ~ 6,
Date > "2021-03-25" ~ 7))
average=astrazenecadata %>%
group_by(Section) %>%
summarise(Average=mean(Close))
ggplot(average,aes(x= Average,y=factor(Section)))+
geom_col(fill="lightblue")+
scale_y_discrete(labels=c("First","Second","Third",
"Fourth", "Five", "Six", "Seven"))+
labs(x = "Average Day-Close Price Value",
y="Section",
title= "Average Day-Close Price Value of Astrazeneca Stocks Among Sections")+
theme_minimal()+
theme(legend.position = "bottom")
ggplot(astrazenecadata,aes(sample=Close))+
stat_qq(color="blue")+
stat_qq_line(color="red")+
facet_wrap(~Section)+
labs(x = " ", y=" ",title= "Quantile-Quantile
Plot of Each Section")+
theme_minimal()+
theme(legend.position = "bottom")
ggplot(astrazenecadata, aes(x=Close ,y=factor(Section)))+
geom_boxplot(fill="yellow")+
scale_y_discrete(labels=c("First","Second",
"Third","Fourth", "Five",
"Six", "Seven"))+
labs(x = "Close Values", y="Section",
title= "Distribution of Close Values by Section")+
theme_minimal()+
theme(legend.position = "bottom")
varians=astrazenecadata %>%
group_by(Section) %>%
summarise(Variance=var(Close))
ggplot(varians,aes(x=Variance,y=factor(Section)))+
geom_col(fill="green")+
scale_y_discrete(labels=c("First","Second","Third",
"Fourth", "Five", "Six", "Seven"))+
labs(x = "Variance of Section", y="Section",
title= "Variance of Day-Close Price Value of
Astrazeneca Stocks Among Sections")+
theme_minimal()+
theme(legend.position = "bottom")
merged_data=read_excel("mergedata.xlsx")
merged_data$Date=as.Date(merged_data$Date)
companies=c("Moderna Inc.","BioNTech","AstraZeneca")
p=ggplot(merged_data,aes(x=Date, y))+ geom_point(aes(x=Date, y=ModernaClose,color=companies[1]))+geom_point(aes(x=Date, y=AstrazenecaClose,colour=companies[3]))+geom_point(aes(x=Date, y=BiontechClose,colour=companies[2]))+labs(title = "Close Price Values of BioNTech, Moderna Inc. and AstraZeneca", x="Date",y="Close Price Value($)",colour=" Companies")+scale_color_manual(values =c("BioNTech" = "blue", "Moderna Inc." = "salmon", "AstraZeneca"="orange"))+theme_minimal()
ggplotly(p)
biontech_first_volunteer=biontechdata %>%
filter(Period==1) %>%
mutate(company="BioNTech")
moderna_first_volunteer=ModernaData %>%
filter(Period==1) %>%
mutate(company="Moderna Inc.")
astrazeneca_first_volunteer= astrazenecadata %>%
filter(Section==1) %>%
mutate(company="AstraZeneca")
biontech_phase=biontechdata %>%
filter(Period==2) %>%
mutate(company="BioNTech")
moderna_phase=ModernaData %>%
filter(Period==2) %>%
mutate(company="Moderna Inc.")
astrazeneca_phase= astrazenecadata %>%
filter(Section==2 | Section==3) %>%
mutate(company="AstraZeneca")
biontech_authorization=biontechdata %>%
filter(Period==3) %>%
mutate(company="BioNTech")
moderna_authorization=ModernaData %>%
filter(Period==3 | Period==4 | Period==5) %>%
mutate(company="Moderna Inc.")
astrazeneca_authorization= astrazenecadata %>%
filter(Section==4 | Section==4) %>%
mutate(company="AstraZeneca")
biontech_FDA=biontechdata %>%
filter(Period==4) %>%
mutate(company="BioNTech")
astrazeneca_EMA=astrazenecadata %>%
filter(Section==7)
The following graphs includes \(%95\) confidence intervals for mean of periods of each company. First the following distribution graph compares the distribution of period “first volunteer gets vaccine” of each company.
firstvolunteer=data.frame(a=biontech_first_volunteer$Close %>% append(moderna_first_volunteer$Close) %>% append(astrazeneca_first_volunteer$Close),b=biontech_first_volunteer$company %>% append(moderna_first_volunteer$company) %>% append(astrazeneca_first_volunteer$company))
ggplot(firstvolunteer, aes(x= a, y= factor(b),
fill=factor(stat(quantile))))+
stat_density_ridges(geom="density_ridges_gradient",
calc_ecdf=T, quantiles = c(0.025,0.975))+
ylab("Companies")+xlab("Close Price Value")+
scale_fill_manual(name="Probability",
values=c("pink","lightblue","green"),
labels=c("(0,0.025)","(0.025,0.975)","(0.975,1)"))+
theme_minimal()
phase=data.frame(a=biontech_phase$Close %>% append(moderna_phase$Close) %>% append(astrazeneca_phase$Close),b=biontech_phase$company %>% append(moderna_phase$company) %>% append(astrazeneca_phase$company))
ggplot(phase, aes(x= a, y= factor(b),
fill=factor(stat(quantile))))+
stat_density_ridges(geom="density_ridges_gradient",
calc_ecdf=T, quantiles = c(0.025,0.975))+
ylab("Companies")+xlab("Close Price Value")+
scale_fill_manual(name="Probability",
values=c("pink","lightblue","green"),
labels=c("(0,0.025)","(0.025,0.975)","(0.975,1)"))+
theme_minimal()
authorization=data.frame(a=biontech_authorization$Close %>% append(moderna_authorization$Close) %>% append(astrazeneca_authorization$Close),b=biontech_authorization$company %>% append(moderna_authorization$company) %>% append(astrazeneca_authorization$company))
ggplot(authorization, aes(x= a, y= factor(b),
fill=factor(stat(quantile))))+
stat_density_ridges(geom="density_ridges_gradient",
calc_ecdf=T, quantiles = c(0.025,0.975))+
ylab("Companies")+xlab("Close Price Value")+
scale_fill_manual(name="Probability",
values=c("pink","lightblue","green"),
labels=c("(0,0.025)","(0.025,0.975)","(0.975,1)"))+
theme_minimal()
| The following code calculates the rate of changes in company stock prices in each period.
rates=data.frame(Company=
c("BioNTech","BioNTech","BioNTech","Moderna Inc.","Moderna Inc.","Moderna Inc.","AstraZeneca","AstraZeneca","AstraZeneca"),
rate_types=
c("First Volunteer","Phase","Authorization","First Volunteer","Phase","Authorization","First Volunteer","Phase","Authorization"),
Values=
c((firstvolunteer[86,1]-firstvolunteer[1,1])/firstvolunteer[1,1]*100,
(phase[57,1]-phase[1,1])/phase[1,1]*100,
(authorization[271,1]-authorization[1,1])/authorization[1,1]*100,
(firstvolunteer[129,1]-firstvolunteer[87,1])/firstvolunteer[87,1]*100,
(phase[149,1]-phase[58,1])/phase[58,1]*100,
(authorization[384,1]-authorization[272,1])/authorization[272,1]*100,
(firstvolunteer[209,1]-firstvolunteer[130,1])/firstvolunteer[130,1]*100,
(phase[239,1]-phase[150,1])/phase[150,1]*100 ,
(authorization[468,1]-authorization[385,1])/authorization[385,1]*100))
| The following chart visualizes the values that calculated above.
p=ggplot(rates,aes(x=Company,y=Values,fill=rate_types))+geom_col(position = "dodge")+ylab("Rate of Change( % ) ")+ guides(fill=guide_legend(title="Periods"))+theme_minimal()
ggplotly(p)
| According to the chart, the most increase in AstraZeneca company occurred in period “First vVolunteer” and there were slightly decrease in “Authorization” period. This is expected since AstraZeneca has not become as known vaccine as other vaccine companies, we will argue the reasons in next analyses. The company BioNTech increased its value in each period but authorization period has most effect on stock prices on it. On the other hand, phase studies in Moderna Inc. effected Moderna Inc. most positively, other periods had not an important impact on it.
\(H_0: \mu_{First Volunteer}=\mu_{Authorization}=\mu_{Phase} \ \ H_1: \mu_{First Volunteer}\neq\mu_{Authorization}\neq\mu_{phase}\)
astrazen_rate <- data.frame(
Y=
c(astrazeneca_first_volunteer$Close %>%
append(astrazeneca_phase$Close) %>%
append(astrazeneca_authorization$Close)),
Period=
factor(rep(c("First Volunteer","Phase", "Authorization"),
c(length(astrazeneca_first_volunteer$Close),
length(astrazeneca_phase$Close),
length(astrazeneca_authorization$Close)))))
k=anova(lm(astrazen_rate$Y~astrazen_rate$Period))
rownames(k) <- c("Period", "Residuals")
k %>% kable()
Df | Sum Sq | Mean Sq | F value | Pr(>F) | |
---|---|---|---|---|---|
Period | 2 | 2663.909 | 1331.954330 | 196.5779 | 0 |
Residuals | 249 | 1687.151 | 6.775708 | NA | NA |
p1=ggplot()+ geom_line(biontech_FDA ,mapping=aes(x=Date,y=Close))+theme_minimal()
p2=ggplot()+geom_line(astrazeneca_EMA,mapping=aes(x=Date,y=Close))+theme_minimal()
plot_grid(p1, p2,labels=c("BioNTech", "AstraZeneca"), ncol = 2, nrow = 1)