#load necessary packages
library(tidyverse)
library(ggplot2)
library(here)
Flu Analysis
Wrangling
This is the first file of a four-part data analysis exercise, conducted on the dataset from McKay et al 2020, found here. This file contains data wrangling and cleaning steps in preparation for further analysis.
Load Packages/Data
#load in data
<- readr::read_rds(here::here("fluanalysis", "data", "SympAct_Any_Pos.Rda"))
flu_raw
#view data
glimpse(flu_raw)
Rows: 735
Columns: 63
$ DxName1 <fct> "Influenza like illness - Clinical Dx", "Acute tonsi~
$ DxName2 <fct> NA, "Influenza like illness - Clinical Dx", "Acute p~
$ DxName3 <fct> NA, NA, NA, NA, NA, NA, NA, NA, "Fever, unspecified"~
$ DxName4 <fct> NA, NA, NA, NA, NA, NA, NA, NA, "Other fatigue", NA,~
$ DxName5 <fct> NA, NA, NA, NA, NA, NA, NA, NA, "Headache", NA, NA, ~
$ Unique.Visit <chr> "340_17632125", "340_17794836", "342_17737773", "342~
$ ActivityLevel <int> 10, 6, 2, 2, 5, 3, 4, 0, 0, 5, 9, 1, 3, 6, 5, 2, 2, ~
$ ActivityLevelF <fct> 10, 6, 2, 2, 5, 3, 4, 0, 0, 5, 9, 1, 3, 6, 5, 2, 2, ~
$ SwollenLymphNodes <fct> Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, No, Yes, Y~
$ ChestCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y~
$ ChillsSweats <fct> No, No, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, ~
$ NasalCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y~
$ CoughYN <fct> Yes, Yes, No, Yes, No, Yes, Yes, Yes, Yes, Yes, No, ~
$ Sneeze <fct> No, No, Yes, Yes, No, Yes, No, Yes, No, No, No, No, ~
$ Fatigue <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
$ SubjectiveFever <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes~
$ Headache <fct> Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes~
$ Weakness <fct> Mild, Severe, Severe, Severe, Moderate, Moderate, Mi~
$ WeaknessYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
$ CoughIntensity <fct> Severe, Severe, Mild, Moderate, None, Moderate, Seve~
$ CoughYN2 <fct> Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes, Yes, Yes~
$ Myalgia <fct> Mild, Severe, Severe, Severe, Mild, Moderate, Mild, ~
$ MyalgiaYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
$ RunnyNose <fct> No, No, Yes, Yes, No, No, Yes, Yes, Yes, Yes, No, No~
$ AbPain <fct> No, No, Yes, No, No, No, No, No, No, No, Yes, Yes, N~
$ ChestPain <fct> No, No, Yes, No, No, Yes, Yes, No, No, No, No, Yes, ~
$ Diarrhea <fct> No, No, No, No, No, Yes, No, No, No, No, No, No, No,~
$ EyePn <fct> No, No, No, No, Yes, No, No, No, No, No, Yes, No, Ye~
$ Insomnia <fct> No, No, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Yes, Y~
$ ItchyEye <fct> No, No, No, No, No, No, No, No, No, No, No, No, Yes,~
$ Nausea <fct> No, No, Yes, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Y~
$ EarPn <fct> No, Yes, No, Yes, No, No, No, No, No, No, No, Yes, Y~
$ Hearing <fct> No, Yes, No, No, No, No, No, No, No, No, No, No, No,~
$ Pharyngitis <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, ~
$ Breathless <fct> No, No, Yes, No, No, Yes, No, No, No, Yes, No, Yes, ~
$ ToothPn <fct> No, No, Yes, No, No, No, No, No, Yes, No, No, Yes, N~
$ Vision <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, ~
$ Vomit <fct> No, No, No, No, No, No, Yes, No, No, No, Yes, Yes, N~
$ Wheeze <fct> No, No, No, Yes, No, Yes, No, No, No, No, No, Yes, N~
$ BodyTemp <dbl> 98.3, 100.4, 100.8, 98.8, 100.5, 98.4, 102.5, 98.4, ~
$ RapidFluA <fct> Presumptive Negative For Influenza A, NA, Presumptiv~
$ RapidFluB <fct> Presumptive Negative For Influenza B, NA, Presumptiv~
$ PCRFluA <fct> NA, NA, NA, NA, NA, NA, Influenza A Not Detected, N~
$ PCRFluB <fct> NA, NA, NA, NA, NA, NA, Influenza B Not Detected, N~
$ TransScore1 <dbl> 1, 3, 4, 5, 0, 2, 2, 5, 4, 4, 2, 3, 2, 5, 3, 5, 1, 5~
$ TransScore1F <fct> 1, 3, 4, 5, 0, 2, 2, 5, 4, 4, 2, 3, 2, 5, 3, 5, 1, 5~
$ TransScore2 <dbl> 1, 2, 3, 4, 0, 2, 2, 4, 3, 3, 1, 2, 2, 4, 2, 4, 1, 4~
$ TransScore2F <fct> 1, 2, 3, 4, 0, 2, 2, 4, 3, 3, 1, 2, 2, 4, 2, 4, 1, 4~
$ TransScore3 <dbl> 1, 1, 2, 3, 0, 2, 2, 3, 2, 2, 0, 1, 1, 3, 1, 3, 1, 3~
$ TransScore3F <fct> 1, 1, 2, 3, 0, 2, 2, 3, 2, 2, 0, 1, 1, 3, 1, 3, 1, 3~
$ TransScore4 <dbl> 0, 2, 4, 4, 0, 1, 1, 4, 3, 3, 2, 2, 2, 4, 3, 4, 0, 4~
$ TransScore4F <fct> 0, 2, 4, 4, 0, 1, 1, 4, 3, 3, 2, 2, 2, 4, 3, 4, 0, 4~
$ ImpactScore <int> 7, 8, 14, 12, 11, 12, 8, 7, 10, 7, 13, 17, 11, 13, 9~
$ ImpactScore2 <int> 6, 7, 13, 11, 10, 11, 7, 6, 9, 6, 12, 16, 10, 12, 8,~
$ ImpactScore3 <int> 3, 4, 9, 7, 6, 7, 3, 3, 6, 4, 7, 11, 6, 8, 4, 4, 5, ~
$ ImpactScoreF <fct> 7, 8, 14, 12, 11, 12, 8, 7, 10, 7, 13, 17, 11, 13, 9~
$ ImpactScore2F <fct> 6, 7, 13, 11, 10, 11, 7, 6, 9, 6, 12, 16, 10, 12, 8,~
$ ImpactScore3F <fct> 3, 4, 9, 7, 6, 7, 3, 3, 6, 4, 7, 11, 6, 8, 4, 4, 5, ~
$ ImpactScoreFD <fct> 7, 8, 14, 12, 11, 12, 8, 7, 10, 7, 13, 17, 11, 13, 9~
$ TotalSymp1 <dbl> 8, 11, 18, 17, 11, 14, 10, 12, 14, 11, 15, 20, 13, 1~
$ TotalSymp1F <fct> 8, 11, 18, 17, 11, 14, 10, 12, 14, 11, 15, 20, 13, 1~
$ TotalSymp2 <dbl> 8, 10, 17, 16, 11, 14, 10, 11, 13, 10, 14, 19, 13, 1~
$ TotalSymp3 <dbl> 8, 9, 16, 15, 11, 14, 10, 10, 12, 9, 13, 18, 12, 16,~
summary(flu_raw)
DxName1
Influenza like illness - Clinical Dx :328
Influenza - Virus Identified :131
Fever, unspecified :101
Cough : 66
Acute pharyngitis, unspecified : 50
Acute upper respiratory infection, unspecified: 22
(Other) : 37
DxName2
Influenza - Virus Identified :126
Influenza like illness - Clinical Dx:115
Fever, unspecified : 45
Cough : 41
Acute pharyngitis, unspecified : 31
(Other) : 97
NA's :280
DxName3
Influenza - Virus Identified : 23
Influenza like illness - Clinical Dx: 14
Cough : 10
Fever, unspecified : 6
Acute pharyngitis, unspecified : 4
(Other) : 52
NA's :626
DxName4
Influenza - Virus Identified : 3
Acute upper respiratory infection, unspecified: 2
Encounter for immunization : 2
Influenza like illness - Clinical Dx : 2
Acute pharyngitis, unspecified : 1
(Other) : 9
NA's :716
DxName5
Acute suppurative otitis media without spontaneous rupture of ear drum, right ear : 0
Encounter for immunization : 0
Headache : 1
Other infectious mononucleosis without complication : 0
Strain of other flexor muscle, fascia and tendon at forearm level, right arm, subsequent encounter: 0
NA's :734
Unique.Visit ActivityLevel ActivityLevelF SwollenLymphNodes
Length:735 Min. : 0.000 3 :125 No :421
Class :character 1st Qu.: 3.000 5 : 97 Yes:314
Mode :character Median : 4.000 4 : 95
Mean : 4.463 2 : 80
3rd Qu.: 6.000 7 : 68
Max. :10.000 6 : 66
(Other):204
ChestCongestion ChillsSweats NasalCongestion CoughYN Sneeze Fatigue
No :326 No :131 No :170 No : 75 No :340 No : 64
Yes:409 Yes:604 Yes:565 Yes:660 Yes:395 Yes:671
SubjectiveFever Headache Weakness WeaknessYN CoughIntensity CoughYN2
No :230 No :115 None : 49 No : 49 None : 47 No : 47
Yes:505 Yes:620 Mild :224 Yes:686 Mild :156 Yes:688
Moderate:341 Moderate:360
Severe :121 Severe :172
Myalgia MyalgiaYN RunnyNose AbPain ChestPain Diarrhea EyePn
None : 79 No : 79 No :211 No :642 No :501 No :636 No :622
Mild :214 Yes:656 Yes:524 Yes: 93 Yes:234 Yes: 99 Yes:113
Moderate:327
Severe :115
Insomnia ItchyEye Nausea EarPn Hearing Pharyngitis Breathless
No :316 No :553 No :477 No :573 No :705 No :121 No :438
Yes:419 Yes:182 Yes:258 Yes:162 Yes: 30 Yes:614 Yes:297
ToothPn Vision Vomit Wheeze BodyTemp
No :569 No :716 No :656 No :514 Min. : 97.20
Yes:166 Yes: 19 Yes: 79 Yes:221 1st Qu.: 98.20
Median : 98.50
Mean : 98.94
3rd Qu.: 99.30
Max. :103.10
NA's :5
RapidFluA
Positive for Influenza A :169
Presumptive Negative For Influenza A:159
NA's :407
RapidFluB PCRFluA
Positive for Influenza B : 26 Influenza A Detected :120
Presumptive Negative For Influenza B:302 Influenza A Not Detected: 33
NA's :407 Assay Invalid : 0
Indeterminate : 1
NA's :581
PCRFluB TransScore1 TransScore1F TransScore2
Influenza B Detected : 9 Min. :0.000 0: 13 Min. :0.000
Influenza B Not Detected:145 1st Qu.:3.000 1: 53 1st Qu.:2.000
Assay Invalid : 0 Median :4.000 2:107 Median :3.000
NA's :581 Mean :3.473 3:157 Mean :2.917
3rd Qu.:5.000 4:210 3rd Qu.:4.000
Max. :5.000 5:195 Max. :4.000
TransScore2F TransScore3 TransScore3F TransScore4 TransScore4F
0: 13 Min. :0.000 0: 24 Min. :0.000 0: 50
1: 89 1st Qu.:1.000 1:166 1st Qu.:2.000 1:103
2:138 Median :2.000 2:222 Median :3.000 2:154
3:201 Mean :2.148 3:323 Mean :2.576 3:230
4:294 3rd Qu.:3.000 3rd Qu.:4.000 4:198
Max. :3.000 Max. :4.000
ImpactScore ImpactScore2 ImpactScore3 ImpactScoreF ImpactScore2F
Min. : 2.000 Min. : 2.000 Min. : 0.00 8 :105 7 :107
1st Qu.: 8.000 1st Qu.: 7.000 1st Qu.: 3.00 9 :104 8 :102
Median : 9.000 Median : 8.000 Median : 5.00 10 : 88 9 : 90
Mean : 9.514 Mean : 8.581 Mean : 5.06 7 : 84 10 : 86
3rd Qu.:11.000 3rd Qu.:10.000 3rd Qu.: 7.00 11 : 82 6 : 85
Max. :18.000 Max. :17.000 Max. :13.00 12 : 58 11 : 59
(Other):214 (Other):206
ImpactScore3F ImpactScoreFD TotalSymp1 TotalSymp1F TotalSymp2
4 :134 8 :105 Min. : 5.00 12 : 86 Min. : 4.00
5 :112 9 :104 1st Qu.:11.00 13 : 84 1st Qu.:10.00
3 :108 10 : 88 Median :13.00 14 : 80 Median :12.00
6 :102 7 : 84 Mean :12.99 11 : 72 Mean :12.43
7 : 66 11 : 82 3rd Qu.:15.00 10 : 62 3rd Qu.:15.00
2 : 64 12 : 58 Max. :23.00 15 : 61 Max. :22.00
(Other):149 (Other):214 (Other):290
TotalSymp3
Min. : 3.00
1st Qu.:10.00
Median :12.00
Mean :11.66
3rd Qu.:14.00
Max. :21.00
Cleaning/Wrangling
This dataset will require a small amount of cleaning - namely, I will remove some variables that will not be useful for future analysis and remove missing values.
<- flu_raw %>%
flu_data select(-contains(c("Score", "Total", "DxName", "Activity", "FluA", "FluB", "Unique.Visit"))) %>%
drop_na()
glimpse(flu_data)
Rows: 730
Columns: 32
$ SwollenLymphNodes <fct> Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, No, Yes, Y~
$ ChestCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y~
$ ChillsSweats <fct> No, No, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, ~
$ NasalCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y~
$ CoughYN <fct> Yes, Yes, No, Yes, No, Yes, Yes, Yes, Yes, Yes, No, ~
$ Sneeze <fct> No, No, Yes, Yes, No, Yes, No, Yes, No, No, No, No, ~
$ Fatigue <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
$ SubjectiveFever <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes~
$ Headache <fct> Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes~
$ Weakness <fct> Mild, Severe, Severe, Severe, Moderate, Moderate, Mi~
$ WeaknessYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
$ CoughIntensity <fct> Severe, Severe, Mild, Moderate, None, Moderate, Seve~
$ CoughYN2 <fct> Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes, Yes, Yes~
$ Myalgia <fct> Mild, Severe, Severe, Severe, Mild, Moderate, Mild, ~
$ MyalgiaYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye~
$ RunnyNose <fct> No, No, Yes, Yes, No, No, Yes, Yes, Yes, Yes, No, No~
$ AbPain <fct> No, No, Yes, No, No, No, No, No, No, No, Yes, Yes, N~
$ ChestPain <fct> No, No, Yes, No, No, Yes, Yes, No, No, No, No, Yes, ~
$ Diarrhea <fct> No, No, No, No, No, Yes, No, No, No, No, No, No, No,~
$ EyePn <fct> No, No, No, No, Yes, No, No, No, No, No, Yes, No, Ye~
$ Insomnia <fct> No, No, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Yes, Y~
$ ItchyEye <fct> No, No, No, No, No, No, No, No, No, No, No, No, Yes,~
$ Nausea <fct> No, No, Yes, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Y~
$ EarPn <fct> No, Yes, No, Yes, No, No, No, No, No, No, No, Yes, Y~
$ Hearing <fct> No, Yes, No, No, No, No, No, No, No, No, No, No, No,~
$ Pharyngitis <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, ~
$ Breathless <fct> No, No, Yes, No, No, Yes, No, No, No, Yes, No, Yes, ~
$ ToothPn <fct> No, No, Yes, No, No, No, No, No, Yes, No, No, Yes, N~
$ Vision <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, ~
$ Vomit <fct> No, No, No, No, No, No, Yes, No, No, No, Yes, Yes, N~
$ Wheeze <fct> No, No, No, Yes, No, Yes, No, No, No, No, No, Yes, N~
$ BodyTemp <dbl> 98.3, 100.4, 100.8, 98.8, 100.5, 98.4, 102.5, 98.4, ~
Save Cleaned Data
This completes the data cleaning that I will be doing, so I will now save the clean data as a new RDS file.
%>%
flu_data saveRDS(here::here("fluanalysis", "data", "flu_data_clean.RDS"))