# We use the 'patients' dataset in this exercise
library(dplyr)
print(head(patients))
print('----------The details of the dataset: -----')
print(str(patients))
print('--------- The result: ----------')
# First we define the average BMI for females and males by ignoring the null values
avg_f = mean(patients[patients$gender=='Female',]$BMI,na.rm = TRUE) # Average BMI for female patients
avg_m = mean(patients[patients$gender=='Male',]$BMI,na.rm = TRUE) # Average BMI for male patients
result <- patients %>% mutate(BMI = ifelse(is.na(BMI)&gender=='Female',avg_f,BMI))%>% # Impute null values with female average
mutate(BMI = ifelse(is.na(BMI)&gender=='Male',avg_m,BMI))%>% # Impute null values with male average
mutate(fasting_glucose=as.double(fasting_glucose))%>% # Convert the character data type into double data type
mutate(glucose_diff = abs((fasting_glucose - mean(fasting_glucose))/sd(fasting_glucose)))%>% # Calculate the Z-score for each fasting glucose value and create a new column
mutate(avg_bp = (high_bp + low_bp)/2)%>% # Create a new column for the average blood pressure
mutate(in_danger= ifelse(avg_bp>=100,TRUE,FALSE))%>% # Create a binary column stating that the patient is in danger if the average blood pressure is over 110
select(c(id,gender,BMI,fasting_glucose,glucose_diff,avg_bp,in_danger))%>% # Select the columns desired by the managers.
filter(glucose_diff >= 2 & BMI >= 25 & in_danger == TRUE) # Filter the patients that carry danger in the given measurements.
print(result) # Check the patients who are in danger
print('-------- The number of rows: -------')
result<- result%>%nrow() # Find the number of patients who needs to be care.
print(result)