This report summarizes the public-safe synthetic gradebook reconstruction workflow. The source artifacts are generated by R from private reference structure, but this report reads only synthetic outputs.
wide_gradebook <- read.csv("data/synthetic/synthetic_gradebook.csv", stringsAsFactors = FALSE, check.names = FALSE)
score_records <- read.csv("data/synthetic/synthetic_student_scores_long.csv", stringsAsFactors = FALSE)
assignment_metadata <- read.csv("data/synthetic/synthetic_assignment_metadata.csv", stringsAsFactors = FALSE)
str(wide_gradebook)
## 'data.frame': 287 obs. of 39 variables:
## $ Student : chr "Synthetic Student 001" "Synthetic Student 002" "Synthetic Student 003" "Synthetic Student 004" ...
## $ ID : chr "SYN-ID-000001" "SYN-ID-000002" "SYN-ID-000003" "SYN-ID-000004" ...
## $ SIS User ID : chr "SYN000001" "SYN000002" "SYN000003" "SYN000004" ...
## $ SIS Login ID : chr "synthetic001" "synthetic002" "synthetic003" "synthetic004" ...
## $ Section : chr "Section A" "Section A" "Section A" "Section A" ...
## $ Assignment 06 : chr "Complete" "Exempt" "Submitted" "Submitted" ...
## $ Current Score : logi NA NA NA NA NA NA ...
## $ Final Score : num 10 NA NA NA NA 12 26.4 NA NA NA ...
## $ Unposted Final Score: num 10 NA NA NA NA 12 26.4 NA NA NA ...
## $ Current Grade : chr "F" "" "" "" ...
## $ Final Grade : chr "F" "" "" "" ...
## $ Assignment 12 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 13 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 14 : int NA NA NA NA NA NA 100 NA NA NA ...
## $ Assignment 15 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 16 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 17 : int NA NA NA NA NA NA 2 NA NA NA ...
## $ Assignment 18 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 19 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 20 : int NA NA NA NA NA NA 10 NA NA NA ...
## $ Assignment 21 : int 10 NA NA NA NA NA NA NA NA NA ...
## $ Assignment 22 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 23 : int NA NA NA NA NA NA 10 NA NA NA ...
## $ Assignment 24 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 25 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 26 : int NA NA NA NA NA NA 10 NA NA NA ...
## $ Assignment 27 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 28 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 29 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 30 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 31 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 32 : int NA NA NA NA NA 12 NA NA NA NA ...
## $ Assignment 33 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 34 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 35 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 36 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 37 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 38 : int NA NA NA NA NA NA NA NA NA NA ...
## $ Assignment 39 : int NA NA NA NA NA NA NA NA NA NA ...
str(score_records)
## 'data.frame': 8036 obs. of 17 variables:
## $ synthetic_student_id : chr "synthetic_student_001" "synthetic_student_002" "synthetic_student_003" "synthetic_student_004" ...
## $ synthetic_section : chr "Section A" "Section A" "Section A" "Section A" ...
## $ assignment_id : chr "assignment_01" "assignment_01" "assignment_01" "assignment_01" ...
## $ assignment_label : chr "Assignment 12" "Assignment 12" "Assignment 12" "Assignment 12" ...
## $ assignment_sequence : int 1 1 1 1 1 1 1 1 1 1 ...
## $ assignment_family : chr "Diagnostic" "Diagnostic" "Diagnostic" "Diagnostic" ...
## $ skill_domain : chr "Conceptual Fluency" "Conceptual Fluency" "Conceptual Fluency" "Conceptual Fluency" ...
## $ reference_column_index: int 12 12 12 12 12 12 12 12 12 12 ...
## $ score : int NA NA NA NA NA NA NA NA NA NA ...
## $ score_min : int 100 100 100 100 100 100 100 100 100 100 ...
## $ score_max : int 100 100 100 100 100 100 100 100 100 100 ...
## $ score_percent : logi NA NA NA NA NA NA ...
## $ completed : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ missingness_reason : chr "missing_submission" "missing_submission" "missing_submission" "not_administered" ...
## $ ability_band : chr "approaching" "developing" "developing" "developing" ...
## $ engagement_band : chr "steady" "steady" "low" "emerging" ...
## $ risk_band : chr "elevated" "high" "high" "low" ...
str(assignment_metadata)
## 'data.frame': 28 obs. of 17 variables:
## $ source_column_index : int 12 13 14 15 16 17 18 19 20 21 ...
## $ assignment_id : chr "assignment_01" "assignment_02" "assignment_03" "assignment_04" ...
## $ assignment_label : chr "Assignment 12" "Assignment 13" "Assignment 14" "Assignment 15" ...
## $ assignment_sequence : int 1 2 3 4 5 6 7 8 9 10 ...
## $ assignment_family : chr "Diagnostic" "Diagnostic" "Diagnostic" "Diagnostic" ...
## $ skill_domain : chr "Conceptual Fluency" "Procedural Accuracy" "Modeling And Application" "Evidence And Explanation" ...
## $ reference_nonblank_count: int 1 1 1 1 1 1 1 1 1 1 ...
## $ reference_blank_rate : num 0.997 0.997 0.997 0.997 0.997 ...
## $ reference_mean : int 100 100 100 100 20 2 10 10 10 10 ...
## $ reference_sd : int 12 12 12 12 12 12 12 12 12 12 ...
## $ reference_min : int 100 100 100 100 20 2 10 10 10 10 ...
## $ reference_p25 : int 100 100 100 100 20 2 2 2 2 2 ...
## $ reference_p50 : int 100 100 100 100 20 2 2 2 2 2 ...
## $ reference_p75 : int 100 100 100 100 20 2 2 2 2 2 ...
## $ reference_max : int 100 100 100 100 20 2 10 10 10 10 ...
## $ difficulty_index : num -6.411 -6.411 -6.411 -6.411 0.256 ...
## $ discrimination : int 1 1 1 1 1 1 1 1 1 1 ...
data.frame(
Metric = c(
"Wide gradebook rows",
"Wide gradebook columns",
"Long-form score records",
"Synthetic students",
"Synthetic sections",
"Assignment metadata rows",
"Assignment families",
"Skill domains"
),
Value = c(
nrow(wide_gradebook),
ncol(wide_gradebook),
nrow(score_records),
length(unique(score_records$synthetic_student_id)),
length(unique(score_records$synthetic_section)),
nrow(assignment_metadata),
length(unique(score_records$assignment_family)),
length(unique(score_records$skill_domain))
)
)
## Metric Value
## 1 Wide gradebook rows 287
## 2 Wide gradebook columns 39
## 3 Long-form score records 8036
## 4 Synthetic students 287
## 5 Synthetic sections 1
## 6 Assignment metadata rows 28
## 7 Assignment families 5
## 8 Skill domains 5
completion_summary <- aggregate(
completed ~ assignment_family,
data = score_records,
FUN = function(x) round(100 * mean(x), 1)
)
names(completion_summary)[names(completion_summary) == "completed"] <- "completion_rate"
completion_summary
## assignment_family completion_rate
## 1 Concept Check 2.5
## 2 Cumulative Review 3.9
## 3 Diagnostic 3.4
## 4 Skill Practice 3.1
## 5 Unit Assessment 3.4
missingness_summary <- as.data.frame(table(score_records$missingness_reason))
names(missingness_summary) <- c("missingness_reason", "records")
missingness_summary[order(-missingness_summary$records), ]
## missingness_reason records
## 4 missing_submission 3169
## 5 not_administered 2053
## 3 late_or_incomplete 1410
## 2 excused_or_absent 1151
## 1 completed 253
completed_scores <- score_records[score_records$completed & !is.na(score_records$score), ]
if (any(!is.na(completed_scores$score_percent))) {
completed_scores$score_metric <- completed_scores$score_percent
score_metric_label <- "Score percent"
} else {
completed_scores$score_metric <- completed_scores$score
score_metric_label <- "Synthetic score"
}
data.frame(
Metric = c("Completed score records", paste("Mean", score_metric_label), paste("Median", score_metric_label), paste(score_metric_label, "SD")),
Value = c(
nrow(completed_scores),
round(mean(completed_scores$score_metric), 1),
round(median(completed_scores$score_metric), 1),
round(sd(completed_scores$score_metric), 1)
)
)
## Metric Value
## 1 Completed score records 253.0
## 2 Mean Synthetic score 25.4
## 3 Median Synthetic score 10.0
## 4 Synthetic score SD 33.9
if (requireNamespace("ggplot2", quietly = TRUE)) {
ggplot2::ggplot(completed_scores, ggplot2::aes(score_metric)) +
ggplot2::geom_histogram(binwidth = 5, fill = "#2563eb", color = "white") +
ggplot2::facet_wrap(~ assignment_family) +
ggplot2::labs(
title = "Synthetic Score Distribution By Assignment Family",
x = score_metric_label,
y = "Completed records"
) +
ggplot2::theme_minimal()
} else {
hist(completed_scores$score_metric, breaks = 20, main = "Synthetic Score Distribution", xlab = score_metric_label)
}
aggregate(
score_metric ~ ability_band + engagement_band,
data = completed_scores,
FUN = function(x) round(mean(x), 1)
)
## ability_band engagement_band score_metric
## 1 advanced emerging 28.0
## 2 approaching emerging 10.0
## 3 developing emerging 28.4
## 4 proficient emerging 46.0
## 5 advanced high 26.6
## 6 approaching high 25.7
## 7 developing high 28.4
## 8 proficient high 23.9
## 9 advanced low 10.0
## 10 approaching low 32.5
## 11 developing low 28.8
## 12 proficient low 55.0
## 13 advanced steady 18.6
## 14 approaching steady 28.0
## 15 developing steady 10.0
## 16 proficient steady 26.0
aggregate(
completed ~ risk_band,
data = score_records,
FUN = function(x) round(100 * mean(x), 1)
)
## risk_band completed
## 1 elevated 0.7
## 2 high 0.4
## 3 low 9.6
## 4 moderate 2.0
The synthetic dataset is designed to support realistic analytics workflows:
The report is not evidence about a real class or school. It is a reproducible artifact demonstrating how private reference structure can be transformed into a public-safe dataset for analysis and portfolio presentation.