总体描述性统计

所用球员特征表:

Code
# 安装并加载所需包(如未安装请取消注释下面行)
# install.packages("readxl")
# install.packages("dplyr")

library(readxl)
library(dplyr)

# 读取 Excel 文件
df <- read_excel("C:/Users/86133/Desktop/object/MVP/常规赛.xlsx")

# 查看列名和前几行
names(df)
 [1] "name"   "Season" "FG"     "FGA"    "FG%"    "3P"     "3PA"    "3P%"   
 [9] "2P"     "2PA"    "2P%"    "eFG%"   "FT"     "FTA"    "FT%"    "ORB"   
[17] "DRB"    "TRB"    "AST"    "STL"    "BLK"    "TOV"    "PF"     "PTS"   
[25] "PER"    "TS%"    "3PAr"   "FTr"    "ORB%"   "DRB%"   "TRB%"   "AST%"  
[33] "STL%"   "BLK%"   "TOV%"   "USG%"   "OWS"    "DWS"    "WS"     "WS/48" 
[41] "OBPM"   "DBPM"   "BPM"    "VORP"   "win"   
Code
head(df)
# A tibble: 6 × 45
  name       Season    FG   FGA `FG%`  `3P` `3PA` `3P%`  `2P` `2PA` `2P%` `eFG%`
  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
1 Rose         2010   8.8  19.7 0.445   1.6   4.8 0.332   7.2  14.9 0.483  0.611
2 1.Michael…   1984  10.2  19.8 0.515   0.1   0.6 0.173  10.1  19.2 0.526  0.518
3 2.Michael…   1985   8.3  18.2 0.457   0.2   1   0.167   8.2  17.2 0.474  0.462
4 3.Michael…   1986  13.4  27.8 0.482   0.1   0.8 0.182  13.2  27   0.491  0.484
5 4.Michael…   1987  13    24.4 0.535   0.1   0.6 0.132  13    23.7 0.546  0.537
6 5.Michael…   1988  11.9  22.2 0.538   0.3   1.2 0.276  11.6  21   0.553  0.546
# ℹ 33 more variables: FT <dbl>, FTA <dbl>, `FT%` <dbl>, ORB <dbl>, DRB <dbl>,
#   TRB <dbl>, AST <dbl>, STL <dbl>, BLK <dbl>, TOV <dbl>, PF <dbl>, PTS <dbl>,
#   PER <dbl>, `TS%` <dbl>, `3PAr` <dbl>, FTr <dbl>, `ORB%` <dbl>,
#   `DRB%` <dbl>, `TRB%` <dbl>, `AST%` <dbl>, `STL%` <dbl>, `BLK%` <dbl>,
#   `TOV%` <dbl>, `USG%` <dbl>, OWS <dbl>, DWS <dbl>, WS <dbl>, `WS/48` <dbl>,
#   OBPM <dbl>, DBPM <dbl>, BPM <dbl>, VORP <dbl>, win <dbl>
Code
# 去掉非数值列(如名字、赛季),只保留数值型列
numeric_df <- df %>%
  select(where(is.numeric))

# 基本描述性统计
summary(numeric_df)
     Season           FG              FGA             FG%        
 Min.   :1948   Min.   : 0.000   Min.   : 0.00   Min.   :0.0000  
 1st Qu.:1975   1st Qu.: 5.700   1st Qu.:11.60   1st Qu.:0.4410  
 Median :1993   Median : 7.500   Median :15.80   Median :0.4720  
 Mean   :1991   Mean   : 7.276   Mean   :15.24   Mean   :0.4772  
 3rd Qu.:2008   3rd Qu.: 9.000   3rd Qu.:18.90   3rd Qu.:0.5100  
 Max.   :2023   Max.   :20.000   Max.   :39.50   Max.   :1.0000  
                                                                 
       3P              3PA              3P%               2P        
 Min.   :0.0000   Min.   : 0.000   Min.   :0.0000   Min.   : 0.400  
 1st Qu.:0.0000   1st Qu.: 0.200   1st Qu.:0.2220   1st Qu.: 4.500  
 Median :0.5000   Median : 1.700   Median :0.3170   Median : 6.600  
 Mean   :0.8571   Mean   : 2.406   Mean   :0.2812   Mean   : 6.463  
 3rd Qu.:1.4000   3rd Qu.: 4.000   3rd Qu.:0.3750   3rd Qu.: 8.400  
 Max.   :5.3000   Max.   :13.200   Max.   :1.0000   Max.   :14.300  
 NA's   :423      NA's   :423      NA's   :494      NA's   :283     
      2PA             2P%              eFG%              FT        
 Min.   : 1.00   Min.   :0.3080   Min.   :0.2860   Min.   : 0.000  
 1st Qu.: 9.10   1st Qu.:0.4710   1st Qu.:0.4770   1st Qu.: 2.900  
 Median :12.80   Median :0.5010   Median :0.5070   Median : 4.400  
 Mean   :12.77   Mean   :0.5047   Mean   :0.5097   Mean   : 4.433  
 3rd Qu.:16.50   3rd Qu.:0.5360   3rd Qu.:0.5437   3rd Qu.: 5.800  
 Max.   :28.70   Max.   :0.7320   Max.   :0.7350   Max.   :11.500  
 NA's   :283     NA's   :283      NA's   :283                      
      FTA              FT%              ORB             DRB        
 Min.   : 0.000   Min.   :0.0000   Min.   :0.000   Min.   : 0.400  
 1st Qu.: 3.800   1st Qu.:0.7160   1st Qu.:0.800   1st Qu.: 3.200  
 Median : 5.600   Median :0.7830   Median :1.400   Median : 4.700  
 Mean   : 5.776   Mean   :0.7686   Mean   :1.785   Mean   : 5.199  
 3rd Qu.: 7.600   3rd Qu.:0.8350   3rd Qu.:2.500   3rd Qu.: 7.000  
 Max.   :17.000   Max.   :1.0000   Max.   :7.200   Max.   :13.700  
                                   NA's   :305     NA's   :305     
      TRB              AST              STL            BLK        
 Min.   : 0.500   Min.   : 0.000   Min.   :0.00   Min.   :0.0000  
 1st Qu.: 4.500   1st Qu.: 2.400   1st Qu.:0.80   1st Qu.:0.3000  
 Median : 6.600   Median : 3.700   Median :1.20   Median :0.6000  
 Mean   : 7.754   Mean   : 4.325   Mean   :1.24   Mean   :0.9156  
 3rd Qu.:10.500   3rd Qu.: 5.800   3rd Qu.:1.60   3rd Qu.:1.2000  
 Max.   :27.200   Max.   :14.500   Max.   :3.40   Max.   :5.0000  
 NA's   :3                         NA's   :313    NA's   :312     
      TOV            PF             PTS             PER             TS%        
 Min.   :0.2   Min.   :0.300   Min.   : 0.00   Min.   :-0.30   Min.   :0.3360  
 1st Qu.:2.0   1st Qu.:2.100   1st Qu.:14.90   1st Qu.:17.00   1st Qu.:0.5130  
 Median :2.6   Median :2.600   Median :20.00   Median :20.30   Median :0.5510  
 Mean   :2.6   Mean   :2.615   Mean   :19.59   Mean   :20.31   Mean   :0.5494  
 3rd Qu.:3.2   3rd Qu.:3.100   3rd Qu.:24.70   3rd Qu.:23.60   3rd Qu.:0.5840  
 Max.   :5.7   Max.   :4.500   Max.   :50.40   Max.   :34.10   Max.   :1.0420  
 NA's   :383                                   NA's   :8       NA's   :1       
      3PAr             FTr              ORB%             DRB%      
 Min.   :0.0000   Min.   :0.0830   Min.   : 0.000   Min.   : 3.50  
 1st Qu.:0.0120   1st Qu.:0.2870   1st Qu.: 2.800   1st Qu.:11.10  
 Median :0.1295   Median :0.3580   Median : 5.000   Median :16.10  
 Mean   :0.1689   Mean   :0.3789   Mean   : 6.042   Mean   :17.02  
 3rd Qu.:0.2818   3rd Qu.:0.4520   3rd Qu.: 8.500   3rd Qu.:22.80  
 Max.   :0.8170   Max.   :1.0000   Max.   :24.200   Max.   :37.80  
 NA's   :407      NA's   :17       NA's   :308      NA's   :308    
      TRB%             AST%            STL%            BLK%      
 Min.   : 2.700   Min.   : 0.00   Min.   :0.000   Min.   :0.000  
 1st Qu.: 7.475   1st Qu.:11.30   1st Qu.:1.300   1st Qu.:0.600  
 Median :10.500   Median :17.40   Median :1.700   Median :1.300  
 Mean   :11.575   Mean   :20.17   Mean   :1.817   Mean   :1.864  
 3rd Qu.:16.100   3rd Qu.:26.80   3rd Qu.:2.300   3rd Qu.:2.600  
 Max.   :29.700   Max.   :57.50   Max.   :4.400   Max.   :8.800  
 NA's   :237      NA's   :119     NA's   :313     NA's   :312    
      TOV%            USG%            OWS              DWS        
 Min.   : 3.00   Min.   : 6.40   Min.   :-2.400   Min.   :-0.300  
 1st Qu.:11.10   1st Qu.:21.40   1st Qu.: 2.400   1st Qu.: 1.800  
 Median :13.10   Median :25.40   Median : 4.800   Median : 3.000  
 Mean   :13.75   Mean   :25.10   Mean   : 5.151   Mean   : 3.202  
 3rd Qu.:15.68   3rd Qu.:29.27   3rd Qu.: 7.700   3rd Qu.: 4.300  
 Max.   :33.80   Max.   :41.70   Max.   :18.300   Max.   :16.000  
 NA's   :383     NA's   :383                                      
       WS             WS/48              OBPM             DBPM        
 Min.   :-0.500   Min.   :-0.1010   Min.   :-6.400   Min.   :-3.5000  
 1st Qu.: 5.000   1st Qu.: 0.1170   1st Qu.: 1.000   1st Qu.:-0.3000  
 Median : 8.200   Median : 0.1600   Median : 2.900   Median : 0.6000  
 Mean   : 8.353   Mean   : 0.1593   Mean   : 2.913   Mean   : 0.5993  
 3rd Qu.:11.300   3rd Qu.: 0.2030   3rd Qu.: 5.000   3rd Qu.: 1.6000  
 Max.   :25.400   Max.   : 0.3400   Max.   :10.300   Max.   : 4.9000  
                  NA's   :8         NA's   :315      NA's   :315      
      BPM              VORP             win       
 Min.   :-5.900   Min.   :-0.800   Min.   :10.00  
 1st Qu.: 1.200   1st Qu.: 1.600   1st Qu.:41.00  
 Median : 3.500   Median : 3.300   Median :48.50  
 Mean   : 3.513   Mean   : 3.489   Mean   :47.37  
 3rd Qu.: 5.575   3rd Qu.: 5.000   3rd Qu.:56.00  
 Max.   :13.700   Max.   :12.500   Max.   :73.00  
 NA's   :315      NA's   :315      NA's   :497    
Code
# 计算均值、标准差、最小值、最大值等更全面的统计量
Code
# 安装必要包(首次运行时需要)
#install.packages("gt")
library(dplyr)
library(tidyr)
library(gt)

# 计算全面统计量(宽数据格式)
stat_wide <- numeric_df %>%
  summarise_all(
    list(
      mean = ~mean(.x, na.rm = TRUE),
      sd = ~sd(.x, na.rm = TRUE),
      min = ~min(.x, na.rm = TRUE),
      q1 = ~quantile(.x, 0.25, na.rm = TRUE),
      median = ~median(.x, na.rm = TRUE),
      q3 = ~quantile(.x, 0.75, na.rm = TRUE),
      max = ~max(.x, na.rm = TRUE)
    )
  )

# 转换为长数据(变量与统计量分离)
stat_long <- stat_wide %>%
  pivot_longer(
    cols = everything(),
    names_to = c("variable", "statistic"),  # 拆分为变量名和统计量名
    names_sep = "_"                         # 列名通过下划线分割(如"mpg_mean" → "mpg"和"mean")
  )

# 转换为宽数据(统计量作为列,变量作为行),并保留variable列
stat_table <- stat_long %>%
  pivot_wider(
    names_from = statistic,       # 统计量名称作为列名
    values_from = value           # 统计量值作为列内容
  ) %>%
  select(
    variable,                     # 显式保留variable列(关键修正!)
    starts_with("mean"), 
    starts_with("sd"), 
    starts_with("min"), 
    starts_with("q1"), 
    starts_with("median"), 
    starts_with("q3"), 
    starts_with("max")
  )  # 按顺序排列列(可选)

# 用gt绘制表格(此时stat_table包含variable列)
stat_table %>%
  gt() %>%
  tab_header(
    title = "数值变量的描述性统计量",
    subtitle = "包含均值、标准差、分位数等"
  ) %>%
  cols_label(
    variable = "变量",  # 现在variable列存在,不会报错
    mean = "均值",
    sd = "标准差",
    min = "最小值",
    q1 = "第一四分位数 (25%)",
    median = "中位数",
    q3 = "第三四分位数 (75%)",
    max = "最大值"
  ) %>%
  fmt_number(
    columns = everything(), 
    decimals = 2
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"), 
    locations = cells_title(groups = "title")
  ) %>%
  tab_options(
    column_labels.font.weight = "bold", 
    table.font.size = 12,                
    data_row.padding = px(8)             
  )
数值变量的描述性统计量
包含均值、标准差、分位数等
变量 均值 标准差 最小值 第一四分位数 (25%) 中位数 第三四分位数 (75%) 最大值
Season 1,991.31 19.17 1,948.00 1,975.00 1,993.00 2,008.00 2,023.00
FG 7.28 2.59 0.00 5.70 7.50 9.00 20.00
FGA 15.24 5.26 0.00 11.60 15.80 18.90 39.50
FG% 0.48 0.06 0.00 0.44 0.47 0.51 1.00
3P 0.86 0.97 0.00 0.00 0.50 1.40 5.30
3PA 2.41 2.51 0.00 0.20 1.70 4.00 13.20
3P% 0.28 0.14 0.00 0.22 0.32 0.38 1.00
2P 6.46 2.61 0.40 4.50 6.60 8.40 14.30
2PA 12.77 5.09 1.00 9.10 12.80 16.50 28.70
2P% 0.50 0.05 0.31 0.47 0.50 0.54 0.73
eFG% 0.51 0.05 0.29 0.48 0.51 0.54 0.73
FT 4.43 2.10 0.00 2.90 4.40 5.80 11.50
FTA 5.78 2.69 0.00 3.80 5.60 7.60 17.00
FT% 0.77 0.10 0.00 0.72 0.78 0.83 1.00
ORB 1.78 1.22 0.00 0.80 1.40 2.50 7.20
DRB 5.20 2.50 0.40 3.20 4.70 7.00 13.70
TRB 7.75 4.43 0.50 4.50 6.60 10.50 27.20
AST 4.33 2.61 0.00 2.40 3.70 5.80 14.50
STL 1.24 0.58 0.00 0.80 1.20 1.60 3.40
BLK 0.92 0.87 0.00 0.30 0.60 1.20 5.00
TOV 2.60 0.89 0.20 2.00 2.60 3.20 5.70
PF 2.61 0.70 0.30 2.10 2.60 3.10 4.50
PTS 19.59 7.05 0.00 14.90 20.00 24.70 50.40
PER 20.31 4.79 −0.30 17.00 20.30 23.60 34.10
TS% 0.55 0.05 0.34 0.51 0.55 0.58 1.04
3PAr 0.17 0.17 0.00 0.01 0.13 0.28 0.82
FTr 0.38 0.13 0.08 0.29 0.36 0.45 1.00
ORB% 6.04 3.94 0.00 2.80 5.00 8.50 24.20
DRB% 17.02 7.12 3.50 11.10 16.10 22.80 37.80
TRB% 11.58 5.13 2.70 7.47 10.50 16.10 29.70
AST% 20.17 11.33 0.00 11.30 17.40 26.80 57.50
STL% 1.82 0.75 0.00 1.30 1.70 2.30 4.40
BLK% 1.86 1.62 0.00 0.60 1.30 2.60 8.80
TOV% 13.75 4.01 3.00 11.10 13.10 15.67 33.80
USG% 25.10 5.69 6.40 21.40 25.40 29.28 41.70
OWS 5.15 3.61 −2.40 2.40 4.80 7.70 18.30
DWS 3.20 1.94 −0.30 1.80 3.00 4.30 16.00
WS 8.35 4.64 −0.50 5.00 8.20 11.30 25.40
WS/48 0.16 0.06 −0.10 0.12 0.16 0.20 0.34
OBPM 2.91 2.77 −6.40 1.00 2.90 5.00 10.30
DBPM 0.60 1.39 −3.50 −0.30 0.60 1.60 4.90
BPM 3.51 3.38 −5.90 1.20 3.50 5.57 13.70
VORP 3.49 2.48 −0.80 1.60 3.30 5.00 12.50
win 47.37 11.51 10.00 41.00 48.50 56.00 73.00
Code
library(readxl)
library(dplyr)
# 总样本量(原始数据行数)
total_rows <- nrow(df)

# 每列的缺失值个数
missing_per_column <- colSums(is.na(df))

# 缺失值总数
total_missing <- sum(is.na(df))

# 删除所有含有缺失值的行
df_no_na <- df %>% drop_na()

# 删除后剩余的行数
remaining_rows <- nrow(df_no_na)

# 被删除的行数
removed_rows <- total_rows - remaining_rows

# 汇总信息为表格
summary_table <- data.frame(
  指标 = c("总样本量", "含缺失值的样本量", "删除后剩余样本量", "缺失值总个数","指标总数"),
  数值 = c(total_rows, removed_rows, remaining_rows, total_missing,43)
)

print(summary_table)
              指标 数值
1         总样本量 1473
2 含缺失值的样本量  551
3 删除后剩余样本量  922
4     缺失值总个数 8654
5         指标总数   43

指标随赛季变化折线图

Code
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)

df <- read_excel("C:/Users/86133/Desktop/object/MVP/常规赛.xlsx")

# ---------------------------
# 步骤1:处理常数列并归一化(避免NaN)
# ---------------------------
# 选择数值列并识别常数列(max == min)
numeric_cols <- df %>% select(where(is.numeric)) %>% colnames()
constant_cols <- sapply(df[numeric_cols], function(col) {
  isTRUE(all.equal(min(col, na.rm = TRUE), max(col, na.rm = TRUE)))
})

# 仅对非常数列进行归一化,常数列设为0(或根据需求处理)
df_scaled <- df %>%
  mutate(
    across(
      all_of(numeric_cols[!constant_cols]),  # 仅处理非常数列
      ~ (. - min(., na.rm = TRUE)) / (max(., na.rm = TRUE) - min(., na.rm = TRUE))
    ),
    across(
      all_of(numeric_cols[constant_cols]),  # 常数列设为0(或其他合理值)
      ~ 0
    ),
    Season = df$Season  # 添加赛季列
  ) %>%
  select(Season, all_of(numeric_cols))  # 整理列顺序(可选)

# ---------------------------
# 步骤2:计算各赛季均值(处理全NA情况)
# ---------------------------
mean_by_season <- df_scaled %>%
  group_by(Season) %>%
  summarise(
    across(where(is.numeric), 
           ~ ifelse(sum(is.na(.)) == length(.),  # 检查是否全为NA
                    NA_real_,                # 全NA则返回NA
                    mean(., na.rm = TRUE))),   # 否则计算均值
    .groups = "drop"
  )

# ---------------------------
# 步骤3:转换为长格式并整理
# ---------------------------
mean_long <- mean_by_season %>%
  pivot_longer(
    cols = -Season, 
    names_to = "Variable", 
    values_to = "Mean"
  ) %>%
  mutate(
    Season = factor(Season, levels = sort(unique(Season))),  # 控制赛季顺序
    Variable = factor(Variable)  # 确保变量为因子,正确分组
  ) %>%
  filter(!is.na(Mean))  # 移除均值中的NA(可选,根据需求保留)
Code
library(ggplot2)
library(dplyr)
library(patchwork)  # 用于图拼接

# 步骤1:变量分组
variables <- unique(mean_long$Variable)
n_vars <- length(variables)
group_size <- 5
group_ids <- ceiling(seq_len(n_vars) / group_size)
Group <- paste0("Group_", group_ids)
variable_group_map <- data.frame(Variable = variables, Group = Group)

mean_long_grouped <- mean_long %>%
  left_join(variable_group_map, by = "Variable")

# 步骤2:为每个 group 生成一个图(含图例)
plots <- list()

groups <- unique(mean_long_grouped$Group)

for (g in groups) {
  df_sub <- mean_long_grouped %>% filter(Group == g)
  
  p <- ggplot(df_sub, aes(x = Season, y = Mean, color = Variable, group = Variable)) +
    geom_line(size = 0.7, alpha = 0.5) +
    theme_minimal(base_size = 8) +
    theme(
      axis.text.x = element_text(angle = 90, hjust = 1, size = 4),
      axis.text.y = element_text(size = 4),
      strip.text = element_text(size = 5, face = "bold"),
      legend.position = "top",
      legend.text = element_text(size = 5),
      legend.title = element_blank(),
      plot.title = element_text(size = 5, face = "bold"),
      
    ) +
    labs(
      title = paste("归一化变量均值变化 -", g),
      x = "赛季",
      y = "归一化均值"
    )
  
  plots[[g]] <- p
}

# 步骤3:拼接所有图(2列排列)
final_plot <- wrap_plots(plots, ncol = 3)
final_plot

Code
#ggsave("grouped_plots.png", final_plot, width = 15, height = 10, dpi = 500)

1.上世纪50年代,总投篮出手次数逐渐减少而后趋于稳定,而命中率上升,可能表示技术精进、精细,团队合作

2.上世纪80-90年代,命中率(主要是两分球)达到峰值后90-2000又下降(可能是防守强度上升),后又逐渐上升(技术精进)

3.三分球占总投篮比例(3PAr)在1966年达到峰值后下降到73年逐步上升至今(幅度很大)

4.三分球出手数稳步提升于2017年超过两分球,同时三分球,两分球等投篮命中率在2000年后稳步提升

5.ORB(进攻篮板)逐年下降趋势(下降明显),AST助攻近年逐步上升

6.PF犯规逐年递减

总体和MVP对比柱状图

赛季均值数据:

Code
# 加载所需的包
library(readxl)
library(dplyr)
library(writexl)
#install.packages("writexl")
# 读取 Excel 文件
data <- read_excel("C:/Users/86133/Desktop/object/MVP/常规赛.xlsx")



# 假设年份列为 "Season",对其分组并求每列的均值
mean_by_year <- data %>%
  group_by(Season) %>%
  summarise(across(where(is.numeric), mean, na.rm = TRUE))

# 保存为新的 Excel 文件
write_xlsx(mean_by_year, path = "C:/Users/86133/Desktop/object/MVP/每年均值.xlsx")

2010-2023赛季 MVP 和 总体数据 对比柱状图:

Code
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)

# 读取数据
df <- read_excel("C:/Users/86133/Desktop/object/MVP/MVP_Mean/MVP_mean_2010_23.xlsx", sheet = "Sheet1")

# 提取年份和类型
df <- df %>%
  mutate(
    Season = str_extract(name, "\\d{4}"),
    Type = ifelse(grepl("Mean", name), "League Mean", "MVP")
  )

# 分别计算平均值(跨所有赛季)
mvp_mean <- df %>%
  filter(Type == "MVP") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "MVP")

league_mean <- df %>%
  filter(Type == "League Mean") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "League Mean")

# 合并
compare_df <- bind_rows(mvp_mean, league_mean)

# 绘图
ggplot(compare_df, aes(x = Metric, y = Value, fill = Type)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  geom_text(
    aes(label = round(Value, 1)),
    position = position_dodge(width = 0.8),
    vjust = -0.5,
    size = 3
  ) +
  scale_fill_manual(values = c("MVP" = "#1DA1F2", "League Mean" = "#7F8C8D")) +
  labs(
    title = "MVP vs. League Mean: Average Performance Across All Seasons",
    x = "Metric",
    y = "Average Value",
    fill = "Type"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1,size=5),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "bottom"
  )

2010-2023赛季 MVP 和 总体数据 对比柱状图:

Code
# 将 MVP 和 League Mean 数据展开为宽格式
df_wide <- compare_df %>%
  pivot_wider(names_from = Type, values_from = Value)

# 计算提升百分比(%),并构建新数据框
improvement_df <- df_wide %>%
  mutate(`Improvement %` = (`MVP` - `League Mean`) / `League Mean` * 100) %>%
  select(Metric, `Improvement %`) %>%
  mutate(Type = "Improvement") %>%
  rename(Value = `Improvement %`)

compare_all <- bind_rows(compare_df, improvement_df)

library(ggplot2)

ggplot(compare_all, aes(x = Metric, y = Value, fill = Type)) +
  geom_col(
    data = subset(compare_all, Type != "Improvement"),
    position = position_dodge(width = 0.9),
    width = 0.7
  ) +
  geom_col(
    data = subset(compare_all, Type == "Improvement"),
    aes(fill = "Improvement"),
    position = position_dodge(width = 0.9),
    width = 0.7,
    color = "black",
    fill = "orange",
    alpha = 0.1
  ) +
  geom_text(
    data = subset(compare_all, Type == "Improvement"),
    aes(label = paste0(round(Value, 1), "%")),
    vjust = -0.5,
    size = 3,
    position = position_dodge(width = 0.9)
  ) +
  scale_fill_manual(
    values = c("Improvement" = "orange","MVP" = "#1DA1F2", "League Mean" = "#7F8C8D")
  ) +
  labs(
    title = "MVP vs. League Mean + Improvement %",
    subtitle = "Average performance and MVP boost by metric (2010–2023)",
    x = "Metric",
    y = "Value",
    fill = "Type"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1,size=7),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "bottom"
  )

  1. 3PAr(三分出手率(3PA / FGA),三分出手占总投篮的比例) 数据低于均值
  2. 提升幅度最高的几项指标:DBPM 防守 Box Plus/Minus衡量球员对球队进攻影响, BPM 综合 Box Plus/Minus(越大越好), VORP 替代值胜利(Value Over Replacement Player),表示比替补平均水平好多少, OBPM 进攻 Box Plus/Minus(衡量球员对球队进攻影响), OWS 进攻 Box Plus/Minus(衡量球员对球队进攻影响), DWS 防守胜利贡献值(Defensive Win Shares), WS 总胜利贡献值(Win Shares)

2000-2009赛季:

Code
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)

# 读取数据
df <- read_excel("C:/Users/86133/Desktop/object/MVP/MVP_Mean/MVP_mean_2000_09.xlsx", sheet = "Sheet1")

# 提取年份和类型
df <- df %>%
  mutate(
    Season = str_extract(name, "\\d{4}"),
    Type = ifelse(grepl("Mean", name), "League Mean", "MVP")
  )

# 分别计算平均值(跨所有赛季)
mvp_mean <- df %>%
  filter(Type == "MVP") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "MVP")

league_mean <- df %>%
  filter(Type == "League Mean") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "League Mean")

# 合并
compare_df <- bind_rows(mvp_mean, league_mean)

# 绘图
ggplot(compare_df, aes(x = Metric, y = Value, fill = Type)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  geom_text(
    aes(label = round(Value, 1)),
    position = position_dodge(width = 0.8),
    vjust = -0.5,
    size = 3
  ) +
  scale_fill_manual(values = c("MVP" = "#1DA1F2", "League Mean" = "#7F8C8D")) +
  labs(
    title = "MVP vs. League Mean: Average Performance Across All Seasons",
    x = "Metric",
    y = "Average Value",
    fill = "Type"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1,size=5),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "bottom"
  )

Code
df <- read_excel("C:/Users/86133/Desktop/object/MVP/MVP_Mean/MVP_mean_2000_09.xlsx", sheet = "Sheet1")

# 提取年份和类型
df <- df %>%
  mutate(
    Season = str_extract(name, "\\d{4}"),
    Type = ifelse(grepl("Mean", name), "League Mean", "MVP")
  )

# 分别计算平均值(跨所有赛季)
mvp_mean <- df %>%
  filter(Type == "MVP") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "MVP")

league_mean <- df %>%
  filter(Type == "League Mean") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "League Mean")

# 合并
compare_df <- bind_rows(mvp_mean, league_mean)
# 将 MVP 和 League Mean 数据展开为宽格式
df_wide <- compare_df %>%
  pivot_wider(names_from = Type, values_from = Value)

# 计算提升百分比(%),并构建新数据框
improvement_df <- df_wide %>%
  mutate(`Improvement %` = (`MVP` - `League Mean`) / `League Mean` * 100) %>%
  select(Metric, `Improvement %`) %>%
  mutate(Type = "Improvement") %>%
  rename(Value = `Improvement %`)

compare_all <- bind_rows(compare_df, improvement_df)

library(ggplot2)

ggplot(compare_all, aes(x = Metric, y = Value, fill = Type)) +
  geom_col(
    data = subset(compare_all, Type != "Improvement"),
    position = position_dodge(width = 0.9),
    width = 0.7
  ) +
  geom_col(
    data = subset(compare_all, Type == "Improvement"),
    aes(fill = "Improvement"),
    position = position_dodge(width = 0.9),
    width = 0.7,
    color = "black",
    fill = "orange",
    alpha = 0.1
  ) +
  geom_text(
    data = subset(compare_all, Type == "Improvement"),
    aes(label = paste0(round(Value, 1), "%")),
    vjust = -0.5,
    size = 3,
    position = position_dodge(width = 0.9)
  ) +
  scale_fill_manual(
    values = c("Improvement" = "orange","MVP" = "#1DA1F2", "League Mean" = "#7F8C8D")
  ) +
  labs(
    title = "MVP vs. League Mean + Improvement %",
    subtitle = "Average performance and MVP boost by metric (2000–2009)",
    x = "Metric",
    y = "Value",
    fill = "Type"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1,size=7),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "bottom"
  )

1990-1999赛季:

Code
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)

# 读取数据
df <- read_excel("C:/Users/86133/Desktop/object/MVP/MVP_Mean/MVP_mean_1990_99.xlsx", sheet = "Sheet1")

# 提取年份和类型
df <- df %>%
  mutate(
    Season = str_extract(name, "\\d{4}"),
    Type = ifelse(grepl("Mean", name), "League Mean", "MVP")
  )

# 分别计算平均值(跨所有赛季)
mvp_mean <- df %>%
  filter(Type == "MVP") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "MVP")

league_mean <- df %>%
  filter(Type == "League Mean") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "League Mean")

# 合并
compare_df <- bind_rows(mvp_mean, league_mean)

# 绘图
ggplot(compare_df, aes(x = Metric, y = Value, fill = Type)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  geom_text(
    aes(label = round(Value, 1)),
    position = position_dodge(width = 0.8),
    vjust = -0.5,
    size = 3
  ) +
  scale_fill_manual(values = c("MVP" = "#1DA1F2", "League Mean" = "#7F8C8D")) +
  labs(
    title = "MVP vs. League Mean: Average Performance Across All Seasons",
    x = "Metric",
    y = "Average Value",
    fill = "Type"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1,size=5),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "bottom"
  )

Code
df <- read_excel("C:/Users/86133/Desktop/object/MVP/MVP_Mean/MVP_mean_1990_99.xlsx", sheet = "Sheet1")

# 提取年份和类型
df <- df %>%
  mutate(
    Season = str_extract(name, "\\d{4}"),
    Type = ifelse(grepl("Mean", name), "League Mean", "MVP")
  )

# 分别计算平均值(跨所有赛季)
mvp_mean <- df %>%
  filter(Type == "MVP") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "MVP")

league_mean <- df %>%
  filter(Type == "League Mean") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "League Mean")

# 合并
compare_df <- bind_rows(mvp_mean, league_mean)
# 将 MVP 和 League Mean 数据展开为宽格式
df_wide <- compare_df %>%
  pivot_wider(names_from = Type, values_from = Value)

# 计算提升百分比(%),并构建新数据框
improvement_df <- df_wide %>%
  mutate(`Improvement %` = (`MVP` - `League Mean`) / `League Mean` * 100) %>%
  select(Metric, `Improvement %`) %>%
  mutate(Type = "Improvement") %>%
  rename(Value = `Improvement %`)

compare_all <- bind_rows(compare_df, improvement_df)

library(ggplot2)

ggplot(compare_all, aes(x = Metric, y = Value, fill = Type)) +
  geom_col(
    data = subset(compare_all, Type != "Improvement"),
    position = position_dodge(width = 0.9),
    width = 0.7
  ) +
  geom_col(
    data = subset(compare_all, Type == "Improvement"),
    aes(fill = "Improvement"),
    position = position_dodge(width = 0.9),
    width = 0.7,
    color = "black",
    fill = "orange",
    alpha = 0.1
  ) +
  geom_text(
    data = subset(compare_all, Type == "Improvement"),
    aes(label = paste0(round(Value, 1), "%")),
    vjust = -0.5,
    size = 3,
    position = position_dodge(width = 0.9)
  ) +
  scale_fill_manual(
    values = c("Improvement" = "orange","MVP" = "#1DA1F2", "League Mean" = "#7F8C8D")
  ) +
  labs(
    title = "MVP vs. League Mean + Improvement %",
    subtitle = "Average performance and MVP boost by metric (1990–1999)",
    x = "Metric",
    y = "Value",
    fill = "Type"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1,size=7),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "bottom"
  )

1980-1989赛季:

Code
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)

# 读取数据
df <- read_excel("C:/Users/86133/Desktop/object/MVP/MVP_Mean/MVP_mean_1980_89.xlsx", sheet = "Sheet1")

# 提取年份和类型
df <- df %>%
  mutate(
    Season = str_extract(name, "\\d{4}"),
    Type = ifelse(grepl("Mean", name), "League Mean", "MVP")
  )

# 分别计算平均值(跨所有赛季)
mvp_mean <- df %>%
  filter(Type == "MVP") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "MVP")

league_mean <- df %>%
  filter(Type == "League Mean") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "League Mean")

# 合并
compare_df <- bind_rows(mvp_mean, league_mean)

# 绘图
ggplot(compare_df, aes(x = Metric, y = Value, fill = Type)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  geom_text(
    aes(label = round(Value, 1)),
    position = position_dodge(width = 0.8),
    vjust = -0.5,
    size = 3
  ) +
  scale_fill_manual(values = c("MVP" = "#1DA1F2", "League Mean" = "#7F8C8D")) +
  labs(
    title = "MVP vs. League Mean: Average Performance Across All Seasons",
    x = "Metric",
    y = "Average Value",
    fill = "Type"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1,size=5),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "bottom"
  )

Code
df <- read_excel("C:/Users/86133/Desktop/object/MVP/MVP_Mean/MVP_mean_1980_89.xlsx", sheet = "Sheet1")

# 提取年份和类型
df <- df %>%
  mutate(
    Season = str_extract(name, "\\d{4}"),
    Type = ifelse(grepl("Mean", name), "League Mean", "MVP")
  )

# 分别计算平均值(跨所有赛季)
mvp_mean <- df %>%
  filter(Type == "MVP") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "MVP")

league_mean <- df %>%
  filter(Type == "League Mean") %>%
  select(-name, -Season) %>%
  summarise(across(everything(), mean, na.rm = TRUE)) %>%
  pivot_longer(cols = everything(), names_to = "Metric", values_to = "Value") %>%
  mutate(Type = "League Mean")

# 合并
compare_df <- bind_rows(mvp_mean, league_mean)
# 将 MVP 和 League Mean 数据展开为宽格式
df_wide <- compare_df %>%
  pivot_wider(names_from = Type, values_from = Value)

# 计算提升百分比(%),并构建新数据框
improvement_df <- df_wide %>%
  mutate(`Improvement %` = (`MVP` - `League Mean`) / `League Mean` * 100) %>%
  select(Metric, `Improvement %`) %>%
  mutate(Type = "Improvement") %>%
  rename(Value = `Improvement %`)

compare_all <- bind_rows(compare_df, improvement_df)

library(ggplot2)

ggplot(compare_all, aes(x = Metric, y = Value, fill = Type)) +
  geom_col(
    data = subset(compare_all, Type != "Improvement"),
    position = position_dodge(width = 0.9),
    width = 0.7
  ) +
  geom_col(
    data = subset(compare_all, Type == "Improvement"),
    aes(fill = "Improvement"),
    position = position_dodge(width = 0.9),
    width = 0.7,
    color = "black",
    fill = "orange",
    alpha = 0.1
  ) +
  geom_text(
    data = subset(compare_all, Type == "Improvement"),
    aes(label = paste0(round(Value, 1), "%")),
    vjust = -0.5,
    size = 3,
    position = position_dodge(width = 0.9)
  ) +
  scale_fill_manual(
    values = c("Improvement" = "orange","MVP" = "#1DA1F2", "League Mean" = "#7F8C8D")
  ) +
  labs(
    title = "MVP vs. League Mean + Improvement %",
    subtitle = "Average performance and MVP boost by metric (1980–1989)",
    x = "Metric",
    y = "Value",
    fill = "Type"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1,size=7),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "bottom"
  )