# КОД ДЛЯ ВОСПРОИЗВОДСТВА РЕЗУЛЬТАТОВ СТАТЬИ: # Гнидченко А.А. Концентрация мировой торговли и сегрегация товарных # рынков // Журнал Новой экономической ассоциации, 2025, №1 (66). # <используемая кодировка: UTF-8> # # 1) Подготовительный этап #### ## 1.1) Автоматическая установка директории #### bx <- "box" bx_new <- bx[!(bx %in% installed.packages()[,"Package"])] if(length(bx_new)) install.packages(bx_new) setwd(box::file()) getwd() # проверка ## 1.2) Загрузка пакетов #### packs <- c( "data.table","dplyr","Hmisc","grid","gridExtra","ggplot2","MASS","car", "Cairo","factoextra","gtools","openxlsx","glue","stats","reshape2") newpacks <- packs[!(packs %in% installed.packages()[,"Package"])] if(length(newpacks)) install.packages(newpacks) lapply(packs, library, character.only=T)[[1]] ## 1.3) Прописывание пользовательских функций #### `%!in%` = Negate(`%in%`) zv <- function(x){ifelse(x<0.01,"***",ifelse(x<0.05,"**",ifelse(x<0.1,"*","")))} NAP <- function(x) { x[is.na(x)] <- ""; x } HHI <- function(data) { data <- data.frame(as.numeric(as.character(data))) res <- sum((data/sum(data, na.rm=T))^2, na.rm=T); res } sumNA <- function(value,threshold) { value=as.numeric(value) if(missing(threshold)) { sum(value,na.rm = TRUE) } else { ifelse(threshold>1 | threshold<0, stop("threshold should vary between 0 and 1"), ifelse(threshold>=sum(is.na(value)==TRUE)/length(value), sum(value,na.rm = TRUE),sum(value,na.rm = FALSE))) } } medianNA <- function(value,threshold) { if(missing(threshold)) { median(value,na.rm = TRUE) } else { ifelse(threshold>1 | threshold<0, stop("threshold should vary between 0 and 1"), ifelse(threshold>=sum(is.na(value)==TRUE)/length(value), median(value,na.rm = TRUE),median(value,na.rm = FALSE))) } } meanNA <- function(value,threshold) { if(missing(threshold)) { mean(value,na.rm = TRUE) } else { ifelse(threshold>1 | threshold<0, stop("threshold should vary between 0 and 1"), ifelse(threshold>=sum(is.na(value)==TRUE)/length(value), mean(value,na.rm = TRUE),mean(value,na.rm = FALSE))) } } w.perc <- function (x, weights, probs, normwt = F, na.rm, type = c("quantile", "(i-1)/(n-1)", "i/(n+1)", "i/n")) { if (!length(weights)) return(quantile(x, probs = probs, na.rm = na.rm)) type <- match.arg(type) if (any(probs < 0 | probs > 1)) stop("Probabilities must be between 0 and 1 inclusive") nams <- paste(format(round(probs * 100, if ( length(probs) > 1) 2 - log10(diff(range(probs))) else 2)), "%", sep = "") if (type == "quantile") { library("Hmisc") w <- wtd.table(x, weights, na.rm = na.rm, normwt = normwt, type = "list") x <- w$x wts <- w$sum.of.weights n <- sum(wts) order <- 1 + (n - 1) * probs low <- pmax(floor(order), 1) high <- pmin(low + 1, n) order <- order%%1 allq <- tryCatch( { approx(cumsum(wts), x, xout = c(low, high), method = "constant", f = 1, rule = 2)$y }, error=function(cond) { message("Some minor errors executed:") message(cond) message("") return(NA) }, finally={ } ) k <- length(probs) quantiles <- (1 - order) * allq[1:k] + order * allq[-(1:k)] quantiles <- if (length(quantiles)==k) { quantiles } else { vector(length=k)*NA } names(quantiles) <- nams return(quantiles) } w <- wtd.Ecdf(x, weights, na.rm = na.rm, type = type, normwt = normwt) structure(approx(w$ecdf, w$x, xout = probs, rule = 2)$y, names = nams) } theme_GOST_base <- function (base_size = 14, base_family = "serif") { theme_bw(base_size = base_size, base_family = base_family) + theme( plot.title = element_text(hjust = 0.5, size = 16, margin = margin(t = 0, b = 10)), axis.text = element_text(color = "black"), plot.margin = margin(0.5, 0.2, 0.25, 0.25, "cm") ) } # 2) Подготовка данных #### ## 2.1) Загрузка данных с сайта CEPII #### getOption('timeout') options(timeout=100) temp <- tempfile() download.file("http://www.cepii.fr/DATA_DOWNLOAD/baci/data/BACI_HS17_V202401.zip",temp) CepiiData <- read.csv(unz(temp, "BACI_HS17_Y2022_V202401.csv")) do.call(file.remove, list(temp)) unlink(temp) gc() year <- CepiiData$t[1] ## 2.2) Первичная обработка данных #### CepiiData <- CepiiData[, -which(names(CepiiData) %in% c("t","q"))] CepiiData$k[nchar(CepiiData$k)==5] <- paste0("0",CepiiData$k[nchar(CepiiData$k)==5]) CepiiData$i <- as.character(CepiiData$i) CepiiData$j <- as.character(CepiiData$j) CepiiData$c <- substr(CepiiData$k,1,4) gc() ## 2.3) Формирование данных на уровне 4 знаков ТН ВЭД #### Cepii4 <- data.table::dcast(as.data.table(CepiiData), i+j+c~"v", sum, value.var=c("v")) names(Cepii4)[ncol(Cepii4)] <- "v" rm(CepiiData) gc() # 3) Расчеты #### ## 3.1) Расчет мирового объема торговли по товару #### TOT <- Cepii4 %>% group_by(c) %>% summarise(v=sum(v)) %>% as.data.frame() TOT$v <- TOT$v/1000000 # млрд долл. ## 3.2) Расчет базовых показателей #### # (этот блок считается несколько минут) setDT(Cepii4)[, V_m := sumNA(v), by = .(j, c)] # объем импорта по товару c из всех стран setDT(Cepii4)[, V_x := sumNA(v), by = .(i, c)] # объем экспорта по товару c во все страны setDT(Cepii4)[, hi_m := HHI(v), by = .(j, c)] # HHI для импортера товара c по странам-экспортерам setDT(Cepii4)[, hi_x := HHI(v), by = .(i, c)] # HHI для экспортера товара c по странам-импортерам gc() ## 3.3) Рис. 1 #### options(scipen=7) tov_exmpl <- c("8601","2606") tit_exmpl <- c("Локомотивы железнодорожные (ТН ВЭД 8601)", "Руды и концентраты алюминиевые (ТН ВЭД 2606)") leg_pos <- c("none","bottom") list_pic <- list() i=1 while(i% group_by(clust3) %>% summarise( Kol = sum(as.numeric(clust3)/as.numeric(clust3)), HI2 = median(HI2), HJ2 = median(HJ2), HI1 = median(HI1), HJ1 = median(HJ1), RP = median(RP), SI = median(SI), SJ = median(SJ)) %>% as.data.frame() CLR[,3:ncol(CLR)] <- round(CLR[,3:ncol(CLR)],3) CLR <- as.data.frame(t(CLR)) MEDS <- DF_sub %>% group_by("всего") %>% summarise( Kol = sum(as.numeric(clust3)/as.numeric(clust3)), HI2 = median(HI2), HJ2 = median(HJ2), HI1 = median(HI1), HJ1 = median(HJ1), RP = median(RP), SI = median(SI), SJ = median(SJ)) %>% as.data.frame() MEDS[,3:ncol(MEDS)] <- round(MEDS[,3:ncol(MEDS)],3) MEDS <- as.data.frame(t(MEDS)) CLR$V4 <- MEDS$V1 openxlsx::write.xlsx(CLR,"./tab4.xlsx", rowNames=T, colNames=F) ## 5.4) Рис. 5 #### b1 <- ggplot(DF_sub) + geom_text(aes(PC1, PC2, label=clust3, col=clust3), size=2) + theme_GOST_base() + xlab(glue::glue("Первая компонента ({prc7[1]}%)")) + ylab(glue::glue("Вторая компонента \n({prc7[2]}%)")) + theme(legend.position="none") + scale_color_manual(values=c("grey60","grey40","black","red")) + ggtitle("(А)") + scale_y_continuous(breaks=seq(-6,6,2)) b2 <- ggplot(DF_sub) + geom_text(aes(HI2, HJ2, label=clust3, col=clust3), size=2) + theme_GOST_base() + xlab(glue::glue("Концентрация поставщиков (HI2)")) + ylab(glue::glue("Концентрация покупателей \n(HJ2)")) + theme(legend.position="none") + scale_color_manual(values=c("grey60","grey40","black","red")) + ggtitle("(Б)") + scale_x_continuous(breaks=seq(0,1,0.2)) + scale_y_continuous(breaks=seq(0,1,0.2)) b3 <- ggplot(DF_sub) + geom_text(aes(SJ, RP, label=clust3, col=clust3), size=2) + theme_GOST_base() + xlab(glue::glue("Сегрегация рынков по покупателям (SJ)")) + ylab(glue::glue("Относительная рыночная \nвласть поставщиков (RP)")) + theme(legend.position="none") + scale_color_manual(values=c("grey60","grey40","black","red")) + ggtitle("(В)") + scale_x_continuous(breaks=seq(0,1,0.2)) + scale_y_continuous(breaks=seq(-1,1,0.2)) b4 <- ggplot(DF_sub) + geom_text(aes(SI, RP, label=clust3, col=clust3), size=2) + theme_GOST_base() + xlab(glue::glue("Сегрегация рынков по поставщикам (SI)")) + ylab(glue::glue("Относительная рыночная \nвласть поставщиков (RP)")) + theme(legend.position="none") + scale_color_manual(values=c("grey60","grey40","black","red")) + ggtitle("(Г)") + scale_x_continuous(breaks=seq(0,1,0.2)) + scale_y_continuous(breaks=seq(-1,1,0.2)) # комбинированный график из 4 панелей с разделительными линиями между панелями line_h <- ggplot() + geom_hline(yintercept=1, linetype="solid", linewidth=0.75, col=alpha("black",0.5)) + theme_void() line_v <- ggplot() + geom_vline(xintercept=1, linetype="solid", linewidth=0.75, col=alpha("black",0.5)) + theme_void() + theme(plot.margin = unit(c(0.25, 0, 0.025, 0), "inches")) line_no <- ggplot() + theme_void() part1 <- gridExtra::grid.arrange(b1,line_v,b2, ncol=3, widths=c(8,0.1,8)) part2 <- gridExtra::grid.arrange(b3,line_no,b4, ncol=3, widths=c(8,0.1,8)) png('./pic5.png', width = 10, height = 7, units = 'in', res = 500) gridExtra::grid.arrange(part1,line_h,part2, heights=c(8,0.1,8)) dev.off() CairoPDF("./pic5.pdf", width = 10, height = 7) gridExtra::grid.arrange(part1,line_h,part2, heights=c(8,0.1,8)) dev.off() # Приложение (состав кластеров и их вклад в торговлю) #### options(scipen=7) DF_sub$V <- TOT$v[match(DF_sub$Product, TOT$c)] ann <- as.data.frame(c(1,2,3)) names(ann) <- "cluster" j=1 while(j < nrow(ann)+1){ ann$share[j] <- round(sum(DF_sub$V[DF_sub$clust3==j])/sum(DF_sub$V)*100,1) ann$prod_no[j] <- length(DF_sub$Product[DF_sub$clust3==j]) ann$products[j] <- paste(DF_sub$Product[DF_sub$clust3==j], collapse=', ') j=j+1 } ann$share[1] <- 100-sum(ann$share[2:3]) openxlsx::write.xlsx(ann, "./annex.xlsx") ### ### КОНЕЦ КОДА ###