This repository has been archived by the owner on May 3, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ntbk_asresults_graphs2015.Rmd
137 lines (121 loc) · 6.13 KB
/
ntbk_asresults_graphs2015.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
---
title: "GRAPHS-2015 Algorithm Selection Results"
output:
html_document:
df_print: paged
toc: yes
bibliography: GRAPHS-2015/citation.bib
---
## Description
Script to reproduce algorithm selection results from [@kotthoff_portfolios_2016].
```{r results='hide', message=FALSE, warning=FALSE}
library(tidyr)
library(magrittr)
library(ggplot2)
library(ggthemes)
library(grid)
library(scales)
library(aslib)
library(llama)
library(randomForest)
library(parallelMap)
```
```{r load_dataset}
# Load dataset from file and convert to LLAMA object
dataset_raw <- parseASScenario("GRAPHS-2015")
dataset_structured <- convertToLlamaCVFolds(dataset_raw)
#Presolver 1: Filter instances solved by IncompleteLAD
dataset_notPresolved <- dataset_structured
presolved_ids <- dataset_raw$feature.runstatus$instance_id[dataset_raw$feature.runstatus$lad_features == "presolved"]
dataset_notPresolved$data <- subset(dataset_structured$data, !(dataset_structured$data$instance_id %in% presolved_ids))
dataset_notPresolved$best <- subset(dataset_structured$best, !(dataset_structured$data$instance_id %in% presolved_ids))
# Presolver 2: Filter instances solved by VF2 within 50 ms
dataset_hard <- dataset_notPresolved
dataset_hard$data <- subset(dataset_notPresolved$data, dataset_notPresolved$data$vf2 > 50)
dataset_hard$best <- subset(dataset_notPresolved$best, dataset_notPresolved$data$vf2 > 50)
dataset_hard <- cvFolds(dataset_hard)
cat("Count of all instances =", nrow(dataset_structured$data), ", Count of hard instances =", nrow(dataset_hard$data)) # should be 5725 and 2336
```
## Train model
```{r train_hard}
if (!file.exists("model_regr_hard.rds")) {
parallelStartSocket(4)
parallelLibrary("llama", "mlr")
start_time <- Sys.time()
system.time(model_regr_hard <- regressionPairs(makeLearner("regr.randomForest"), dataset_hard))
end_time <- Sys.time()
saveRDS(model_regr_hard, "model_regr_hard.rds")
cat("Training started at", format(start_time, "%X"), "and ended at", format(end_time, "%X"), "\n")
end_time - start_time
} else {
model_regr_hard <- readRDS("model_regr_hard.rds")
cat("Loaded model_regr_hard from disk.\n")
}
```
## Algorithm selection results
```{r asd}
resvbs = data.frame(model = "Virtual best solver",
mean.misclassification.penalty = mean(misclassificationPenalties(dataset_hard, vbs)),
solved = sum(successes(dataset_hard, vbs)),
mean.performance = mean(parscores(dataset_hard, vbs, factor = 1)),
median.performance = median(parscores(dataset_hard, vbs, factor = 1)))
ressb = data.frame(model = "Single best solver",
mean.misclassification.penalty = mean(misclassificationPenalties(dataset_hard, singleBest)),
solved = sum(successes(dataset_hard, singleBest)),
mean.performance = mean(parscores(dataset_hard, singleBest, factor = 1)),
median.performance = median(parscores(dataset_hard, singleBest, factor = 1)))
resrp = data.frame(model = "Pairwise random forest regression",
mean.misclassification.penalty = mean(misclassificationPenalties(dataset_hard, model_regr_hard)),
solved = sum(successes(dataset_hard, model_regr_hard)),
mean.performance = mean(parscores(dataset_hard, model_regr_hard, factor = 1)),
median.performance = median(parscores(dataset_hard, model_regr_hard, factor = 1)))
rbind(resvbs, resrp, ressb)
```
## ECDF plot
```{r ecdf_all, warning=FALSE}
vbs.agg = aggregate(as.formula(paste("score~", paste(c("instance_id", "iteration"), sep="+", collapse="+"))), vbs(dataset_hard), function(ss) { ss[1] })
vbs.agg$virtual.best = parscores(dataset_hard, vbs, factor = 1)
vbs.agg$iteration = NULL
vbs.agg$score = NULL
pmod = data.frame(instance_id = unique(model_regr_hard$predictions$instance_id), portfolio = parscores(dataset_hard, model_regr_hard, factor = 1))
perfs = subset(dataset_hard$data, TRUE, c("instance_id", dataset_hard$performance))
perfs = merge(perfs, vbs.agg, by = "instance_id")
perfs = merge(perfs, pmod, by = "instance_id")
wide = gather(perfs, "solver", "time", names(perfs)[-1])
wide$type = ifelse(wide$solver %in% c("virtual.best", "portfolio"), "pf", "alg")
p.full = ggplot(wide, aes(x = time, col = solver, linetype = type)) +
stat_ecdf() +
scale_linetype_manual(values=c(3,1), guide = FALSE) +
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x, n = 10),
labels = trans_format("log10", math_format(10^.x)),
limits = c(1, dataset_raw$desc$algorithm_cutoff_time-1)) +
coord_cartesian(xlim = c(1, dataset_raw$desc$algorithm_cutoff_time-1),
ylim = c(0,1)) +
ylab("fraction of instances solved") + xlab("time [ms]") +
annotation_logticks(sides = "b") +
theme_tufte(base_family='Times', base_size = 14) +
guides(col = guide_legend(ncol = 2, keyheight = .8)) +
theme(legend.justification=c(1,0), legend.position=c(1,0.5), aspect.ratio = 0.6, axis.line = element_line(colour="black"), panel.grid = element_line(), panel.grid.major = element_line(colour="lightgray"))
p.zoom = ggplot(wide, aes(x = time, col = solver, linetype = type)) +
stat_ecdf() +
scale_linetype_manual(values=c(3,1), guide = FALSE) +
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x, n = 3),
labels = trans_format("log10", math_format(10^.x)),
limits = c(1, dataset_raw$desc$algorithm_cutoff_time-1)) +
coord_cartesian(xlim = c(dataset_raw$desc$algorithm_cutoff_time/10, dataset_raw$desc$algorithm_cutoff_time-1),
ylim = c(.95,1)) +
annotation_logticks(sides = "b") +
theme_tufte(base_family='Times', base_size = 14) +
theme(legend.position="none",
axis.title.x=element_blank(), axis.title.y=element_blank(),
panel.background = element_rect(fill='white', colour = "white"),
axis.line = element_line(colour="black"),
panel.grid = element_line(),
panel.grid.major = element_line(colour="lightgray"))
vp = viewport(width = 0.5, height = 0.3, x = 0.7, y = 0.42)
print(p.full)
print(p.zoom, vp = vp)
```
## Source code
[Source Rmd file](https://github.com/kvrigor/oraqle-dev/blob/master/project/ntbk_asresults_graphs2015.Rmd)
[GRAPHS-2015 EDA Sweave source code](https://github.com/ciaranm/lion2016-sip-portfolios-paper/blob/master/portfolio/as-graphs.Rnw)