Skip to content

Commit

Permalink
specialize sampler based on sample space cardinality
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Dec 7, 2023
1 parent 67d117a commit f3cc52e
Showing 1 changed file with 15 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,22 @@ fun CFG.solveSeq(tokens: List<String>): Sequence<String> =
// This should never return duplicates and is the second fastest.
// Eventually, this will become the default method for sampling.
fun CFG.enumSeq(tokens: List<String>): Sequence<String> =
startPTree(tokens)?.sampleStrWithoutReplacement() ?: sequenceOf()

// This should never return duplicates and is the second fastest.
// Eventually, this will become the default method for sampling.
fun CFG.enumSeqSmart(tokens: List<String>): Sequence<String> =
startPTree(tokens)?.let { pt ->
if (BigInteger.ONE < pt.inverseDensity) pt.sampleStrWithoutReplacement()
if (BigInteger.ONE < pt.inverseDensity) {
if (pt.totalTrees < BigInteger(100_000)) {
println("Small number of parse trees (${pt.totalTrees}), sampling without replacement!")
pt.sampleStrWithoutReplacement()
}
else {
println("Large number of parse trees (${pt.totalTrees}), sampling with replacement!")
pt.sampleWithReplacement()
}
}
// This means the grammar is highly ambiguous and we would probably be
// better off sampling from the bottom-up, instead of from the top-down.
else {
Expand Down

0 comments on commit f3cc52e

Please sign in to comment.