Skip to content

Commit

Permalink
simplify and evaluate
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Sep 25, 2023
1 parent 68ffcfb commit 19492f1
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,59 +3,52 @@ package ai.hypergraph.kaliningraph.parsing
import ai.hypergraph.kaliningraph.tensor.UTMatrix
import ai.hypergraph.kaliningraph.types.*

typealias PForest = Set<PTree>
operator fun PForest.contains(v: Σᐩ) = PTree(v) in this
fun PSingleton(v: Σᐩ): List<Π2A<PTree>> = listOf(PTree(v) to PTree())
typealias PForest = Map<String, PTree>
fun PSingleton(v: String): List<Π2A<PTree>> = listOf(PTree(v) to PTree())

// Algebraic data type / polynomial functor for parse forests
class PTree(val root: Σᐩ = "ε", val branches: List<Π2A<PTree>> = listOf()) {
class PTree(val root: String = "ε", val branches: List<Π2A<PTree>> = listOf()) {
// Returns the set of all strings derivable from the given PTree
fun choose(): Sequence<Σᐩ> =
fun choose(): Sequence<String> =
if (branches.isEmpty()) sequenceOf(if("ε" in root) "" else root)
else branches.asSequence().flatMap { (l, r) ->
// TODO: Use weighted choice mechanism
(l.choose() * r.choose()).map { (a, b) ->
if (a == "") b else if (b == "") a else "$a $b"
}
}

override fun hashCode(): Int = root.hashCode()
override fun equals(other: Any?) = hashCode() == other.hashCode()
}

// Lazily computes all syntactically strings compatible with the given template
fun CFG.solveSeq(s: Σᐩ): Sequence<Σᐩ> =
initPForestMat(s.tokenizeByWhitespace()).seekFixpoint().diagonals.last()[0]
.firstOrNull { it.root == START_SYMBOL }?.choose() ?: emptySequence()
fun CFG.solveSeq(s: String): Sequence<String> =
initPForestMat(s.tokenizeByWhitespace()).seekFixpoint()
.diagonals.last()[0][START_SYMBOL]?.choose() ?: emptySequence()

fun CFG.initPForestMat(tokens: List<Σᐩ>): UTMatrix<PForest> =
fun CFG.initPForestMat(tokens: List<String>): UTMatrix<PForest> =
UTMatrix(
ts = tokens.map { token ->
(if (token != HOLE_MARKER) bimap[listOf(token)] else unitNonterminals)
.associateWith { nt ->
if (token != HOLE_MARKER) PSingleton(token)
else bimap.UNITS[nt]?.map { PSingleton(it) }?.flatten() ?: listOf()
}.map { (k, v) -> PTree(k, v) }.toSet()
}.map { (k, v) -> k to PTree(k, v) }.toMap()
}.toTypedArray(),
algebra = Ring.of(
nil = emptySet(),
nil = emptyMap(),
plus = { x, y -> merge(x, y) },
times = { x, y -> joinSeq(x, y) },
)
)

fun merge(X: PForest, Z: PForest): PForest =
(X.toList() + Z).groupBy { it.root }.map { (k, v) ->
PTree(k, v.map { it.branches }.flatten())
}.toSet()
(X.keys + Z.keys).associateWith { k ->
PTree(k, (X[k]?.branches ?: listOf()) + (Z[k]?.branches ?: listOf()))
}

// X ⊗ Z := { w | <x, z> ∈ X × Z, (w -> xz) ∈ P }
fun CFG.joinSeq(X: PForest, Z: PForest): PForest =
bimap.TRIPL.filter { (_, x, z) -> x in X && z in Z }
.groupingBy { it.first }.aggregate { _, acc: List<Π2A<PTree>>?, it, _->
val (w, x, z) = it
val ptreeX = X.first { it.root == x }
val ptreeZ = Z.first { it.root == z }
val pair = ptreeX to ptreeZ
.groupingBy { it.first }.aggregate { _, acc: List<Π2A<PTree>>?, it, _->
val pair = X[it.second]!! to Z[it.third]!!
if (acc == null) listOf(pair) else acc + pair
}.map { (k, v) -> PTree(k, v) }.toSet()
}.map { (k, v) -> k to PTree(k, v) }.toMap()
Original file line number Diff line number Diff line change
Expand Up @@ -583,8 +583,13 @@ Yield_Arg -> From_Keyword Test | Testlist_Endcomma
val refLst = refStr.tokenizeByWhitespace()
val template = List(refLst.size + 3) { "_" }.joinToString(" ")
measureTime {
seq2parsePythonCFG.solve(template, levMetric(refStr))
.onEach { println("Δ=${levenshtein(it, refStr)}: $it") }
// seq2parsePythonCFG.solve(template, levMetric(refStr))
seq2parsePythonCFG.solveSeq(template)
.map { it to levenshtein(it, refStr) }
.filter { it.second < 4 }.distinct()
.sortedWith(compareBy({ it.second }, { it.first.length })).toList()
.also { it.take(1000).forEach { println("Δ=${it.second}: ${it.first}") } }
// .onEach { println("Δ=${levenshtein(it, refStr)}: $it") }
.also { println("Found ${it.size} solutions!") }
}.also { println("Finished in ${it.inWholeMilliseconds}ms.") }
}
Expand Down

0 comments on commit 19492f1

Please sign in to comment.