diff --git a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SeqValiant.kt b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SeqValiant.kt index 5e2f4a4b..8ee91209 100644 --- a/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SeqValiant.kt +++ b/src/commonMain/kotlin/ai/hypergraph/kaliningraph/parsing/SeqValiant.kt @@ -3,14 +3,13 @@ package ai.hypergraph.kaliningraph.parsing import ai.hypergraph.kaliningraph.tensor.UTMatrix import ai.hypergraph.kaliningraph.types.* -typealias PForest = Set -operator fun PForest.contains(v: Σᐩ) = PTree(v) in this -fun PSingleton(v: Σᐩ): List<Π2A> = listOf(PTree(v) to PTree()) +typealias PForest = Map +fun PSingleton(v: String): List<Π2A> = listOf(PTree(v) to PTree()) // Algebraic data type / polynomial functor for parse forests -class PTree(val root: Σᐩ = "ε", val branches: List<Π2A> = listOf()) { +class PTree(val root: String = "ε", val branches: List<Π2A> = listOf()) { // Returns the set of all strings derivable from the given PTree - fun choose(): Sequence<Σᐩ> = + fun choose(): Sequence = if (branches.isEmpty()) sequenceOf(if("ε" in root) "" else root) else branches.asSequence().flatMap { (l, r) -> // TODO: Use weighted choice mechanism @@ -18,44 +17,38 @@ class PTree(val root: Σᐩ = "ε", val branches: List<Π2A> = listOf()) if (a == "") b else if (b == "") a else "$a $b" } } - - override fun hashCode(): Int = root.hashCode() - override fun equals(other: Any?) = hashCode() == other.hashCode() } // Lazily computes all syntactically strings compatible with the given template -fun CFG.solveSeq(s: Σᐩ): Sequence<Σᐩ> = - initPForestMat(s.tokenizeByWhitespace()).seekFixpoint().diagonals.last()[0] - .firstOrNull { it.root == START_SYMBOL }?.choose() ?: emptySequence() +fun CFG.solveSeq(s: String): Sequence = + initPForestMat(s.tokenizeByWhitespace()).seekFixpoint() + .diagonals.last()[0][START_SYMBOL]?.choose() ?: emptySequence() -fun CFG.initPForestMat(tokens: List<Σᐩ>): UTMatrix = +fun CFG.initPForestMat(tokens: List): UTMatrix = UTMatrix( ts = tokens.map { token -> (if (token != HOLE_MARKER) bimap[listOf(token)] else unitNonterminals) .associateWith { nt -> if (token != HOLE_MARKER) PSingleton(token) else bimap.UNITS[nt]?.map { PSingleton(it) }?.flatten() ?: listOf() - }.map { (k, v) -> PTree(k, v) }.toSet() + }.map { (k, v) -> k to PTree(k, v) }.toMap() }.toTypedArray(), algebra = Ring.of( - nil = emptySet(), + nil = emptyMap(), plus = { x, y -> merge(x, y) }, times = { x, y -> joinSeq(x, y) }, ) ) fun merge(X: PForest, Z: PForest): PForest = - (X.toList() + Z).groupBy { it.root }.map { (k, v) -> - PTree(k, v.map { it.branches }.flatten()) - }.toSet() + (X.keys + Z.keys).associateWith { k -> + PTree(k, (X[k]?.branches ?: listOf()) + (Z[k]?.branches ?: listOf())) + } // X ⊗ Z := { w | ∈ X × Z, (w -> xz) ∈ P } fun CFG.joinSeq(X: PForest, Z: PForest): PForest = bimap.TRIPL.filter { (_, x, z) -> x in X && z in Z } - .groupingBy { it.first }.aggregate { _, acc: List<Π2A>?, it, _-> - val (w, x, z) = it - val ptreeX = X.first { it.root == x } - val ptreeZ = Z.first { it.root == z } - val pair = ptreeX to ptreeZ + .groupingBy { it.first }.aggregate { _, acc: List<Π2A>?, it, _-> + val pair = X[it.second]!! to Z[it.third]!! if (acc == null) listOf(pair) else acc + pair - }.map { (k, v) -> PTree(k, v) }.toSet() \ No newline at end of file + }.map { (k, v) -> k to PTree(k, v) }.toMap() \ No newline at end of file diff --git a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiantTest.kt b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiantTest.kt index 0f611f83..6caadc2f 100644 --- a/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiantTest.kt +++ b/src/commonTest/kotlin/ai/hypergraph/kaliningraph/parsing/SetValiantTest.kt @@ -583,8 +583,13 @@ Yield_Arg -> From_Keyword Test | Testlist_Endcomma val refLst = refStr.tokenizeByWhitespace() val template = List(refLst.size + 3) { "_" }.joinToString(" ") measureTime { - seq2parsePythonCFG.solve(template, levMetric(refStr)) - .onEach { println("Δ=${levenshtein(it, refStr)}: $it") } +// seq2parsePythonCFG.solve(template, levMetric(refStr)) + seq2parsePythonCFG.solveSeq(template) + .map { it to levenshtein(it, refStr) } + .filter { it.second < 4 }.distinct() + .sortedWith(compareBy({ it.second }, { it.first.length })).toList() + .also { it.take(1000).forEach { println("Δ=${it.second}: ${it.first}") } } +// .onEach { println("Δ=${levenshtein(it, refStr)}: $it") } .also { println("Found ${it.size} solutions!") } }.also { println("Finished in ${it.inWholeMilliseconds}ms.") } }