From 69e9615dbc27ae3b4fa64d1fcfa01434f8af4cee Mon Sep 17 00:00:00 2001 From: Mohamed Elqdusy Date: Mon, 22 Jan 2018 20:00:29 +0100 Subject: [PATCH] Syntax: support multiprecision integer literals (#58) * Syntax: support multiprecision integer literals * Using INT token type for both cases the parser is handling int64 and *big.Int values as a token of the type INT * Tests for bigInt * Testing large integer literals * Deleting some comments * Fixing a space --- eval.go | 8 +++++++- repl/repl.go | 6 ------ syntax/parse.go | 6 +++++- syntax/scan.go | 11 +++++++++++ syntax/scan_test.go | 9 ++++++++- syntax/syntax.go | 2 +- testdata/int.sky | 2 ++ 7 files changed, 34 insertions(+), 10 deletions(-) diff --git a/eval.go b/eval.go index ef03553..3c87862 100644 --- a/eval.go +++ b/eval.go @@ -9,6 +9,7 @@ import ( "fmt" "log" "math" + "math/big" "sort" "strings" "unicode" @@ -723,7 +724,12 @@ func eval(fr *Frame, e syntax.Expr) (Value, error) { case *syntax.Literal: switch e.Token { case syntax.INT: - return MakeInt64(e.Value.(int64)), nil + switch e.Value.(type) { + case int64: + return MakeInt64(e.Value.(int64)), nil + case *big.Int: + return Int{e.Value.(*big.Int)}, nil + } case syntax.FLOAT: return Float(e.Value.(float64)), nil case syntax.STRING: diff --git a/repl/repl.go b/repl/repl.go index 4334540..567773a 100644 --- a/repl/repl.go +++ b/repl/repl.go @@ -14,12 +14,6 @@ package repl // TODO(adonovan): // -// - Distinguish expressions from statements more precisely. -// Otherwise e.g. 1 is parsed as an expression but -// 1000000000000000000000000000 is parsed as a file -// because the scanner fails to convert it to an int64. -// The spec should clarify limits on numeric literals. -// // - Unparenthesized tuples are not parsed as a single expression: // >>> (1, 2) // (1, 2) diff --git a/syntax/parse.go b/syntax/parse.go index e4c5644..45bc7ab 100644 --- a/syntax/parse.go +++ b/syntax/parse.go @@ -778,7 +778,11 @@ func (p *parser) parsePrimary() Expr { tok := p.tok switch tok { case INT: - val = p.tokval.int + if p.tokval.bigInt != nil { + val = p.tokval.bigInt + } else { + val = p.tokval.int + } case FLOAT: val = p.tokval.float case STRING: diff --git a/syntax/scan.go b/syntax/scan.go index 6b6c5e8..c2f2537 100644 --- a/syntax/scan.go +++ b/syntax/scan.go @@ -11,6 +11,7 @@ import ( "io" "io/ioutil" "log" + "math/big" "strconv" "strings" "unicode" @@ -336,6 +337,7 @@ func (sc *scanner) readRune() rune { type tokenValue struct { raw string // raw text of token int int64 // decoded int + bigInt *big.Int // decoded integers > int64 float float64 // decoded float string string // decoded string pos Position // start position of token @@ -862,12 +864,21 @@ func (sc *scanner) scanNumber(val *tokenValue, c rune) Token { } else { var err error s := val.raw + val.bigInt = nil if len(s) > 2 && s[0] == '0' && (s[1] == 'o' || s[1] == 'O') { val.int, err = strconv.ParseInt(s[2:], 8, 64) } else if len(s) > 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B') { val.int, err = strconv.ParseInt(s[2:], 2, 64) } else { val.int, err = strconv.ParseInt(s, 0, 64) + if err != nil { + num := new(big.Int) + var ok bool = true + val.bigInt, ok = num.SetString(s, 0) + if ok { + err = nil + } + } } if err != nil { sc.error(start, "invalid int literal") diff --git a/syntax/scan_test.go b/syntax/scan_test.go index 6d1dba6..7a4d3e5 100644 --- a/syntax/scan_test.go +++ b/syntax/scan_test.go @@ -35,7 +35,11 @@ func scan(src interface{}) (tokens string, err error) { case IDENT: buf.WriteString(val.raw) case INT: - fmt.Fprintf(&buf, "%d", val.int) + if val.bigInt != nil { + fmt.Fprintf(&buf, "%d", val.bigInt) + } else { + fmt.Fprintf(&buf, "%d", val.int) + } case FLOAT: fmt.Fprintf(&buf, "%e", val.float) case STRING: @@ -152,6 +156,8 @@ pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated {"1e-1", `1.000000e-01 EOF`}, {"123", `123 EOF`}, {"123e45", `1.230000e+47 EOF`}, + {"999999999999999999999999999999999999999999999999999", `999999999999999999999999999999999999999999999999999 EOF`}, + {"12345678901234567890", `12345678901234567890 EOF`}, // hex {"0xA", `10 EOF`}, {"0xAAG", `170 G EOF`}, @@ -160,6 +166,7 @@ pass`, "pass newline pass EOF"}, // consecutive newlines are consolidated {"0XG", `foo.sky:1:1: invalid hex literal`}, {"0xA.", `10 . EOF`}, {"0xA.e1", `10 . e1 EOF`}, + {"0x12345678deadbeef12345678", `5634002672576678570168178296 EOF`}, // binary {"0b1010", `10 EOF`}, {"0B111101", `61 EOF`}, diff --git a/syntax/syntax.go b/syntax/syntax.go index 7850a41..2b1c965 100644 --- a/syntax/syntax.go +++ b/syntax/syntax.go @@ -217,7 +217,7 @@ type Literal struct { Token Token // = STRING | INT TokenPos Position Raw string // uninterpreted text - Value interface{} // = string | int + Value interface{} // = string | int64 | *big.Int } func (x *Literal) Span() (start, end Position) { diff --git a/testdata/int.sky b/testdata/int.sky index df11042..2596f0f 100644 --- a/testdata/int.sky +++ b/testdata/int.sky @@ -93,10 +93,12 @@ assert.eq(int("12", 16), 18) assert.eq(int("-12", 16), -18) assert.eq(int("0x12", 16), 18) assert.eq(int("-0x12", 16), -18) +assert.eq(0x1000000000000001 * 0x1000000000000001, 0x1000000000000002000000000000001) assert.eq(int("1010", 2), 10) assert.eq(int("111111101", 2), 509) assert.eq(int("0b0101", 0), 5) assert.eq(int("0b00000", 0), 0) +assert.eq(1111111111111111 * 1111111111111111, 1234567901234567654320987654321) assert.fails(lambda: int("0x123", 8), "invalid literal.*base 8") assert.fails(lambda: int("-0x123", 8), "invalid literal.*base 8") assert.fails(lambda: int("0o123", 16), "invalid literal.*base 16")