123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191 |
- package scanner
- import "io"
- import "fmt"
- import "../syntax"
- type Code = []rune
- type Token struct {
- Id syntax.Id
- Content []rune
- Pos int
- }
- type TokenSequence = []Token
- type Point struct {
- Row int
- Col int
- }
- type RowColInfo = []Point
- type SemiInfo = map[int]bool
- type RuneListReader struct {
- src []rune
- pos int
- }
- func (r *RuneListReader) ReadRune() (rune, int, error) {
- if r.pos >= len(r.src) {
- return -1, 0, io.EOF
- }
- next := r.src[r.pos]
- r.pos += 1
- return next, 1, nil
- }
- func GetInfo (code Code) RowColInfo {
- var info = make(RowColInfo, 0, 10000)
- var row = 1
- var col = 0
- for _, char := range code {
- if char != '\n' {
- col += 1
- } else {
- row += 1
- col = 0
- }
- info = append(info, Point { Row: row, Col: col })
- }
- return info
- }
- func MatchToken (code Code, pos int) (amount int, id syntax.Id) {
- for _, token := range syntax.Tokens {
- reader := &RuneListReader { src: code, pos: pos }
- loc := token.Pattern.FindReaderIndex(reader)
- // fmt.Printf("Try %v\n", token.Name)
- if loc != nil {
- if (loc[0] != 0) { panic("invalid token pattern") }
- return loc[1], syntax.Name2Id[token.Name]
- }
- }
- return 0, 0
- }
- func IsRightParOrName (token *Token) bool {
- if token != nil {
- var name = syntax.Id2Name[token.Id]
- if ( name == ")" || name == "]" || name == ">" ||
- name == "Name" || name == "EsId" ) {
- return true
- } else {
- return false
- }
- } else {
- return false
- }
- }
- func IsReturnKeyword (token *Token) bool {
- var NameId = syntax.Name2Id["Name"]
- if token != nil {
- return (token.Id == NameId && string(token.Content) == "return")
- } else {
- return false
- }
- }
- func Try2InsertExtra (tokens TokenSequence, current Token) TokenSequence {
- /*
- * A blank magic to distinguish
- * let t = f(g*h)(x)
- * between
- * let t = f
- * (g*h)(x)
- */
- var current_name = syntax.Id2Name[current.Id]
- if current_name == "(" || current_name == "<" {
- return append(tokens, Token {
- Id: syntax.Name2Id["Call"],
- Pos: current.Pos,
- Content: []rune(""),
- })
- } else if current_name == "[" || current_name == "." {
- return append(tokens, Token {
- Id: syntax.Name2Id["Get"],
- Pos: current.Pos,
- Content: []rune(""),
- })
- }
- return tokens
- }
- func Scan (code Code) (TokenSequence, RowColInfo, SemiInfo) {
- var BlankId = syntax.Name2Id["Blank"]
- var CommentId = syntax.Name2Id["Comment"]
- var LFId = syntax.Name2Id["LF"]
- var RCBId = syntax.Name2Id["}"]
- var LtId = syntax.Name2Id["<"]
- var tokens = make(TokenSequence, 0, 10000)
- var semi = make(SemiInfo)
- var info = GetInfo(code)
- var length = len(code)
- var previous_ptr *Token
- var has_blank_between_prev = false
- var pos = 0
- for pos < length {
- // fmt.Printf("pos %v\n", pos)
- amount, id := MatchToken(code, pos)
- if amount == 0 { break }
- if id == CommentId { pos += amount; continue }
- if id == BlankId {
- pos += amount
- has_blank_between_prev = true
- continue
- }
- var current = Token {
- Id: id,
- Pos: pos,
- Content: code[pos : pos+amount],
- }
- if IsRightParOrName(previous_ptr) {
- /* tell from "a [LF] (b+c).d" and "a(b+c).d" */
- if !(id == LtId && has_blank_between_prev) {
- /* tell from "S<T>" and "s < t" */
- tokens = Try2InsertExtra(tokens, current)
- }
- }
- if current.Id == LFId || current.Id == RCBId {
- /* tell from "return [LF] expr" and "return expr" */
- if IsReturnKeyword(previous_ptr) {
- tokens = append(tokens, Token {
- Id: syntax.Name2Id["Void"],
- Pos: current.Pos,
- Content: []rune(""),
- })
- }
- }
- tokens = append(tokens, current)
- previous_ptr = ¤t
- if has_blank_between_prev { has_blank_between_prev = false }
- pos += amount
- }
- var clear = make(TokenSequence, 0, 10000)
- for _, token := range tokens {
- if token.Id != LFId {
- clear = append(clear, token)
- } else {
- semi[len(clear)] = true
- }
- }
- if (pos < length) {
- panic(fmt.Sprintf("invalid token at %+v", info[pos]))
- }
- return clear, info, semi
- }
|