scanner.go 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. package scanner
  2. import "io"
  3. import "fmt"
  4. import "../syntax"
  5. type Code = []rune
  6. type Token struct {
  7. Id syntax.Id
  8. Content []rune
  9. Pos int
  10. }
  11. type TokenSequence = []Token
  12. type Point struct {
  13. Row int
  14. Col int
  15. }
  16. type RowColInfo = []Point
  17. type SemiInfo = map[int]bool
  18. type RuneListReader struct {
  19. src []rune
  20. pos int
  21. }
  22. func (r *RuneListReader) ReadRune() (rune, int, error) {
  23. if r.pos >= len(r.src) {
  24. return -1, 0, io.EOF
  25. }
  26. next := r.src[r.pos]
  27. r.pos += 1
  28. return next, 1, nil
  29. }
  30. func GetInfo (code Code) RowColInfo {
  31. var info = make(RowColInfo, 0, 10000)
  32. var row = 1
  33. var col = 0
  34. for _, char := range code {
  35. if char != '\n' {
  36. col += 1
  37. } else {
  38. row += 1
  39. col = 0
  40. }
  41. info = append(info, Point { Row: row, Col: col })
  42. }
  43. return info
  44. }
  45. func MatchToken (code Code, pos int) (amount int, id syntax.Id) {
  46. for _, token := range syntax.Tokens {
  47. reader := &RuneListReader { src: code, pos: pos }
  48. loc := token.Pattern.FindReaderIndex(reader)
  49. // fmt.Printf("Try %v\n", token.Name)
  50. if loc != nil {
  51. if (loc[0] != 0) { panic("invalid token pattern") }
  52. return loc[1], syntax.Name2Id[token.Name]
  53. }
  54. }
  55. return 0, 0
  56. }
  57. func IsRightParOrName (token *Token) bool {
  58. if token != nil {
  59. var name = syntax.Id2Name[token.Id]
  60. if ( name == ")" || name == "]" || name == ">" ||
  61. name == "Name" || name == "EsId" ) {
  62. return true
  63. } else {
  64. return false
  65. }
  66. } else {
  67. return false
  68. }
  69. }
  70. func IsReturnKeyword (token *Token) bool {
  71. var NameId = syntax.Name2Id["Name"]
  72. if token != nil {
  73. return (token.Id == NameId && string(token.Content) == "return")
  74. } else {
  75. return false
  76. }
  77. }
  78. func Try2InsertExtra (tokens TokenSequence, current Token) TokenSequence {
  79. /*
  80. * A blank magic to distinguish
  81. * let t = f(g*h)(x)
  82. * between
  83. * let t = f
  84. * (g*h)(x)
  85. */
  86. var current_name = syntax.Id2Name[current.Id]
  87. if current_name == "(" || current_name == "<" {
  88. return append(tokens, Token {
  89. Id: syntax.Name2Id["Call"],
  90. Pos: current.Pos,
  91. Content: []rune(""),
  92. })
  93. } else if current_name == "[" || current_name == "." {
  94. return append(tokens, Token {
  95. Id: syntax.Name2Id["Get"],
  96. Pos: current.Pos,
  97. Content: []rune(""),
  98. })
  99. }
  100. return tokens
  101. }
  102. func Scan (code Code) (TokenSequence, RowColInfo, SemiInfo) {
  103. var BlankId = syntax.Name2Id["Blank"]
  104. var CommentId = syntax.Name2Id["Comment"]
  105. var LFId = syntax.Name2Id["LF"]
  106. var RCBId = syntax.Name2Id["}"]
  107. var LtId = syntax.Name2Id["<"]
  108. var tokens = make(TokenSequence, 0, 10000)
  109. var semi = make(SemiInfo)
  110. var info = GetInfo(code)
  111. var length = len(code)
  112. var previous_ptr *Token
  113. var has_blank_between_prev = false
  114. var pos = 0
  115. for pos < length {
  116. // fmt.Printf("pos %v\n", pos)
  117. amount, id := MatchToken(code, pos)
  118. if amount == 0 { break }
  119. if id == CommentId { pos += amount; continue }
  120. if id == BlankId {
  121. pos += amount
  122. has_blank_between_prev = true
  123. continue
  124. }
  125. var current = Token {
  126. Id: id,
  127. Pos: pos,
  128. Content: code[pos : pos+amount],
  129. }
  130. if IsRightParOrName(previous_ptr) {
  131. /* tell from "a [LF] (b+c).d" and "a(b+c).d" */
  132. if !(id == LtId && has_blank_between_prev) {
  133. /* tell from "S<T>" and "s < t" */
  134. tokens = Try2InsertExtra(tokens, current)
  135. }
  136. }
  137. if current.Id == LFId || current.Id == RCBId {
  138. /* tell from "return [LF] expr" and "return expr" */
  139. if IsReturnKeyword(previous_ptr) {
  140. tokens = append(tokens, Token {
  141. Id: syntax.Name2Id["Void"],
  142. Pos: current.Pos,
  143. Content: []rune(""),
  144. })
  145. }
  146. }
  147. tokens = append(tokens, current)
  148. previous_ptr = &current
  149. if has_blank_between_prev { has_blank_between_prev = false }
  150. pos += amount
  151. }
  152. var clear = make(TokenSequence, 0, 10000)
  153. for _, token := range tokens {
  154. if token.Id != LFId {
  155. clear = append(clear, token)
  156. } else {
  157. semi[len(clear)] = true
  158. }
  159. }
  160. if (pos < length) {
  161. panic(fmt.Sprintf("invalid token at %+v", info[pos]))
  162. }
  163. return clear, info, semi
  164. }