8000 Merge pull request #141 from rubychan/lua-scanner · tricknotes/coderay@1e330f1 · GitHub
[go: up one dir, main page]

Skip to content

Commit 1e330f1

Browse files
committed
Merge pull request rubychan#141 from rubychan/lua-scanner
Lua scanner, tweaked (finally!)
2 parents 546b489 + 90c401c commit 1e330f1

File tree

6 files changed

+284
-2
lines changed

6 files changed

+284
-2
lines changed

Changes.textile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ p=. _This files lists all changes in the CodeRay library since the 0.9.8 release
44

55
h2. Changes in 1.1
66

7+
* New scanner: Lua [#21, #22, thanks to Quintus]
78
* New scanner: Sass [#93]
89
* New scanner: Taskpaper [#39, thanks to shimomura]
910
* Diff scanner: Highlight inline changes in multi-line changes [#99]
@@ -17,6 +18,7 @@ h2. Changes in 1.1
1718
* @CodeRay::TokenKinds@ should not be frozen [#130, thanks to Gavin Kistner]
1819
* New token type @:id@ for CSS/Sass [#27]
1920
* New token type @:done@ for Taskpaper [#39]
21+
* New token type @:map@ for Lua, introducing a nice nested-shades trick [#22, thanks to Quintus and nathany]
2022
* Display line numbers in HTML @:table@ mode even for single-line code (remove special case) [#41, thanks to Ariejan de Vroom]
2123
* Override Bootstrap's pre word-break setting for line numbers [#102, thanks to lightswitch05]
2224
* Fixed @:docstring@ token type style

lib/coderay/encoders/debug_lint.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def begin_group kind
3535
end
3636

3737
def end_group kind
38-
raise IncorrectTokenGroupNesting, "We are inside #{@opened.inspect}, not #{kind}" if @opened.pop != kind
38+
raise IncorrectTokenGroupNesting, "We are inside #{@opened.inspect}, not #{kind} (end_group)" if @opened.pop != kind
3939
super
4040
end
4141

@@ -45,7 +45,7 @@ def begin_line kind
4545
end
4646

4747
def end_line kind
48-
raise IncorrectTokenGroupNesting, "We are inside #{@opened.inspect}, not #{kind}" if @opened.pop != kind
48+
raise IncorrectTokenGroupNesting, "We are inside #{@opened.inspect}, not #{kind} (end_line)" if @opened.pop != kind
4949
super
5050
end
5151

lib/coderay/helpers/file_type.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ def shebang filename
9696
'java' => :java,
9797
'js' => :java_script,
9898
'json' => :json,
99+
'lua' => :lua,
99100
'mab' => :ruby,
100101
'pas' => :delphi,
101102
'patch' => :diff,

lib/coderay/scanners/lua.rb

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
# encoding: utf-8
2+
3+
module CodeRay
4+
module Scanners
5+
6+
# Scanner for the Lua[http://lua.org] programming lanuage.
7+
#
8+
# The language’s complete syntax is defined in
9+
# {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
10+
# which is what this scanner tries to conform to.
11+
class Lua < Scanner
12+
13+
register_for :lua
14+
file_extension 'lua'
15+
title 'Lua'
16+
17+
# Keywords used in Lua.
18+
KEYWORDS = %w[and break do else elseif end
19+
for function goto if in
20+
local not or repeat return
21+
then until while
22+
]
23+
24+
# Constants set by the Lua core.
25+
PREDEFINED_CONSTANTS = %w[false true nil]
26+
27+
# The expressions contained in this array are parts of Lua’s `basic'
28+
# library. Although it’s not entirely necessary to load that library,
29+
# it is highly recommended and one would have to provide own implementations
30+
# of some of these expressions if one does not do so. They however aren’t
31+
# keywords, neither are they constants, but nearly predefined, so they
32+
# get tagged as `predefined' rather than anything else.
33+
#
34+
# This list excludes values of form `_UPPERCASE' because the Lua manual
35+
# requires such identifiers to be reserved by Lua anyway and they are
36+
# highlighted directly accordingly, without the need for specific
37+
# identifiers to be listed here.
38+
PREDEFINED_EXPRESSIONS = %w[
39+
assert collectgarbage dofile error getmetatable
40+
ipairs load loadfile next pairs pcall print
41+
rawequal rawget rawlen rawset select setmetatable
42+
tonumber tostring type xpcall
43+
]
44+
45+
# Automatic token kind selection for normal words.
46+
IDENT_KIND = CodeRay::WordList.new(:ident).
47+
add(KEYWORDS, :keyword).
48+
add(PREDEFINED_CONSTANTS, :predefined_constant).
49+
add(PREDEFINED_EXPRESSIONS, :predefined)
50+
51+
protected
52+
53+
# Scanner initialization.
54+
def setup
55+
@state = :initial
56+
@brace_depth = 0
57+
end
58+
59+
# CodeRay entry hook. Starts parsing.
60+
def scan_tokens(encoder, options)
61+
state = options[:state] || @state
62+
63+
until eos?
64+
case state
65+
66+
when :initial
67+
if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]]
68+
@num_equals = match.count("=") # Number must match for comment end
69+
encoder.begin_group(:comment)
70+
encoder.text_token(match, :delimiter)
71+
state = :long_comment
72+
73+
elsif match = scan(/--.*$/) # --Lua comment
74+
encoder.text_token(match, :comment)
75+
76+
elsif match = scan(/\[=*\[/) # [[ long (possibly multiline) string ]]
77+
@num_equals = match.count("=") # Number must match for comment end
78+
encoder.begin_group(:string)
79+
encoder.text_token(match, :delimiter)
80+
state = :long_string
81+
82+
elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label::
83+
encoder.text_token(match, :label)
84+
85+
elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua
86+
encoder.text_token(match, :predefined)
87+
88+
elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits)
89+
kind = IDENT_KIND[match]
90+
91+
# Extra highlighting for entities following certain keywords
92+
if kind == :keyword and match == "function"
93+
state = :function_expected
94+
elsif kind == :keyword and match == "goto"
95+
state = :goto_label_expected
96+
elsif kind == :keyword and match == "local"
97+
state = :local_var_expected
98+
end
99+
100+
encoder.text_token(match, kind)
101+
102+
elsif match = scan(/\{/) # Opening table brace {
103+
encoder.begin_group(:map)
104+
encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter)
105+
@brace_depth += 1
106+
state = :map
107+
108+
elsif match = scan(/\}/) # Closing table brace }
109+
if @brace_depth == 1
110+
@brace_depth = 0
111+
encoder.text_token(match, :delimiter)
112+
encoder.end_group(:map)
113+
elsif @brace_depth == 0 # Mismatched brace
114+
encoder.text_token(match, :error)
115+
else
116+
@brace_depth -= 1
117+
encoder.text_token(match, :inline_delimiter)
118+
encoder.end_group(:map)
119+
state = :map
120+
end
121+
122+
elsif match = scan(/["']/) # String delimiters " and '
123+
encoder.begin_group(:string)
124+
encoder.text_token(match, :delimiter)
125+
@start_delim = match
126+
state = :string
127+
128+
# ↓Prefix hex number ←|→ decimal number
129+
elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
130+
encoder.text_token(match, :float)
131+
132+
# ↓Prefix hex number ←|→ decimal number
133+
elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
134+
encoder.text_token(match, :integer)
135+
136+
elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators
137+
encoder.text_token(match, :operator)
138+
139+
elsif match = scan(/\s+/) # Space
140+
encoder.text_token(match, :space)
141+
142+
else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors.
143+
encoder.text_token(getch, :error)
144+
end
145+
146+
# It may be that we’re scanning a full-blown subexpression of a table
147+
# (tables can contain full expressions in parts).
148+
# If this is the case, return to :map scanning state.
149+
state = :map if state == :initial && @brace_depth >= 1
150+
151+
when :function_expected
152+
if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name
153+
encoder.text_token(match, :operator)
154+
state = :initial
155+
elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator
156+
encoder.text_token(match, :ident)
157+
elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo()
158+
encoder.text_token(match, :function)
159+
state = :initial
160+
elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace
161+
encoder.text_token(match, :space)
162+
else
163+
encoder.text_token(getch, :error)
164+
state = :initial
165+
end
166+
167+
when :goto_label_expected
168+
if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
169+
encoder.text_token(match, :label)
170+
state = :initial
171+
elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace
172+
encoder.text_token(match, :space)
173+
else
174+
encoder.text_token(getch, :error)
175+
end
176+
177+
when :local_var_expected
178+
if match = scan(/function/) # local function ...
179+
encoder.text_token(match, :keyword)
180+
state = :function_expected
181+
elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
182+
encoder.text_token(match, :local_variable)
183+
elsif match = scan(/,/)
184+
encoder.text_token(match, :operator)
185+
elsif match = scan(/\=/)
186+
encoder.text_token(match, :operator)
187+
# After encountering the equal sign, arbitrary expressions are
188+
# allowed again, so just return to the main state for further
189+
# parsing.
190+
state = :initial
191+
elsif match = scan(/\n/)
192+
encoder.text_token(match, :space)
193+
state = :initial
194+
elsif match = scan(/\s+/)
195+
encoder.text_token(match, :space)
196+
else
197+
encoder.text_token(getch, :error)
198+
end
199+
200+
when :long_comment
201+
if match = scan(/.*?(?=\]={#@num_equals}\])/m)
202+
encoder.text_token(match, :content)
203+
204+
delim = scan(/\]={#@num_equals}\]/)
205+
encoder.text_token(delim, :delimiter)
206+
else # No terminator found till EOF
207+
encoder.text_token(rest, :error)
208+
terminate
209+
end
210+
encoder.end_group(:comment)
211+
state = :initial
212+
213+
when :long_string
214+
if match = scan(/.*?(?=\]={#@num_equals}\])/m) # Long strings do not interpret any escape sequences
215+
encoder.text_token(match, :content)
216+
217+
delim = scan(/\]={#@num_equals}\]/)
218+
encoder.text_token(delim, :delimiter)
219+
else # No terminator found till EOF
220+
encoder.text_token(rest, :error)
221+
terminate
222+
end
223+
encoder.end_group(:string)
224+
state = :initial
225+
226+
when :string
227+
if match = scan(/[^\\#@start_delim\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
228+
encoder.text_token(match, :content)
229+
elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m)
230+
encoder.text_token(match, :char)
231+
elsif match = scan(Regexp.compile(@start_delim))
232+
encoder.text_token(match, :delimiter)
233+
encoder.end_group(:string)
234+
state = :initial
235+
elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings
236+
encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
237+
encoder.end_group(:string)
238+
state = :initial
239+
else
240+
encoder.text_token(getch, :error)
241+
end
242+
243+
when :map
244+
if match = scan(/[,;]/)
245+
encoder.text_token(match, :operator)
246+
elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x)
247+
encoder.text_token(match, :key)
248+
encoder.text_token(scan(/\s+/), :space) if check(/\s+/)
249+
encoder.text_token(scan(/\=/), :operator)
250+
state = :initial
251+
elsif match = scan(/\s+/m)
252+
encoder.text_token(match, :space)
253+
else
254+
# Note this clause doesn’t advance the scan pointer, it’s a kind of
255+
# "retry with other options" (the :initial state then of course
256+
# advances the pointer).
257+
state = :initial
258+
end
259+
else
260+
raise
261+
end
262+
263+
end
264+
265+
if options[:keep_state]
266+
@state = state
267+
end
268+
269+
encoder
270+
end
271+
272+
end
273+
274+
end
275+
end

lib/coderay/styles/alpha.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ class Alpha < Style
9999
.keyword { color:#080; font-weight:bold }
100100
.label { color:#970; font-weight:bold }
101101
.local-variable { color:#963 }
102+
.map .content { color:#808 }
103+
.map .delimiter { color:#40A}
104+
.map { background-color:hsla(200,100%,50%,0.06); }
102105
.namespace { color:#707; font-weight:bold }
103106
.octal { color:#40E }
104107
.operator { }

lib/coderay/token_kinds.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ module CodeRay
5151
:keyword => 'keyword', # reserved word that's actually implemented; most scanners
5252
:label => 'label', # C, PHP
5353
:local_variable => 'local-variable', # local and magic variables; some scanners
54+
:map => 'map', # Lua tables
5455
:modifier => 'modifier', # used inside on strings; lots of scanners
5556
:namespace => 'namespace', # Clojure, Java, Taskpaper
5657
:octal => 'octal', # lots of scanners

0 commit comments

Comments
 (0)
0