From 9f568d9a82b2df68c814a0830ba725f563477a3b Mon Sep 17 00:00:00 2001 From: Quintus Date: Sat, 21 Apr 2012 22:47:39 +0200 Subject: [PATCH 01/14] Add Lua scanner --- lib/coderay/scanners/lua.rb | 137 ++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 lib/coderay/scanners/lua.rb diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb new file mode 100644 index 00000000..2540a2fa --- /dev/null +++ b/lib/coderay/scanners/lua.rb @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- + +# http://www.lua.org/manual/5.2/manual.html +class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner + + register_for :lua + file_extension "lua" + title "Lua" + + KEYWORDS = %w[and break do else elseif end + for function goto if in + local not or repeat return + then until while + ] + + PREDEFINED_CONSTANTS = %w[false true nil] + + IDENT_KIND = CodeRay::WordList.new(:ident) + .add(KEYWORDS, :keyword) + .add(PREDEFINED_CONSTANTS, :predefined_constant) + + protected + + def setup + @state = :initial + end + + def scan_tokens(encoder, options) + @encoder = encoder + @options = options + + send(:"handle_state_#@state") until eos? + + + @encoder + end + + def handle_state_initial + if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]] + @num_equals = match.count("=") # Number must match for comment end + @encoder.begin_group(:comment) + @encoder.text_token(match, :delimiter) + @state = :long_comment + elsif match = scan(/--.*?$/) # --Lua comment + @encoder.text_token(match, :comment) + elsif match = scan(/\[=*\[/) # [[ long (possibly multiline) string ]] + @num_equals = match.count("=") # Number must match for comment end + @encoder.begin_group(:string) + @encoder.text_token(match, :delimiter) + @state = :long_string + elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\{\}\[\]:;,] | \.(?!\d)/x) + @encoder.text_token(match, :operator) + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits) + kind = IDENT_KIND[match] + + if kind == :keyword and match == "function" + @state = :function_expected + end + + @encoder.text_token(match, kind) + elsif match = scan(/["']/) + @encoder.begin_group(:string) + @encoder.text_token(match, :delimiter) + @start_delim = match + @state = :string # hex number ←|→ decimal number + elsif match = scan(/0x(?:[0-9a-z])* \. [0-9a-z]+ (?:p-\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?/ix) # hexadecimal constants have no E power, decimal ones no P power + @encoder.text_token(match, :float) # hex | decimal + elsif match = scan(/0x[0-9a-z]+ (?:p-\d+)? | \d+ (?:e[+\-]?\d+)?/ix) # hexadecimal constants have no E power, decimal ones no P power + @encoder.text_token(match, :integer) + elsif match = scan(/\s+/) + @encoder.text_token(match, :space) + else + @encoder.text_token(getch, :error) + end + end + + def handle_state_function_expected + if match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* \./x) # function tbl.subtbl.foo() + @encoder.text_token(match, :ident) + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo() + @encoder.text_token(match, :function) + @state = :initial + elsif match = scan(/\s+/) # Between the function keyword and the ident may be any amount of whitespace + @encoder.text_token(match, :space) + else + @encoder.text_token(getch, :error) + @state = :initial + end + end + + def handle_state_long_comment + if match = scan(/.*?(?=\]={#@num_equals}\])/m) + @encoder.text_token(match, :content) + + delim = scan(/\]={#@num_equals}\]/) + @encoder.text_token(delim, :delimiter) + else # No terminator found till EOF + @encoder.text_token(rest, :error) + terminate + end + @encoder.end_group(:comment) + @state = :initial + end + + def handle_state_long_string + if match = scan(/.*?(?=\]={#@num_equals}\])/m) # Long strings do not interpret any escape sequences + @encoder.text_token(match, :content) + + delim = scan(/\]={#@num_equals}\]/) + @encoder.text_token(delim, :delimiter) + else # No terminator found till EOF + @encoder.text_token(rest, :error) + terminate + end + @encoder.end_group(:string) + @state = :initial + end + + def handle_state_string + if match = scan(/[^\\#@start_delim\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z) + @encoder.text_token(match, :content) + elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m) + @encoder.text_token(match, :char) + elsif match = scan(Regexp.compile(@start_delim)) + @encoder.text_token(match, :delimiter) + @encoder.end_group(:string) + @state = :initial + elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings + @encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings + @encoder.end_group(:string) + @state = :initial + else + @encoder.text_token(getch, :error) + end + end + +end From 2a2ac128a1acd37e75450073f726bf9f7e176ab3 Mon Sep 17 00:00:00 2001 From: Quintus Date: Sat, 21 Apr 2012 23:13:46 +0200 Subject: [PATCH 02/14] Correctly highlight obscure numbers such as 0x.a3p-5. --- lib/coderay/scanners/lua.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index 2540a2fa..49abbb17 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -48,8 +48,6 @@ def handle_state_initial @encoder.begin_group(:string) @encoder.text_token(match, :delimiter) @state = :long_string - elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\{\}\[\]:;,] | \.(?!\d)/x) - @encoder.text_token(match, :operator) elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits) kind = IDENT_KIND[match] @@ -62,11 +60,13 @@ def handle_state_initial @encoder.begin_group(:string) @encoder.text_token(match, :delimiter) @start_delim = match - @state = :string # hex number ←|→ decimal number - elsif match = scan(/0x(?:[0-9a-z])* \. [0-9a-z]+ (?:p-\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?/ix) # hexadecimal constants have no E power, decimal ones no P power - @encoder.text_token(match, :float) # hex | decimal - elsif match = scan(/0x[0-9a-z]+ (?:p-\d+)? | \d+ (?:e[+\-]?\d+)?/ix) # hexadecimal constants have no E power, decimal ones no P power + @state = :string # hex number ←|→ decimal number + elsif match = scan(/0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?/ix) # hexadecimal constants have no E power, decimal ones no P power + @encoder.text_token(match, :float) #hex | decimal + elsif match = scan(/0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?/ix) # hexadecimal constants have no E power, decimal ones no P power @encoder.text_token(match, :integer) + elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\{\}\[\]:;,] | \.(?!\d)/x) + @encoder.text_token(match, :operator) elsif match = scan(/\s+/) @encoder.text_token(match, :space) else From dcf7b1d0909e323675e90c7e701fb63e41ca042a Mon Sep 17 00:00:00 2001 From: Quintus Date: Sat, 21 Apr 2012 23:39:35 +0200 Subject: [PATCH 03/14] =?UTF-8?q?Recognize=20=5FUPPERCASE=20tokens=20as=20?= =?UTF-8?q?Lua=20reserved=20idents.=20:reserved=20would=20be=20a=20more=20?= =?UTF-8?q?fitting=20token=20kind=20than=20:predefined,=20but=20unfortunat?= =?UTF-8?q?ely=20:reserved=20looks=20like=20:keyword=20in=20Coderay?= =?UTF-8?q?=E2=80=99s=20default=20stylesheet=20and=20this=20is=20NOT=20the?= =?UTF-8?q?=20same=20in=20Lua.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/coderay/scanners/lua.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index 49abbb17..b05798fb 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -48,6 +48,8 @@ def handle_state_initial @encoder.begin_group(:string) @encoder.text_token(match, :delimiter) @state = :long_string + elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua + @encoder.text_token(match, :predefined) elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits) kind = IDENT_KIND[match] From 955db8fdfb9f5c4b63cdd3cffad7b46cccbf89dc Mon Sep 17 00:00:00 2001 From: Quintus Date: Sun, 22 Apr 2012 12:05:49 +0200 Subject: [PATCH 04/14] Highlight goto labels --- lib/coderay/scanners/lua.rb | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index b05798fb..f6ea811e 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -48,13 +48,18 @@ def handle_state_initial @encoder.begin_group(:string) @encoder.text_token(match, :delimiter) @state = :long_string + elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label:: + @encoder.text_token(match, :label) elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua @encoder.text_token(match, :predefined) elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits) kind = IDENT_KIND[match] + # Extra highlighting for entities following certain keywords if kind == :keyword and match == "function" @state = :function_expected + elsif kind == :keyword and match == "goto" + @state = :goto_label_expected end @encoder.text_token(match, kind) @@ -82,7 +87,7 @@ def handle_state_function_expected elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo() @encoder.text_token(match, :function) @state = :initial - elsif match = scan(/\s+/) # Between the function keyword and the ident may be any amount of whitespace + elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace @encoder.text_token(match, :space) else @encoder.text_token(getch, :error) @@ -90,6 +95,17 @@ def handle_state_function_expected end end + def handle_state_goto_label_expected + if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) + @encoder.text_token(match, :label) + @state = :initial + elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace + @encoder.text_token(match, :space) + else + @encoder.text_token(getch, :error) + end + end + def handle_state_long_comment if match = scan(/.*?(?=\]={#@num_equals}\])/m) @encoder.text_token(match, :content) From 5d3ad0b97ff5d7476bfccc8747dd3f9e7df20d01 Mon Sep 17 00:00:00 2001 From: Quintus Date: Sun, 22 Apr 2012 12:44:41 +0200 Subject: [PATCH 05/14] Highlight local variable declarations --- lib/coderay/scanners/lua.rb | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index f6ea811e..52e53b2c 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -60,6 +60,8 @@ def handle_state_initial @state = :function_expected elsif kind == :keyword and match == "goto" @state = :goto_label_expected + elsif kind == :keyword and match == "local" + @state = :local_var_expected end @encoder.text_token(match, kind) @@ -106,6 +108,27 @@ def handle_state_goto_label_expected end end + def handle_state_local_var_expected + if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) + @encoder.text_token(match, :local_variable) + elsif match = scan(/,/) + @encoder.text_token(match, :operator) + elsif match = scan(/\=/) + @encoder.text_token(match, :operator) + # After encountering the equal sign, arbitrary expressions are + # allowed again, so just return to the main state for further + # parsing. + @state = :initial + elsif match = scan(/\n/) + @encoder.text_token(match, :space) + @state = :initial + elsif match = scan(/\s+/) + @encoder.text_token(match, :space) + else + @encoder.text_token(getch, :error) + end + end + def handle_state_long_comment if match = scan(/.*?(?=\]={#@num_equals}\])/m) @encoder.text_token(match, :content) From 6e2c99766ac3223012a5cedf595a9d062d11d5fc Mon Sep 17 00:00:00 2001 From: Quintus Date: Sun, 22 Apr 2012 15:37:44 +0200 Subject: [PATCH 06/14] Recognize the various kinds of table definitions Lua allows. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tbl2 = { [30] = 10, a = 1, [{["a"] = "f"}] = 4, c = {a = 5}, d = {a = 3, b = "fff", {["foo"] = 1}}, e = 9, "foo", {}, true, 3 } Nice, ain’t it? --- lib/coderay/scanners/lua.rb | 66 ++++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 5 deletions(-) diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index 52e53b2c..b1eeaef3 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -22,7 +22,8 @@ class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner protected def setup - @state = :initial + @state = :initial + @brace_depth = 0 end def scan_tokens(encoder, options) @@ -41,17 +42,22 @@ def handle_state_initial @encoder.begin_group(:comment) @encoder.text_token(match, :delimiter) @state = :long_comment + elsif match = scan(/--.*?$/) # --Lua comment @encoder.text_token(match, :comment) + elsif match = scan(/\[=*\[/) # [[ long (possibly multiline) string ]] @num_equals = match.count("=") # Number must match for comment end @encoder.begin_group(:string) @encoder.text_token(match, :delimiter) @state = :long_string + elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label:: @encoder.text_token(match, :label) + elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua @encoder.text_token(match, :predefined) + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits) kind = IDENT_KIND[match] @@ -65,22 +71,54 @@ def handle_state_initial end @encoder.text_token(match, kind) + + elsif match = scan(/{/) + @encoder.begin_group(:table) + @encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter) + @brace_depth += 1 + @state = :table + + elsif match = scan(/}/) + if @brace_depth == 1 + @brace_depth = 0 + @encoder.text_token(match, :delimiter) + elsif @brace_depth == 0 # Mismatched brace + @encoder.text_token(match, :error) + else + @brace_depth -= 1 + @encoder.text_token(match, :inline_delimiter) + @state = :table + end + @encoder.end_group(:table) + elsif match = scan(/["']/) @encoder.begin_group(:string) @encoder.text_token(match, :delimiter) @start_delim = match - @state = :string # hex number ←|→ decimal number + @state = :string + + # hex number ←|→ decimal number elsif match = scan(/0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?/ix) # hexadecimal constants have no E power, decimal ones no P power - @encoder.text_token(match, :float) #hex | decimal + @encoder.text_token(match, :float) + + # hex number ←|→ decimal number elsif match = scan(/0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?/ix) # hexadecimal constants have no E power, decimal ones no P power @encoder.text_token(match, :integer) - elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\{\}\[\]:;,] | \.(?!\d)/x) + + elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) @encoder.text_token(match, :operator) + elsif match = scan(/\s+/) @encoder.text_token(match, :space) + else @encoder.text_token(getch, :error) end + + # It may be that we’re scanning a full-blown subexpression of a table + # (tables can contain full expressions in parts). + # If this is the case, return to :table scanning state. + @state = :table if @state == :initial && @brace_depth >= 1 end def handle_state_function_expected @@ -113,7 +151,7 @@ def handle_state_local_var_expected @encoder.text_token(match, :local_variable) elsif match = scan(/,/) @encoder.text_token(match, :operator) - elsif match = scan(/\=/) + elsif match = scan(/=/) @encoder.text_token(match, :operator) # After encountering the equal sign, arbitrary expressions are # allowed again, so just return to the main state for further @@ -175,4 +213,22 @@ def handle_state_string end end + def handle_state_table + if match = scan(/[,;]/) + @encoder.text_token(match, :operator) + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x) + @encoder.text_token(match, :key) + @encoder.text_token(scan(/\s+/), :space) if check(/\s+/) + @encoder.text_token(scan(/=/), :operator) + @state = :initial + elsif match = scan(/\s+/m) + @encoder.text_token(match, :space) + else + # Note this clause doesn’t advance the scan pointer, it’s a kind of + # "retry with other options" (the :initial state then of course + # advances the pointer). + @state = :initial + end + end + end From e9f5c285306fcdee29f89875286b914ccec33bbc Mon Sep 17 00:00:00 2001 From: Quintus Date: Sun, 22 Apr 2012 18:22:11 +0200 Subject: [PATCH 07/14] Allow local functions --- lib/coderay/scanners/lua.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index b1eeaef3..3ba06130 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -147,7 +147,10 @@ def handle_state_goto_label_expected end def handle_state_local_var_expected - if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) + if match = scan(/function/) # local function ... + @encoder.text_token(match, :keyword) + @state = :function_expected + elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) @encoder.text_token(match, :local_variable) elsif match = scan(/,/) @encoder.text_token(match, :operator) From 535ff2fb70f1a9f8849afafc139058ef119d4b60 Mon Sep 17 00:00:00 2001 From: Quintus Date: Sun, 22 Apr 2012 18:31:59 +0200 Subject: [PATCH 08/14] Allow more obscure function definitions with colon as the last separator. tbl = {} function tbl:foo(self) print("foo") end --- lib/coderay/scanners/lua.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index 3ba06130..f5a8c494 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -122,7 +122,7 @@ def handle_state_initial end def handle_state_function_expected - if match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* \./x) # function tbl.subtbl.foo() + if match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator @encoder.text_token(match, :ident) elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo() @encoder.text_token(match, :function) From 8c9f53fe40c10cafc629d3b332c74d01fdb0a049 Mon Sep 17 00:00:00 2001 From: Quintus Date: Sun, 22 Apr 2012 18:44:18 +0200 Subject: [PATCH 09/14] Allow anonymous function definitions. x = function() print("anon") end --- lib/coderay/scanners/lua.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index f5a8c494..9915c14d 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -122,7 +122,10 @@ def handle_state_initial end def handle_state_function_expected - if match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator + if match = scan(/\(\s*\)/) # x = function() # "Anonymous" function without explicit name + @encoder.text_token(match, :operator) + @state = :initial + elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator @encoder.text_token(match, :ident) elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo() @encoder.text_token(match, :function) From dc0e15ae025dd075ac4ea061406a0546a580f2bb Mon Sep 17 00:00:00 2001 From: Quintus Date: Sun, 22 Apr 2012 19:21:25 +0200 Subject: [PATCH 10/14] Recognize predefined expressions, i.e. functions and other idents provided by the `base' library. print, error, garbagecollect, ... --- lib/coderay/scanners/lua.rb | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index 9915c14d..ad1ffb03 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -1,38 +1,65 @@ # -*- coding: utf-8 -*- -# http://www.lua.org/manual/5.2/manual.html +# Scanner for the Lua[http://lua.org] programming lanuage. +# +# The language’s complete syntax is defined in +# {the Lua manual}[http://www.lua.org/manual/5.2/manual.html], +# which is what this scanner tries to conform to. class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner register_for :lua file_extension "lua" title "Lua" + # Keywords used in Lua. KEYWORDS = %w[and break do else elseif end for function goto if in local not or repeat return then until while ] + # Constants set by the Lua core. PREDEFINED_CONSTANTS = %w[false true nil] + # The expressions contained in this array are parts of Lua’s `basic' + # library. Although it’s not entirely necessary to load that library, + # it is highly recommended and one would have to provide own implementations + # of some of these expressions if one does not do so. They however aren’t + # keywords, neither are they constants, but nearly predefined, so they + # get tagged as `predefined' rather than anything else. + # + # This list excludes values of form `_UPPERCASE' because the Lua manual + # requires such identifiers to be reserved by Lua anyway and they are + # highlighted directly accordingly, without the need for specific + # identifiers to be listed here. + PREDEFINED_EXPRESSIONS = %w[ + assert collectgarbage dofile error getmetatable + ipairs load loadfile next pairs pcall print + rawequal rawget rawlen rawset select setmetatable + tonumber tostring type xpcall + ] + + # Automatic token kind selection for normal words. IDENT_KIND = CodeRay::WordList.new(:ident) .add(KEYWORDS, :keyword) .add(PREDEFINED_CONSTANTS, :predefined_constant) + .add(PREDEFINED_EXPRESSIONS, :predefined) protected + # Scanner initialization. def setup @state = :initial @brace_depth = 0 end + # CodeRay entry hook. Starts parsing. def scan_tokens(encoder, options) @encoder = encoder @options = options send(:"handle_state_#@state") until eos? - @encoder end From 5b3efc31412371c32e0f7fc22fca2131c90ec933 Mon Sep 17 00:00:00 2001 From: Quintus Date: Sun, 22 Apr 2012 19:40:40 +0200 Subject: [PATCH 11/14] Anonymous functions with parameters --- lib/coderay/scanners/lua.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index ad1ffb03..eb8100c6 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -149,7 +149,7 @@ def handle_state_initial end def handle_state_function_expected - if match = scan(/\(\s*\)/) # x = function() # "Anonymous" function without explicit name + if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name @encoder.text_token(match, :operator) @state = :initial elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator From 800431a00c0fe129b163859ab5c62d651d4cc3ad Mon Sep 17 00:00:00 2001 From: Quintus Date: Sun, 22 Apr 2012 20:17:46 +0200 Subject: [PATCH 12/14] Add CSS styles for table highlighting. --- lib/coderay/styles/alpha.rb | 3 +++ lib/coderay/token_kinds.rb | 1 + 2 files changed, 4 insertions(+) diff --git a/lib/coderay/styles/alpha.rb b/lib/coderay/styles/alpha.rb index 8506d103..257083e5 100644 --- a/lib/coderay/styles/alpha.rb +++ b/lib/coderay/styles/alpha.rb @@ -116,6 +116,9 @@ class Alpha < Style .symbol .content { color:#A60 } .symbol .delimiter { color:#630 } .symbol { color:#A60 } +.table .content { color:#808 } +.table .delimiter { color:#40A} +.table { background-color:hsla(200,100%,50%,0.06); } .tag { color:#070 } .type { color:#339; font-weight:bold } .value { color: #088; } diff --git a/lib/coderay/token_kinds.rb b/lib/coderay/token_kinds.rb index 3b8d07e4..e2456235 100755 --- a/lib/coderay/token_kinds.rb +++ b/lib/coderay/token_kinds.rb @@ -63,6 +63,7 @@ module CodeRay :shell => 'shell', :string => 'string', :symbol => 'symbol', + :table => 'table', :tag => 'tag', :type => 'type', :value => 'value', From 9dc21e57ea0fbd1e7869405d206dc906aef1aee6 Mon Sep 17 00:00:00 2001 From: Quintus Date: Sun, 22 Apr 2012 20:32:52 +0200 Subject: [PATCH 13/14] Take unary - as part of the number rather than an operator. Note Lua has no unary +. --- lib/coderay/scanners/lua.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index eb8100c6..b7e745df 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -124,12 +124,12 @@ def handle_state_initial @start_delim = match @state = :string - # hex number ←|→ decimal number - elsif match = scan(/0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?/ix) # hexadecimal constants have no E power, decimal ones no P power + # ↓Prefix hex number ←|→ decimal number + elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power @encoder.text_token(match, :float) - # hex number ←|→ decimal number - elsif match = scan(/0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?/ix) # hexadecimal constants have no E power, decimal ones no P power + # ↓Prefix hex number ←|→ decimal number + elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power @encoder.text_token(match, :integer) elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) From e339a68fbe1084fc22b735623c9bde42d9383d42 Mon Sep 17 00:00:00 2001 From: Quintus Date: Sun, 22 Apr 2012 20:47:51 +0200 Subject: [PATCH 14/14] Add some informative comments --- lib/coderay/scanners/lua.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/coderay/scanners/lua.rb b/lib/coderay/scanners/lua.rb index b7e745df..e7706fc1 100644 --- a/lib/coderay/scanners/lua.rb +++ b/lib/coderay/scanners/lua.rb @@ -99,13 +99,13 @@ def handle_state_initial @encoder.text_token(match, kind) - elsif match = scan(/{/) + elsif match = scan(/{/) # Opening table brace { @encoder.begin_group(:table) @encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter) @brace_depth += 1 @state = :table - elsif match = scan(/}/) + elsif match = scan(/}/) # Closing table brace } if @brace_depth == 1 @brace_depth = 0 @encoder.text_token(match, :delimiter) @@ -118,11 +118,11 @@ def handle_state_initial end @encoder.end_group(:table) - elsif match = scan(/["']/) + elsif match = scan(/["']/) # String delimiters " and ' @encoder.begin_group(:string) @encoder.text_token(match, :delimiter) @start_delim = match - @state = :string + @state = :string # ↓Prefix hex number ←|→ decimal number elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power @@ -132,13 +132,13 @@ def handle_state_initial elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power @encoder.text_token(match, :integer) - elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) + elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators @encoder.text_token(match, :operator) - elsif match = scan(/\s+/) + elsif match = scan(/\s+/) # Space @encoder.text_token(match, :space) - else + else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors. @encoder.text_token(getch, :error) end