Merge branch 'lexer-part' into 'master'
Lexer part See merge request saburly/reklamice/parser!1
This commit was merged in pull request #1.
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
.idea
|
||||
|
||||
lexer.rb
|
||||
14
README.md
14
README.md
@@ -1,3 +1,15 @@
|
||||
# parser
|
||||
|
||||
parser ba
|
||||
###Prerequisites
|
||||
|
||||
* Rexical (rex)
|
||||
|
||||
###Available commands
|
||||
|
||||
* `rake lexer` - generates `lexer.rb` file based on `specification.rex` file
|
||||
|
||||
###Testing
|
||||
|
||||
To run only `lexer` tests, execute : `rspec spec/query_lexer_spec.rb`
|
||||
|
||||
To run all tests, execute : `rake spec`
|
||||
12
Rakefile
Normal file
12
Rakefile
Normal file
@@ -0,0 +1,12 @@
|
||||
require 'rspec/core/rake_task'
|
||||
|
||||
RSpec::Core::RakeTask.new do |c|
|
||||
options = ['--color']
|
||||
options += ["--format", "documentation"]
|
||||
c.rspec_opts = options
|
||||
end
|
||||
|
||||
desc "Generate Lexer"
|
||||
task :lexer do
|
||||
`rex specification.rex -o lexer.rb`
|
||||
end
|
||||
250
spec/query_lexer_spec.rb
Normal file
250
spec/query_lexer_spec.rb
Normal file
@@ -0,0 +1,250 @@
|
||||
require './lexer'
|
||||
|
||||
class QueryLexerTester
|
||||
describe 'Testing the Lexer' do
|
||||
before do
|
||||
@evaluator = Query.new
|
||||
end
|
||||
|
||||
it 'tests bracket expression' do
|
||||
@result = @evaluator.tokenize('()')
|
||||
expect(@result.length).to eq 2
|
||||
|
||||
expect(@result[0][0]).to eq :L_BRACKET
|
||||
expect(@result[1][0]).to eq :R_BRACKET
|
||||
end
|
||||
|
||||
it 'tests bracket expression with spaces' do
|
||||
@result = @evaluator.tokenize(' ( ) ')
|
||||
expect(@result.length).to eq 2
|
||||
|
||||
expect(@result[0][0]).to eq :L_BRACKET
|
||||
expect(@result[1][0]).to eq :R_BRACKET
|
||||
end
|
||||
|
||||
it 'tests expression with OR operator' do
|
||||
@result = @evaluator.tokenize('() or () OR ()')
|
||||
expect(@result.length).to eq 8
|
||||
|
||||
expect(@result[0][0]).to eq :L_BRACKET
|
||||
expect(@result[1][0]).to eq :R_BRACKET
|
||||
expect(@result[2][0]).to eq :OPERATOR_OR
|
||||
expect(@result[3][0]).to eq :L_BRACKET
|
||||
expect(@result[4][0]).to eq :R_BRACKET
|
||||
expect(@result[5][0]).to eq :OPERATOR_OR
|
||||
expect(@result[6][0]).to eq :L_BRACKET
|
||||
expect(@result[7][0]).to eq :R_BRACKET
|
||||
end
|
||||
|
||||
it 'tests expression with AND operator' do
|
||||
@result = @evaluator.tokenize('() AND () and ()')
|
||||
expect(@result.length).to eq 8
|
||||
|
||||
expect(@result[0][0]).to eq :L_BRACKET
|
||||
expect(@result[1][0]).to eq :R_BRACKET
|
||||
expect(@result[2][0]).to eq :OPERATOR_AND
|
||||
expect(@result[3][0]).to eq :L_BRACKET
|
||||
expect(@result[4][0]).to eq :R_BRACKET
|
||||
expect(@result[5][0]).to eq :OPERATOR_AND
|
||||
expect(@result[6][0]).to eq :L_BRACKET
|
||||
expect(@result[7][0]).to eq :R_BRACKET
|
||||
end
|
||||
|
||||
it 'tests expression with NOT OR and NOT AND operator' do
|
||||
@result = @evaluator.tokenize('() NOT or () not AND ()')
|
||||
expect(@result.length).to eq 10
|
||||
|
||||
expect(@result[0][0]).to eq :L_BRACKET
|
||||
expect(@result[1][0]).to eq :R_BRACKET
|
||||
expect(@result[2][0]).to eq :OPERATOR_NOT
|
||||
expect(@result[3][0]).to eq :OPERATOR_OR
|
||||
expect(@result[4][0]).to eq :L_BRACKET
|
||||
expect(@result[5][0]).to eq :R_BRACKET
|
||||
expect(@result[6][0]).to eq :OPERATOR_NOT
|
||||
expect(@result[7][0]).to eq :OPERATOR_AND
|
||||
expect(@result[8][0]).to eq :L_BRACKET
|
||||
expect(@result[9][0]).to eq :R_BRACKET
|
||||
end
|
||||
|
||||
it 'tests search term under quotes' do
|
||||
@result = @evaluator.tokenize('"123-456"')
|
||||
expect(@result.length).to eq 1
|
||||
|
||||
expect(@result[0][0]).to eq :TERM_WITH_QUOTES
|
||||
expect(@result[0][1]).to eq '"123-456"'
|
||||
end
|
||||
|
||||
it 'tests term without quotes' do
|
||||
@result = @evaluator.tokenize('device_id')
|
||||
expect(@result.length).to eq 1
|
||||
|
||||
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[0][1]).to eq 'device_id'
|
||||
end
|
||||
|
||||
it 'tests integer term without quotes' do
|
||||
@result = @evaluator.tokenize('123')
|
||||
expect(@result.length).to eq 1
|
||||
|
||||
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[0][1]).to eq '123'
|
||||
end
|
||||
|
||||
it 'tests multiple terms without quotes' do
|
||||
@result = @evaluator.tokenize('device_id tag 123-456 name123')
|
||||
|
||||
expect(@result.length).to eq 4
|
||||
|
||||
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[0][1]).to eq 'device_id'
|
||||
expect(@result[1][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[1][1]).to eq 'tag'
|
||||
expect(@result[2][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[2][1]).to eq '123-456'
|
||||
expect(@result[3][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[3][1]).to eq 'name123'
|
||||
end
|
||||
|
||||
it 'tests simple query with column name and search term without quotes' do
|
||||
@result = @evaluator.tokenize('name:JF')
|
||||
|
||||
expect(@result.length).to eq 3
|
||||
|
||||
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[0][1]).to eq 'name'
|
||||
expect(@result[1][0]).to eq :COLON
|
||||
expect(@result[2][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[2][1]).to eq 'JF'
|
||||
end
|
||||
|
||||
it 'tests simple query with column name and search term with quotes' do
|
||||
@result = @evaluator.tokenize('name:"name with space"')
|
||||
|
||||
expect(@result.length).to eq 3
|
||||
|
||||
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[0][1]).to eq 'name'
|
||||
expect(@result[1][0]).to eq :COLON
|
||||
expect(@result[2][0]).to eq :TERM_WITH_QUOTES
|
||||
expect(@result[2][1]).to eq '"name with space"'
|
||||
end
|
||||
|
||||
it 'tests search term without quotes containing pipe characters' do
|
||||
@result = @evaluator.tokenize('||')
|
||||
|
||||
expect(@result.length).to eq 1
|
||||
|
||||
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[0][1]).to eq '||'
|
||||
end
|
||||
|
||||
it 'tests search term with quotes containing non alphanumerical characters' do
|
||||
@result = @evaluator.tokenize('"|*|/\()#-!=<>&$"')
|
||||
|
||||
expect(@result.length).to eq 1
|
||||
|
||||
expect(@result[0][0]).to eq :TERM_WITH_QUOTES
|
||||
expect(@result[0][1]).to eq '"|*|/\()#-!=<>&$"'
|
||||
end
|
||||
|
||||
it 'tests simple query in brackets' do
|
||||
@result = @evaluator.tokenize('(name:"name with space")')
|
||||
|
||||
expect(@result.length).to eq 5
|
||||
|
||||
expect(@result[0][0]).to eq :L_BRACKET
|
||||
expect(@result[1][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[1][1]).to eq 'name'
|
||||
expect(@result[2][0]).to eq :COLON
|
||||
expect(@result[3][0]).to eq :TERM_WITH_QUOTES
|
||||
expect(@result[3][1]).to eq '"name with space"'
|
||||
expect(@result[4][0]).to eq :R_BRACKET
|
||||
end
|
||||
|
||||
it 'tests multiple query wtih brackets' do
|
||||
@result = @evaluator.tokenize('(name:"name with space") or (tag:mta)')
|
||||
|
||||
expect(@result.length).to eq 11
|
||||
|
||||
expect(@result[0][0]).to eq :L_BRACKET
|
||||
expect(@result[1][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[1][1]).to eq 'name'
|
||||
expect(@result[2][0]).to eq :COLON
|
||||
expect(@result[3][0]).to eq :TERM_WITH_QUOTES
|
||||
expect(@result[3][1]).to eq '"name with space"'
|
||||
expect(@result[4][0]).to eq :R_BRACKET
|
||||
expect(@result[5][0]).to eq :OPERATOR_OR
|
||||
expect(@result[6][0]).to eq :L_BRACKET
|
||||
expect(@result[7][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[7][1]).to eq 'tag'
|
||||
expect(@result[8][0]).to eq :COLON
|
||||
expect(@result[9][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[9][1]).to eq 'mta'
|
||||
expect(@result[10][0]).to eq :R_BRACKET
|
||||
|
||||
end
|
||||
|
||||
it 'tests complex query' do
|
||||
@result = @evaluator.tokenize('(device-id:"with space" tag:mta no-quotes-id-123)'\
|
||||
'or "id with quotes-5" and ( ("id with q 10" or "id with q 20")'\
|
||||
'and ("id with Q 30" "id with Q 40") and not id-without-Q-50)')
|
||||
|
||||
expect(@result.length).to eq 27
|
||||
|
||||
expect(@result[0][0]).to eq :L_BRACKET
|
||||
expect(@result[1][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[1][1]).to eq 'device-id'
|
||||
expect(@result[2][0]).to eq :COLON
|
||||
expect(@result[3][0]).to eq :TERM_WITH_QUOTES
|
||||
expect(@result[3][1]).to eq '"with space"'
|
||||
expect(@result[4][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[4][1]).to eq 'tag'
|
||||
expect(@result[5][0]).to eq :COLON
|
||||
expect(@result[6][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[6][1]).to eq 'mta'
|
||||
expect(@result[7][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[7][1]).to eq 'no-quotes-id-123'
|
||||
expect(@result[8][0]).to eq :R_BRACKET
|
||||
|
||||
expect(@result[9][0]).to eq :OPERATOR_OR
|
||||
expect(@result[10][0]).to eq :TERM_WITH_QUOTES
|
||||
expect(@result[10][1]).to eq '"id with quotes-5"'
|
||||
expect(@result[11][0]).to eq :OPERATOR_AND
|
||||
|
||||
expect(@result[12][0]).to eq :L_BRACKET
|
||||
expect(@result[13][0]).to eq :L_BRACKET
|
||||
expect(@result[14][0]).to eq :TERM_WITH_QUOTES
|
||||
expect(@result[14][1]).to eq '"id with q 10"'
|
||||
expect(@result[15][0]).to eq :OPERATOR_OR
|
||||
expect(@result[16][0]).to eq :TERM_WITH_QUOTES
|
||||
expect(@result[16][1]).to eq '"id with q 20"'
|
||||
expect(@result[17][0]).to eq :R_BRACKET
|
||||
|
||||
expect(@result[18][0]).to eq :OPERATOR_AND
|
||||
expect(@result[19][0]).to eq :L_BRACKET
|
||||
expect(@result[20][0]).to eq :TERM_WITH_QUOTES
|
||||
expect(@result[20][1]).to eq '"id with Q 30"'
|
||||
expect(@result[21][0]).to eq :TERM_WITH_QUOTES
|
||||
expect(@result[21][1]).to eq '"id with Q 40"'
|
||||
expect(@result[22][0]).to eq :R_BRACKET
|
||||
|
||||
expect(@result[23][0]).to eq :OPERATOR_AND
|
||||
expect(@result[24][0]).to eq :OPERATOR_NOT
|
||||
expect(@result[25][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[25][1]).to eq 'id-without-Q-50'
|
||||
expect(@result[26][0]).to eq :R_BRACKET
|
||||
end
|
||||
|
||||
it 'tests query with -or-, -and- and -not- words inside quoted expression' do
|
||||
@result = @evaluator.tokenize('tag:"tag with or and not inside"')
|
||||
|
||||
expect(@result.length).to eq 3
|
||||
|
||||
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
|
||||
expect(@result[0][1]).to eq 'tag'
|
||||
expect(@result[1][0]).to eq :COLON
|
||||
expect(@result[2][0]).to eq :TERM_WITH_QUOTES
|
||||
expect(@result[2][1]).to eq '"tag with or and not inside"'
|
||||
end
|
||||
end
|
||||
end
|
||||
35
specification.rex
Normal file
35
specification.rex
Normal file
@@ -0,0 +1,35 @@
|
||||
class Query
|
||||
macro
|
||||
L_BRACKET \(
|
||||
R_BRACKET \)
|
||||
SPACE \ + # Space char
|
||||
OPERATOR_OR (?i)or
|
||||
OPERATOR_AND (?i)and
|
||||
OPERATOR_NOT (?i)not
|
||||
TERM_WITH_QUOTES "([^"]*)"
|
||||
TERM_WITHOUT_QUOTES [a-zA-Z0-9-_|]+
|
||||
COLON \:
|
||||
|
||||
|
||||
|
||||
rule
|
||||
{SPACE} # No action
|
||||
{L_BRACKET} { return [:L_BRACKET, text] }
|
||||
{R_BRACKET} { return [:R_BRACKET, text] }
|
||||
{OPERATOR_OR} { return [:OPERATOR_OR, text] }
|
||||
{OPERATOR_AND} { return [:OPERATOR_AND, text] }
|
||||
{OPERATOR_NOT} { return [:OPERATOR_NOT, text] }
|
||||
{TERM_WITH_QUOTES} { return [:TERM_WITH_QUOTES, text] }
|
||||
{TERM_WITHOUT_QUOTES} { return [:TERM_WITHOUT_QUOTES, text] }
|
||||
{COLON} { return [:COLON, text] }
|
||||
|
||||
inner
|
||||
def tokenize(code)
|
||||
scan_setup(code)
|
||||
tokens = []
|
||||
while token = next_token
|
||||
tokens << token
|
||||
end
|
||||
tokens
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user