diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..837c184 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea + +lexer.rb \ No newline at end of file diff --git a/README.md b/README.md index 1da7fd1..3123cb2 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,15 @@ # parser -parser ba \ No newline at end of file +###Prerequisites + +* Rexical (rex) + +###Available commands + +* `rake lexer` - generates `lexer.rb` file based on `specification.rex` file + +###Testing + +To run only `lexer` tests, execute : `rspec spec/query_lexer_spec.rb` + +To run all tests, execute : `rake spec` \ No newline at end of file diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..b63d98d --- /dev/null +++ b/Rakefile @@ -0,0 +1,12 @@ +require 'rspec/core/rake_task' + +RSpec::Core::RakeTask.new do |c| + options = ['--color'] + options += ["--format", "documentation"] + c.rspec_opts = options +end + +desc "Generate Lexer" +task :lexer do + `rex specification.rex -o lexer.rb` +end \ No newline at end of file diff --git a/spec/query_lexer_spec.rb b/spec/query_lexer_spec.rb new file mode 100644 index 0000000..0da9c3b --- /dev/null +++ b/spec/query_lexer_spec.rb @@ -0,0 +1,250 @@ +require './lexer' + +class QueryLexerTester + describe 'Testing the Lexer' do + before do + @evaluator = Query.new + end + + it 'tests bracket expression' do + @result = @evaluator.tokenize('()') + expect(@result.length).to eq 2 + + expect(@result[0][0]).to eq :L_BRACKET + expect(@result[1][0]).to eq :R_BRACKET + end + + it 'tests bracket expression with spaces' do + @result = @evaluator.tokenize(' ( ) ') + expect(@result.length).to eq 2 + + expect(@result[0][0]).to eq :L_BRACKET + expect(@result[1][0]).to eq :R_BRACKET + end + + it 'tests expression with OR operator' do + @result = @evaluator.tokenize('() or () OR ()') + expect(@result.length).to eq 8 + + expect(@result[0][0]).to eq :L_BRACKET + expect(@result[1][0]).to eq :R_BRACKET + expect(@result[2][0]).to eq :OPERATOR_OR + expect(@result[3][0]).to eq :L_BRACKET + expect(@result[4][0]).to eq :R_BRACKET + expect(@result[5][0]).to eq :OPERATOR_OR + expect(@result[6][0]).to eq :L_BRACKET + expect(@result[7][0]).to eq :R_BRACKET + end + + it 'tests expression with AND operator' do + @result = @evaluator.tokenize('() AND () and ()') + expect(@result.length).to eq 8 + + expect(@result[0][0]).to eq :L_BRACKET + expect(@result[1][0]).to eq :R_BRACKET + expect(@result[2][0]).to eq :OPERATOR_AND + expect(@result[3][0]).to eq :L_BRACKET + expect(@result[4][0]).to eq :R_BRACKET + expect(@result[5][0]).to eq :OPERATOR_AND + expect(@result[6][0]).to eq :L_BRACKET + expect(@result[7][0]).to eq :R_BRACKET + end + + it 'tests expression with NOT OR and NOT AND operator' do + @result = @evaluator.tokenize('() NOT or () not AND ()') + expect(@result.length).to eq 10 + + expect(@result[0][0]).to eq :L_BRACKET + expect(@result[1][0]).to eq :R_BRACKET + expect(@result[2][0]).to eq :OPERATOR_NOT + expect(@result[3][0]).to eq :OPERATOR_OR + expect(@result[4][0]).to eq :L_BRACKET + expect(@result[5][0]).to eq :R_BRACKET + expect(@result[6][0]).to eq :OPERATOR_NOT + expect(@result[7][0]).to eq :OPERATOR_AND + expect(@result[8][0]).to eq :L_BRACKET + expect(@result[9][0]).to eq :R_BRACKET + end + + it 'tests search term under quotes' do + @result = @evaluator.tokenize('"123-456"') + expect(@result.length).to eq 1 + + expect(@result[0][0]).to eq :TERM_WITH_QUOTES + expect(@result[0][1]).to eq '"123-456"' + end + + it 'tests term without quotes' do + @result = @evaluator.tokenize('device_id') + expect(@result.length).to eq 1 + + expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[0][1]).to eq 'device_id' + end + + it 'tests integer term without quotes' do + @result = @evaluator.tokenize('123') + expect(@result.length).to eq 1 + + expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[0][1]).to eq '123' + end + + it 'tests multiple terms without quotes' do + @result = @evaluator.tokenize('device_id tag 123-456 name123') + + expect(@result.length).to eq 4 + + expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[0][1]).to eq 'device_id' + expect(@result[1][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[1][1]).to eq 'tag' + expect(@result[2][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[2][1]).to eq '123-456' + expect(@result[3][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[3][1]).to eq 'name123' + end + + it 'tests simple query with column name and search term without quotes' do + @result = @evaluator.tokenize('name:JF') + + expect(@result.length).to eq 3 + + expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[0][1]).to eq 'name' + expect(@result[1][0]).to eq :COLON + expect(@result[2][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[2][1]).to eq 'JF' + end + + it 'tests simple query with column name and search term with quotes' do + @result = @evaluator.tokenize('name:"name with space"') + + expect(@result.length).to eq 3 + + expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[0][1]).to eq 'name' + expect(@result[1][0]).to eq :COLON + expect(@result[2][0]).to eq :TERM_WITH_QUOTES + expect(@result[2][1]).to eq '"name with space"' + end + + it 'tests search term without quotes containing pipe characters' do + @result = @evaluator.tokenize('||') + + expect(@result.length).to eq 1 + + expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[0][1]).to eq '||' + end + + it 'tests search term with quotes containing non alphanumerical characters' do + @result = @evaluator.tokenize('"|*|/\()#-!=<>&$"') + + expect(@result.length).to eq 1 + + expect(@result[0][0]).to eq :TERM_WITH_QUOTES + expect(@result[0][1]).to eq '"|*|/\()#-!=<>&$"' + end + + it 'tests simple query in brackets' do + @result = @evaluator.tokenize('(name:"name with space")') + + expect(@result.length).to eq 5 + + expect(@result[0][0]).to eq :L_BRACKET + expect(@result[1][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[1][1]).to eq 'name' + expect(@result[2][0]).to eq :COLON + expect(@result[3][0]).to eq :TERM_WITH_QUOTES + expect(@result[3][1]).to eq '"name with space"' + expect(@result[4][0]).to eq :R_BRACKET + end + + it 'tests multiple query wtih brackets' do + @result = @evaluator.tokenize('(name:"name with space") or (tag:mta)') + + expect(@result.length).to eq 11 + + expect(@result[0][0]).to eq :L_BRACKET + expect(@result[1][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[1][1]).to eq 'name' + expect(@result[2][0]).to eq :COLON + expect(@result[3][0]).to eq :TERM_WITH_QUOTES + expect(@result[3][1]).to eq '"name with space"' + expect(@result[4][0]).to eq :R_BRACKET + expect(@result[5][0]).to eq :OPERATOR_OR + expect(@result[6][0]).to eq :L_BRACKET + expect(@result[7][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[7][1]).to eq 'tag' + expect(@result[8][0]).to eq :COLON + expect(@result[9][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[9][1]).to eq 'mta' + expect(@result[10][0]).to eq :R_BRACKET + + end + + it 'tests complex query' do + @result = @evaluator.tokenize('(device-id:"with space" tag:mta no-quotes-id-123)'\ + 'or "id with quotes-5" and ( ("id with q 10" or "id with q 20")'\ + 'and ("id with Q 30" "id with Q 40") and not id-without-Q-50)') + + expect(@result.length).to eq 27 + + expect(@result[0][0]).to eq :L_BRACKET + expect(@result[1][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[1][1]).to eq 'device-id' + expect(@result[2][0]).to eq :COLON + expect(@result[3][0]).to eq :TERM_WITH_QUOTES + expect(@result[3][1]).to eq '"with space"' + expect(@result[4][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[4][1]).to eq 'tag' + expect(@result[5][0]).to eq :COLON + expect(@result[6][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[6][1]).to eq 'mta' + expect(@result[7][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[7][1]).to eq 'no-quotes-id-123' + expect(@result[8][0]).to eq :R_BRACKET + + expect(@result[9][0]).to eq :OPERATOR_OR + expect(@result[10][0]).to eq :TERM_WITH_QUOTES + expect(@result[10][1]).to eq '"id with quotes-5"' + expect(@result[11][0]).to eq :OPERATOR_AND + + expect(@result[12][0]).to eq :L_BRACKET + expect(@result[13][0]).to eq :L_BRACKET + expect(@result[14][0]).to eq :TERM_WITH_QUOTES + expect(@result[14][1]).to eq '"id with q 10"' + expect(@result[15][0]).to eq :OPERATOR_OR + expect(@result[16][0]).to eq :TERM_WITH_QUOTES + expect(@result[16][1]).to eq '"id with q 20"' + expect(@result[17][0]).to eq :R_BRACKET + + expect(@result[18][0]).to eq :OPERATOR_AND + expect(@result[19][0]).to eq :L_BRACKET + expect(@result[20][0]).to eq :TERM_WITH_QUOTES + expect(@result[20][1]).to eq '"id with Q 30"' + expect(@result[21][0]).to eq :TERM_WITH_QUOTES + expect(@result[21][1]).to eq '"id with Q 40"' + expect(@result[22][0]).to eq :R_BRACKET + + expect(@result[23][0]).to eq :OPERATOR_AND + expect(@result[24][0]).to eq :OPERATOR_NOT + expect(@result[25][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[25][1]).to eq 'id-without-Q-50' + expect(@result[26][0]).to eq :R_BRACKET + end + + it 'tests query with -or-, -and- and -not- words inside quoted expression' do + @result = @evaluator.tokenize('tag:"tag with or and not inside"') + + expect(@result.length).to eq 3 + + expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES + expect(@result[0][1]).to eq 'tag' + expect(@result[1][0]).to eq :COLON + expect(@result[2][0]).to eq :TERM_WITH_QUOTES + expect(@result[2][1]).to eq '"tag with or and not inside"' + end + end +end diff --git a/specification.rex b/specification.rex new file mode 100644 index 0000000..f0b3375 --- /dev/null +++ b/specification.rex @@ -0,0 +1,35 @@ +class Query +macro + L_BRACKET \( + R_BRACKET \) + SPACE \ + # Space char + OPERATOR_OR (?i)or + OPERATOR_AND (?i)and + OPERATOR_NOT (?i)not + TERM_WITH_QUOTES "([^"]*)" + TERM_WITHOUT_QUOTES [a-zA-Z0-9-_|]+ + COLON \: + + + +rule + {SPACE} # No action + {L_BRACKET} { return [:L_BRACKET, text] } + {R_BRACKET} { return [:R_BRACKET, text] } + {OPERATOR_OR} { return [:OPERATOR_OR, text] } + {OPERATOR_AND} { return [:OPERATOR_AND, text] } + {OPERATOR_NOT} { return [:OPERATOR_NOT, text] } + {TERM_WITH_QUOTES} { return [:TERM_WITH_QUOTES, text] } + {TERM_WITHOUT_QUOTES} { return [:TERM_WITHOUT_QUOTES, text] } + {COLON} { return [:COLON, text] } + +inner + def tokenize(code) + scan_setup(code) + tokens = [] + while token = next_token + tokens << token + end + tokens + end +end \ No newline at end of file