Initial commit

This commit is contained in:
Senad Uka
2020-01-28 13:31:56 +01:00
parent 7f7c6e95bc
commit 2749c53aac
56 changed files with 6516 additions and 1 deletions

26
lib/grammar.y Normal file
View File

@@ -0,0 +1,26 @@
class Query
prechigh
left OPERATOR_NOT
left OPERATOR_AND
left OPERATOR_OR
preclow
rule
target: expression
| /* none */ { result = 0 }
expression: TERM_WITHOUT_QUOTES { result = {:DEFAULT_COLUMN => val[0]} }
| TERM_WITH_QUOTES { result = {:DEFAULT_COLUMN => val[0]} }
| TERM_WITHOUT_QUOTES COLON TERM_WITHOUT_QUOTES { result = {val[0] => val[2]} }
| TERM_WITHOUT_QUOTES COLON TERM_WITH_QUOTES { result = {val[0] => val[2]} }
| expression OPERATOR_OR expression { result = {:OPERATOR_OR => [val[0], val[2]]} }
| expression OPERATOR_AND expression { result = {:OPERATOR_AND => [val[0], val[2]]} }
| L_BRACKET expression R_BRACKET { result = val[1] }
end
---- header
require_relative 'lexer'
---- inner
def parse(input)
scan_str(input)
end

4
lib/parser-parser-part/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
.idea
lexer.rb
parser.rb

View File

@@ -0,0 +1,19 @@
# parser
### Prerequisites
* Rexical (rex)
* Racc
### Available commands
* `rake lexer` - generates `lexer.rb` file based on `specification.rex` file
* `rake parser` - generates `parser.rb` file based on `grammar.y` file
* `rake generate` - generates `lexer.rb` and `parser.rb` files
### Testing
To run only `lexer` tests, execute : `rspec spec/query_lexer_spec.rb`
To run only `parser` tests, execute : `rspec spec/query_parser_spec.rb`
To run all tests, execute : `rake spec`

View File

@@ -0,0 +1,20 @@
require 'rspec/core/rake_task'
RSpec::Core::RakeTask.new do |c|
options = ['--color']
options += %w[--format documentation]
c.rspec_opts = options
end
desc 'Generate Lexer'
task :lexer do
`rex specification.rex -o lexer.rb`
end
desc 'Generate Parser'
task :parser do
`racc grammar.y -o parser.rb`
end
desc 'Generate Lexer and Parser'
task generate: %i[lexer parser]

View File

@@ -0,0 +1,26 @@
class Query
prechigh
left OPERATOR_NOT
left OPERATOR_AND
left OPERATOR_OR
preclow
rule
target: expression
| /* none */ { result = 0 }
expression: TERM_WITHOUT_QUOTES { result = {:DEFAULT_COLUMN => val[0]} }
| TERM_WITH_QUOTES { result = {:DEFAULT_COLUMN => val[0]} }
| TERM_WITHOUT_QUOTES COLON TERM_WITHOUT_QUOTES { result = {val[0] => val[2]} }
| TERM_WITHOUT_QUOTES COLON TERM_WITH_QUOTES { result = {val[0] => val[2]} }
| expression OPERATOR_OR expression { result = {:OPERATOR_OR => [val[0], val[2]]} }
| expression OPERATOR_AND expression { result = {:OPERATOR_AND => [val[0], val[2]]} }
| L_BRACKET expression R_BRACKET { result = val[1] }
end
---- header
require_relative 'lexer'
---- inner
def parse(input)
scan_str(input)
end

View File

@@ -0,0 +1,259 @@
require './lexer'
class QueryLexerTester
describe 'Testing the Lexer' do
before do
@evaluator = Query.new
end
it 'tests bracket expression' do
@result = @evaluator.tokenize('()')
expect(@result.length).to eq 2
expect(@result[0][0]).to eq :L_BRACKET
expect(@result[1][0]).to eq :R_BRACKET
end
it 'tests bracket expression with spaces' do
@result = @evaluator.tokenize(' ( ) ')
expect(@result.length).to eq 2
expect(@result[0][0]).to eq :L_BRACKET
expect(@result[1][0]).to eq :R_BRACKET
end
it 'tests expression with OR operator' do
@result = @evaluator.tokenize('() or () OR ()')
expect(@result.length).to eq 8
expect(@result[0][0]).to eq :L_BRACKET
expect(@result[1][0]).to eq :R_BRACKET
expect(@result[2][0]).to eq :OPERATOR_OR
expect(@result[3][0]).to eq :L_BRACKET
expect(@result[4][0]).to eq :R_BRACKET
expect(@result[5][0]).to eq :OPERATOR_OR
expect(@result[6][0]).to eq :L_BRACKET
expect(@result[7][0]).to eq :R_BRACKET
end
it 'tests expression with AND operator' do
@result = @evaluator.tokenize('() AND () and ()')
expect(@result.length).to eq 8
expect(@result[0][0]).to eq :L_BRACKET
expect(@result[1][0]).to eq :R_BRACKET
expect(@result[2][0]).to eq :OPERATOR_AND
expect(@result[3][0]).to eq :L_BRACKET
expect(@result[4][0]).to eq :R_BRACKET
expect(@result[5][0]).to eq :OPERATOR_AND
expect(@result[6][0]).to eq :L_BRACKET
expect(@result[7][0]).to eq :R_BRACKET
end
it 'tests expression with NOT OR and NOT AND operator' do
@result = @evaluator.tokenize('() NOT or () not AND ()')
expect(@result.length).to eq 10
expect(@result[0][0]).to eq :L_BRACKET
expect(@result[1][0]).to eq :R_BRACKET
expect(@result[2][0]).to eq :OPERATOR_NOT
expect(@result[3][0]).to eq :OPERATOR_OR
expect(@result[4][0]).to eq :L_BRACKET
expect(@result[5][0]).to eq :R_BRACKET
expect(@result[6][0]).to eq :OPERATOR_NOT
expect(@result[7][0]).to eq :OPERATOR_AND
expect(@result[8][0]).to eq :L_BRACKET
expect(@result[9][0]).to eq :R_BRACKET
end
it 'tests search term under quotes' do
@result = @evaluator.tokenize('"123-456"')
expect(@result.length).to eq 1
expect(@result[0][0]).to eq :TERM_WITH_QUOTES
expect(@result[0][1]).to eq '"123-456"'
end
it 'tests term without quotes' do
@result = @evaluator.tokenize('device_id')
expect(@result.length).to eq 1
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[0][1]).to eq 'device_id'
end
it 'tests integer term without quotes' do
@result = @evaluator.tokenize('123')
expect(@result.length).to eq 1
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[0][1]).to eq '123'
end
it 'tests multiple terms without quotes' do
@result = @evaluator.tokenize('device_id tag 123-456 name123')
expect(@result.length).to eq 4
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[0][1]).to eq 'device_id'
expect(@result[1][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[1][1]).to eq 'tag'
expect(@result[2][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[2][1]).to eq '123-456'
expect(@result[3][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[3][1]).to eq 'name123'
end
it 'tests simple query with column name and search term without quotes' do
@result = @evaluator.tokenize('name:JF')
expect(@result.length).to eq 3
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[0][1]).to eq 'name'
expect(@result[1][0]).to eq :COLON
expect(@result[2][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[2][1]).to eq 'JF'
end
it 'tests simple query with two columns with name and search terms without quotes' do
@result = @evaluator.tokenize('name:JF tag:mta')
expect(@result.length).to eq 6
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[0][1]).to eq 'name'
expect(@result[1][0]).to eq :COLON
expect(@result[2][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[2][1]).to eq 'JF'
expect(@result[3][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[3][1]).to eq 'tag'
expect(@result[4][0]).to eq :COLON
expect(@result[5][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[5][1]).to eq 'mta'
end
it 'tests simple query with column name and search term with quotes' do
@result = @evaluator.tokenize('name:"name with space"')
expect(@result.length).to eq 3
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[0][1]).to eq 'name'
expect(@result[1][0]).to eq :COLON
expect(@result[2][0]).to eq :TERM_WITH_QUOTES
expect(@result[2][1]).to eq '"name with space"'
end
it 'tests search term with quotes containing non alphanumerical characters' do
@result = @evaluator.tokenize('"|*|/\()#-!=<>&$"')
expect(@result.length).to eq 1
expect(@result[0][0]).to eq :TERM_WITH_QUOTES
expect(@result[0][1]).to eq '"|*|/\()#-!=<>&$"'
end
it 'tests simple query in brackets' do
@result = @evaluator.tokenize('(name:"name with space")')
expect(@result.length).to eq 5
expect(@result[0][0]).to eq :L_BRACKET
expect(@result[1][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[1][1]).to eq 'name'
expect(@result[2][0]).to eq :COLON
expect(@result[3][0]).to eq :TERM_WITH_QUOTES
expect(@result[3][1]).to eq '"name with space"'
expect(@result[4][0]).to eq :R_BRACKET
end
it 'tests multiple query wtih brackets' do
@result = @evaluator.tokenize('(name:"name with space") or (tag:mta)')
expect(@result.length).to eq 11
expect(@result[0][0]).to eq :L_BRACKET
expect(@result[1][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[1][1]).to eq 'name'
expect(@result[2][0]).to eq :COLON
expect(@result[3][0]).to eq :TERM_WITH_QUOTES
expect(@result[3][1]).to eq '"name with space"'
expect(@result[4][0]).to eq :R_BRACKET
expect(@result[5][0]).to eq :OPERATOR_OR
expect(@result[6][0]).to eq :L_BRACKET
expect(@result[7][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[7][1]).to eq 'tag'
expect(@result[8][0]).to eq :COLON
expect(@result[9][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[9][1]).to eq 'mta'
expect(@result[10][0]).to eq :R_BRACKET
end
it 'tests complex query' do
@result = @evaluator.tokenize('(device-id:"with space" tag:mta no-quotes-id-123)'\
'or "id with quotes-5" and ( ("id with q 10" or "id with q 20")'\
'and ("id with Q 30" "id with Q 40") and not id-without-Q-50)')
expect(@result.length).to eq 27
expect(@result[0][0]).to eq :L_BRACKET
expect(@result[1][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[1][1]).to eq 'device-id'
expect(@result[2][0]).to eq :COLON
expect(@result[3][0]).to eq :TERM_WITH_QUOTES
expect(@result[3][1]).to eq '"with space"'
expect(@result[4][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[4][1]).to eq 'tag'
expect(@result[5][0]).to eq :COLON
expect(@result[6][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[6][1]).to eq 'mta'
expect(@result[7][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[7][1]).to eq 'no-quotes-id-123'
expect(@result[8][0]).to eq :R_BRACKET
expect(@result[9][0]).to eq :OPERATOR_OR
expect(@result[10][0]).to eq :TERM_WITH_QUOTES
expect(@result[10][1]).to eq '"id with quotes-5"'
expect(@result[11][0]).to eq :OPERATOR_AND
expect(@result[12][0]).to eq :L_BRACKET
expect(@result[13][0]).to eq :L_BRACKET
expect(@result[14][0]).to eq :TERM_WITH_QUOTES
expect(@result[14][1]).to eq '"id with q 10"'
expect(@result[15][0]).to eq :OPERATOR_OR
expect(@result[16][0]).to eq :TERM_WITH_QUOTES
expect(@result[16][1]).to eq '"id with q 20"'
expect(@result[17][0]).to eq :R_BRACKET
expect(@result[18][0]).to eq :OPERATOR_AND
expect(@result[19][0]).to eq :L_BRACKET
expect(@result[20][0]).to eq :TERM_WITH_QUOTES
expect(@result[20][1]).to eq '"id with Q 30"'
expect(@result[21][0]).to eq :TERM_WITH_QUOTES
expect(@result[21][1]).to eq '"id with Q 40"'
expect(@result[22][0]).to eq :R_BRACKET
expect(@result[23][0]).to eq :OPERATOR_AND
expect(@result[24][0]).to eq :OPERATOR_NOT
expect(@result[25][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[25][1]).to eq 'id-without-Q-50'
expect(@result[26][0]).to eq :R_BRACKET
end
it 'tests query with -or-, -and- and -not- words inside quoted expression' do
@result = @evaluator.tokenize('tag:"tag with or and not inside"')
expect(@result.length).to eq 3
expect(@result[0][0]).to eq :TERM_WITHOUT_QUOTES
expect(@result[0][1]).to eq 'tag'
expect(@result[1][0]).to eq :COLON
expect(@result[2][0]).to eq :TERM_WITH_QUOTES
expect(@result[2][1]).to eq '"tag with or and not inside"'
end
end
end

View File

@@ -0,0 +1,164 @@
require './parser'
class QueryParserTester
describe 'Testing the Parser' do
before do
@evaluator = Query.new
end
it 'tests query with only one search term without quotes and without column name' do
@result = @evaluator.parse('-123')
expect(@result[:DEFAULT_COLUMN]).to eq '-123'
end
it 'tests query with only one search term with quotes and without column name' do
@result = @evaluator.parse('"OR 128"')
expect(@result[:DEFAULT_COLUMN]).to eq '"OR 128"'
end
it 'tests query with one column and search term without quotes' do
@result = @evaluator.parse('tag:mta')
expect(@result['tag']).to eq 'mta'
end
it 'tests query with one column and search term with quotes' do
@result = @evaluator.parse('tag:"tag 120"')
expect(@result['tag']).to eq '"tag 120"'
end
it 'tests query with two columns connected with OR and search terms without quotes' do
@result = @evaluator.parse('tag:mta OR tag:12')
@expected_array = [
{ 'tag' => 'mta' },
{ 'tag' => '12' }
]
expect(@result.count).to eq 1
expect(@result[:OPERATOR_OR]).to eq @expected_array
end
it 'tests query with two columns connected with OR and search terms with quotes' do
@result = @evaluator.parse('tag:mta OR tag:"tag 12"')
@expected_array = [
{ 'tag' => 'mta' },
{ 'tag' => '"tag 12"' }
]
expect(@result.count).to eq 1
expect(@result[:OPERATOR_OR]).to eq @expected_array
end
it 'tests query with two columns connected with AND and search terms without quotes' do
@result = @evaluator.parse('tag:mta AND tag:12')
@expected_array = [
{ 'tag' => 'mta' },
{ 'tag' => '12' }
]
expect(@result.count).to eq 1
expect(@result[:OPERATOR_AND]).to eq @expected_array
end
it 'tests query with two columns connected with AND and search terms with quotes' do
@result = @evaluator.parse('tag:mta and tag:"tag 12"')
@expected_array = [
{ 'tag' => 'mta' },
{ 'tag' => '"tag 12"' }
]
expect(@result.count).to eq 1
expect(@result[:OPERATOR_AND]).to eq @expected_array
end
it 'tests simple query with brackets' do
@result = @evaluator.parse('(123)')
expect(@result.count).to eq 1
expect(@result[:DEFAULT_COLUMN]).to eq '123'
end
it 'tests simple query with brackets and with a column name' do
@result = @evaluator.parse('(name:JF)')
expect(@result.count).to eq 1
expect(@result['name']).to eq 'JF'
end
it 'tests query with OR operator in brackets' do
@result = @evaluator.parse('(name:JF or tag:mta)')
@expected_array = [
{ 'name' => 'JF' },
{ 'tag' => 'mta' }
]
expect(@result.count).to eq 1
expect(@result[:OPERATOR_OR]).to eq @expected_array
end
it 'tests query with two simple brackets expressions' do
@result = @evaluator.parse('(name:JF) and (-456)')
@expected_array = [
{ 'name' => 'JF' },
{ :DEFAULT_COLUMN => '-456' }
]
expect(@result.count).to eq 1
expect(@result[:OPERATOR_AND]).to eq @expected_array
end
it 'tests query with two brackets expressions' do
@result = @evaluator.parse('(name:JF or tag:"tag 0") and (-456)')
@expected_array_part_1 = [
{ 'name' => 'JF' },
{ 'tag' => '"tag 0"' }
]
@expected_array_total = [
{:OPERATOR_OR => @expected_array_part_1},
{:DEFAULT_COLUMN => '-456'}
]
expect(@result.count).to eq 1
expect(@result[:OPERATOR_AND]).to eq @expected_array_total
end
it 'tests operator precedence' do
@result1 = @evaluator.parse('tag:mta or name:JF and 12_4')
@result2 = @evaluator.parse('tag:mta or (name:JF and 12_4)')
expect(@result1).to eq @result2
expect(@result1.length).to eq 1
@expected_array_part_2 = [
{'name' => 'JF'},
{:DEFAULT_COLUMN => '12_4'}
]
@expected_array_total = [
{'tag' => 'mta'},
{:OPERATOR_AND => @expected_array_part_2}
]
expect(@result1[:OPERATOR_OR]).to eq @expected_array_total
end
# Tests to write :
# * query with multiple column names and search terms without logical operators
# * AND NOT, OR NOT tests
end
end

View File

@@ -0,0 +1,35 @@
class Query
macro
L_BRACKET \(
R_BRACKET \)
SPACE \ + # Space char
OPERATOR_OR (?i)or
OPERATOR_AND (?i)and
OPERATOR_NOT (?i)not
TERM_WITH_QUOTES "([^"]*)"
TERM_WITHOUT_QUOTES [a-zA-Z0-9-_]+
COLON \:
rule
{SPACE} # No action
{L_BRACKET} { return [:L_BRACKET, text] }
{R_BRACKET} { return [:R_BRACKET, text] }
{OPERATOR_OR} { return [:OPERATOR_OR, text] }
{OPERATOR_AND} { return [:OPERATOR_AND, text] }
{OPERATOR_NOT} { return [:OPERATOR_NOT, text] }
{TERM_WITH_QUOTES} { return [:TERM_WITH_QUOTES, text] }
{TERM_WITHOUT_QUOTES} { return [:TERM_WITHOUT_QUOTES, text] }
{COLON} { return [:COLON, text] }
inner
def tokenize(code)
scan_setup(code)
tokens = []
while token = next_token
tokens << token
end
tokens
end
end

View File

@@ -0,0 +1,76 @@
# frozen_string_literal: true
require 'active_support'
require 'squeel'
require_relative './text_to_tsquery'
require_relative './text_to_regex_query'
module PgSearchable
extend ActiveSupport::Concern
included do
def update_pg_search_cache
# kept just for compatibility with pg_searchable
# noop in this implementation
end
end
class_methods do
def pg_search(
fields: [],
fields_mappings: {},
cache: nil,
language: 'english',
scope: 'scope_search',
skip_callback: false,
wildcard: true,
external_cache_data: nil,
joins: [],
default_field: ""
)
@ts_search_fields = fields
@ts_search_fields_mappings = fields_mappings
@ts_cache_field = cache
@ts_language = language
@ts_scope_method = scope
@ts_skip_cache_update = skip_callback
@ts_wildcard = wildcard
@ts_joins = joins
@default_field = (default_field.to_sym || fields.first)
ts_add_scope
end
def ts_add_scope
class_eval do
scope ts_scope_method, ->(value) { ts_search(value) }
end
end
def ts_search(value)
return if @ts_search_fields.blank? || value.blank?
TextToRegexQuery.new(value, @ts_search_fields, @default_field, @ts_search_fields_mappings).where_clause( includes(@ts_joins).references(:all))
end
def should_update_cache_field?
!@ts_skip_cache_update && @ts_cache_field.present?
end
def ts_cache_field
@ts_cache_field
end
def ts_scope_method
@ts_scope_method
end
def ts_cache_method
@ts_cache_method
end
def ts_fields_to_vector(extra_data = [])
field_to_vector = ->(field) { "to_tsvector('#{@ts_language}', coalesce(#{field}::text, ''))" }
data_to_vector = ->(data) { "to_tsvector('#{@ts_language}', '#{data}')" }
(@ts_search_fields.map(&field_to_vector) + extra_data.map(&data_to_vector)).join(' || ')
end
end
end

35
lib/specification.rex Normal file
View File

@@ -0,0 +1,35 @@
class Query
macro
L_BRACKET \(
R_BRACKET \)
SPACE \ + # Space char
OPERATOR_OR (?i)or
OPERATOR_AND (?i)and
OPERATOR_NOT (?i)not
TERM_WITH_QUOTES "([^"]*)"
TERM_WITHOUT_QUOTES [a-zA-Z0-9-_]+
COLON \:
rule
{SPACE} # No action
{L_BRACKET} { return [:L_BRACKET, text] }
{R_BRACKET} { return [:R_BRACKET, text] }
{OPERATOR_OR} { return [:OPERATOR_OR, text] }
{OPERATOR_AND} { return [:OPERATOR_AND, text] }
{OPERATOR_NOT} { return [:OPERATOR_NOT, text] }
{TERM_WITH_QUOTES} { return [:TERM_WITH_QUOTES, text] }
{TERM_WITHOUT_QUOTES} { return [:TERM_WITHOUT_QUOTES, text] }
{COLON} { return [:COLON, text] }
inner
def tokenize(code)
scan_setup(code)
tokens = []
while token = next_token
tokens << token
end
tokens
end
end

View File

@@ -0,0 +1,67 @@
# frozen_string_literal: true
# transforms "english like" text queries into a where clause with regex
# https://www.postgresql.org/docs/9.5/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
class TextToRegexQuery
def initialize(text, fields, default_field, fields_mappings = {})
@text = text.to_s.strip
@fields = fields.map(&:to_sym)
@default_field = default_field.to_sym
@fields_mappings = fields_mappings.merge(@fields.reduce({}) do |mappings, field|
table_name, field_name = field.to_s.split(".")
mappings[field_name.to_sym] = field
mappings
end)
end
def where_clause(query)
@cleared_text = @text.dup
@column_chunks = []
remove_duplicated_spaces
extract_columns
escape_special_characters
generate_where_clause(query)
end
private
def remove_duplicated_spaces
@cleared_text.gsub!(/\s+/, ' ')
end
def escape_special_characters
@cleared_text.gsub!(/\_/, '\_')
@cleared_text.tr!('\\', '\\')
@cleared_text.gsub!(/%/, '\%')
end
def extract_columns
column_search_term_pairs = @cleared_text.scan(/([A-Za-z0-9_]+:[\w\_-]+)/)
@column_chunks = (column_search_term_pairs.flatten.map do |pair|
column, term = pair.split(':')
next unless @fields_mappings.include?(column.to_sym)
@cleared_text.gsub!(pair, '')
{ @fields_mappings[column.to_sym] => term }
end).compact
unless @cleared_text.strip.empty?
@column_chunks = [{ @default_field.to_s => @cleared_text.strip }] + @column_chunks
end
@column_chunks
end
def generate_where_clause(query)
where_clause = ''
columns = @column_chunks.map { |c| c.keys.first }
values = @column_chunks.map { |c| c.values.first }
columns.each do |column|
quoted_column = '"' + column.to_s.gsub(".",'"."') + '"'
where_clause += "#{quoted_column} ILIKE ? OR "
end
where_clause += " 1<>1 "
regexed_values = values.map { |v| "%#{v}%" }
query.where([where_clause] + regexed_values)
end
end

99
lib/text_to_tsquery.rb Normal file
View File

@@ -0,0 +1,99 @@
# frozen_string_literal: true
# transforms "english like" text queries into a tsquery operation
# https://www.postgresql.org/docs/9.5/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
class TextToTsquery
attr_reader :text
def initialize(text, wildcard: true)
@text = text.to_s.strip
@wildcard = wildcard
@exact_matches = []
validate!
end
EXACT_WORD_CHAR = '§'.freeze
def tsquery
@tsquery = @text
strip_exact_words
remove_duplicated_spaces
transform_or_into_operator
transform_and_into_operator
strip_spaces_from_parenthesis
transform_remaining_spaces_into_and_operator
transform_keywords
join_operators_with_and
remove_partial_match_from_not_keywords
add_exact_words
@tsquery
end
def validate!
parenthesis_error unless self.class.valid_search_parenthesis?(@text)
end
def self.valid_search_parenthesis?(text)
text.split('').reduce(0) do |acc, char|
return false if acc < 0
if char == '('
acc + 1
elsif char == ')'
acc - 1
else
acc
end
end.zero?
end
def parenthesis_error
raise ArgumentError, "incorrect number/order of parenthesis in search query: '#{@text}'"
end
def strip_exact_words
@exact_matches << Regexp.last_match(1) while @tsquery.sub!(/"(.*?)"/, EXACT_WORD_CHAR)
end
def remove_duplicated_spaces
@tsquery = @tsquery.gsub(/\s+/, ' ')
end
# transforms or/OR/|/|| into | operator
def transform_or_into_operator
@tsquery = @tsquery.gsub(/ ((or|\|+) )+/i, '|').gsub(/ *\|+ */, '|')
end
# transforms and/AND/&/&& into & operator
def transform_and_into_operator
@tsquery = @tsquery.gsub(/ ((and|\&+) )+/i, '&')
end
def strip_spaces_from_parenthesis
@tsquery = @tsquery.gsub(/ *([()]) */, '\1')
end
def transform_remaining_spaces_into_and_operator
@tsquery = @tsquery.tr(' ', '&')
end
# adds :* for partial match of words
def transform_keywords
keyword = @wildcard ? '\1:*' : '\1:'
@tsquery = @tsquery.gsub(/([^#{EXACT_WORD_CHAR}|&!())]+)/, keyword)
end
# adds & between operations
def join_operators_with_and
@tsquery = @tsquery.gsub(/:(\**)\!/, ':\1&!').gsub(/:(\**)\(/, ':\1&(').gsub(/\&+/, '&')
end
# removes partial match from NOT operations
def remove_partial_match_from_not_keywords
@tsquery = @tsquery.gsub(/\!([^|&!())]+):\**/, '!\1')
end
def add_exact_words
@exact_matches.each { |phrase| @tsquery = @tsquery.sub(EXACT_WORD_CHAR, "'#{phrase}'") }
end
end