100 lines
2.5 KiB
Ruby
100 lines
2.5 KiB
Ruby
|
|
# frozen_string_literal: true
|
||
|
|
|
||
|
|
# transforms "english like" text queries into a tsquery operation
|
||
|
|
# https://www.postgresql.org/docs/9.5/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
|
||
|
|
class TextToTsquery
|
||
|
|
attr_reader :text
|
||
|
|
|
||
|
|
def initialize(text, wildcard: true)
|
||
|
|
@text = text.to_s.strip
|
||
|
|
@wildcard = wildcard
|
||
|
|
@exact_matches = []
|
||
|
|
validate!
|
||
|
|
end
|
||
|
|
|
||
|
|
EXACT_WORD_CHAR = '§'.freeze
|
||
|
|
|
||
|
|
def tsquery
|
||
|
|
@tsquery = @text
|
||
|
|
strip_exact_words
|
||
|
|
remove_duplicated_spaces
|
||
|
|
transform_or_into_operator
|
||
|
|
transform_and_into_operator
|
||
|
|
strip_spaces_from_parenthesis
|
||
|
|
transform_remaining_spaces_into_and_operator
|
||
|
|
transform_keywords
|
||
|
|
join_operators_with_and
|
||
|
|
remove_partial_match_from_not_keywords
|
||
|
|
add_exact_words
|
||
|
|
@tsquery
|
||
|
|
end
|
||
|
|
|
||
|
|
def validate!
|
||
|
|
parenthesis_error unless self.class.valid_search_parenthesis?(@text)
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.valid_search_parenthesis?(text)
|
||
|
|
text.split('').reduce(0) do |acc, char|
|
||
|
|
return false if acc < 0
|
||
|
|
|
||
|
|
if char == '('
|
||
|
|
acc + 1
|
||
|
|
elsif char == ')'
|
||
|
|
acc - 1
|
||
|
|
else
|
||
|
|
acc
|
||
|
|
end
|
||
|
|
end.zero?
|
||
|
|
end
|
||
|
|
|
||
|
|
def parenthesis_error
|
||
|
|
raise ArgumentError, "incorrect number/order of parenthesis in search query: '#{@text}'"
|
||
|
|
end
|
||
|
|
|
||
|
|
def strip_exact_words
|
||
|
|
@exact_matches << Regexp.last_match(1) while @tsquery.sub!(/"(.*?)"/, EXACT_WORD_CHAR)
|
||
|
|
end
|
||
|
|
|
||
|
|
def remove_duplicated_spaces
|
||
|
|
@tsquery = @tsquery.gsub(/\s+/, ' ')
|
||
|
|
end
|
||
|
|
|
||
|
|
# transforms or/OR/|/|| into | operator
|
||
|
|
def transform_or_into_operator
|
||
|
|
@tsquery = @tsquery.gsub(/ ((or|\|+) )+/i, '|').gsub(/ *\|+ */, '|')
|
||
|
|
end
|
||
|
|
|
||
|
|
# transforms and/AND/&/&& into & operator
|
||
|
|
def transform_and_into_operator
|
||
|
|
@tsquery = @tsquery.gsub(/ ((and|\&+) )+/i, '&')
|
||
|
|
end
|
||
|
|
|
||
|
|
def strip_spaces_from_parenthesis
|
||
|
|
@tsquery = @tsquery.gsub(/ *([()]) */, '\1')
|
||
|
|
end
|
||
|
|
|
||
|
|
def transform_remaining_spaces_into_and_operator
|
||
|
|
@tsquery = @tsquery.tr(' ', '&')
|
||
|
|
end
|
||
|
|
|
||
|
|
# adds :* for partial match of words
|
||
|
|
def transform_keywords
|
||
|
|
keyword = @wildcard ? '\1:*' : '\1:'
|
||
|
|
@tsquery = @tsquery.gsub(/([^#{EXACT_WORD_CHAR}|&!())]+)/, keyword)
|
||
|
|
end
|
||
|
|
|
||
|
|
# adds & between operations
|
||
|
|
def join_operators_with_and
|
||
|
|
@tsquery = @tsquery.gsub(/:(\**)\!/, ':\1&!').gsub(/:(\**)\(/, ':\1&(').gsub(/\&+/, '&')
|
||
|
|
end
|
||
|
|
|
||
|
|
# removes partial match from NOT operations
|
||
|
|
def remove_partial_match_from_not_keywords
|
||
|
|
@tsquery = @tsquery.gsub(/\!([^|&!())]+):\**/, '!\1')
|
||
|
|
end
|
||
|
|
|
||
|
|
def add_exact_words
|
||
|
|
@exact_matches.each { |phrase| @tsquery = @tsquery.sub(EXACT_WORD_CHAR, "'#{phrase}'") }
|
||
|
|
end
|
||
|
|
end
|