Files
old-gem/lib/text_to_tsquery.rb
2020-01-28 13:31:56 +01:00

100 lines
2.5 KiB
Ruby

# frozen_string_literal: true
# transforms "english like" text queries into a tsquery operation
# https://www.postgresql.org/docs/9.5/textsearch-controls.html#TEXTSEARCH-PARSING-QUERIES
class TextToTsquery
attr_reader :text
def initialize(text, wildcard: true)
@text = text.to_s.strip
@wildcard = wildcard
@exact_matches = []
validate!
end
EXACT_WORD_CHAR = '§'.freeze
def tsquery
@tsquery = @text
strip_exact_words
remove_duplicated_spaces
transform_or_into_operator
transform_and_into_operator
strip_spaces_from_parenthesis
transform_remaining_spaces_into_and_operator
transform_keywords
join_operators_with_and
remove_partial_match_from_not_keywords
add_exact_words
@tsquery
end
def validate!
parenthesis_error unless self.class.valid_search_parenthesis?(@text)
end
def self.valid_search_parenthesis?(text)
text.split('').reduce(0) do |acc, char|
return false if acc < 0
if char == '('
acc + 1
elsif char == ')'
acc - 1
else
acc
end
end.zero?
end
def parenthesis_error
raise ArgumentError, "incorrect number/order of parenthesis in search query: '#{@text}'"
end
def strip_exact_words
@exact_matches << Regexp.last_match(1) while @tsquery.sub!(/"(.*?)"/, EXACT_WORD_CHAR)
end
def remove_duplicated_spaces
@tsquery = @tsquery.gsub(/\s+/, ' ')
end
# transforms or/OR/|/|| into | operator
def transform_or_into_operator
@tsquery = @tsquery.gsub(/ ((or|\|+) )+/i, '|').gsub(/ *\|+ */, '|')
end
# transforms and/AND/&/&& into & operator
def transform_and_into_operator
@tsquery = @tsquery.gsub(/ ((and|\&+) )+/i, '&')
end
def strip_spaces_from_parenthesis
@tsquery = @tsquery.gsub(/ *([()]) */, '\1')
end
def transform_remaining_spaces_into_and_operator
@tsquery = @tsquery.tr(' ', '&')
end
# adds :* for partial match of words
def transform_keywords
keyword = @wildcard ? '\1:*' : '\1:'
@tsquery = @tsquery.gsub(/([^#{EXACT_WORD_CHAR}|&!())]+)/, keyword)
end
# adds & between operations
def join_operators_with_and
@tsquery = @tsquery.gsub(/:(\**)\!/, ':\1&!').gsub(/:(\**)\(/, ':\1&(').gsub(/\&+/, '&')
end
# removes partial match from NOT operations
def remove_partial_match_from_not_keywords
@tsquery = @tsquery.gsub(/\!([^|&!())]+):\**/, '!\1')
end
def add_exact_words
@exact_matches.each { |phrase| @tsquery = @tsquery.sub(EXACT_WORD_CHAR, "'#{phrase}'") }
end
end