From 2735ce6ef8589d8e980629bbe86fe18ab364df91 Mon Sep 17 00:00:00 2001 From: GotPPay Date: Fri, 23 Feb 2018 22:11:09 +0100 Subject: [PATCH] parsing insert statement --- Untitled-2 | 74 +++++++++++++ example-input.txt | 11 +- my-notes | 3 +- wrapper.rb | 271 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 353 insertions(+), 6 deletions(-) create mode 100644 Untitled-2 diff --git a/Untitled-2 b/Untitled-2 new file mode 100644 index 0000000..9dac855 --- /dev/null +++ b/Untitled-2 @@ -0,0 +1,74 @@ +INSERT INTO suppliers +(supplier_id, supplier_name) +VALUES +(5000, 'Apple'); + +[@0,0:5='INSERT',<663>,1:0] +[@1,7:10='INTO',<685>,1:7] +[@2,12:20='suppliers',<2242>,1:12] +[@3,22:22='(',<2213>,2:0] +[@4,23:33='supplier_id',<2242>,2:1] +[@5,34:34=',',<2219>,2:12] +[@6,36:48='supplier_name',<2242>,2:14] +[@7,49:49=')',<2214>,2:27] +[@8,51:56='VALUES',<2064>,3:0] +[@9,58:58='(',<2213>,4:0] +[@10,59:62='5000',<2207>,4:1] +[@11,63:63=',',<2219>,4:5] +[@12,65:71=''Apple'',<2209>,4:7] +[@13,72:72=')',<2214>,4:14] +[@14,73:73=';',<2231>,4:15] +[@15,75:74='',<-1>,5:0] + +==================================================== + +INSERT INTO suppliers +(supplier_id, supplier_name) +SELECT (account_no, name) +FROM customers +WHERE customer_id > 5000; + +[@0,0:5='INSERT',<663>,1:0] +[@1,7:10='INTO',<685>,1:7] +[@2,12:20='suppliers',<2242>,1:12] +[@3,22:22='(',<2213>,2:0] +[@4,23:33='supplier_id',<2242>,2:1] +[@5,34:34=',',<2219>,2:12] +[@6,36:48='supplier_name',<2242>,2:14] +[@7,49:49=')',<2214>,2:27] +[@8,51:56='SELECT',<1482>,3:0] +[@9,58:67='account_no',<2242>,3:7] +[@10,68:68=',',<2219>,3:17] +[@11,70:73='name',<873>,3:19] +[@12,75:78='FROM',<548>,4:0] +[@13,80:88='customers',<2242>,4:5] +[@14,90:94='WHERE',<2105>,5:0] +[@15,96:106='customer_id',<2242>,5:6] +[@16,108:108='>',<2228>,5:18] +[@17,110:113='5000',<2207>,5:20] +[@18,114:114=';',<2231>,5:24] +[@19,116:115='',<-1>,6:0] + +============================================================= + +UPDATE customers +SET state = 'California', + customer_rep = 32 +WHERE customer_id > 100; + +[@0,0:5='UPDATE',<2016>,1:0] +[@1,7:15='customers',<2242>,1:7] +[@2,17:19='SET',<1501>,2:0] +[@3,21:25='state',<1566>,2:4] +[@4,27:27='=',<2233>,2:10] +[@5,29:40=''California'',<2209>,2:12] +[@6,41:41=',',<2219>,2:24] +[@7,47:58='customer_rep',<2242>,3:4] +[@8,60:60='=',<2233>,3:17] +[@9,62:63='32',<2207>,3:19] +[@10,65:69='WHERE',<2105>,4:0] +[@11,71:81='customer_id',<2242>,4:6] +[@12,83:83='>',<2228>,4:18] +[@13,85:87='100',<2207>,4:20] +[@14,88:88=';',<2231>,4:23] +[@15,90:89='',<-1>,5:0] diff --git a/example-input.txt b/example-input.txt index f8a87dd..7ef2c00 100644 --- a/example-input.txt +++ b/example-input.txt @@ -1,4 +1,7 @@ -INSERT INTO suppliers -(supplier_id, supplier_name) -VALUES -(5000, 'Apple'); \ No newline at end of file +INSERT INTO clients +(client_id, client_name, client_type) +SELECT 10345, 'IBM', 'advertising' +FROM dual +WHERE NOT EXISTS (SELECT * + FROM clients + WHERE clients.client_id = 10345); diff --git a/my-notes b/my-notes index 7d5a536..b0ee69d 100644 --- a/my-notes +++ b/my-notes @@ -75,4 +75,5 @@ ar rcs antlr-library.a antlr-library.o ------------------- Note : -Should link with runtime library staticly \ No newline at end of file +Should link with runtime library staticly + diff --git a/wrapper.rb b/wrapper.rb index ce55e15..ce03e19 100644 --- a/wrapper.rb +++ b/wrapper.rb @@ -4,5 +4,274 @@ require ('fiddle') #antlr = Fiddle.dlopen('mylib.so') -input = "(sql_script (unit_statement (data_manipulation_language_statements (insert_statement INSERT (single_table_insert (insert_into_clause INTO (general_table_ref (dml_table_expression_clause (tableview_name (identifier (id_expression (regular_id suppliers)))))) (paren_column_list ( (column_list (column_name (identifier (id_expression (regular_id supplier_id)))) , (column_name (identifier (id_expression (regular_id supplier_name))))) ))) (values_clause VALUES ( (expressions (expression (logical_expression (multiset_expression (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (constant (numeric 5000))))))))))) , (expression (logical_expression (multiset_expression (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (constant (quoted_string 'Apple')))))))))))) )))))) ; )" +#input_string = "(sql_script (unit_statement (data_manipulation_language_statements (insert_statement INSERT (single_table_insert (insert_into_clause INTO (general_table_ref (dml_table_expression_clause (tableview_name (identifier (id_expression (regular_id suppliers)))))) (paren_column_list ( (column_list (column_name (identifier (id_expression (regular_id supplier_id)))) , (column_name (identifier (id_expression (regular_id supplier_name))))) ))) (values_clause VALUES ( (expressions (expression (logical_expression (multiset_expression (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (constant (numeric 5000))))))))))) , (expression (logical_expression (multiset_expression (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (constant (quoted_string 'Apple')))))))))))) )))))) ; )" +#input_string = "(sql_script (unit_statement (data_manipulation_language_statements (insert_statement INSERT (single_table_insert (insert_into_clause INTO (general_table_ref (dml_table_expression_clause (tableview_name (identifier (id_expression (regular_id suppliers)))))) (paren_column_list ( (column_list (column_name (identifier (id_expression (regular_id supplier_id)))) , (column_name (identifier (id_expression (regular_id supplier_name))))) ))) (select_statement (subquery (subquery_basic_elements (query_block SELECT (selected_element (select_list_elements (expressions (expression (logical_expression (multiset_expression (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (general_element (general_element_part (id_expression (regular_id account_no))))))))))))) , (expression (logical_expression (multiset_expression (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (general_element (general_element_part (id_expression (regular_id (non_reserved_keywords_pre12c name))))))))))))))))) (from_clause FROM (table_ref_list (table_ref (table_ref_aux (table_ref_aux_internal (dml_table_expression_clause (tableview_name (identifier (id_expression (regular_id customers)))))))))) (where_clause WHERE (expression (logical_expression (multiset_expression (relational_expression (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (general_element (general_element_part (id_expression (regular_id customer_id)))))))))) (relational_operator >) (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (constant (numeric 5000))))))))))))))))))))) ; )" +#input_string = "(sql_script (unit_statement (data_manipulation_language_statements (update_statement UPDATE (general_table_ref (dml_table_expression_clause (tableview_name (identifier (id_expression (regular_id customers)))))) (update_set_clause SET (column_based_update_set_clause (column_name (identifier (id_expression (regular_id last_name)))) = (expression (logical_expression (multiset_expression (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (constant (quoted_string 'Anderson'))))))))))))) (where_clause WHERE (expression (logical_expression (multiset_expression (relational_expression (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (general_element (general_element_part (id_expression (regular_id customer_id)))))))))) (relational_operator =) (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (constant (numeric 5000)))))))))))))))) ; )" +#input_string = "(sql_script (unit_statement (data_manipulation_language_statements (insert_statement INSERT (single_table_insert (insert_into_clause INTO (general_table_ref (dml_table_expression_clause (tableview_name (identifier (id_expression (regular_id suppliers)))))) (paren_column_list ( (column_list (column_name (identifier (id_expression (regular_id supplier_id)))) , (column_name (identifier (id_expression (regular_id supplier_name))))) ))) (select_statement (subquery (subquery_basic_elements (query_block SELECT (selected_element (select_list_elements (expressions (expression (logical_expression (multiset_expression (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (general_element (general_element_part (id_expression (regular_id account_no))))))))))))) , (expression (logical_expression (multiset_expression (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (general_element (general_element_part (id_expression (regular_id (non_reserved_keywords_pre12c name))))))))))))))))) (from_clause FROM (table_ref_list (table_ref (table_ref_aux (table_ref_aux_internal (dml_table_expression_clause (tableview_name (identifier (id_expression (regular_id customers)))))))))) (where_clause WHERE (expression (logical_expression (logical_expression (multiset_expression (relational_expression (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (general_element (general_element_part (id_expression (regular_id customer_id)))))))))) (relational_operator >) (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (constant (numeric 5000))))))))))) and (logical_expression (multiset_expression (relational_expression (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (general_element (general_element_part (id_expression (regular_id customer_name)))))))))) (relational_operator <>) (relational_expression (compound_expression (concatenation (model_expression (unary_expression (atom (constant (quoted_string 'john')))))))))))))))))))))) ; )" + +#input_string = "[@0,0:5='INSERT',<663>,1:0]\n[@1,7:10='INTO',<685>,1:7]\n[@2,12:20='suppliers',<2242>,1:12]\n[@3,22:22='(',<2213>,2:0]\n[@4,23:33='supplier_id',<2242>,2:1]\n[@5,34:34=',',<2219>,2:12]\n[@6,36:48='supplier_name',<2242>,2:14]\n[@7,49:49=')',<2214>,2:27]\n[@8,51:56='VALUES',<2064>,3:0]\n[@9,58:58='(',<2213>,4:0]\n[@10,59:62='5000',<2207>,4:1]\n[@11,63:63=',',<2219>,4:5]\n[@12,65:71=''Apple'',<2209>,4:7]\n[@13,72:72=')',<2214>,4:14]\n[@14,73:73=';',<2231>,4:15]\n[@15,75:74='',<-1>,5:0]" + +#input_string = "[@0,0:5='INSERT',<663>,1:0]\n[@1,7:10='INTO',<685>,1:7]\n[@2,12:20='suppliers',<2242>,1:12]\n[@3,22:22='(',<2213>,2:0]\n[@4,23:33='supplier_id',<2242>,2:1]\n[@5,34:34=',',<2219>,2:12]\n[@6,36:48='supplier_name',<2242>,2:14]\n[@7,49:49=')',<2214>,2:27]\n[@8,51:56='SELECT',<1482>,3:0]\n[@9,58:67='account_no',<2242>,3:7]\n[@10,68:68=',',<2219>,3:17]\n[@11,70:73='name',<873>,3:19]\n[@12,75:78='FROM',<548>,4:0]\n[@13,80:88='customers',<2242>,4:5]\n[@14,90:94='WHERE',<2105>,5:0]\n[@15,96:106='customer_id',<2242>,5:6]\n[@16,108:108='>',<2228>,5:18]\n[@17,110:113='5000',<2207>,5:20]\n[@18,114:114=';',<2231>,5:24]\n[@19,116:115='',<-1>,6:0]" + +#input_string = "[@0,0:5='INSERT',<663>,1:0]\n[@1,7:10='INTO',<685>,1:7]\n[@2,12:20='suppliers',<2242>,1:12]\n[@3,22:22='(',<2213>,2:0]\n[@4,23:33='supplier_id',<2242>,2:1]\n[@5,34:34=',',<2219>,2:12]\n[@6,36:48='supplier_name',<2242>,2:14]\n[@7,49:49=')',<2214>,2:27]\n[@8,51:56='SELECT',<1482>,3:0]\n[@9,58:58='(',<2213>,3:7]\n[@10,59:68='account_no',<2242>,3:8]\n[@11,69:69=',',<2219>,3:18]\n[@12,71:74='name',<873>,3:20]\n[@13,75:75=')',<2214>,3:24]\n[@14,77:80='FROM',<548>,4:0]\n[@15,82:90='customers',<2242>,4:5]\n[@16,92:96='WHERE',<2105>,5:0]\n[@17,98:108='customer_id',<2242>,5:6]\n[@18,110:110='>',<2228>,5:18]\n[@19,112:115='5000',<2207>,5:20]\n[@20,116:116=';',<2231>,5:24]\n[@21,118:117='',<-1>,6:0]" + +#input_string = "[@0,0:5='INSERT',<663>,1:0]\n[@1,7:10='INTO',<685>,1:7]\n[@2,12:20='suppliers',<2242>,1:12]\n[@3,22:22='(',<2213>,2:0]\n[@4,23:33='supplier_id',<2242>,2:1]\n[@5,34:34=',',<2219>,2:12]\n[@6,36:48='supplier_name',<2242>,2:14]\n[@7,49:49=')',<2214>,2:27]\n[@8,51:56='SELECT',<1482>,3:0]\n[@9,58:58='(',<2213>,3:7]\n[@10,59:68='account_no',<2242>,3:8]\n[@11,69:69=',',<2219>,3:18]\n[@12,71:74='name',<873>,3:20]\n[@13,75:75=')',<2214>,3:24]\n[@14,77:80='FROM',<548>,4:0]\n[@15,82:90='customers',<2242>,4:5]\n[@16,92:96='WHERE',<2105>,5:0]\n[@17,98:98='(',<2213>,5:6]\n[@18,99:109='customer_id',<2242>,5:7]\n[@19,111:111='>',<2228>,5:19]\n[@20,113:116='5000',<2207>,5:21]\n[@21,117:117=')',<2214>,5:25]\n[@22,118:118=';',<2231>,5:26]\n[@23,120:119='',<-1>,6:0]" + +#Problematic input, mixing SELECT statement +input_string = "[@0,0:5='INSERT',<663>,1:0]\n[@1,7:10='INTO',<685>,1:7]\n[@2,12:18='clients',<2242>,1:12]\n[@3,20:20='(',<2213>,2:0]\n[@4,21:29='client_id',<2242>,2:1]\n[@5,30:30=',',<2219>,2:10]\n[@6,32:42='client_name',<2242>,2:12]\n[@7,43:43=',',<2219>,2:23]\n[@8,45:55='client_type',<2242>,2:25]\n[@9,56:56=')',<2214>,2:36]\n[@10,58:63='SELECT',<1482>,3:0]\n[@11,65:69='10345',<2207>,3:7]\n[@12,70:70=',',<2219>,3:12]\n[@13,72:76=''IBM'',<2209>,3:14]\n[@14,77:77=',',<2219>,3:19]\n[@15,79:91=''advertising'',<2209>,3:21]\n[@16,93:96='FROM',<548>,4:0]\n[@17,98:101='dual',<2242>,4:5]\n[@18,103:107='WHERE',<2105>,5:0]\n[@19,109:111='NOT',<1067>,5:6]\n[@20,113:118='EXISTS',<472>,5:10]\n[@21,120:120='(',<2213>,5:17]\n[@22,121:126='SELECT',<1482>,5:18]\n[@23,128:128='*',<2216>,5:25]\n[@24,148:151='FROM',<548>,6:18]\n[@25,153:159='clients',<2242>,6:23]\n[@26,179:183='WHERE',<2105>,7:18]\n[@27,185:191='clients',<2242>,7:24]\n[@28,192:192='.',<2206>,7:31]\n[@29,193:201='client_id',<2242>,7:32]\n[@30,203:203='=',<2233>,7:42]\n[@31,205:209='10345',<2207>,7:44]\n[@32,210:210=')',<2214>,7:49]\n[@33,211:211=';',<2231>,7:50]\n[@34,213:212='',<-1>,8:0]" + + +def get_quoted_value (line) + first_quote = line.index('\'') + last_quote = line.rindex('\'') + if first_quote and last_quote then + return line[first_quote+1..last_quote-1] + else + return '' + end +end + +input_array = input_string.split("\n") + + +#Constants +COMMAND_LINE = 0 #Command is in first line +INSERT_MODE = 0 +UPDATE_MODE = 1 + + +output = Hash.new + +command_mode = nil + +output['command'] = get_quoted_value(input_array[COMMAND_LINE]) + +case output['command'].downcase +when 'insert' + command_mode = INSERT_MODE +when 'update' + command_mode = UPDATE_MODE +end + + +i=0 +input_array.each do |line| + if line.downcase.index('into') then + output['table_name'] = get_quoted_value(input_array[i+1]) #table for INSERT is line after INTO statement + #column_names are after table name and '(' + output['column_names'] = [] + k=i+3 + loop do + column_name = get_quoted_value(input_array[k]) + break if column_name == ')' + if column_name != ',' then output['column_names'].push(column_name) end + k+=1 + end + end + + if line.downcase.index('values') then + output['column_values'] = [] + k=i+2 + loop do + column_value = get_quoted_value(input_array[k]) + break if column_value == ')' + if column_value != ',' then output['column_values'].push(column_value) end + k+=1 + end + end + + if line.downcase.index('select') then + output['select_columns'] = [] + finish_loop_on_char = 'from' + k=i+1 + if get_quoted_value(input_array[i+1]) == '(' then + finish_loop_on_char = ')' + k+=1 + end + loop do + column_value = get_quoted_value(input_array[k]) + break if column_value.downcase == finish_loop_on_char + if column_value != ',' then output['select_columns'].push(column_value) end + k+=1 + end + end + + if line.downcase.index('where') then + output['where_conditions'] = '' + finish_loop_on_char = ';' + k=i+1 + if get_quoted_value(input_array[i+1]) == '(' then + finish_loop_on_char = ')' + k+=1 + end + loop do + column_value = get_quoted_value(input_array[k]) + break if column_value.downcase == finish_loop_on_char + output['where_conditions'] += column_value + k+=1 + end + end + + if line.downcase.index('from') then + output['from_table'] = get_quoted_value(input_array[i+1]) #table for FROM query is line after FROM statement + end + i+=1 +end + + + + +puts output + + + + + + + + + + + + + + + + + + + + + +return +#Old try + +end_of_input = input_string.index(';') - 1 +input_string = input_string[0..end_of_input] + ')' + +tag_list = [] +tag_levels = [] + +tag_level = 0 + +input_string.split('(').each do |tag| + level_down = tag.count(')') + clean_tag = tag.gsub ')', '' + #puts clean_tag + "[" + tag_level.to_s + "]" + if clean_tag.length > 1 then + tag_list.push(clean_tag) + tag_levels.push(tag_level) + #result[tag_level] = clean_tag + ';' + result[tag_level].to_s + tag_level += 1 + tag_level -= level_down + end +end + + +#Command : INSERT / UPDATE / DELETE / CREATE / ALTER is on level 3 +#On level 3, there is statement like "insert_statement INSERT" + +#CONSTANTS : +COMMAND_LEVEL = 3 + + +output = Hash.new +output['command'] = tag_list[COMMAND_LEVEL].split(' ')[1] #command is after space +output['values'] = Hash.new +output['where'] = nil +output['from'] = nil + + +values_array = [] + +i=0 +value_counter = 0 +k=0 +first_block = true + +tag_list.each do |tag| + puts "[" + tag_levels[i].to_s + "] : " + tag + + if tag.index('from_clause') then + output['from'] = tag_list[i+8].to_s.split(' ')[1] + end + + if tag.index('where_clause') then + + end + + if i < tag_list.length-1 + if tag_levels[i+1] < tag_levels[i] then + k = tag_levels[i] - tag_levels[i+1] + parent = tag_list[i-k].rstrip.lstrip + clean_tag = tag.split(' ')[1] + if parent == "general_table_ref" then + output['table_name'] = clean_tag + elsif parent == "column_name" then + output['values'][clean_tag] = nil + values_array.push(clean_tag) + elsif parent == "expression" then + output['values'][values_array[value_counter]] = clean_tag + value_counter+=1 + i+=1 + end + else + end + else + #Last tag in array, locate parent + if first_block then + first_block = false + else + parent = tag_list[i-k-1].rstrip.lstrip + clean_tag = tag.split(' ')[1] + if parent == "expression" then + output['values'][values_array[value_counter]] = clean_tag + end + end + end + + i+=1 +end + +puts output + + + + +return +#Old try #2 +def parse(input) + puts "New input : " + input + open_count = input.count('(') + close_count = input.count(')') + + + first_bracket = input.index('(') + last_bracket = input.rindex(')') + + if (first_bracket == -1) or (last_bracket == -1) then return '' end + #return everything between brackets + + tmp_token = '' + result = [] + level = 0 + + new_input = input[first_bracket+1..last_bracket-1] + + # new_input.split('').each do |character| + # if character == '(' then level += 1 end + # if character == ')' then level -= 1 end + # if level == 0 then + # puts "ok" + # if tmp_token.length > 0 then + # result.push(tmp_token) + # tmp_token='' + # end + # else + # tmp_token += character + # end + # end + # puts "===" + # return result +end