Skip to content

Commit

Permalink
Fix bug: can not use escaped by (#16027)
Browse files Browse the repository at this point in the history
Fix bug: can not use escaped by
�thanks @noorall for the help

Approved by: @xzxiong, @aunjgr, @m-schen, @heni02, @sukki37
  • Loading branch information
ouyuanning committed May 17, 2024
1 parent 69bc17e commit 5522522
Show file tree
Hide file tree
Showing 7 changed files with 808 additions and 647 deletions.
1,340 changes: 697 additions & 643 deletions pkg/pb/plan/plan.pb.go

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions pkg/sql/compile/compile.go
Original file line number Diff line number Diff line change
Expand Up @@ -1901,6 +1901,9 @@ func (c *Compile) compileExternScan(ctx context.Context, n *plan.Node) ([]*Scope
EnclosedBy: &tree.EnclosedBy{
Value: n.ExternScan.EnclosedBy[0],
},
EscapedBy: &tree.EscapedBy{
Value: n.ExternScan.EscapedBy[0],
},
}
param.JsonData = n.ExternScan.JsonType
}
Expand Down
13 changes: 11 additions & 2 deletions pkg/sql/plan/build_load.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,18 @@ func buildLoad(stmt *tree.Load, ctx CompilerContext, isPrepareStmt bool) (*Plan,
builder := NewQueryBuilder(plan.Query_SELECT, ctx, isPrepareStmt)
bindCtx := NewBindContext(builder, nil)
terminated := ","
enclosedBy := []byte{0}
enclosedBy := []byte("\"")
escapedBy := []byte{0}
if stmt.Param.Tail.Fields != nil {
if stmt.Param.Tail.Fields.EnclosedBy != nil {
enclosedBy = []byte{stmt.Param.Tail.Fields.EnclosedBy.Value}
if stmt.Param.Tail.Fields.EnclosedBy.Value != 0 {
enclosedBy = []byte{stmt.Param.Tail.Fields.EnclosedBy.Value}
}
}
if stmt.Param.Tail.Fields.EscapedBy != nil {
if stmt.Param.Tail.Fields.EscapedBy.Value != 0 {
escapedBy = []byte{stmt.Param.Tail.Fields.EscapedBy.Value}
}
}
if stmt.Param.Tail.Fields.Terminated != nil {
terminated = stmt.Param.Tail.Fields.Terminated.Value
Expand All @@ -113,6 +121,7 @@ func buildLoad(stmt *tree.Load, ctx CompilerContext, isPrepareStmt bool) (*Plan,
IgnoredLines: uint64(stmt.Param.Tail.IgnoredLines),
EnclosedBy: enclosedBy,
Terminated: terminated,
EscapedBy: escapedBy,
JsonType: stmt.Param.JsonData,
},
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/export/etl/db/db_holder.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ func bulkInsert(ctx context.Context, sqlDb *sql.DB, records [][]string, tbl *tab

csvData := csvWriter.GetContent()

loadSQL := fmt.Sprintf("LOAD DATA INLINE FORMAT='csv', DATA='%s' INTO TABLE %s.%s", csvData, tbl.Database, tbl.Table)
loadSQL := fmt.Sprintf("LOAD DATA INLINE FORMAT='csv', DATA='%s' INTO TABLE %s.%s FIELDS TERMINATED BY ','", csvData, tbl.Database, tbl.Table)

// Use the transaction to execute the SQL command

Expand Down
1 change: 1 addition & 0 deletions proto/plan.proto
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,7 @@ message ExternScan {
bytes enclosed_by = 5;
string terminated = 6;
string json_type = 7;
bytes escaped_by = 8;
}

message LockTarget {
Expand Down
51 changes: 51 additions & 0 deletions test/distributed/cases/load_data/load_data_csv_values.result
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,54 @@ col1 text
load data inline format='jsonline', data='{"col1":"good"}' , jsontype = 'object' into table t5;
load data inline format='unknow', data='{"col1":"good"}' , jsontype = 'object' into table t5;
load format 'unknow' is not yet implemented
CREATE TABLE IF NOT EXISTS `t6`(
`str` VARCHAR(32) NOT NULL COMMENT "str column",
`int64` BIGINT DEFAULT "0" COMMENT "int64 column",
`float64` DOUBLE DEFAULT "0.0" COMMENT "float64 column",
`uint64` BIGINT UNSIGNED DEFAULT "0" COMMENT "uint64 column",
`datetime_6` Datetime(6) NOT NULL COMMENT "datetime.6 column",
`json_col` TEXT NOT NULL COMMENT "json column"
);
LOAD DATA INLINE FORMAT='csv', DATA='row5,1,1.1,1,2023-05-16 16:06:18.070277,"{""key1"":""insert charactor \\''""}"' INTO TABLE t6 FIELDS TERMINATED BY ',';
LOAD DATA INLINE FORMAT='csv', DATA='row5,1,1.1,1,2023-05-16 16:06:18.070277,"{""key1"":""insert charactor \\''""}"' INTO TABLE t6;
LOAD DATA INLINE FORMAT='csv', DATA='row7,1,1.1,1,2023-05-16 16:06:18.070277,"{""key1"":""newline: \\n""}"' INTO TABLE t6 FIELDS TERMINATED BY ',';
LOAD DATA INLINE FORMAT='csv', DATA='row7,1,1.1,1,2023-05-16 16:06:18.070277,"{""key1"":""newline: \\n""}"' INTO TABLE t6;
LOAD DATA INLINE FORMAT='csv', DATA='row\\8,1,1.1,1,2023-05-16 16:06:18.070277,"{""key1"":""newline""}"' INTO TABLE t6 FIELDS TERMINATED BY ',';
LOAD DATA INLINE FORMAT='csv', DATA='row\\8,1,1.1,1,2023-05-16 16:06:18.070277,"{""key1"":""newline""}"' INTO TABLE t6;
select * from t6 order by str;
str int64 float64 uint64 datetime_6 json_col
row5 1 1.1 1 2023-05-16 16:06:18.070277000 {"key1":"insert charactor \'"}
row5 1 1.1 1 2023-05-16 16:06:18.070277000 {"key1":"insert charactor \'"}
row7 1 1.1 1 2023-05-16 16:06:18.070277000 {"key1":"newline: \n"}
row7 1 1.1 1 2023-05-16 16:06:18.070277000 {"key1":"newline: \n"}
row\8 1 1.1 1 2023-05-16 16:06:18.070277000 {"key1":"newline"}
row\8 1 1.1 1 2023-05-16 16:06:18.070277000 {"key1":"newline"}
CREATE TABLE `rawlog` (
`raw_item` VARCHAR(1024) NOT NULL COMMENT 'raw log item',
`node_uuid` VARCHAR(36) NOT NULL COMMENT 'node uuid, which node gen this data.',
`node_type` VARCHAR(1024) NOT NULL COMMENT 'node type in MO, val in [DN, CN, LOG]',
`span_id` VARCHAR(16) DEFAULT '0' COMMENT 'span uniq id',
`trace_id` VARCHAR(36) NOT NULL COMMENT 'trace uniq id',
`logger_name` VARCHAR(1024) NOT NULL COMMENT 'logger name',
`timestamp` DATETIME NOT NULL COMMENT 'timestamp of action',
`level` VARCHAR(1024) NOT NULL COMMENT 'log level, enum: debug, info, warn, error, panic, fatal',
`caller` VARCHAR(1024) NOT NULL COMMENT 'where it log, like: package/file.go:123',
`message` TEXT NOT NULL COMMENT 'log message',
`extra` TEXT DEFAULT '{}' COMMENT 'log dynamic fields',
`err_code` VARCHAR(1024) DEFAULT '0' COMMENT 'error code info',
`error` TEXT NOT NULL COMMENT 'error message',
`stack` VARCHAR(2048) NOT NULL COMMENT 'stack info',
`span_name` VARCHAR(1024) NOT NULL COMMENT 'span name, for example: step name of execution plan, function name in code, ...',
`parent_span_id` VARCHAR(16) DEFAULT '0' COMMENT 'parent span uniq id',
`start_time` DATETIME NOT NULL COMMENT 'start time',
`end_time` DATETIME NOT NULL COMMENT 'end time',
`duration` BIGINT UNSIGNED DEFAULT '0' COMMENT 'exec time, unit: ns',
`resource` TEXT DEFAULT '{}' COMMENT 'static resource information',
`span_kind` VARCHAR(1024) NOT NULL COMMENT 'span kind, enum: internal, statement, remote',
`statement_id` VARCHAR(36) NOT NULL COMMENT 'statement id',
`session_id` VARCHAR(36) NOT NULL COMMENT 'session id'
);
LOAD DATA INLINE FORMAT='csv', DATA='log_info,7c4dccb4-4d3c-41f8-b482-5251dc7a41bf,ALL,7c4dccb44d3c41f8,,,2024-05-06 18:28:54.956415,info,compile/sql_executor.go:355,sql_executor exec,"{""sql"":""insert into mo_catalog.mo_account(\\n\\t\\t\\t\\taccount_id,\\n\\t\\t\\t\\taccount_name,\\n\\t\\t\\t\\tstatus,\\n\\t\\t\\t\\tcreated_time,\\n\\t\\t\\t\\tcomments,\\n create_version) values (0,\\""sys\\"",\\""open\\"",\\""2024-05-06 10:28:54\\"",\\""system account\\"",\\""1.2.0\\"");"",""txn-id"":""018f4d73053c7667a7fbe169f33a12d1"",""duration"":0.011872,""AffectedRows"":1}",0,,,,0,0001-01-01 00:00:00.000000,0001-01-01 00:00:00.000000,0,{},internal,,' INTO TABLE rawlog;
select raw_item from rawlog;
raw_item
log_info
45 changes: 44 additions & 1 deletion test/distributed/cases/load_data/load_data_csv_values.sql
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,47 @@ create table t5(
col1 text
);
load data inline format='jsonline', data='{"col1":"good"}' , jsontype = 'object' into table t5;
load data inline format='unknow', data='{"col1":"good"}' , jsontype = 'object' into table t5;
load data inline format='unknow', data='{"col1":"good"}' , jsontype = 'object' into table t5;

CREATE TABLE IF NOT EXISTS `t6`(
`str` VARCHAR(32) NOT NULL COMMENT "str column",
`int64` BIGINT DEFAULT "0" COMMENT "int64 column",
`float64` DOUBLE DEFAULT "0.0" COMMENT "float64 column",
`uint64` BIGINT UNSIGNED DEFAULT "0" COMMENT "uint64 column",
`datetime_6` Datetime(6) NOT NULL COMMENT "datetime.6 column",
`json_col` TEXT NOT NULL COMMENT "json column"
);
LOAD DATA INLINE FORMAT='csv', DATA='row5,1,1.1,1,2023-05-16 16:06:18.070277,"{""key1"":""insert charactor \\''""}"' INTO TABLE t6 FIELDS TERMINATED BY ',';
LOAD DATA INLINE FORMAT='csv', DATA='row5,1,1.1,1,2023-05-16 16:06:18.070277,"{""key1"":""insert charactor \\''""}"' INTO TABLE t6;
LOAD DATA INLINE FORMAT='csv', DATA='row7,1,1.1,1,2023-05-16 16:06:18.070277,"{""key1"":""newline: \\n""}"' INTO TABLE t6 FIELDS TERMINATED BY ',';
LOAD DATA INLINE FORMAT='csv', DATA='row7,1,1.1,1,2023-05-16 16:06:18.070277,"{""key1"":""newline: \\n""}"' INTO TABLE t6;
LOAD DATA INLINE FORMAT='csv', DATA='row\\8,1,1.1,1,2023-05-16 16:06:18.070277,"{""key1"":""newline""}"' INTO TABLE t6 FIELDS TERMINATED BY ',';
LOAD DATA INLINE FORMAT='csv', DATA='row\\8,1,1.1,1,2023-05-16 16:06:18.070277,"{""key1"":""newline""}"' INTO TABLE t6;
select * from t6 order by str;
CREATE TABLE `rawlog` (
`raw_item` VARCHAR(1024) NOT NULL COMMENT 'raw log item',
`node_uuid` VARCHAR(36) NOT NULL COMMENT 'node uuid, which node gen this data.',
`node_type` VARCHAR(1024) NOT NULL COMMENT 'node type in MO, val in [DN, CN, LOG]',
`span_id` VARCHAR(16) DEFAULT '0' COMMENT 'span uniq id',
`trace_id` VARCHAR(36) NOT NULL COMMENT 'trace uniq id',
`logger_name` VARCHAR(1024) NOT NULL COMMENT 'logger name',
`timestamp` DATETIME NOT NULL COMMENT 'timestamp of action',
`level` VARCHAR(1024) NOT NULL COMMENT 'log level, enum: debug, info, warn, error, panic, fatal',
`caller` VARCHAR(1024) NOT NULL COMMENT 'where it log, like: package/file.go:123',
`message` TEXT NOT NULL COMMENT 'log message',
`extra` TEXT DEFAULT '{}' COMMENT 'log dynamic fields',
`err_code` VARCHAR(1024) DEFAULT '0' COMMENT 'error code info',
`error` TEXT NOT NULL COMMENT 'error message',
`stack` VARCHAR(2048) NOT NULL COMMENT 'stack info',
`span_name` VARCHAR(1024) NOT NULL COMMENT 'span name, for example: step name of execution plan, function name in code, ...',
`parent_span_id` VARCHAR(16) DEFAULT '0' COMMENT 'parent span uniq id',
`start_time` DATETIME NOT NULL COMMENT 'start time',
`end_time` DATETIME NOT NULL COMMENT 'end time',
`duration` BIGINT UNSIGNED DEFAULT '0' COMMENT 'exec time, unit: ns',
`resource` TEXT DEFAULT '{}' COMMENT 'static resource information',
`span_kind` VARCHAR(1024) NOT NULL COMMENT 'span kind, enum: internal, statement, remote',
`statement_id` VARCHAR(36) NOT NULL COMMENT 'statement id',
`session_id` VARCHAR(36) NOT NULL COMMENT 'session id'
);
LOAD DATA INLINE FORMAT='csv', DATA='log_info,7c4dccb4-4d3c-41f8-b482-5251dc7a41bf,ALL,7c4dccb44d3c41f8,,,2024-05-06 18:28:54.956415,info,compile/sql_executor.go:355,sql_executor exec,"{""sql"":""insert into mo_catalog.mo_account(\\n\\t\\t\\t\\taccount_id,\\n\\t\\t\\t\\taccount_name,\\n\\t\\t\\t\\tstatus,\\n\\t\\t\\t\\tcreated_time,\\n\\t\\t\\t\\tcomments,\\n create_version) values (0,\\""sys\\"",\\""open\\"",\\""2024-05-06 10:28:54\\"",\\""system account\\"",\\""1.2.0\\"");"",""txn-id"":""018f4d73053c7667a7fbe169f33a12d1"",""duration"":0.011872,""AffectedRows"":1}",0,,,,0,0001-01-01 00:00:00.000000,0001-01-01 00:00:00.000000,0,{},internal,,' INTO TABLE rawlog;
select raw_item from rawlog;

0 comments on commit 5522522

Please sign in to comment.