doris icon indicating copy to clipboard operation
doris copied to clipboard

[Bug] data error when using select into outfile format as parquet

Open luozenglin opened this issue 3 years ago • 0 comments

Search before asking

  • [X] I had searched in the issues and found no similar issues.

Version

master

What's Wrong?

When I export the data using select into outfile format as parquet and then load it into a table with the same schema, the tinyint column becomes NULL.

 set enable_vectorized_engine = false;

CREATE TABLE `test_select_into_property_test_output_format_parquet_tb` (
  `k1` tinyint(4) NOT NULL,
  `k2` smallint(6) NOT NULL,
  `k3` int(11) NOT NULL,
  `k4` bigint(20) NOT NULL,
  `k5` datetime NOT NULL,
  `v1` date REPLACE NOT NULL,
  `v2` char(1) REPLACE NOT NULL,
  `v3` varchar(4096) REPLACE NOT NULL,
  `v4` float SUM NOT NULL,
  `v5` double SUM NOT NULL,
  `v6` decimal(20, 7) SUM NOT NULL
) ENGINE=OLAP
AGGREGATE KEY(`k1`, `k2`, `k3`, `k4`, `k5`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`) BUCKETS 15
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2",
"disable_auto_compaction" = "false"
);

mysql> select * from test_select_into_property_test_output_format_parquet_tb where k1 <= 5;
+------+------+------+------+---------------------+------------+------+-------------------------------+-----------+------------+-------------+
| k1   | k2   | k3   | k4   | k5                  | v1         | v2   | v3                            | v4        | v5         | v6          |
+------+------+------+------+---------------------+------------+------+-------------------------------+-----------+------------+-------------+
|    1 |   10 |  100 | 1000 | 2011-01-01 00:00:00 | 2010-01-01 | t    | ynqnzeowymt                   | 38.638844 | 180.998031 | 7395.231067 |
|    2 |   20 |  200 | 2000 | 2012-01-01 00:00:00 | 2010-01-02 | f    | hfkfwlr                       | 506.04404 | 539.922834 | 2080.504502 |
|    3 |   30 |  300 | 3000 | 2013-01-01 00:00:00 | 2010-01-03 | t    | uoclasp                       | 377.79321 | 577.044148 | 4605.253205 |
|    4 |   40 |  400 | 4000 | 2014-01-01 00:00:00 | 2010-01-04 | n    | iswngzeodfhptjzgswsddt        | 871.35455 | 919.067864 | 7291.703724 |
|    5 |   50 |  500 | 5000 | 2015-01-01 00:00:00 | 2010-01-05 | a    | sqodagzlyrmcelyxgcgcsfuxadcdt |  462.0679 | 929.660783 | 3903.906901 |
+------+------+------+------+---------------------+------------+------+-------------------------------+-----------+------------+-------------+

select k1 k_0, k2 k_1, k3 k_2, k4 k_3, k5 k_4, v1 k_5, v2 k_6, v3 k_7, v4 k_8, v5 k_9, v6 k_10 from test_select_into_property_test_output_format_parquet_tb INTO OUTFILE "hdfs://xxxx:9000/user/palo/test/data/export/test_select_into_property_test_output_format_parquet_db/label_21_04_47_49_475312_1042101013/label_21_04_47_49_475364_844373478" FORMAT AS parquet PROPERTIES ("broker.name"="ahdfs","broker.username"="xxxx","broker.password"="xxxx", "schema" = "required,int32,k_0;required,int32,k_1;required,int32,k_2;required,int64,k_3;required,int64,k_4;required,int64,k_5;required,byte_array,k_6;required,byte_array,k_7;required,float,k_8;required,double,k_9;required,byte_array,k_10");


CREATE TABLE `select_into_check_table` (
  `k_0` tinyint(4) NULL,
  `k_1` smallint(6) NULL,
  `k_2` int(11) NULL,
  `k_3` bigint(20) NULL,
  `k_4` datetime NULL,
  `k_5` date NULL,
  `k_6` char(1) NULL,
  `k_7` char(29) NULL,
  `k_8` float NULL,
  `k_9` double NULL,
  `k_10` decimal(27, 9) NULL
) ENGINE=OLAP
DUPLICATE KEY(`k_0`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k_0`) BUCKETS 13
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2",
"disable_auto_compaction" = "false"
);


LOAD LABEL test_select_into_property_test_output_format_parquet_db.label_21_04_47_50_543709_8920444695 ( DATA INFILE(" hdfs:/xxxx:9000/user/palo/test/data/export/test_select_into_property_test_output_format_parquet_db/label_21_04_47_49_475312_1042101013/label_21_04_47_49_475364_8443734786915a56b133f4b71-a671fd00077a30b4_0.parquet") INTO TABLE `select_into_check_table` FORMAT AS "parquet") WITH BROKER "ahdfs" ("username"="xxxx", "password"="xxxx");


mysql> select * from select_into_check_table;
+------+------+------+-------+---------------------+------------+------+-------------------------------+-----------+------------+-------------+
| k_0  | k_1  | k_2  | k_3   | k_4                 | k_5        | k_6  | k_7                           | k_8       | k_9        | k_10        |
+------+------+------+-------+---------------------+------------+------+-------------------------------+-----------+------------+-------------+
| NULL | NULL | 1000 | 10000 | 2020-01-01 00:00:00 | 2010-01-10 | s    | zucprgdnlgzzfl                | 26.874739 | 155.861217 | 7996.434686 |
| NULL | NULL | 2700 | 27000 | 2037-01-01 00:00:00 | 2010-01-27 | d    | yrxspxgcwgbnjnmqkcido         |  362.2272 | 519.383701 | 9921.135045 |
| NULL | NULL | 2400 | 24000 | 2034-01-01 00:00:00 | 2010-01-24 | s    | irpbe                         | 537.90613 | 396.750845 | 3585.208809 |
| NULL | NULL |  900 |  9000 | 2019-01-01 00:00:00 | 2010-01-09 | b    | nbarqjwilbkelk                |   92.7024 |  535.28551 | 4846.735593 |
| NULL | NULL |  200 |  2000 | 2012-01-01 00:00:00 | 2010-01-02 | f    | hfkfwlr                       | 506.04404 | 539.922834 | 2080.504502 |
| NULL | NULL |  100 |  1000 | 2011-01-01 00:00:00 | 2010-01-01 | t    | ynqnzeowymt                   | 38.638844 | 180.998031 | 7395.231067 |
+------+------+------+-------+---------------------+------------+------+-------------------------------+-----------+------------+-------------+

What You Expected?

Table select_into_check_table has the same data as Table test_select_into_property_test_output_format_parquet_tb

How to Reproduce?

No response

Anything Else?

No response

Are you willing to submit PR?

  • [X] Yes I am willing to submit a PR!

Code of Conduct

luozenglin avatar Sep 22 '22 08:09 luozenglin