[ https://issues.apache.org/jira/browse/SPARK-18105?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17397585#comment-17397585 ]
Dongjoon Hyun commented on SPARK-18105: --------------------------------------- BTW, I checked that a single zip file contains 157 snappy parquet files which is generated by Apache Spark 3.0.1. {code} $ ls -al /Users/dongjoon/data/SPARK-18105/hash_import.parquet | grep parquet | wc -l 157 $ ls -al /Users/dongjoon/data/SPARK-18105/hash_import.parquet total 5040768 drwxr-xr-x 160 dongjoon staff 5120 Aug 9 02:55 . drwxr-xr-x 4 dongjoon staff 128 Aug 11 12:32 .. -rw-r--r-- 1 dongjoon staff 0 Aug 9 02:21 _SUCCESS -rw-r--r-- 1 dongjoon staff 7098577 Aug 9 02:19 part-00000-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 9310705 Aug 9 02:19 part-00001-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 9286598 Aug 9 02:19 part-00002-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 9066660 Aug 9 02:19 part-00003-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 7174124 Aug 9 02:19 part-00004-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 7269784 Aug 9 02:19 part-00005-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 9045563 Aug 9 02:19 part-00006-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 8874048 Aug 9 02:19 part-00007-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 9088055 Aug 9 02:19 part-00008-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 7921907 Aug 9 02:19 part-00009-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 9734530 Aug 9 02:19 part-00010-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 11430364 Aug 9 02:19 part-00011-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 10106472 Aug 9 02:19 part-00012-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 11672118 Aug 9 02:19 part-00013-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 11077873 Aug 9 02:19 part-00014-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 17459766 Aug 9 02:19 part-00015-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 18000141 Aug 9 02:19 part-00016-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 10688008 Aug 9 02:19 part-00017-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 10876962 Aug 9 02:19 part-00018-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 6001747 Aug 9 02:19 part-00019-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 13051685 Aug 9 02:19 part-00020-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 40607547 Aug 9 02:19 part-00021-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 16122066 Aug 9 02:19 part-00022-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 7858387 Aug 9 02:19 part-00023-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 7953870 Aug 9 02:19 part-00024-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 20777892 Aug 9 02:19 part-00025-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 7785559 Aug 9 02:19 part-00026-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 6101494 Aug 9 02:19 part-00027-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 14878775 Aug 9 02:19 part-00028-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 8807412 Aug 9 02:19 part-00029-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 664925 Aug 9 02:19 part-00030-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 36711578 Aug 9 02:19 part-00031-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 6376692 Aug 9 02:19 part-00032-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 9121175 Aug 9 02:19 part-00033-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 12318653 Aug 9 02:19 part-00034-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 18944542 Aug 9 02:20 part-00035-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 5108518 Aug 9 02:19 part-00036-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 10884610 Aug 9 02:19 part-00037-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 7678782 Aug 9 02:19 part-00038-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 9950415 Aug 9 02:19 part-00039-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 22648978 Aug 9 02:19 part-00040-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 9918156 Aug 9 02:19 part-00041-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 20537613 Aug 9 02:20 part-00042-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 20121006 Aug 9 02:20 part-00043-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 10919311 Aug 9 02:19 part-00044-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 13743611 Aug 9 02:20 part-00045-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 20744467 Aug 9 02:20 part-00046-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 8362612 Aug 9 02:20 part-00047-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 17083828 Aug 9 02:20 part-00048-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 8325658 Aug 9 02:20 part-00049-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 28616403 Aug 9 02:20 part-00050-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 27766601 Aug 9 02:20 part-00051-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 7274627 Aug 9 02:20 part-00052-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 18133083 Aug 9 02:20 part-00053-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 20962549 Aug 9 02:20 part-00054-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 10766021 Aug 9 02:20 part-00055-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 10987925 Aug 9 02:20 part-00056-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 24120006 Aug 9 02:20 part-00057-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 7940774 Aug 9 02:20 part-00058-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 15471824 Aug 9 02:20 part-00059-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 17041126 Aug 9 02:20 part-00060-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 4558582 Aug 9 02:20 part-00061-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 28812083 Aug 9 02:20 part-00062-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 18382491 Aug 9 02:20 part-00063-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 11464446 Aug 9 02:20 part-00064-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 13522906 Aug 9 02:20 part-00065-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 12881794 Aug 9 02:20 part-00066-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 14688841 Aug 9 02:20 part-00067-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 13630922 Aug 9 02:20 part-00068-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 15015507 Aug 9 02:20 part-00069-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 21820733 Aug 9 02:20 part-00070-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 9051687 Aug 9 02:20 part-00071-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 14009387 Aug 9 02:20 part-00072-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 8043754 Aug 9 02:20 part-00073-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 15522853 Aug 9 02:20 part-00074-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 22376010 Aug 9 02:20 part-00075-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 22600841 Aug 9 02:20 part-00076-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 12973790 Aug 9 02:20 part-00077-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 13724406 Aug 9 02:20 part-00078-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 9837992 Aug 9 02:20 part-00079-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 24946732 Aug 9 02:20 part-00081-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 21250848 Aug 9 02:20 part-00082-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 22465612 Aug 9 02:20 part-00083-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 15383161 Aug 9 02:20 part-00084-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 10725376 Aug 9 02:20 part-00085-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 19325378 Aug 9 02:20 part-00086-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 8895845 Aug 9 02:20 part-00087-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 19790382 Aug 9 02:20 part-00088-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 8773449 Aug 9 02:20 part-00089-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 19144012 Aug 9 02:20 part-00090-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 23290537 Aug 9 02:20 part-00091-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 39088655 Aug 9 02:20 part-00092-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 8602985 Aug 9 02:20 part-00093-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 15345549 Aug 9 02:20 part-00094-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 13559709 Aug 9 02:20 part-00095-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 6852728 Aug 9 02:20 part-00096-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 29831129 Aug 9 02:20 part-00097-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 11356100 Aug 9 02:20 part-00098-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 21956322 Aug 9 02:20 part-00099-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 16915694 Aug 9 02:20 part-00100-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 21229399 Aug 9 02:20 part-00101-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 3039283 Aug 9 02:20 part-00102-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 31255403 Aug 9 02:20 part-00103-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 13639185 Aug 9 02:20 part-00104-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 22379210 Aug 9 02:20 part-00105-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 20522598 Aug 9 02:20 part-00106-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 21562145 Aug 9 02:20 part-00107-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 21461668 Aug 9 02:20 part-00108-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 27882056 Aug 9 02:20 part-00109-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 14075767 Aug 9 02:20 part-00110-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 17072086 Aug 9 02:20 part-00111-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 2745029 Aug 9 02:20 part-00112-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 18697825 Aug 9 02:20 part-00113-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 42039504 Aug 9 02:20 part-00114-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 7403373 Aug 9 02:20 part-00115-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 10516182 Aug 9 02:20 part-00116-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 16325101 Aug 9 02:20 part-00117-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 14799829 Aug 9 02:20 part-00118-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 14694122 Aug 9 02:20 part-00119-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 24990548 Aug 9 02:20 part-00120-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 11079775 Aug 9 02:20 part-00121-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 33703475 Aug 9 02:20 part-00122-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 14656154 Aug 9 02:20 part-00123-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 2180907 Aug 9 02:20 part-00124-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 22524866 Aug 9 02:20 part-00125-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 23664835 Aug 9 02:20 part-00126-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 3718511 Aug 9 02:20 part-00127-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 15386780 Aug 9 02:20 part-00128-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 15793443 Aug 9 02:21 part-00129-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 25739876 Aug 9 02:21 part-00130-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 22014066 Aug 9 02:21 part-00131-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 26034951 Aug 9 02:21 part-00132-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 18732517 Aug 9 02:21 part-00133-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 15581656 Aug 9 02:21 part-00134-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 24107039 Aug 9 02:21 part-00135-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 617756 Aug 9 02:21 part-00136-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 17878862 Aug 9 02:21 part-00137-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 37368882 Aug 9 02:21 part-00138-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 14727470 Aug 9 02:21 part-00139-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 36085625 Aug 9 02:21 part-00140-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 21564288 Aug 9 02:21 part-00141-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 4207926 Aug 9 02:21 part-00142-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 26885735 Aug 9 02:21 part-00143-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 47222 Aug 9 02:21 part-00145-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 3031 Aug 9 02:21 part-00146-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 101888 Aug 9 02:21 part-00147-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 18902521 Aug 9 02:21 part-00148-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 3428 Aug 9 02:21 part-00149-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 23022202 Aug 9 02:21 part-00150-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 2675 Aug 9 02:21 part-00155-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 21980843 Aug 9 02:21 part-00157-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 37397239 Aug 9 02:21 part-00158-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 53319037 Aug 9 02:21 part-00159-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 53852766 Aug 9 02:21 part-00160-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 52634756 Aug 9 02:21 part-00161-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 58968977 Aug 9 02:21 part-00162-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet -rw-r--r-- 1 dongjoon staff 14955676 Aug 9 02:21 part-00163-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet {code} {code} $ parquet-tools meta /Users/dongjoon/data/SPARK-18105/hash_import.parquet/part-00163-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet file: file:/Users/dongjoon/data/SPARK-18105/hash_import.parquet/part-00163-51e43836-6ce9-419d-b632-2a8ca8185b9b-c000.snappy.parquet creator: parquet-mr version 1.10.1 (build a89df8f9932b6ef6633d06069e50c9b7970bebd1) extra: org.apache.spark.version = 3.0.1 extra: org.apache.spark.sql.parquet.row.metadata = {"type":"struct","fields":[{"name":"id","type":"long","nullable":true,"metadata":{}},{"name":"pivot_hash","type":"integer","nullable":false,"metadata":{}},{"name":"full_name","type":"string","nullable":true,"metadata":{}}]} {code} > LZ4 failed to decompress a stream of shuffled data > -------------------------------------------------- > > Key: SPARK-18105 > URL: https://issues.apache.org/jira/browse/SPARK-18105 > Project: Spark > Issue Type: Bug > Components: Spark Core > Affects Versions: 3.0.1, 3.1.1 > Reporter: Davies Liu > Priority: Major > Attachments: TestWeightedGraph.java > > > When lz4 is used to compress the shuffle files, it may fail to decompress it > as "stream is corrupt" > {code} > Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: > Task 92 in stage 5.0 failed 4 times, most recent failure: Lost task 92.3 in > stage 5.0 (TID 16616, 10.0.27.18): java.io.IOException: Stream is corrupted > at > org.apache.spark.io.LZ4BlockInputStream.refill(LZ4BlockInputStream.java:220) > at > org.apache.spark.io.LZ4BlockInputStream.available(LZ4BlockInputStream.java:109) > at java.io.BufferedInputStream.read(BufferedInputStream.java:353) > at java.io.DataInputStream.read(DataInputStream.java:149) > at com.google.common.io.ByteStreams.read(ByteStreams.java:828) > at com.google.common.io.ByteStreams.readFully(ByteStreams.java:695) > at > org.apache.spark.sql.execution.UnsafeRowSerializerInstance$$anon$3$$anon$1.next(UnsafeRowSerializer.scala:127) > at > org.apache.spark.sql.execution.UnsafeRowSerializerInstance$$anon$3$$anon$1.next(UnsafeRowSerializer.scala:110) > at scala.collection.Iterator$$anon$13.next(Iterator.scala:372) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) > at > org.apache.spark.util.CompletionIterator.next(CompletionIterator.scala:30) > at > org.apache.spark.InterruptibleIterator.next(InterruptibleIterator.scala:43) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.sort_addToSorter$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:370) > at > org.apache.spark.sql.execution.datasources.DynamicPartitionWriterContainer.writeRows(WriterContainer.scala:397) > at > org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(InsertIntoHadoopFsRelationCommand.scala:143) > at > org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand$$anonfun$run$1$$anonfun$apply$mcV$sp$1.apply(InsertIntoHadoopFsRelationCommand.scala:143) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70) > at org.apache.spark.scheduler.Task.run(Task.scala:86) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > {code} > https://github.com/jpountz/lz4-java/issues/89 -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org