digraph G {
0 [labelType="html" label="<b>Execute InsertIntoHadoopFsRelationCommand</b><br><br>number of written files: 1<br>written output: 11.0 KiB<br>number of output rows: 173<br>number of dynamic part: 0"];
1 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 173<br>shuffle write time total (min, med, max (stageId: taskId))<br>26 ms (0 ms, 0 ms, 5 ms (stage 60.1: task 633))<br>records read: 173<br>local bytes read: 9.7 KiB<br>fetch wait time: 0 ms<br>remote bytes read: 8.4 KiB<br>local blocks read: 62<br>remote blocks read: 58<br>data size total (min, med, max (stageId: taskId))<br>13.5 KiB (0.0 B, 80.0 B, 320.0 B (stage 60.1: task 697))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>18.1 KiB (0.0 B, 124.0 B, 331.0 B (stage 60.1: task 697))"];
subgraph cluster2 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n755 ms (1 ms, 2 ms, 41 ms (stage 60.1: task 633))";
3 [labelType="html" label="<br><b>Project</b><br><br>"];
4 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 173"];
5 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>275 ms (0 ms, 1 ms, 27 ms (stage 60.1: task 633))<br>peak memory total (min, med, max (stageId: taskId))<br>3.2 GiB (16.3 MiB, 16.3 MiB, 16.3 MiB (stage 60.1: task 631))<br>number of output rows: 484,136<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1, 1, 1.1 (stage 60.1: task 664))"];
}
6 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 242,686<br>shuffle write time total (min, med, max (stageId: taskId))<br>20 ms (0 ms, 0 ms, 20 ms (stage 59.1: task 630))<br>records read: 484,233<br>local bytes read total (min, med, max (stageId: taskId))<br>14.9 MiB (70.9 KiB, 76.1 KiB, 81.9 KiB (stage 60.1: task 706))<br>fetch wait time total (min, med, max (stageId: taskId))<br>14 ms (0 ms, 0 ms, 7 ms (stage 60.1: task 749))<br>remote bytes read total (min, med, max (stageId: taskId))<br>14.9 MiB (69.5 KiB, 76.5 KiB, 83.4 KiB (stage 60.1: task 766))<br>local blocks read: 200<br>remote blocks read: 200<br>data size total (min, med, max (stageId: taskId))<br>20.4 MiB (0.0 B, 0.0 B, 20.4 MiB (stage 59.1: task 630))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>14.9 MiB (0.0 B, 0.0 B, 14.9 MiB (stage 59.1: task 630))"];
subgraph cluster7 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n253 ms (0 ms, 0 ms, 253 ms (stage 59.1: task 630))";
8 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>144 ms (0 ms, 0 ms, 144 ms (stage 59.1: task 630))<br>peak memory total (min, med, max (stageId: taskId))<br>40.0 MiB (0.0 B, 0.0 B, 40.0 MiB (stage 59.1: task 630))<br>number of output rows: 242,686<br>avg hash probe bucket list iters: 1.6"];
9 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 242,721<br>number of input batches: 60"];
}
10 [labelType="html" label="<b>Scan parquet itv024694_lending_club.loans_defaulters_delinq</b><br><br>number of files read: 2<br>scan time total (min, med, max (stageId: taskId))<br>47 ms (0 ms, 0 ms, 47 ms (stage 59.1: task 630))<br>metadata time: 0 ms<br>size of files read: 31.1 MiB<br>number of output rows: 242,721"];
1->0;
3->1;
4->3;
5->4;
6->5;
8->6;
9->8;
10->9;
}
11
Execute InsertIntoHadoopFsRelationCommand hdfs://m01.itversity.com:9000/user/itv024694/bad_data/bad_data_loan_defaulters, false, CSV, [header=true, path=/user/itv024694/bad_data/bad_data_loan_defaulters], Overwrite, [member_id]
Exchange RoundRobinPartitioning(1), REPARTITION_WITH_NUM, [id=#534]
Project [member_id#607]
Filter (total_count#605L > 1)
HashAggregate(keys=[member_id#607], functions=[count(1)])
WholeStageCodegen (2)
Exchange hashpartitioning(member_id#607, 200), ENSURE_REQUIREMENTS, [id=#528]
HashAggregate(keys=[member_id#607], functions=[partial_count(1)])
ColumnarToRow
WholeStageCodegen (1)
FileScan parquet itv024694_lending_club.loans_defaulters_delinq[member_id#607] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[hdfs://m01.itversity.com:9000/public/trendytech/lendingclubproject/cleaned/loan..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<member_id:string>