digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n70 ms (2 ms, 2 ms, 11 ms (stage 47.0: task 370))";
2 [labelType="html" label="<br><b>Project</b><br><br>"];
3 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 24"];
4 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>21 ms (0 ms, 0 ms, 10 ms (stage 47.0: task 370))<br>peak memory total (min, med, max (stageId: taskId))<br>471.3 MiB (16.3 MiB, 16.3 MiB, 16.3 MiB (stage 45.0: task 368))<br>number of output rows: 70,415<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1, 1, 1 (stage 45.0: task 368))"];
}
5 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 484,233<br>shuffle write time total (min, med, max (stageId: taskId))<br>41 ms (0 ms, 20 ms, 20 ms (stage 44.0: task 365))<br>records read: 70,429<br>local bytes read total (min, med, max (stageId: taskId))<br>2.2 MiB (72.8 KiB, 76.6 KiB, 81.2 KiB (stage 49.0: task 388))<br>fetch wait time total (min, med, max (stageId: taskId))<br>9 ms (0 ms, 0 ms, 9 ms (stage 47.0: task 370))<br>remote bytes read total (min, med, max (stageId: taskId))<br>2.2 MiB (72.5 KiB, 76.0 KiB, 79.8 KiB (stage 49.0: task 385))<br>local blocks read: 29<br>remote blocks read: 29<br>data size total (min, med, max (stageId: taskId))<br>40.6 MiB (0.0 B, 20.3 MiB, 20.4 MiB (stage 44.0: task 365))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>29.8 MiB (0.0 B, 14.9 MiB, 14.9 MiB (stage 44.0: task 365))"];
subgraph cluster6 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n1.2 s (207 ms, 264 ms, 771 ms (stage 44.0: task 367))";
7 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>906 ms (107 ms, 154 ms, 645 ms (stage 44.0: task 367))<br>peak memory total (min, med, max (stageId: taskId))<br>80.3 MiB (256.0 KiB, 40.0 MiB, 40.0 MiB (stage 44.0: task 366))<br>number of output rows: 484,233<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1.6, 1.6, 1.6 (stage 44.0: task 366))"];
8 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 484,310<br>number of input batches: 119"];
}
9 [labelType="html" label="<b>Scan parquet itv024694_lending_club.loans_defaulters_delinq</b><br><br>number of files read: 2<br>scan time total (min, med, max (stageId: taskId))<br>721 ms (22 ms, 56 ms, 643 ms (stage 44.0: task 367))<br>metadata time: 0 ms<br>size of files read: 31.1 MiB<br>number of output rows: 484,310"];
2->0;
3->2;
4->3;
5->4;
7->5;
8->7;
9->8;
}
10
CollectLimit 21
Project [member_id#607]
Filter (total_count#605L > 1)
HashAggregate(keys=[member_id#607], functions=[count(1)])
WholeStageCodegen (2)
Exchange hashpartitioning(member_id#607, 200), ENSURE_REQUIREMENTS, [id=#381]
HashAggregate(keys=[member_id#607], functions=[partial_count(1)])
ColumnarToRow
WholeStageCodegen (1)
FileScan parquet itv024694_lending_club.loans_defaulters_delinq[member_id#607] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[hdfs://m01.itversity.com:9000/public/trendytech/lendingclubproject/cleaned/loan..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<member_id:string>