digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n44 ms (17 ms, 27 ms, 27 ms (stage 53.2: task 404))";
2 [labelType="html" label="<br><b>Project</b><br><br>"];
3 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 33"];
4 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>31 ms (12 ms, 19 ms, 19 ms (stage 53.2: task 404))<br>peak memory total (min, med, max (stageId: taskId))<br>33.0 MiB (16.5 MiB, 16.5 MiB, 16.5 MiB (stage 53.2: task 404))<br>number of output rows: 22,463<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1.4, 1.4, 1.4 (stage 53.2: task 404))"];
}
5 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 1,151,305<br>shuffle write time total (min, med, max (stageId: taskId))<br>72 ms (0 ms, 0 ms, 72 ms (stage 52.2: task 403))<br>records read: 22,480<br>local bytes read total (min, med, max (stageId: taskId))<br>711.5 KiB (346.8 KiB, 364.7 KiB, 364.7 KiB (stage 55.0: task 405))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 53.2: task 404))<br>remote bytes read total (min, med, max (stageId: taskId))<br>699.8 KiB (349.0 KiB, 350.8 KiB, 350.8 KiB (stage 55.0: task 405))<br>local blocks read: 2<br>remote blocks read: 2<br>data size total (min, med, max (stageId: taskId))<br>96.6 MiB (0.0 B, 0.0 B, 96.6 MiB (stage 52.2: task 403))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>70.6 MiB (0.0 B, 0.0 B, 70.6 MiB (stage 52.2: task 403))"];
subgraph cluster6 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n1.1 s (0 ms, 0 ms, 1.1 s (stage 52.2: task 403))";
7 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>662 ms (0 ms, 0 ms, 662 ms (stage 52.2: task 403))<br>peak memory total (min, med, max (stageId: taskId))<br>192.0 MiB (0.0 B, 0.0 B, 192.0 MiB (stage 52.2: task 403))<br>number of output rows: 1,151,305<br>avg hash probe bucket list iters: 1.6"];
8 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 1,152,101<br>number of input batches: 282"];
}
9 [labelType="html" label="<b>Scan parquet itv024694_lending_club.loans_defaulters_detail_rec_enq</b><br><br>number of files read: 2<br>scan time total (min, med, max (stageId: taskId))<br>123 ms (0 ms, 0 ms, 123 ms (stage 52.2: task 403))<br>metadata time: 0 ms<br>size of files read: 144.3 MiB<br>number of output rows: 1,152,101"];
2->0;
3->2;
4->3;
5->4;
7->5;
8->7;
9->8;
}
10
CollectLimit 21
Project [member_id#625]
Filter (total_count#623L > 1)
HashAggregate(keys=[member_id#625], functions=[count(1)])
WholeStageCodegen (2)
Exchange hashpartitioning(member_id#625, 200), ENSURE_REQUIREMENTS, [id=#425]
HashAggregate(keys=[member_id#625], functions=[partial_count(1)])
ColumnarToRow
WholeStageCodegen (1)
FileScan parquet itv024694_lending_club.loans_defaulters_detail_rec_enq[member_id#625] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[hdfs://m01.itversity.com:9000/public/trendytech/lendingclubproject/cleaned/loan..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<member_id:string>