digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n17 ms (8 ms, 9 ms, 9 ms (stage 41.0: task 363))";
2 [labelType="html" label="<br><b>Project</b><br><br>"];
3 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 33"];
4 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>11 ms (5 ms, 6 ms, 6 ms (stage 41.0: task 363))<br>peak memory total (min, med, max (stageId: taskId))<br>33.0 MiB (16.5 MiB, 16.5 MiB, 16.5 MiB (stage 41.0: task 363))<br>number of output rows: 22,463<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1.4, 1.4, 1.4 (stage 41.0: task 363))"];
}
5 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 2,260,197<br>shuffle write time total (min, med, max (stageId: taskId))<br>190 ms (18 ms, 24 ms, 26 ms (stage 40.0: task 355))<br>records read: 22,494<br>local bytes read total (min, med, max (stageId: taskId))<br>706.3 KiB (342.8 KiB, 363.5 KiB, 363.5 KiB (stage 41.0: task 363))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 41.0: task 363))<br>remote bytes read total (min, med, max (stageId: taskId))<br>707.4 KiB (333.2 KiB, 374.2 KiB, 374.2 KiB (stage 43.0: task 364))<br>local blocks read: 8<br>remote blocks read: 8<br>data size total (min, med, max (stageId: taskId))<br>189.7 MiB (16.8 MiB, 24.7 MiB, 25.0 MiB (stage 40.0: task 355))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>138.8 MiB (12.3 MiB, 18.1 MiB, 18.3 MiB (stage 40.0: task 355))"];
subgraph cluster6 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n3.9 s (304 ms, 512 ms, 621 ms (stage 40.0: task 355))";
7 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>3.0 s (229 ms, 399 ms, 504 ms (stage 40.0: task 355))<br>peak memory total (min, med, max (stageId: taskId))<br>304.0 MiB (24.0 MiB, 40.0 MiB, 40.0 MiB (stage 40.0: task 356))<br>number of output rows: 2,260,197<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1.6, 1.6, 1.6 (stage 40.0: task 356))"];
8 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 2,260,633<br>number of input batches: 600"];
}
9 [labelType="html" label="<b>Scan parquet itv024694_lending_club.customers</b><br><br>number of files read: 200<br>scan time total (min, med, max (stageId: taskId))<br>1.9 s (145 ms, 244 ms, 334 ms (stage 40.0: task 355))<br>metadata time: 0 ms<br>size of files read: 184.9 MiB<br>number of output rows: 2,260,633"];
2->0;
3->2;
4->3;
5->4;
7->5;
8->7;
9->8;
}
10
CollectLimit 21
Project [member_id#577]
Filter (total_count#575L > 1)
HashAggregate(keys=[member_id#577], functions=[count(1)])
WholeStageCodegen (2)
Exchange hashpartitioning(member_id#577, 200), ENSURE_REQUIREMENTS, [id=#337]
HashAggregate(keys=[member_id#577], functions=[partial_count(1)])
ColumnarToRow
WholeStageCodegen (1)
FileScan parquet itv024694_lending_club.customers[member_id#577] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[hdfs://m01.itversity.com:9000/public/trendytech/lendingclubproject/cleaned/cust..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<member_id:string>