digraph G {
0 [labelType="html" label="<br><b>TakeOrderedAndProject</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (2)";
2 [labelType="html" label="<br><b>HashAggregate</b><br><br>"];
}
3 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 250,450<br>shuffle write time total (min, med, max (stageId: taskId))<br>28 ms (0 ms, 0 ms, 28 ms (stage 2.3: task 252))<br>data size total (min, med, max (stageId: taskId))<br>21.0 MiB (0.0 B, 0.0 B, 21.0 MiB (stage 2.3: task 252))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>15.4 MiB (0.0 B, 0.0 B, 15.4 MiB (stage 2.3: task 252))"];
subgraph cluster4 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n511 ms (0 ms, 0 ms, 511 ms (stage 2.3: task 252))";
5 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>388 ms (0 ms, 0 ms, 388 ms (stage 2.3: task 252))<br>peak memory total (min, med, max (stageId: taskId))<br>40.0 MiB (0.0 B, 0.0 B, 40.0 MiB (stage 2.3: task 252))<br>number of output rows: 250,450<br>avg hash probe bucket list iters: 1.6"];
6 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 250,488<br>number of input batches: 66"];
}
7 [labelType="html" label="<b>Scan parquet itv024694_lending_club.customers</b><br><br>number of files read: 200<br>scan time total (min, med, max (stageId: taskId))<br>269 ms (0 ms, 0 ms, 269 ms (stage 2.3: task 252))<br>metadata time: 0 ms<br>size of files read: 184.9 MiB<br>number of output rows: 250,488"];
2->0;
3->2;
5->3;
6->5;
7->6;
}
8
TakeOrderedAndProject(limit=21, orderBy=[total_count#0L DESC NULLS LAST], output=[member_id#2,total_count#50])
HashAggregate(keys=[member_id#2], functions=[count(1)])
WholeStageCodegen (2)
Exchange hashpartitioning(member_id#2, 200), ENSURE_REQUIREMENTS, [id=#55]
HashAggregate(keys=[member_id#2], functions=[partial_count(1)])
ColumnarToRow
WholeStageCodegen (1)
FileScan parquet itv024694_lending_club.customers[member_id#2] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[hdfs://m01.itversity.com:9000/public/trendytech/lendingclubproject/cleaned/cust..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<member_id:string>