digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (3)";
2 [labelType="html" label="<br><b>Project</b><br><br>"];
}
3 [labelType="html" label="<b>TakeOrderedAndProject</b><br><br>shuffle records written: 5,655<br>shuffle write time total (min, med, max (stageId: taskId))<br>27 ms (0 ms, 0 ms, 0 ms (stage 21.0: task 1234))<br>records read: 5,655<br>local bytes read: 57.6 KiB<br>fetch wait time: 0 ms<br>local blocks read: 200<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>57.6 KiB (170.0 B, 294.0 B, 401.0 B (stage 21.0: task 1365))"];
subgraph cluster4 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n168 ms (0 ms, 0 ms, 12 ms (stage 21.0: task 1360))";
5 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>1 ms (0 ms, 0 ms, 1 ms (stage 21.0: task 1360))<br>peak memory total (min, med, max (stageId: taskId))<br>3.2 GiB (16.3 MiB, 16.3 MiB, 16.3 MiB (stage 21.0: task 1215))<br>number of output rows: 5,655<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1, 1, 1 (stage 21.0: task 1215))"];
}
6 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 5,655<br>shuffle write time: 11 ms<br>records read: 5,655<br>local bytes read total (min, med, max (stageId: taskId))<br>59.5 KiB (174.0 B, 302.0 B, 423.0 B (stage 21.0: task 1245))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 21.0: task 1215))<br>local blocks read: 200<br>data size: 132.5 KiB<br>shuffle bytes written: 59.5 KiB"];
subgraph cluster7 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: 155 ms";
8 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build: 140 ms<br>peak memory: 256.0 KiB<br>number of output rows: 5,655"];
9 [labelType="html" label="<br><b>Project</b><br><br>"];
10 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 7,556"];
}
11 [labelType="html" label="<b>Scan csv </b><br><br>number of output rows: 7,556<br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 2.9 MiB"];
2->0;
3->2;
5->3;
6->5;
8->6;
9->8;
10->9;
11->10;
}
12
CollectLimit 21
Project [cast(customer_id#18 as string) AS customer_id#156, cast(count#149L as string) AS count#157]
WholeStageCodegen (3)
TakeOrderedAndProject(limit=50, orderBy=[count#149L DESC NULLS LAST], output=[customer_id#18,count#149L])
HashAggregate(keys=[customer_id#18], functions=[count(1)])
WholeStageCodegen (2)
Exchange hashpartitioning(customer_id#18, 200), ENSURE_REQUIREMENTS, [id=#267]
HashAggregate(keys=[customer_id#18], functions=[partial_count(1)])
Project [customer_id#18]
Filter (isnotnull(order_status#19) AND (order_status#19 = CLOSED))
WholeStageCodegen (1)
FileScan csv [customer_id#18,order_status#19] Batched: false, DataFilters: [isnotnull(order_status#19), (order_status#19 = CLOSED)], Format: CSV, Location: InMemoryFileIndex[hdfs://m01.itversity.com:9000/public/trendytech/orders_wh/orders_wh.csv], PartitionFilters: [], PushedFilters: [IsNotNull(order_status), EqualTo(order_status,CLOSED)], ReadSchema: struct<customer_id:int,order_status:string>