digraph G {
0 [labelType="html" label="<b>Execute InsertIntoHadoopFsRelationCommand</b><br><br>number of written files: 1<br>written output: 413.8 KiB<br>number of output rows: 6,519<br>number of dynamic part: 0"];
1 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 6,519<br>shuffle write time total (min, med, max (stageId: taskId))<br>60 ms (0 ms, 0 ms, 1 ms (stage 88.0: task 2659))<br>records read: 6,519<br>local bytes read: 463.2 KiB<br>fetch wait time: 0 ms<br>local blocks read: 520<br>data size total (min, med, max (stageId: taskId))<br>509.3 KiB (0.0 B, 1040.0 B, 1920.0 B (stage 88.0: task 2735))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>463.2 KiB (0.0 B, 947.0 B, 1690.0 B (stage 88.0: task 2735))"];
2 [labelType="html" label="<br><b>Union</b><br><br>"];
subgraph cluster3 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n852 ms (0 ms, 0 ms, 12 ms (stage 88.0: task 2489))";
4 [labelType="html" label="<br><b>Project</b><br><br>"];
5 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 3,157"];
6 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>645 ms (0 ms, 0 ms, 11 ms (stage 88.0: task 2489))<br>peak memory total (min, med, max (stageId: taskId))<br>3.2 GiB (0.0 B, 0.0 B, 16.5 MiB (stage 88.0: task 2330))<br>number of output rows: 2,257,379<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1.4, 1.4, 1.4 (stage 88.0: task 2330))"];
}
7 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 2,260,197<br>shuffle write time total (min, med, max (stageId: taskId))<br>185 ms (17 ms, 24 ms, 25 ms (stage 87.0: task 2328))<br>records read: 2,260,197<br>local bytes read total (min, med, max (stageId: taskId))<br>138.8 MiB (0.0 B, 0.0 B, 734.0 KiB (stage 88.0: task 2405))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 88.0: task 2330))<br>local blocks read: 1,600<br>data size total (min, med, max (stageId: taskId))<br>189.7 MiB (16.8 MiB, 24.7 MiB, 25.0 MiB (stage 87.0: task 2322))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>138.8 MiB (12.3 MiB, 18.1 MiB, 18.3 MiB (stage 87.0: task 2322))"];
subgraph cluster8 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n3.9 s (334 ms, 506 ms, 591 ms (stage 87.0: task 2323))";
9 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>3.0 s (259 ms, 393 ms, 470 ms (stage 87.0: task 2323))<br>peak memory total (min, med, max (stageId: taskId))<br>304.0 MiB (24.0 MiB, 40.0 MiB, 40.0 MiB (stage 87.0: task 2322))<br>number of output rows: 2,260,197<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1.6, 1.6, 1.6 (stage 87.0: task 2322))"];
10 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 2,260,633<br>number of input batches: 600"];
}
11 [labelType="html" label="<b>Scan parquet itv024694_lending_club.customers</b><br><br>number of files read: 200<br>scan time total (min, med, max (stageId: taskId))<br>1.9 s (176 ms, 238 ms, 327 ms (stage 87.0: task 2323))<br>metadata time: 0 ms<br>size of files read: 184.9 MiB<br>number of output rows: 2,260,633"];
subgraph cluster12 {
isCluster="true";
label="WholeStageCodegen (4)\n \nduration: total (min, med, max (stageId: taskId))\n210 ms (0 ms, 0 ms, 4 ms (stage 88.0: task 2701))";
13 [labelType="html" label="<br><b>Project</b><br><br>"];
14 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 173"];
15 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>20 ms (0 ms, 0 ms, 3 ms (stage 88.0: task 2701))<br>peak memory total (min, med, max (stageId: taskId))<br>3.2 GiB (0.0 B, 0.0 B, 16.3 MiB (stage 88.0: task 2530))<br>number of output rows: 484,136<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1, 1, 1.1 (stage 88.0: task 2563))"];
}
16 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 484,233<br>shuffle write time total (min, med, max (stageId: taskId))<br>49 ms (23 ms, 25 ms, 25 ms (stage 85.0: task 2318))<br>records read: 484,233<br>local bytes read total (min, med, max (stageId: taskId))<br>29.8 MiB (0.0 B, 0.0 B, 161.4 KiB (stage 88.0: task 2599))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 88.0: task 2530))<br>local blocks read: 400<br>data size total (min, med, max (stageId: taskId))<br>40.6 MiB (20.3 MiB, 20.4 MiB, 20.4 MiB (stage 85.0: task 2318))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>29.8 MiB (14.9 MiB, 14.9 MiB, 14.9 MiB (stage 85.0: task 2318))"];
subgraph cluster17 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: total (min, med, max (stageId: taskId))\n566 ms (271 ms, 295 ms, 295 ms (stage 85.0: task 2318))";
18 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>353 ms (167 ms, 186 ms, 186 ms (stage 85.0: task 2318))<br>peak memory total (min, med, max (stageId: taskId))<br>80.0 MiB (40.0 MiB, 40.0 MiB, 40.0 MiB (stage 85.0: task 2319))<br>number of output rows: 484,233<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1.6, 1.6, 1.6 (stage 85.0: task 2319))"];
19 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 484,310<br>number of input batches: 119"];
}
20 [labelType="html" label="<b>Scan parquet itv024694_lending_club.loans_defaulters_delinq</b><br><br>number of files read: 2<br>scan time total (min, med, max (stageId: taskId))<br>70 ms (33 ms, 37 ms, 37 ms (stage 85.0: task 2319))<br>metadata time: 0 ms<br>size of files read: 31.1 MiB<br>number of output rows: 484,310"];
subgraph cluster21 {
isCluster="true";
label="WholeStageCodegen (6)\n \nduration: total (min, med, max (stageId: taskId))\n782 ms (0 ms, 0 ms, 6 ms (stage 88.0: task 2787))";
22 [labelType="html" label="<br><b>Project</b><br><br>"];
23 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 3,189"];
24 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>615 ms (0 ms, 0 ms, 6 ms (stage 88.0: task 2861))<br>peak memory total (min, med, max (stageId: taskId))<br>3.2 GiB (0.0 B, 0.0 B, 16.5 MiB (stage 88.0: task 2730))<br>number of output rows: 2,257,384<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1.4, 1.4, 1.4 (stage 88.0: task 2730))"];
}
25 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 2,259,065<br>shuffle write time total (min, med, max (stageId: taskId))<br>147 ms (69 ms, 77 ms, 77 ms (stage 86.0: task 2320))<br>records read: 2,259,065<br>local bytes read total (min, med, max (stageId: taskId))<br>138.5 MiB (0.0 B, 0.0 B, 732.4 KiB (stage 88.0: task 2805))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 88.0: task 2730))<br>local blocks read: 400<br>data size total (min, med, max (stageId: taskId))<br>189.6 MiB (93.0 MiB, 96.6 MiB, 96.6 MiB (stage 86.0: task 2320))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>138.5 MiB (67.9 MiB, 70.6 MiB, 70.6 MiB (stage 86.0: task 2320))"];
subgraph cluster26 {
isCluster="true";
label="WholeStageCodegen (5)\n \nduration: total (min, med, max (stageId: taskId))\n2.3 s (1.1 s, 1.2 s, 1.2 s (stage 86.0: task 2320))";
27 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>1.4 s (657 ms, 724 ms, 724 ms (stage 86.0: task 2320))<br>peak memory total (min, med, max (stageId: taskId))<br>336.0 MiB (144.0 MiB, 192.0 MiB, 192.0 MiB (stage 86.0: task 2320))<br>number of output rows: 2,259,065<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1.6, 1.7, 1.7 (stage 86.0: task 2321))"];
28 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 2,260,701<br>number of input batches: 553"];
}
29 [labelType="html" label="<b>Scan parquet itv024694_lending_club.loans_defaulters_detail_rec_enq</b><br><br>number of files read: 2<br>scan time total (min, med, max (stageId: taskId))<br>150 ms (75 ms, 75 ms, 75 ms (stage 86.0: task 2321))<br>metadata time: 0 ms<br>size of files read: 144.3 MiB<br>number of output rows: 2,260,701"];
1->0;
2->1;
4->2;
5->4;
6->5;
7->6;
9->7;
10->9;
11->10;
13->2;
14->13;
15->14;
16->15;
18->16;
19->18;
20->19;
22->2;
23->22;
24->23;
25->24;
27->25;
28->27;
29->28;
}
30
Execute InsertIntoHadoopFsRelationCommand hdfs://m01.itversity.com:9000/user/itv024694/bad_data/bad_data_union, false, CSV, [header=true, path=/user/itv024694/bad_data/bad_data_union], Overwrite, [member_id]
Exchange RoundRobinPartitioning(1), REPARTITION_WITH_NUM, [id=#1071]
Union
Project [member_id#577]
Filter (total_count#575L > 1)
HashAggregate(keys=[member_id#577], functions=[count(1)])
WholeStageCodegen (2)
Exchange hashpartitioning(member_id#577, 200), ENSURE_REQUIREMENTS, [id=#1044]
HashAggregate(keys=[member_id#577], functions=[partial_count(1)])
ColumnarToRow
WholeStageCodegen (1)
FileScan parquet itv024694_lending_club.customers[member_id#577] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[hdfs://m01.itversity.com:9000/public/trendytech/lendingclubproject/cleaned/cust..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<member_id:string>
Project [member_id#607]
Filter (total_count#605L > 1)
HashAggregate(keys=[member_id#607], functions=[count(1)])
WholeStageCodegen (4)
Exchange hashpartitioning(member_id#607, 200), ENSURE_REQUIREMENTS, [id=#1054]
HashAggregate(keys=[member_id#607], functions=[partial_count(1)])
ColumnarToRow
WholeStageCodegen (3)
FileScan parquet itv024694_lending_club.loans_defaulters_delinq[member_id#607] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[hdfs://m01.itversity.com:9000/public/trendytech/lendingclubproject/cleaned/loan..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<member_id:string>
Project [member_id#625]
Filter (total_count#623L > 1)
HashAggregate(keys=[member_id#625], functions=[count(1)])
WholeStageCodegen (6)
Exchange hashpartitioning(member_id#625, 200), ENSURE_REQUIREMENTS, [id=#1064]
HashAggregate(keys=[member_id#625], functions=[partial_count(1)])
ColumnarToRow
WholeStageCodegen (5)
FileScan parquet itv024694_lending_club.loans_defaulters_detail_rec_enq[member_id#625] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[hdfs://m01.itversity.com:9000/public/trendytech/lendingclubproject/cleaned/loan..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<member_id:string>