org.apache.spark.sql.execution.
OutOfMemorySparkException: Size of broadcasted table
far exceeds estimates and exceeds limit of spark.driver.maxResultSize=4294967296.
You can disable broadcasts for this query using set
spark.sql.autoBroadcastJoinThreshold=-1
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
127 if submission:
128 submission.submit_status('In Progress', stage=stage)
--> 129 method()
130 submission.submit_status('Completed', stage=stage)
131 else:
/local_disk0/spark-d72471c9-08db-4f0d-b38c-8cb06cc4fc63/userFiles-87519a89-ae11-
4133-92d0-28f1df2ea516/scripts.zip/integrated_dim_product_unharmonised.py in
process()
37
38 output_df = transform(source_df, current_df, epos_product_lookup_df)
---> 39 load(output_df)
40
41 log_info(__name__, "process", "finish::success")
/local_disk0/spark-d72471c9-08db-4f0d-b38c-8cb06cc4fc63/userFiles-87519a89-ae11-
4133-92d0-28f1df2ea516/scripts.zip/integrated_dim_product_unharmonised.py in
load(df)
85
86 # don't write output unless df has values in
---> 87 if df.head(1):
88 df.write.mode('overwrite').parquet(output_path)
89
/databricks/spark/python/pyspark/sql/dataframe.py in head(self, n)
1744 rs = self.head(1)
1745 return rs[0] if rs else None
-> 1746 return self.take(n)
1747
1748 def first(self):
/databricks/spark/python/pyspark/sql/dataframe.py in take(self, num)
767 [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
768 """
--> 769 return self.limit(num).collect()
770
771 def tail(self, num):
/databricks/spark/python/pyspark/sql/dataframe.py in collect(self)
713 # Default path used in OSS Spark / for non-DF-ACL clusters:
714 with SCCallSiteSync(self._sc) as css:
--> 715 sock_info = self._jdf.collectToPython()
716 return list(_load_from_socket(sock_info,
BatchedSerializer(PickleSerializer())))
717
/databricks/spark/python/lib/py4j-0.10.9.1-src.zip/py4j/java_gateway.py in
__call__(self, *args)
1302
1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
1305 answer, self.gateway_client, self.target_id, self.name)
1306
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
115 def deco(*a, **kw):
116 try:
--> 117 return f(*a, **kw)
118 except py4j.protocol.Py4JJavaError as e:
119 converted = convert_exception(e.java_exception)
/databricks/spark/python/lib/py4j-0.10.9.1-src.zip/py4j/protocol.py in
get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
Py4JJavaError: An error occurred while calling o690.collectToPython.
: org.apache.spark.sql.execution.OutOfMemorySparkException: Size of broadcasted
table far exceeds estimates and exceeds limit of
spark.driver.maxResultSize=4294967296. You can disable broadcasts for this query
using set spark.sql.autoBroadcastJoinThreshold=-1
at org.apache.spark.sql.execution.exchange.BroadcastExchangeExec.
$anonfun$relationFuture$1(BroadcastExchangeExec.scala:202)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:968)
at org.apache.spark.sql.execution.SQLExecution$.
$anonfun$withThreadLocalCaptured$4(SQLExecution.scala:448)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at org.apache.spark.sql.execution.SQLExecution$.
$anonfun$withThreadLocalCaptured$3(SQLExecution.scala:448)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at org.apache.spark.sql.execution.SQLExecution$.
$anonfun$withThreadLocalCaptured$2(SQLExecution.scala:447)
at
org.apache.spark.sql.execution.SQLExecution$.withOptimisticTransaction(SQLExecution
.scala:465)
at org.apache.spark.sql.execution.SQLExecution$.
$anonfun$withThreadLocalCaptured$1(SQLExecution.scala:446)
at
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.
$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:104)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(Spark
ThreadLocalForwardingThreadPoolExecutor.scala:68)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$
(SparkThreadLocalForwardingThreadPoolExecutor.scala:54)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(Spa
rkThreadLocalForwardingThreadPoolExecutor.scala:101)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLoca
lForwardingThreadPoolExecutor.scala:104)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)