HiveTableRelation 相关代码
当 relation.tableMeta.stats.isEmpty 是, 即调用 hiveTableWithStats
- class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
- private def hiveTableWithStats(relation: HiveTableRelation): HiveTableRelation = {
- val table = relation.tableMeta
- val partitionCols = relation.partitionCols
- // For partitioned tables, the partition directory may be outside of the table directory.
- // Which is expensive to get table size. Please see how we implemented it in the AnalyzeTable.
- val sizeInBytes = if (conf.fallBackToHdfsForStatsEnabled && partitionCols.isEmpty) {
- try {
- val hadoopConf = session.sessionState.newHadoopConf()
- val tablePath = new Path(table.location)
- val fs: FileSystem = tablePath.getFileSystem(hadoopConf)
- fs.getContentSummary(tablePath).getLength
- } catch {
- case e: IOException =>
- logWarning("Failed to get table size from HDFS.", e)
- conf.defaultSizeInBytes
- }
- } else {
- conf.defaultSizeInBytes
- }
- val stats = Some(Statistics(sizeInBytes = BigInt(sizeInBytes)))
- relation.copy(tableStats = stats)
- }
- override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
- case relation: HiveTableRelation
- if DDLUtils.isHiveTable(relation.tableMeta) && relation.tableMeta.stats.isEmpty =>
- hiveTableWithStats(relation)
- // handles InsertIntoStatement specially as the table in InsertIntoStatement is not added in its
- // children, hence not matched directly by previous HiveTableRelation case.
- case i @ InsertIntoStatement(relation: HiveTableRelation, _, _, _, _, _)
- if DDLUtils.isHiveTable(relation.tableMeta) && relation.tableMeta.stats.isEmpty =>
- i.copy(table = hiveTableWithStats(relation))
- }
- }
- /**
- * A `LogicalPlan` that represents a hive table.
- *
- * TODO: remove this after we completely make hive as a data source.
- */
- case class HiveTableRelation(
- tableMeta: CatalogTable,
- dataCols: Seq[AttributeReference],
- partitionCols: Seq[AttributeReference],
- tableStats: Option[Statistics] = None,
- @transient prunedPartitions: Option[Seq[CatalogTablePartition]] = None)
