abstract class Index extends Serializable with Logging
Represents an abstract index for a metadata on a file.
- Alphabetic
- By Inheritance
- Index
- Logging
- Serializable
- Serializable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
-
new
Index(params: Map[String, String], keyMetadata: Option[String], cols: String*)
- params
a map containing the index parameters if exists
- keyMetadata
optional key metadata for encryption
- cols
a sequence of columns associated with the index
Abstract Value Members
-
abstract
def
getMetaDataTypeClassName(): String
- returns
the (full) name of the MetaDataType class used by this index
-
abstract
def
getName: String
- returns
the name of the index
-
abstract
def
getRowMetadata(row: Row): Any
Gets a DataFrame row and extract the raw metadata needed by the index
Gets a DataFrame row and extract the raw metadata needed by the index
- row
Row a row to be indexed
- returns
raw metadata needed by the index or null if the row contain null value
-
abstract
def
isValid(df: DataFrame, schemaMap: Map[String, (String, DataType)]): Unit
Gets a DataFrame and checks whether it is valid for the index No need to check column existence as it is checked by the index builder
Gets a DataFrame and checks whether it is valid for the index No need to check column existence as it is checked by the index builder
- df
the DataFrame to be checked
- schemaMap
a map containing column names (as appear in the object) and their data types the key is the column name in lower case
- Exceptions thrown
[[XskipperException]]
with the reason if invalid
-
abstract
def
reduce(md1: MetadataType, md2: MetadataType): MetadataType
Same as above reduce given two accumulated metadata
Same as above reduce given two accumulated metadata
- returns
updated metadata for the index
-
abstract
def
reduce(accuMetadata: MetadataType, curr: Any): MetadataType
Given an accumulated metadata and new value - process the new value and returns an updated accumulated metadata
Given an accumulated metadata and new value - process the new value and returns an updated accumulated metadata
- accuMetadata
accumulated metadata created by processing all values until curr
- curr
new value to be processed
- returns
updated metadata for the index
Concrete Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @native() @throws( ... )
- var colsMap: Map[String, IndexField]
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
generateBaseMetadata(): MetadataType
- returns
"zero" value of the index - will be used for the first comparison to the object's rows data (by default this is null)
-
def
generateColsMap(schemaMap: Map[String, (String, StructField)]): Unit
Generate the column map according to a given schema
Generate the column map according to a given schema
- schemaMap
a map containing column names (as appear in the object) and their data types the key is the column name in lower case
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getCols: Seq[String]
- returns
the index columns (in lower case)
-
def
getIndexCols: Iterable[IndexField]
- returns
the columns which the indexed is defined on
- def getKeyMetadata(): Option[String]
-
def
getParams: Map[String, String]
- returns
the index params map
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- val indexCols: Seq[String]
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def isEncrypted(): Boolean
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- var isOptimized: Boolean
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
optCollectMetaData(filePath: String, df: DataFrame, format: String, options: Map[String, String]): MetadataType
For some formats we might have an optimized way for collecting the metadata This function enables this by receiving the entire file DataFrame instead of processing it row by row (For example in Parquet we can read the min/max from the footer)
For some formats we might have an optimized way for collecting the metadata This function enables this by receiving the entire file DataFrame instead of processing it row by row (For example in Parquet we can read the min/max from the footer)
- filePath
the path of the file that is being processed
- df
a DataFrame with the file data
- format
the format of the file
- options
the options that were used to read the file
- returns
the collected MetadataType or null if no metadata was collected
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @throws( ... )