class MetadataProcessor extends Logging with Serializable
A Helper class which collects the indexes and use a MetadataHandle to upload the metadata
- Alphabetic
- By Inheritance
- MetadataProcessor
- Serializable
- Serializable
- Logging
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
-
new
MetadataProcessor(spark: SparkSession, uri: String, metadataHandle: MetadataHandle)
- spark
org.apache.spark.sql.SparkSession instance for processing
- uri
the URI of the dataset
- metadataHandle
a MetadataHandle instance to be used for saving the metadata
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
analyzeAndUploadMetadata(format: String, options: Map[String, String], indexes: Seq[Index], fileIds: Seq[(String, String)], schema: Option[StructType], isRefresh: Boolean = false): Unit
Collects the indexes and them to the metadatastore using the MetadataHandle
Collects the indexes and them to the metadatastore using the MetadataHandle
- format
the format to be used when reading each object
- options
the options to be used when reading each object Note: all objects are assumed to have the same options and format.
- indexes
a sequence of indexes that will be applied on the indexed dataset
- fileIds
a sequence of (String, String) where the first string is the file name and the second is the fileID
- schema
(optional) the expected schema (since we are reading object by object the schema can be provided according to the full dataframe)
- isRefresh
indicates whether the operation is a refresh
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @native() @throws( ... )
-
def
collectNewFiles(files: Seq[FileStatus], isRefresh: Boolean): (Seq[(String, String)], Seq[String])
Collects the list of files that needs to be indexed A file needs to be indexed if: 1.
Collects the list of files that needs to be indexed A file needs to be indexed if: 1. It is a new file that was not indexed before 2. It is an indexed file which changed since it was indexed
- files
The list of files the will be compared against the existing indexed files
- isRefresh
indicates whether this is a refresh operation or not, in case this is not a refresh operation assuming no indexed files exits
- returns
Sequence of (String, String) where the first string is the file name and the second is the fileID for all of new/modified files, Sequence of files to be removed from the metadatastore (since they were updated)
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- def prepareForRefresh(indexes: Seq[Index]): Unit
-
def
removeMetadataForFiles(files: Seq[String]): Int
Removes the metadata for a given list of files
Removes the metadata for a given list of files
- files
the list of files to remove metadata for
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
- val tableIdentifier: String
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @throws( ... )