case class ReaderParameters(recordFormat: RecordFormat = FixedLength, isEbcdic: Boolean = true, isText: Boolean = false, ebcdicCodePage: String = "common", ebcdicCodePageClass: Option[String] = None, asciiCharset: String = "", fieldCodePage: Map[String, String] = Map.empty[String, String], isUtf16BigEndian: Boolean = true, floatingPointFormat: FloatingPointFormat = FloatingPointFormat.IBM, variableSizeOccurs: Boolean = false, recordLength: Option[Int] = None, minimumRecordLength: Int = 1, maximumRecordLength: Int = Int.MaxValue, lengthFieldExpression: Option[String] = None, isRecordSequence: Boolean = false, bdw: Option[Bdw] = None, isRdwBigEndian: Boolean = false, isRdwPartRecLength: Boolean = false, rdwAdjustment: Int = 0, isIndexGenerationNeeded: Boolean = false, inputSplitRecords: Option[Int] = None, inputSplitSizeMB: Option[Int] = None, hdfsDefaultBlockSize: Option[Int] = None, startOffset: Int = 0, endOffset: Int = 0, fileStartOffset: Int = 0, fileEndOffset: Int = 0, generateRecordId: Boolean = false, generateRecordBytes: Boolean = false, schemaPolicy: SchemaRetentionPolicy = SchemaRetentionPolicy.KeepOriginal, stringTrimmingPolicy: StringTrimmingPolicy = StringTrimmingPolicy.TrimBoth, allowPartialRecords: Boolean = false, multisegment: Option[MultisegmentParameters] = None, commentPolicy: CommentPolicy = CommentPolicy(), strictSignOverpunch: Boolean = true, improvedNullDetection: Boolean = false, decodeBinaryAsHex: Boolean = false, dropGroupFillers: Boolean = false, dropValueFillers: Boolean = true, fillerNamingPolicy: FillerNamingPolicy = FillerNamingPolicy.SequenceNumbers, nonTerminals: Seq[String] = Nil, occursMappings: Map[String, Map[String, Int]] = Map(), debugFieldsPolicy: DebugFieldsPolicy = DebugFieldsPolicy.NoDebug, recordHeaderParser: Option[String] = None, recordExtractor: Option[String] = None, rhpAdditionalInfo: Option[String] = None, reAdditionalInfo: String = "", inputFileNameColumn: String = "", metadataPolicy: MetadataPolicy = MetadataPolicy.Basic) extends Product with Serializable
These are properties for customizing mainframe binary data reader.
- recordFormat
Record format
- isEbcdic
If true the input data file encoding is EBCDIC, otherwise it is ASCII
- isText
If true line ending characters will be used (LF / CRLF) as the record separator
- ebcdicCodePage
Specifies what code page to use for EBCDIC to ASCII/Unicode conversions
- ebcdicCodePageClass
An optional custom code page conversion class provided by a user
- asciiCharset
A charset for ASCII data
- fieldCodePage
Specifies a mapping between a field name and the code page
- isUtf16BigEndian
If true UTF-16 strings are considered big-endian.
- floatingPointFormat
A format of floating-point numbers
- variableSizeOccurs
If true, OCCURS DEPENDING ON data size will depend on the number of elements
- recordLength
Specifies the length of the record disregarding the copybook record size. Implied the file has fixed record length.
- minimumRecordLength
Minium record length for which the record is considered valid.
- maximumRecordLength
Maximum record length for which the record is considered valid.
- lengthFieldExpression
A name of a field that contains record length. Optional. If not set the copybook record length will be used.
- isRecordSequence
Does input files have 4 byte record length headers
- bdw
Block descriptor word (if specified), for FB and VB record formats
- isRdwPartRecLength
Does RDW count itself as part of record length itself
- rdwAdjustment
Controls a mismatch between RDW and record length
- isIndexGenerationNeeded
Is indexing input file before processing is requested
- inputSplitRecords
The number of records to include in each partition. Notice mainframe records may have variable size, inputSplitMB is the recommended option
- inputSplitSizeMB
A partition size to target. In certain circumstances this size may not be exactly that, but the library will do the best effort to target that size
- hdfsDefaultBlockSize
Default HDFS block size for the HDFS filesystem used. This value is used as the default split size if inputSplitSizeMB is not specified
- startOffset
An offset to the start of the record in each binary data block.
- endOffset
An offset from the end of the record to the end of the binary data block.
- fileStartOffset
A number of bytes to skip at the beginning of each file
- fileEndOffset
A number of bytes to skip at the end of each file
- generateRecordId
If true, a record id field will be prepended to each record.
- generateRecordBytes
Generate 'record_bytes' field containing raw bytes of the original record
- schemaPolicy
Specifies a policy to transform the input schema. The default policy is to keep the schema exactly as it is in the copybook.
- stringTrimmingPolicy
Specifies if and how strings should be trimmed when parsed.
- allowPartialRecords
If true, partial ASCII records can be parsed (in cases when LF character is missing for example)
- multisegment
Parameters specific to reading multisegment files
- commentPolicy
A comment truncation policy
- improvedNullDetection
If true, string values that contain only zero bytes (0x0) will be considered null.
- decodeBinaryAsHex
Decode binary fields as HEX strings
- dropGroupFillers
If true the parser will drop all FILLER fields, even GROUP FILLERS that have non-FILLER nested fields
- dropValueFillers
If true the parser will drop all value FILLER fields
- fillerNamingPolicy
Specifies the strategy of renaming FILLER names to make them unique
- nonTerminals
A list of non-terminals (GROUPS) to combine and parse as primitive fields
- debugFieldsPolicy
Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
- recordHeaderParser
A parser used to parse data field record headers
- rhpAdditionalInfo
An optional additional option string passed to a custom record header parser
- inputFileNameColumn
A column name to add to the dataframe. The column will contain input file name for each record similar to 'input_file_name()' function
- metadataPolicy
Specifies the policy of metadat fields to be added to the Spark schema
- Alphabetic
- By Inheritance
- ReaderParameters
- Serializable
- Serializable
- Product
- Equals
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
-
new
ReaderParameters(recordFormat: RecordFormat = FixedLength, isEbcdic: Boolean = true, isText: Boolean = false, ebcdicCodePage: String = "common", ebcdicCodePageClass: Option[String] = None, asciiCharset: String = "", fieldCodePage: Map[String, String] = Map.empty[String, String], isUtf16BigEndian: Boolean = true, floatingPointFormat: FloatingPointFormat = FloatingPointFormat.IBM, variableSizeOccurs: Boolean = false, recordLength: Option[Int] = None, minimumRecordLength: Int = 1, maximumRecordLength: Int = Int.MaxValue, lengthFieldExpression: Option[String] = None, isRecordSequence: Boolean = false, bdw: Option[Bdw] = None, isRdwBigEndian: Boolean = false, isRdwPartRecLength: Boolean = false, rdwAdjustment: Int = 0, isIndexGenerationNeeded: Boolean = false, inputSplitRecords: Option[Int] = None, inputSplitSizeMB: Option[Int] = None, hdfsDefaultBlockSize: Option[Int] = None, startOffset: Int = 0, endOffset: Int = 0, fileStartOffset: Int = 0, fileEndOffset: Int = 0, generateRecordId: Boolean = false, generateRecordBytes: Boolean = false, schemaPolicy: SchemaRetentionPolicy = SchemaRetentionPolicy.KeepOriginal, stringTrimmingPolicy: StringTrimmingPolicy = StringTrimmingPolicy.TrimBoth, allowPartialRecords: Boolean = false, multisegment: Option[MultisegmentParameters] = None, commentPolicy: CommentPolicy = CommentPolicy(), strictSignOverpunch: Boolean = true, improvedNullDetection: Boolean = false, decodeBinaryAsHex: Boolean = false, dropGroupFillers: Boolean = false, dropValueFillers: Boolean = true, fillerNamingPolicy: FillerNamingPolicy = FillerNamingPolicy.SequenceNumbers, nonTerminals: Seq[String] = Nil, occursMappings: Map[String, Map[String, Int]] = Map(), debugFieldsPolicy: DebugFieldsPolicy = DebugFieldsPolicy.NoDebug, recordHeaderParser: Option[String] = None, recordExtractor: Option[String] = None, rhpAdditionalInfo: Option[String] = None, reAdditionalInfo: String = "", inputFileNameColumn: String = "", metadataPolicy: MetadataPolicy = MetadataPolicy.Basic)
- recordFormat
Record format
- isEbcdic
If true the input data file encoding is EBCDIC, otherwise it is ASCII
- isText
If true line ending characters will be used (LF / CRLF) as the record separator
- ebcdicCodePage
Specifies what code page to use for EBCDIC to ASCII/Unicode conversions
- ebcdicCodePageClass
An optional custom code page conversion class provided by a user
- asciiCharset
A charset for ASCII data
- fieldCodePage
Specifies a mapping between a field name and the code page
- isUtf16BigEndian
If true UTF-16 strings are considered big-endian.
- floatingPointFormat
A format of floating-point numbers
- variableSizeOccurs
If true, OCCURS DEPENDING ON data size will depend on the number of elements
- recordLength
Specifies the length of the record disregarding the copybook record size. Implied the file has fixed record length.
- minimumRecordLength
Minium record length for which the record is considered valid.
- maximumRecordLength
Maximum record length for which the record is considered valid.
- lengthFieldExpression
A name of a field that contains record length. Optional. If not set the copybook record length will be used.
- isRecordSequence
Does input files have 4 byte record length headers
- bdw
Block descriptor word (if specified), for FB and VB record formats
- isRdwPartRecLength
Does RDW count itself as part of record length itself
- rdwAdjustment
Controls a mismatch between RDW and record length
- isIndexGenerationNeeded
Is indexing input file before processing is requested
- inputSplitRecords
The number of records to include in each partition. Notice mainframe records may have variable size, inputSplitMB is the recommended option
- inputSplitSizeMB
A partition size to target. In certain circumstances this size may not be exactly that, but the library will do the best effort to target that size
- hdfsDefaultBlockSize
Default HDFS block size for the HDFS filesystem used. This value is used as the default split size if inputSplitSizeMB is not specified
- startOffset
An offset to the start of the record in each binary data block.
- endOffset
An offset from the end of the record to the end of the binary data block.
- fileStartOffset
A number of bytes to skip at the beginning of each file
- fileEndOffset
A number of bytes to skip at the end of each file
- generateRecordId
If true, a record id field will be prepended to each record.
- generateRecordBytes
Generate 'record_bytes' field containing raw bytes of the original record
- schemaPolicy
Specifies a policy to transform the input schema. The default policy is to keep the schema exactly as it is in the copybook.
- stringTrimmingPolicy
Specifies if and how strings should be trimmed when parsed.
- allowPartialRecords
If true, partial ASCII records can be parsed (in cases when LF character is missing for example)
- multisegment
Parameters specific to reading multisegment files
- commentPolicy
A comment truncation policy
- improvedNullDetection
If true, string values that contain only zero bytes (0x0) will be considered null.
- decodeBinaryAsHex
Decode binary fields as HEX strings
- dropGroupFillers
If true the parser will drop all FILLER fields, even GROUP FILLERS that have non-FILLER nested fields
- dropValueFillers
If true the parser will drop all value FILLER fields
- fillerNamingPolicy
Specifies the strategy of renaming FILLER names to make them unique
- nonTerminals
A list of non-terminals (GROUPS) to combine and parse as primitive fields
- debugFieldsPolicy
Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
- recordHeaderParser
A parser used to parse data field record headers
- rhpAdditionalInfo
An optional additional option string passed to a custom record header parser
- inputFileNameColumn
A column name to add to the dataframe. The column will contain input file name for each record similar to 'input_file_name()' function
- metadataPolicy
Specifies the policy of metadat fields to be added to the Spark schema
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- val allowPartialRecords: Boolean
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
- val asciiCharset: String
- val bdw: Option[Bdw]
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
- val commentPolicy: CommentPolicy
- val debugFieldsPolicy: DebugFieldsPolicy
- val decodeBinaryAsHex: Boolean
- val dropGroupFillers: Boolean
- val dropValueFillers: Boolean
- val ebcdicCodePage: String
- val ebcdicCodePageClass: Option[String]
- val endOffset: Int
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- val fieldCodePage: Map[String, String]
- val fileEndOffset: Int
- val fileStartOffset: Int
- val fillerNamingPolicy: FillerNamingPolicy
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
- val floatingPointFormat: FloatingPointFormat
- val generateRecordBytes: Boolean
- val generateRecordId: Boolean
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- val hdfsDefaultBlockSize: Option[Int]
- val improvedNullDetection: Boolean
- val inputFileNameColumn: String
- val inputSplitRecords: Option[Int]
- val inputSplitSizeMB: Option[Int]
- val isEbcdic: Boolean
- val isIndexGenerationNeeded: Boolean
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- val isRdwBigEndian: Boolean
- val isRdwPartRecLength: Boolean
- val isRecordSequence: Boolean
- val isText: Boolean
- val isUtf16BigEndian: Boolean
- val lengthFieldExpression: Option[String]
- val maximumRecordLength: Int
- val metadataPolicy: MetadataPolicy
- val minimumRecordLength: Int
- val multisegment: Option[MultisegmentParameters]
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- val nonTerminals: Seq[String]
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- val occursMappings: Map[String, Map[String, Int]]
- val rdwAdjustment: Int
- val reAdditionalInfo: String
- val recordExtractor: Option[String]
- val recordFormat: RecordFormat
- val recordHeaderParser: Option[String]
- val recordLength: Option[Int]
- val rhpAdditionalInfo: Option[String]
- val schemaPolicy: SchemaRetentionPolicy
- val startOffset: Int
- val strictSignOverpunch: Boolean
- val stringTrimmingPolicy: StringTrimmingPolicy
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
- val variableSizeOccurs: Boolean
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()