Packages

case class ReaderParameters(recordFormat: RecordFormat = FixedLength, isEbcdic: Boolean = true, isText: Boolean = false, ebcdicCodePage: String = "common", ebcdicCodePageClass: Option[String] = None, asciiCharset: String = "", fieldCodePage: Map[String, String] = Map.empty[String, String], isUtf16BigEndian: Boolean = true, floatingPointFormat: FloatingPointFormat = FloatingPointFormat.IBM, variableSizeOccurs: Boolean = false, recordLength: Option[Int] = None, minimumRecordLength: Int = 1, maximumRecordLength: Int = Int.MaxValue, lengthFieldExpression: Option[String] = None, isRecordSequence: Boolean = false, bdw: Option[Bdw] = None, isRdwBigEndian: Boolean = false, isRdwPartRecLength: Boolean = false, rdwAdjustment: Int = 0, isIndexGenerationNeeded: Boolean = false, inputSplitRecords: Option[Int] = None, inputSplitSizeMB: Option[Int] = None, hdfsDefaultBlockSize: Option[Int] = None, startOffset: Int = 0, endOffset: Int = 0, fileStartOffset: Int = 0, fileEndOffset: Int = 0, generateRecordId: Boolean = false, generateRecordBytes: Boolean = false, schemaPolicy: SchemaRetentionPolicy = SchemaRetentionPolicy.KeepOriginal, stringTrimmingPolicy: StringTrimmingPolicy = StringTrimmingPolicy.TrimBoth, allowPartialRecords: Boolean = false, multisegment: Option[MultisegmentParameters] = None, commentPolicy: CommentPolicy = CommentPolicy(), strictSignOverpunch: Boolean = true, improvedNullDetection: Boolean = false, decodeBinaryAsHex: Boolean = false, dropGroupFillers: Boolean = false, dropValueFillers: Boolean = true, fillerNamingPolicy: FillerNamingPolicy = FillerNamingPolicy.SequenceNumbers, nonTerminals: Seq[String] = Nil, occursMappings: Map[String, Map[String, Int]] = Map(), debugFieldsPolicy: DebugFieldsPolicy = DebugFieldsPolicy.NoDebug, recordHeaderParser: Option[String] = None, recordExtractor: Option[String] = None, rhpAdditionalInfo: Option[String] = None, reAdditionalInfo: String = "", inputFileNameColumn: String = "", metadataPolicy: MetadataPolicy = MetadataPolicy.Basic) extends Product with Serializable

These are properties for customizing mainframe binary data reader.

recordFormat

Record format

isEbcdic

If true the input data file encoding is EBCDIC, otherwise it is ASCII

isText

If true line ending characters will be used (LF / CRLF) as the record separator

ebcdicCodePage

Specifies what code page to use for EBCDIC to ASCII/Unicode conversions

ebcdicCodePageClass

An optional custom code page conversion class provided by a user

asciiCharset

A charset for ASCII data

fieldCodePage

Specifies a mapping between a field name and the code page

isUtf16BigEndian

If true UTF-16 strings are considered big-endian.

floatingPointFormat

A format of floating-point numbers

variableSizeOccurs

If true, OCCURS DEPENDING ON data size will depend on the number of elements

recordLength

Specifies the length of the record disregarding the copybook record size. Implied the file has fixed record length.

minimumRecordLength

Minium record length for which the record is considered valid.

maximumRecordLength

Maximum record length for which the record is considered valid.

lengthFieldExpression

A name of a field that contains record length. Optional. If not set the copybook record length will be used.

isRecordSequence

Does input files have 4 byte record length headers

bdw

Block descriptor word (if specified), for FB and VB record formats

isRdwPartRecLength

Does RDW count itself as part of record length itself

rdwAdjustment

Controls a mismatch between RDW and record length

isIndexGenerationNeeded

Is indexing input file before processing is requested

inputSplitRecords

The number of records to include in each partition. Notice mainframe records may have variable size, inputSplitMB is the recommended option

inputSplitSizeMB

A partition size to target. In certain circumstances this size may not be exactly that, but the library will do the best effort to target that size

hdfsDefaultBlockSize

Default HDFS block size for the HDFS filesystem used. This value is used as the default split size if inputSplitSizeMB is not specified

startOffset

An offset to the start of the record in each binary data block.

endOffset

An offset from the end of the record to the end of the binary data block.

fileStartOffset

A number of bytes to skip at the beginning of each file

fileEndOffset

A number of bytes to skip at the end of each file

generateRecordId

If true, a record id field will be prepended to each record.

generateRecordBytes

Generate 'record_bytes' field containing raw bytes of the original record

schemaPolicy

Specifies a policy to transform the input schema. The default policy is to keep the schema exactly as it is in the copybook.

stringTrimmingPolicy

Specifies if and how strings should be trimmed when parsed.

allowPartialRecords

If true, partial ASCII records can be parsed (in cases when LF character is missing for example)

multisegment

Parameters specific to reading multisegment files

commentPolicy

A comment truncation policy

improvedNullDetection

If true, string values that contain only zero bytes (0x0) will be considered null.

decodeBinaryAsHex

Decode binary fields as HEX strings

dropGroupFillers

If true the parser will drop all FILLER fields, even GROUP FILLERS that have non-FILLER nested fields

dropValueFillers

If true the parser will drop all value FILLER fields

fillerNamingPolicy

Specifies the strategy of renaming FILLER names to make them unique

nonTerminals

A list of non-terminals (GROUPS) to combine and parse as primitive fields

debugFieldsPolicy

Specifies if debugging fields need to be added and what should they contain (false, hex, raw).

recordHeaderParser

A parser used to parse data field record headers

rhpAdditionalInfo

An optional additional option string passed to a custom record header parser

inputFileNameColumn

A column name to add to the dataframe. The column will contain input file name for each record similar to 'input_file_name()' function

metadataPolicy

Specifies the policy of metadat fields to be added to the Spark schema

Linear Supertypes
Serializable, Serializable, Product, Equals, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. ReaderParameters
  2. Serializable
  3. Serializable
  4. Product
  5. Equals
  6. AnyRef
  7. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new ReaderParameters(recordFormat: RecordFormat = FixedLength, isEbcdic: Boolean = true, isText: Boolean = false, ebcdicCodePage: String = "common", ebcdicCodePageClass: Option[String] = None, asciiCharset: String = "", fieldCodePage: Map[String, String] = Map.empty[String, String], isUtf16BigEndian: Boolean = true, floatingPointFormat: FloatingPointFormat = FloatingPointFormat.IBM, variableSizeOccurs: Boolean = false, recordLength: Option[Int] = None, minimumRecordLength: Int = 1, maximumRecordLength: Int = Int.MaxValue, lengthFieldExpression: Option[String] = None, isRecordSequence: Boolean = false, bdw: Option[Bdw] = None, isRdwBigEndian: Boolean = false, isRdwPartRecLength: Boolean = false, rdwAdjustment: Int = 0, isIndexGenerationNeeded: Boolean = false, inputSplitRecords: Option[Int] = None, inputSplitSizeMB: Option[Int] = None, hdfsDefaultBlockSize: Option[Int] = None, startOffset: Int = 0, endOffset: Int = 0, fileStartOffset: Int = 0, fileEndOffset: Int = 0, generateRecordId: Boolean = false, generateRecordBytes: Boolean = false, schemaPolicy: SchemaRetentionPolicy = SchemaRetentionPolicy.KeepOriginal, stringTrimmingPolicy: StringTrimmingPolicy = StringTrimmingPolicy.TrimBoth, allowPartialRecords: Boolean = false, multisegment: Option[MultisegmentParameters] = None, commentPolicy: CommentPolicy = CommentPolicy(), strictSignOverpunch: Boolean = true, improvedNullDetection: Boolean = false, decodeBinaryAsHex: Boolean = false, dropGroupFillers: Boolean = false, dropValueFillers: Boolean = true, fillerNamingPolicy: FillerNamingPolicy = FillerNamingPolicy.SequenceNumbers, nonTerminals: Seq[String] = Nil, occursMappings: Map[String, Map[String, Int]] = Map(), debugFieldsPolicy: DebugFieldsPolicy = DebugFieldsPolicy.NoDebug, recordHeaderParser: Option[String] = None, recordExtractor: Option[String] = None, rhpAdditionalInfo: Option[String] = None, reAdditionalInfo: String = "", inputFileNameColumn: String = "", metadataPolicy: MetadataPolicy = MetadataPolicy.Basic)

    recordFormat

    Record format

    isEbcdic

    If true the input data file encoding is EBCDIC, otherwise it is ASCII

    isText

    If true line ending characters will be used (LF / CRLF) as the record separator

    ebcdicCodePage

    Specifies what code page to use for EBCDIC to ASCII/Unicode conversions

    ebcdicCodePageClass

    An optional custom code page conversion class provided by a user

    asciiCharset

    A charset for ASCII data

    fieldCodePage

    Specifies a mapping between a field name and the code page

    isUtf16BigEndian

    If true UTF-16 strings are considered big-endian.

    floatingPointFormat

    A format of floating-point numbers

    variableSizeOccurs

    If true, OCCURS DEPENDING ON data size will depend on the number of elements

    recordLength

    Specifies the length of the record disregarding the copybook record size. Implied the file has fixed record length.

    minimumRecordLength

    Minium record length for which the record is considered valid.

    maximumRecordLength

    Maximum record length for which the record is considered valid.

    lengthFieldExpression

    A name of a field that contains record length. Optional. If not set the copybook record length will be used.

    isRecordSequence

    Does input files have 4 byte record length headers

    bdw

    Block descriptor word (if specified), for FB and VB record formats

    isRdwPartRecLength

    Does RDW count itself as part of record length itself

    rdwAdjustment

    Controls a mismatch between RDW and record length

    isIndexGenerationNeeded

    Is indexing input file before processing is requested

    inputSplitRecords

    The number of records to include in each partition. Notice mainframe records may have variable size, inputSplitMB is the recommended option

    inputSplitSizeMB

    A partition size to target. In certain circumstances this size may not be exactly that, but the library will do the best effort to target that size

    hdfsDefaultBlockSize

    Default HDFS block size for the HDFS filesystem used. This value is used as the default split size if inputSplitSizeMB is not specified

    startOffset

    An offset to the start of the record in each binary data block.

    endOffset

    An offset from the end of the record to the end of the binary data block.

    fileStartOffset

    A number of bytes to skip at the beginning of each file

    fileEndOffset

    A number of bytes to skip at the end of each file

    generateRecordId

    If true, a record id field will be prepended to each record.

    generateRecordBytes

    Generate 'record_bytes' field containing raw bytes of the original record

    schemaPolicy

    Specifies a policy to transform the input schema. The default policy is to keep the schema exactly as it is in the copybook.

    stringTrimmingPolicy

    Specifies if and how strings should be trimmed when parsed.

    allowPartialRecords

    If true, partial ASCII records can be parsed (in cases when LF character is missing for example)

    multisegment

    Parameters specific to reading multisegment files

    commentPolicy

    A comment truncation policy

    improvedNullDetection

    If true, string values that contain only zero bytes (0x0) will be considered null.

    decodeBinaryAsHex

    Decode binary fields as HEX strings

    dropGroupFillers

    If true the parser will drop all FILLER fields, even GROUP FILLERS that have non-FILLER nested fields

    dropValueFillers

    If true the parser will drop all value FILLER fields

    fillerNamingPolicy

    Specifies the strategy of renaming FILLER names to make them unique

    nonTerminals

    A list of non-terminals (GROUPS) to combine and parse as primitive fields

    debugFieldsPolicy

    Specifies if debugging fields need to be added and what should they contain (false, hex, raw).

    recordHeaderParser

    A parser used to parse data field record headers

    rhpAdditionalInfo

    An optional additional option string passed to a custom record header parser

    inputFileNameColumn

    A column name to add to the dataframe. The column will contain input file name for each record similar to 'input_file_name()' function

    metadataPolicy

    Specifies the policy of metadat fields to be added to the Spark schema

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. val allowPartialRecords: Boolean
  5. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  6. val asciiCharset: String
  7. val bdw: Option[Bdw]
  8. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  9. val commentPolicy: CommentPolicy
  10. val debugFieldsPolicy: DebugFieldsPolicy
  11. val decodeBinaryAsHex: Boolean
  12. val dropGroupFillers: Boolean
  13. val dropValueFillers: Boolean
  14. val ebcdicCodePage: String
  15. val ebcdicCodePageClass: Option[String]
  16. val endOffset: Int
  17. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  18. val fieldCodePage: Map[String, String]
  19. val fileEndOffset: Int
  20. val fileStartOffset: Int
  21. val fillerNamingPolicy: FillerNamingPolicy
  22. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  23. val floatingPointFormat: FloatingPointFormat
  24. val generateRecordBytes: Boolean
  25. val generateRecordId: Boolean
  26. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  27. val hdfsDefaultBlockSize: Option[Int]
  28. val improvedNullDetection: Boolean
  29. val inputFileNameColumn: String
  30. val inputSplitRecords: Option[Int]
  31. val inputSplitSizeMB: Option[Int]
  32. val isEbcdic: Boolean
  33. val isIndexGenerationNeeded: Boolean
  34. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  35. val isRdwBigEndian: Boolean
  36. val isRdwPartRecLength: Boolean
  37. val isRecordSequence: Boolean
  38. val isText: Boolean
  39. val isUtf16BigEndian: Boolean
  40. val lengthFieldExpression: Option[String]
  41. val maximumRecordLength: Int
  42. val metadataPolicy: MetadataPolicy
  43. val minimumRecordLength: Int
  44. val multisegment: Option[MultisegmentParameters]
  45. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  46. val nonTerminals: Seq[String]
  47. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  48. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  49. val occursMappings: Map[String, Map[String, Int]]
  50. val rdwAdjustment: Int
  51. val reAdditionalInfo: String
  52. val recordExtractor: Option[String]
  53. val recordFormat: RecordFormat
  54. val recordHeaderParser: Option[String]
  55. val recordLength: Option[Int]
  56. val rhpAdditionalInfo: Option[String]
  57. val schemaPolicy: SchemaRetentionPolicy
  58. val startOffset: Int
  59. val strictSignOverpunch: Boolean
  60. val stringTrimmingPolicy: StringTrimmingPolicy
  61. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  62. val variableSizeOccurs: Boolean
  63. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  64. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  65. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()

Inherited from Serializable

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from AnyRef

Inherited from Any

Ungrouped