java.lang.Object
org.jcodings.Encoding
- All Implemented Interfaces:
Cloneable
- Direct Known Subclasses:
AbstractEncoding
-
Field Summary
FieldsModifier and TypeFieldDescriptionstatic final int
private Charset
private static int
private int
private int
private boolean
private boolean
private final boolean
private final boolean
protected boolean
protected boolean
protected final int
protected final int
private byte[]
static final byte
private String
-
Constructor Summary
Constructors -
Method Summary
Modifier and TypeMethodDescriptionabstract void
applyAllCaseFold
(int flag, ApplyAllCaseFoldFunction fun, Object arg) Expand case folds given a character class (used for case insensitive matching)static byte
asciiToLower
(int c) static byte
asciiToUpper
(int c) abstract CaseFoldCodeItem[]
caseFoldCodesByString
(int flag, byte[] bytes, int p, int end) Expand AST string nodes into their folded alternatives (look at:Analyser.expandCaseFoldString
) Oniguruma equivalent:get_case_fold_codes_by_str
abstract int
Oniguruma equivalent:case_map
abstract int
codeToMbc
(int code, byte[] bytes, int p) Extracts code point into it's multibyte representationabstract int
codeToMbcLength
(int code) Returns character length given a code point Oniguruma equivalent:code_to_mbclen
abstract int[]
ctypeCodeRange
(int ctype, IntHolder sbOut) Returns code range for a given character type Oniguruma equivalent:get_ctype_code_range
static int
digitVal
(int code) final boolean
If this encoding is capable of being represented by a Java Charset then provide it.The name of the equivalent Java Charset for this encoding.final int
getIndex()
final byte[]
getName()
final int
hashCode()
final boolean
isAlnum
(int code) final boolean
isAlpha
(int code) static boolean
isAscii
(byte b) static boolean
isAscii
(int code) final boolean
final boolean
isBlank
(int code) final boolean
isCntrl
(int code) abstract boolean
isCodeCType
(int code, int ctype) Perform a check whether given code is of given character type (e.g.final boolean
isDigit
(int code) final boolean
isDummy()
final boolean
final boolean
isGraph
(int code) final boolean
isLower
(int code) static boolean
isMbcAscii
(byte b) boolean
isMbcCrnl
(byte[] bytes, int p, int end) final boolean
isMbcHead
(byte[] bytes, int p, int end) final boolean
isMbcWord
(byte[] bytes, int p, int end) abstract boolean
isNewLine
(byte[] bytes, int p, int end) Returns true ifbytes[p]
is a head of a new line character Oniguruma equivalent:is_mbc_newline
final boolean
isNewLine
(int code) final boolean
isPrint
(int code) final boolean
isPunct
(int code) abstract boolean
isReverseMatchAllowed
(byte[] bytes, int p, int end) Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm Oniguruma equivalent:is_allowed_reverse_match
final boolean
isSbWord
(int code) final boolean
final boolean
isSpace
(int code) final boolean
final boolean
isUpper
(int code) final boolean
isUTF8()
final boolean
isWord
(int code) static boolean
isWordGraphPrint
(int ctype) final boolean
isXDigit
(int code) abstract int
leftAdjustCharHead
(byte[] bytes, int p, int s, int end) Seeks the previous character head in a stream Oniguruma equivalent:left_adjust_char_head
abstract int
length
(byte c) Returns character length given character head returns1
for singlebyte encodings or performs direct length table lookup for multibyte ones.abstract int
length
(byte[] bytes, int p, int end) Returns character length given stream, character position and stream end returns1
for singlebyte encodings or performs sanity validations for multibyte ones and returns the character length, missing characters in the stream otherwisestatic Encoding
static Encoding
final int
Returns maximum character byte length that can appear in an encoding Oniguruma equivalent:max_enc_len
final int
Deprecated.abstract int
mbcCaseFold
(int flag, byte[] bytes, IntHolder pp, int end, byte[] to) Performs case folding for a character atbytes[pp.value]
final int
Deprecated.abstract int
mbcToCode
(byte[] bytes, int p, int end) Returns code point for a character Oniguruma equivalent:mbc_to_code
final int
Returns minimum character byte length that can appear in an encoding Oniguruma equivalent:min_enc_len
static int
odigitVal
(int code) final int
prevCharHead
(byte[] bytes, int p, int s, int end) abstract int
propertyNameToCType
(byte[] bytes, int p, int end) Returns character type given character type name (used when e.g.(package private) Encoding
replicate
(byte[] name) final int
rightAdjustCharHead
(byte[] bytes, int p, int s, int end) final int
rightAdjustCharHeadWithPrev
(byte[] bytes, int p, int s, int end, IntHolder prev) protected final void
setDummy()
protected final void
setName
(byte[] name) protected final void
final int
step
(byte[] bytes, int p, int end, int n) final int
stepBack
(byte[] bytes, int p, int s, int end, int n) final int
strByteLengthNull
(byte[] bytes, int p, int end) abstract int
strCodeAt
(byte[] bytes, int p, int end, int index) abstract int
strLength
(byte[] bytes, int p, int end) final int
strLengthNull
(byte[] bytes, int p, int end) final int
strNCmp
(byte[] bytes, int p, int end, byte[] ascii, int asciiP, int n) byte[]
Returns lower case table if it's safe to use it directly, otherwisenull
Used for fast case insensitive matching for some singlebyte encodingsfinal String
toString()
final int
xdigitVal
(int code)
-
Field Details
-
CHAR_INVALID
public static final int CHAR_INVALID- See Also:
-
count
private static int count -
minLength
protected final int minLength -
maxLength
protected final int maxLength -
isFixedWidth
private final boolean isFixedWidth -
isSingleByte
private final boolean isSingleByte -
isAsciiCompatible
private boolean isAsciiCompatible -
isUnicode
protected boolean isUnicode -
isUTF8
protected boolean isUTF8 -
name
private byte[] name -
hashCode
private int hashCode -
index
private int index -
charset
-
isDummy
private boolean isDummy -
stringName
-
NEW_LINE
public static final byte NEW_LINE- See Also:
-
-
Constructor Details
-
Encoding
-
-
Method Details
-
setName
-
setName
protected final void setName(byte[] name) -
setDummy
protected final void setDummy() -
toString
-
equals
-
hashCode
public final int hashCode() -
getIndex
public final int getIndex() -
getName
public final byte[] getName() -
isDummy
public final boolean isDummy() -
isAsciiCompatible
public final boolean isAsciiCompatible() -
isUnicode
public final boolean isUnicode() -
isUTF8
public final boolean isUTF8() -
getCharset
If this encoding is capable of being represented by a Java Charset then provide it. Otherwise this will raise a CharsetNotFound error via the JDK APIs. To reduce cases like jruby/jruby#4716, we always attempt to find a charset here, and default to using the encoding name which is never null. Either the encoding will exist in the JDK or it will fail hard, rather than propagating a null Charset. Encodings with names different than those found in the JDK can override this getCharsetName to provide that name or getCharset to return the right Charset. -
getCharsetName
The name of the equivalent Java Charset for this encoding. Defaults to the name of the encoding. Subclasses can override this to provide a different name.- Returns:
- the name of the equivalent Java Charset for this encoding
-
replicate
-
length
public abstract int length(byte c) Returns character length given character head returns1
for singlebyte encodings or performs direct length table lookup for multibyte ones.- Parameters:
c
- Character head Oniguruma equivalent:mbc_enc_len
To be deprecated very soon (use length(byte[]bytes, int p, int end) version)
-
length
public abstract int length(byte[] bytes, int p, int end) Returns character length given stream, character position and stream end returns1
for singlebyte encodings or performs sanity validations for multibyte ones and returns the character length, missing characters in the stream otherwise- Returns:
- 0 Never
> 0 Valid character, length returned
-1 Illegal/malformed character
< -1 (-1 - n) Number of missing bytes for character in p...end range
Oniguruma equivalent:
mbc_enc_len
modified for 1.9 purposes,
-
maxLength
public final int maxLength()Returns maximum character byte length that can appear in an encoding Oniguruma equivalent:max_enc_len
-
maxLengthDistance
Deprecated. -
minLength
public final int minLength()Returns minimum character byte length that can appear in an encoding Oniguruma equivalent:min_enc_len
-
isNewLine
public abstract boolean isNewLine(byte[] bytes, int p, int end) Returns true ifbytes[p]
is a head of a new line character Oniguruma equivalent:is_mbc_newline
-
mbcToCode
public abstract int mbcToCode(byte[] bytes, int p, int end) Returns code point for a character Oniguruma equivalent:mbc_to_code
-
codeToMbcLength
public abstract int codeToMbcLength(int code) Returns character length given a code point Oniguruma equivalent:code_to_mbclen
-
codeToMbc
public abstract int codeToMbc(int code, byte[] bytes, int p) Extracts code point into it's multibyte representation- Returns:
- character length for the given code point
Oniguruma equivalent:
code_to_mbc
-
mbcCaseFold
Performs case folding for a character atbytes[pp.value]
- Parameters:
flag
- case fold flagpp
- anIntHolder
that points at character headto
- a buffer where to extract case folded character Oniguruma equivalent:mbc_case_fold
-
toLowerCaseTable
public byte[] toLowerCaseTable()Returns lower case table if it's safe to use it directly, otherwisenull
Used for fast case insensitive matching for some singlebyte encodings- Returns:
- lower case table
-
applyAllCaseFold
Expand case folds given a character class (used for case insensitive matching)- Parameters:
flag
- case fold flagfun
- case folding functor (look at:ApplyCaseFold
)arg
- case folding functor argument (look at:ApplyCaseFoldArg
) Oniguruma equivalent:apply_all_case_fold
-
caseFoldCodesByString
Expand AST string nodes into their folded alternatives (look at:Analyser.expandCaseFoldString
) Oniguruma equivalent:get_case_fold_codes_by_str
-
propertyNameToCType
public abstract int propertyNameToCType(byte[] bytes, int p, int end) Returns character type given character type name (used when e.g. \p{Alpha}) Oniguruma equivalent:property_name_to_ctype
-
isCodeCType
public abstract boolean isCodeCType(int code, int ctype) Perform a check whether given code is of given character type (e.g. used by isWord(someByte) and similar methods)- Parameters:
code
- a code point of a characterctype
- a character type to check against Oniguruma equivalent:is_code_ctype
-
ctypeCodeRange
Returns code range for a given character type Oniguruma equivalent:get_ctype_code_range
-
leftAdjustCharHead
public abstract int leftAdjustCharHead(byte[] bytes, int p, int s, int end) Seeks the previous character head in a stream Oniguruma equivalent:left_adjust_char_head
- Parameters:
bytes
- byte streamp
- positions
- stopend
- end
-
isReverseMatchAllowed
public abstract boolean isReverseMatchAllowed(byte[] bytes, int p, int end) Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm Oniguruma equivalent:is_allowed_reverse_match
-
caseMap
public abstract int caseMap(IntHolder flagP, byte[] bytes, IntHolder pp, int end, byte[] to, int toP, int toEnd) Oniguruma equivalent:case_map
-
rightAdjustCharHead
public final int rightAdjustCharHead(byte[] bytes, int p, int s, int end) -
rightAdjustCharHeadWithPrev
-
prevCharHead
public final int prevCharHead(byte[] bytes, int p, int s, int end) -
stepBack
public final int stepBack(byte[] bytes, int p, int s, int end, int n) -
step
public final int step(byte[] bytes, int p, int end, int n) -
strLength
public abstract int strLength(byte[] bytes, int p, int end) -
strCodeAt
public abstract int strCodeAt(byte[] bytes, int p, int end, int index) -
strLengthNull
public final int strLengthNull(byte[] bytes, int p, int end) -
strByteLengthNull
public final int strByteLengthNull(byte[] bytes, int p, int end) -
strNCmp
public final int strNCmp(byte[] bytes, int p, int end, byte[] ascii, int asciiP, int n) -
isNewLine
public final boolean isNewLine(int code) -
isGraph
public final boolean isGraph(int code) -
isPrint
public final boolean isPrint(int code) -
isAlnum
public final boolean isAlnum(int code) -
isAlpha
public final boolean isAlpha(int code) -
isLower
public final boolean isLower(int code) -
isUpper
public final boolean isUpper(int code) -
isCntrl
public final boolean isCntrl(int code) -
isPunct
public final boolean isPunct(int code) -
isSpace
public final boolean isSpace(int code) -
isBlank
public final boolean isBlank(int code) -
isDigit
public final boolean isDigit(int code) -
isXDigit
public final boolean isXDigit(int code) -
isWord
public final boolean isWord(int code) -
isMbcWord
public final boolean isMbcWord(byte[] bytes, int p, int end) -
isSbWord
public final boolean isSbWord(int code) -
isMbcHead
public final boolean isMbcHead(byte[] bytes, int p, int end) -
isMbcCrnl
public boolean isMbcCrnl(byte[] bytes, int p, int end) -
digitVal
public static int digitVal(int code) -
odigitVal
public static int odigitVal(int code) -
xdigitVal
public final int xdigitVal(int code) -
isMbcAscii
public static boolean isMbcAscii(byte b) -
isAscii
public static boolean isAscii(int code) -
isAscii
public static boolean isAscii(byte b) -
asciiToLower
public static byte asciiToLower(int c) -
asciiToUpper
public static byte asciiToUpper(int c) -
isWordGraphPrint
public static boolean isWordGraphPrint(int ctype) -
mbcodeStartPosition
Deprecated. -
isSingleByte
public final boolean isSingleByte() -
isFixedWidth
public final boolean isFixedWidth() -
load
-
load
-