guess1(p1)
public
Returns guessed encoding of str as integer.
Algorithm described in: Ken Lunde. `Understanding Japanese Information
Processing’ Sebastopol, CA: O’Reilly & Associates.
case NKF.guess1(input)
when NKF::JIS
"ISO-2022-JP"
when NKF::SJIS
"Shift_JIS"
when NKF::EUC
"EUC-JP"
when NKF::UNKNOWN
"UNKNOWN(ASCII)"
when NKF::BINARY
"BINARY"
end
Show source
/*
* call-seq:
* NKF.guess1(str) -> integer
*
* Returns guessed encoding of _str_ as integer.
*
* Algorithm described in:
* Ken Lunde. `Understanding Japanese Information Processing'
* Sebastopol, CA: O'Reilly & Associates.
*
* case NKF.guess1(input)
* when NKF::JIS
* "ISO-2022-JP"
* when NKF::SJIS
* "Shift_JIS"
* when NKF::EUC
* "EUC-JP"
* when NKF::UNKNOWN
* "UNKNOWN(ASCII)"
* when NKF::BINARY
* "BINARY"
* end
*/
static VALUE
rb_nkf_guess1(obj, src)
VALUE obj, src;
{
unsigned char *p;
unsigned char *pend;
int sequence_counter = 0;
StringValue(src);
p = (unsigned char *)RSTRING(src)->ptr;
pend = p + RSTRING(src)->len;
if (p == pend) return INT2FIX(_UNKNOWN);
p++;\
if (p==pend) return INT2FIX(_UNKNOWN);\
sequence_counter++;\
if (sequence_counter % 2 == 1 && *p != 0xa4)\
sequence_counter = 0;\
if (6 <= sequence_counter) {\
sequence_counter = 0;\
return INT2FIX(_EUC);\
}\
} while (0)
if (*p == 0xa4)
sequence_counter = 1;
while (p<pend) {
if (*p == '\033') {
return INT2FIX(_JIS);
}
if (*p < '\006' || *p == 0x7f || *p == 0xff) {
return INT2FIX(_BINARY);
}
if (0x81 <= *p && *p <= 0x8d) {
return INT2FIX(_SJIS);
}
if (0x8f <= *p && *p <= 0x9f) {
return INT2FIX(_SJIS);
}
if (*p == 0x8e) { /* SS2 */
INCR;
if ((0x40 <= *p && *p <= 0x7e) ||
(0x80 <= *p && *p <= 0xa0) ||
(0xe0 <= *p && *p <= 0xfc))
return INT2FIX(_SJIS);
}
else if (0xa1 <= *p && *p <= 0xdf) {
INCR;
if (0xf0 <= *p && *p <= 0xfe)
return INT2FIX(_EUC);
if (0xe0 <= *p && *p <= 0xef) {
while (p < pend && *p >= 0x40) {
if (*p >= 0x81) {
if (*p <= 0x8d || (0x8f <= *p && *p <= 0x9f)) {
return INT2FIX(_SJIS);
}
else if (0xfd <= *p && *p <= 0xfe) {
return INT2FIX(_EUC);
}
}
INCR;
}
}
else if (*p <= 0x9f) {
return INT2FIX(_SJIS);
}
}
else if (0xf0 <= *p && *p <= 0xfe) {
return INT2FIX(_EUC);
}
else if (0xe0 <= *p && *p <= 0xef) {
INCR;
if ((0x40 <= *p && *p <= 0x7e) ||
(0x80 <= *p && *p <= 0xa0)) {
return INT2FIX(_SJIS);
}
if (0xfd <= *p && *p <= 0xfe) {
return INT2FIX(_EUC);
}
}
INCR;
}
return INT2FIX(_UNKNOWN);
}