typedef unsigned char u8;
typedef unsigned short u16;
#define ARABIC_SIN 0
#define ARABIC_END 1
#define ARABIC_1ST 2
#define ARABIC_MID 3
u16 ArabicEncode(u16 *arab, u8 pos, BOOL* stepit, BOOL* is2set)
{
u16 ucode = 0;
BOOL nowChar = FALSE;
switch(*arab)
{
// 2개일 경우
case 0x0622: ucode = 0xFE81; nowChar = TRUE; break;
case 0x0623: ucode = 0xFE83; nowChar = TRUE; break;
case 0x0624: ucode = 0xFE85; nowChar = TRUE; break;
case 0x0625: ucode = 0xFE87; nowChar = TRUE; break;
case 0x0627: ucode = 0xFE8D; nowChar = TRUE; break;
case 0x0629: ucode = 0xFE93; nowChar = TRUE; break;
case 0x062F: ucode = 0xFEA9; nowChar = TRUE; break;
case 0x0630: ucode = 0xFEAB; nowChar = TRUE; break;
case 0x0631: ucode = 0xFEAD; nowChar = TRUE; break;
case 0x0632: ucode = 0xFEAF; nowChar = TRUE; break;
case 0x0648: ucode = 0xFEED; nowChar = TRUE; break;
case 0x0649: ucode = 0xFEEF; nowChar = TRUE; break;
// 4개 일경우
case 0x0626: ucode = 0xFE89; nowChar = FALSE; break;
case 0x0628: ucode = 0xFE8F; nowChar = FALSE; break;
case 0x062A: ucode = 0xFE95; nowChar = FALSE; break;
case 0x062B: ucode = 0xFE99; nowChar = FALSE; break;
case 0x062C: ucode = 0xFE9D; nowChar = FALSE; break;
case 0x062D: ucode = 0xFEA1; nowChar = FALSE; break;
case 0x062E: ucode = 0xFEA5; nowChar = FALSE; break;
case 0x0633: ucode = 0xFEB1; nowChar = FALSE; break;
case 0x0634: ucode = 0xFEB5; nowChar = FALSE; break;
case 0x0635: ucode = 0xFEB9; nowChar = FALSE; break;
case 0x0636: ucode = 0xFEBD; nowChar = FALSE; break;
case 0x0637: ucode = 0xFEC1; nowChar = FALSE; break;
case 0x0638: ucode = 0xFEC5; nowChar = FALSE; break;
case 0x0639: ucode = 0xFEC9; nowChar = FALSE; break;
case 0x063A: ucode = 0xFECD; nowChar = FALSE; break;
case 0x0641: ucode = 0xFED1; nowChar = FALSE; break;
case 0x0642: ucode = 0xFED5; nowChar = FALSE; break;
case 0x0643: ucode = 0xFED9; nowChar = FALSE; break;
case 0x0644: ucode = 0xFEDD; nowChar = FALSE; break;
case 0x0645: ucode = 0xFEE1; nowChar = FALSE; break;
case 0x0646: ucode = 0xFEE5; nowChar = FALSE; break;
case 0x0647: ucode = 0xFEE9; nowChar = FALSE; break;
case 0x064A: ucode = 0xFEF1; nowChar = FALSE; break;
}
if((*arab == 0x0644) && (pos != ARABIC_END) && (pos != ARABIC_SIN))
{
switch(*(arab+1))
{
case 0x0622: ucode = 0xFEF5; *stepit = TRUE; break;
case 0x0623: ucode = 0xFEF7; *stepit = TRUE; break;
case 0x0625: ucode = 0xFEF9; *stepit = TRUE; break;
case 0x0627: ucode = 0xFEFB; *stepit = TRUE; break;
default: break;
}
if(*stepit == TRUE)
{
switch(pos)
{
case ARABIC_1ST:
if(*(arab+2) == 0x0020) break;
else { ucode += 0x001; break; }
case ARABIC_MID:
if(*is2set == TRUE) break;
else { ucode += 0x001; break; }
}
*is2set = nowChar;
return ucode;
}
}
switch(pos)
{
//case ARABIC_SIN: return ucode;
case ARABIC_1ST:
if(nowChar == TRUE) ucode = *arab;
else ucode += ARABIC_1ST;
break;
case ARABIC_MID:
if(nowChar == TRUE) // 2개 짜리
{
if(*is2set == TRUE) ucode = *arab;
else ucode += ARABIC_END;
}
else // 4개 짜리
{
if(*is2set == TRUE) ucode += ARABIC_1ST;
else ucode += ARABIC_MID;
}
break;
case ARABIC_END:
if(nowChar == TRUE) // 2개 짜리
{
if(*is2set == TRUE) ucode = *arab;
else ucode += ARABIC_END;
}
else // 4개 짜리
{
if(*is2set == TRUE) ucode = *arab;
else ucode += ARABIC_END;
}
break;
}
*is2set = nowChar;
return ucode;
}
BOOL isarabicstr(u16 *pu16String, u16 u16StrLength)
{
u16 index = 0;
BOOL arabic_exist = FALSE;
for(index = 0;index < u16StrLength;index++)
{
if(pu16String[index] >= 0x060C && pu16String[index] <= 0x064B) return TRUE;
}
return arabic_exist;
}
void Arabic_automata(u16 *target_str, u16 *src_str,u16 u16Length)
{
BOOL b8ArabSpe = FALSE;
BOOL b8is2set = FALSE;
u16 u16Index = 0;
u16 u16Index2 = 0;
u16 u16Index3 = 0;
u16 u16offset = 0;
u16 *temp_str = NULL;
temp_str = malloc((u16Length + 1) * 2);
for (u16Index = 0; u16Index < u16Length;)
{
if (src_str[u16Index] >= 0x060C && u16Index < u16Length) // arabic
{
u16Index2 = u16Index; // 아랍어 시작점
while(src_str[u16Index] >= 0x060C && u16Index < u16Length ) u16Index++;
if(u16Index - u16Index2 == 1) // 단독형
{
temp_str[u16Length - u16Index2 + u16offset - 1] = src_str[u16Index2];
}
else // 2개 이상 단어
{
// 첫자
temp_str[u16Length - u16Index2 + u16offset - 1] = ArabicEncode(src_str + u16Index2, ARABIC_1ST ,&b8ArabSpe, &b8is2set);
if(b8ArabSpe == TRUE) {++u16offset; ++u16Index2;}
//중간글자
for(u16Index3 = u16Index2 + 1; u16Index3 < u16Index - 1; u16Index3++)
{
if(b8ArabSpe == TRUE)
{
b8ArabSpe = FALSE;
temp_str[u16Length - u16Index3 + u16offset - 1] = ArabicEncode(src_str + u16Index3, ARABIC_1ST ,&b8ArabSpe, &b8is2set);
}
else
temp_str[u16Length - u16Index3 + u16offset - 1] = ArabicEncode(src_str + u16Index3, ARABIC_MID ,&b8ArabSpe, &b8is2set);
if(b8ArabSpe == TRUE) {++u16offset; ++u16Index3;}
}
//마지막자
if(u16Index - u16Index2 != 1)
{
if(b8ArabSpe == TRUE)
{
b8ArabSpe = FALSE;
temp_str[u16Length - u16Index + u16offset] = ArabicEncode(src_str + u16Index - 1, ARABIC_1ST ,&b8ArabSpe, &b8is2set);
}
else
temp_str[u16Length - u16Index + u16offset] = ArabicEncode(src_str + u16Index - 1, ARABIC_END ,&b8ArabSpe, &b8is2set);
if(b8ArabSpe == TRUE) b8ArabSpe = FALSE;
}
}
}
else if (src_str[u16Index] == 0x0020)
{
u16Index2 = u16Index++;
temp_str[u16Length - u16Index + u16offset] = src_str[u16Index - 1];
}
else // not arabic
{
u16Index2 = u16Index; // 영어 시작점
while(src_str[u16Index] > 32 && src_str[u16Index] < 255 && u16Index < u16Length && src_str[u16Index] != 0x0020) u16Index++;
for(u16Index3 = 0; u16Index3 < u16Index - u16Index2; u16Index3++)
{
temp_str[u16Length - u16Index + u16Index3 + u16offset] = src_str[u16Index2 + u16Index3];
}
}
}
if(u16offset > 0)
{
memcpy(target_str,temp_str + u16offset,(u16Length - u16offset) * 2);
memset(target_str + (u16Length - u16offset),0x00,u16offset * 2);
}
else
{
memcpy(target_str,temp_str,(u16Length) * 2);
memset(target_str + u16Length,0x00,2);
}
free(temp_str);
}아놔 syntax highlighter 왜 이따구야 ㅠ.ㅠ
'모종의 음모 > 아랍어 오토마타' 카테고리의 다른 글
| 아랍어 조합 방법 - unicode (5) | 2008.11.16 |
|---|
