typedef unsigned char u8; typedef unsigned short u16; #define ARABIC_SIN 0 #define ARABIC_END 1 #define ARABIC_1ST 2 #define ARABIC_MID 3 u16 ArabicEncode(u16 *arab, u8 pos, BOOL* stepit, BOOL* is2set) { u16 ucode = 0; BOOL nowChar = FALSE; switch(*arab) { // 2개일 경우 case 0x0622: ucode = 0xFE81; nowChar = TRUE; break; case 0x0623: ucode = 0xFE83; nowChar = TRUE; break; case 0x0624: ucode = 0xFE85; nowChar = TRUE; break; case 0x0625: ucode = 0xFE87; nowChar = TRUE; break; case 0x0627: ucode = 0xFE8D; nowChar = TRUE; break; case 0x0629: ucode = 0xFE93; nowChar = TRUE; break; case 0x062F: ucode = 0xFEA9; nowChar = TRUE; break; case 0x0630: ucode = 0xFEAB; nowChar = TRUE; break; case 0x0631: ucode = 0xFEAD; nowChar = TRUE; break; case 0x0632: ucode = 0xFEAF; nowChar = TRUE; break; case 0x0648: ucode = 0xFEED; nowChar = TRUE; break; case 0x0649: ucode = 0xFEEF; nowChar = TRUE; break; // 4개 일경우 case 0x0626: ucode = 0xFE89; nowChar = FALSE; break; case 0x0628: ucode = 0xFE8F; nowChar = FALSE; break; case 0x062A: ucode = 0xFE95; nowChar = FALSE; break; case 0x062B: ucode = 0xFE99; nowChar = FALSE; break; case 0x062C: ucode = 0xFE9D; nowChar = FALSE; break; case 0x062D: ucode = 0xFEA1; nowChar = FALSE; break; case 0x062E: ucode = 0xFEA5; nowChar = FALSE; break; case 0x0633: ucode = 0xFEB1; nowChar = FALSE; break; case 0x0634: ucode = 0xFEB5; nowChar = FALSE; break; case 0x0635: ucode = 0xFEB9; nowChar = FALSE; break; case 0x0636: ucode = 0xFEBD; nowChar = FALSE; break; case 0x0637: ucode = 0xFEC1; nowChar = FALSE; break; case 0x0638: ucode = 0xFEC5; nowChar = FALSE; break; case 0x0639: ucode = 0xFEC9; nowChar = FALSE; break; case 0x063A: ucode = 0xFECD; nowChar = FALSE; break; case 0x0641: ucode = 0xFED1; nowChar = FALSE; break; case 0x0642: ucode = 0xFED5; nowChar = FALSE; break; case 0x0643: ucode = 0xFED9; nowChar = FALSE; break; case 0x0644: ucode = 0xFEDD; nowChar = FALSE; break; case 0x0645: ucode = 0xFEE1; nowChar = FALSE; break; case 0x0646: ucode = 0xFEE5; nowChar = FALSE; break; case 0x0647: ucode = 0xFEE9; nowChar = FALSE; break; case 0x064A: ucode = 0xFEF1; nowChar = FALSE; break; } if((*arab == 0x0644) && (pos != ARABIC_END) && (pos != ARABIC_SIN)) { switch(*(arab+1)) { case 0x0622: ucode = 0xFEF5; *stepit = TRUE; break; case 0x0623: ucode = 0xFEF7; *stepit = TRUE; break; case 0x0625: ucode = 0xFEF9; *stepit = TRUE; break; case 0x0627: ucode = 0xFEFB; *stepit = TRUE; break; default: break; } if(*stepit == TRUE) { switch(pos) { case ARABIC_1ST: if(*(arab+2) == 0x0020) break; else { ucode += 0x001; break; } case ARABIC_MID: if(*is2set == TRUE) break; else { ucode += 0x001; break; } } *is2set = nowChar; return ucode; } } switch(pos) { //case ARABIC_SIN: return ucode; case ARABIC_1ST: if(nowChar == TRUE) ucode = *arab; else ucode += ARABIC_1ST; break; case ARABIC_MID: if(nowChar == TRUE) // 2개 짜리 { if(*is2set == TRUE) ucode = *arab; else ucode += ARABIC_END; } else // 4개 짜리 { if(*is2set == TRUE) ucode += ARABIC_1ST; else ucode += ARABIC_MID; } break; case ARABIC_END: if(nowChar == TRUE) // 2개 짜리 { if(*is2set == TRUE) ucode = *arab; else ucode += ARABIC_END; } else // 4개 짜리 { if(*is2set == TRUE) ucode = *arab; else ucode += ARABIC_END; } break; } *is2set = nowChar; return ucode; } BOOL isarabicstr(u16 *pu16String, u16 u16StrLength) { u16 index = 0; BOOL arabic_exist = FALSE; for(index = 0;index < u16StrLength;index++) { if(pu16String[index] >= 0x060C && pu16String[index] <= 0x064B) return TRUE; } return arabic_exist; } void Arabic_automata(u16 *target_str, u16 *src_str,u16 u16Length) { BOOL b8ArabSpe = FALSE; BOOL b8is2set = FALSE; u16 u16Index = 0; u16 u16Index2 = 0; u16 u16Index3 = 0; u16 u16offset = 0; u16 *temp_str = NULL; temp_str = malloc((u16Length + 1) * 2); for (u16Index = 0; u16Index < u16Length;) { if (src_str[u16Index] >= 0x060C && u16Index < u16Length) // arabic { u16Index2 = u16Index; // 아랍어 시작점 while(src_str[u16Index] >= 0x060C && u16Index < u16Length ) u16Index++; if(u16Index - u16Index2 == 1) // 단독형 { temp_str[u16Length - u16Index2 + u16offset - 1] = src_str[u16Index2]; } else // 2개 이상 단어 { // 첫자 temp_str[u16Length - u16Index2 + u16offset - 1] = ArabicEncode(src_str + u16Index2, ARABIC_1ST ,&b8ArabSpe, &b8is2set); if(b8ArabSpe == TRUE) {++u16offset; ++u16Index2;} //중간글자 for(u16Index3 = u16Index2 + 1; u16Index3 < u16Index - 1; u16Index3++) { if(b8ArabSpe == TRUE) { b8ArabSpe = FALSE; temp_str[u16Length - u16Index3 + u16offset - 1] = ArabicEncode(src_str + u16Index3, ARABIC_1ST ,&b8ArabSpe, &b8is2set); } else temp_str[u16Length - u16Index3 + u16offset - 1] = ArabicEncode(src_str + u16Index3, ARABIC_MID ,&b8ArabSpe, &b8is2set); if(b8ArabSpe == TRUE) {++u16offset; ++u16Index3;} } //마지막자 if(u16Index - u16Index2 != 1) { if(b8ArabSpe == TRUE) { b8ArabSpe = FALSE; temp_str[u16Length - u16Index + u16offset] = ArabicEncode(src_str + u16Index - 1, ARABIC_1ST ,&b8ArabSpe, &b8is2set); } else temp_str[u16Length - u16Index + u16offset] = ArabicEncode(src_str + u16Index - 1, ARABIC_END ,&b8ArabSpe, &b8is2set); if(b8ArabSpe == TRUE) b8ArabSpe = FALSE; } } } else if (src_str[u16Index] == 0x0020) { u16Index2 = u16Index++; temp_str[u16Length - u16Index + u16offset] = src_str[u16Index - 1]; } else // not arabic { u16Index2 = u16Index; // 영어 시작점 while(src_str[u16Index] > 32 && src_str[u16Index] < 255 && u16Index < u16Length && src_str[u16Index] != 0x0020) u16Index++; for(u16Index3 = 0; u16Index3 < u16Index - u16Index2; u16Index3++) { temp_str[u16Length - u16Index + u16Index3 + u16offset] = src_str[u16Index2 + u16Index3]; } } } if(u16offset > 0) { memcpy(target_str,temp_str + u16offset,(u16Length - u16offset) * 2); memset(target_str + (u16Length - u16offset),0x00,u16offset * 2); } else { memcpy(target_str,temp_str,(u16Length) * 2); memset(target_str + u16Length,0x00,2); } free(temp_str); }
아놔 syntax highlighter 왜 이따구야 ㅠ.ㅠ
'모종의 음모 > 아랍어 오토마타' 카테고리의 다른 글
아랍어 조합 방법 - unicode (5) | 2008.11.16 |
---|