/*
    writer : Opera Wang (Wang Wei)
    E-Mail : wangvisual AT sohu DOT com
    License: GPL
*/

#include <dirent.h>
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <strings.h>
#include <sys/mman.h>
#include <limits.h>         // for PATH_MAX etc
#include <libgen.h>         // for basename
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <ctype.h>
#include <unistd.h>
#include "regdef.h"
#include "getini.h"
#include "libs.h"

extern char sExecDir[];
extern char IniFileName[];

bool bIsChinese(char ch)
{
    return((unsigned char)ch>160);
}

inline bool bIsVowel(char inputchar)
{
    char ch = mytoupper(inputchar);
    return( ch=='A' || ch=='E' || ch=='I' || ch=='O' || ch=='U' );
}

inline unsigned char mytoupper(unsigned char c)
{
    if ( c>='a' && c<='z' )
        return c-'a'+'A';
    else
        return c;
}

// to avoid locale's disturb
inline char myisalpha(unsigned char c)
{
    if ( ( c>='a' && c<='z' ) || ( c>='A' && c<= 'Z' ) )
        return '\1';    // true
    else
        return '\0';    // false
}

inline char myisdigit(unsigned char c)
{
    if ( c>='0' && c<='9' )
        return '\1';
    else
        return '\0';
}

/*
int mystrcmp(const char * str1, const char * str2)
{   
    assert(str1!=NULL && str2!=NULL);
    const unsigned char * s1 = (const unsigned char *) str1;
    const unsigned char * s2 = (const unsigned char *) str2;
    
    int i;
    for (i=0;s1[i] && s2[i];i++)
    {
        if ( s1[i] > s2[i] )
            return 1;
        else if ( s2[i] > s1[i] )
            return -1;
    }
    if (s1[i])
        return 1;
    else if (s2[i])
        return -1;
    return 0;
}
*/
        
// ignore case
int mystrcmp_ignorecase(const char * s1, const char * s2)
{
    assert(s1!=NULL && s2!=NULL);
    const unsigned char * c1 = (const unsigned char *)s1;
    const unsigned char * c2 = (const unsigned char *)s2;
    int i;
    for(i=0;c1[i] && c2[i];i++)
    {
        if ( mytoupper(c1[i]) > mytoupper(c2[i]) ) return (1);
        else if ( mytoupper(c1[i]) < mytoupper(c2[i]) ) return (-1);
    }

    if(c1[i]) return(1);    // c2[i] must be '\0' so c1>c2
    else if(c2[i]) return(-1);// c1[i] must be '\0' so c1<c2
    else 
        return(0);  // c1[i]=='\0' and c2[i]=='\0',so c1==c2
}

#if WORDS_BIGENDIAN == 0
void vConvertEndian(unsigned int * input)
{
    union {
        unsigned int i;    
        char c[4];
    } u, v;

    u.i = *input;
    v.c[0] = u.c[3];
    v.c[1] = u.c[2];
    v.c[2] = u.c[1];
    v.c[3] = u.c[0];
    *input = v.i;
}

void vConvertEndian(WORD * input)
{
    union {
        WORD i;    
        char c[2];
    } u, v;

    u.i = *input;
    v.c[0] = u.c[1];
    v.c[1] = u.c[0];
    *input = v.i;
}  
#endif

MapTable::MapTable()
{
    file = NULL;
}

bool MapTable::Init(const char * TabFileName)
{
    if (TabFileName==NULL)
        return false;
    char sFullTableFileName[PATH_MAX+1];
    strcpy(sFullTableFileName,sExecDir);
    strncat( sFullTableFileName,TabFileName,PATH_MAX-strlen(sFullTableFileName) );
    file=fopen(sFullTableFileName,"rb");
    if (file==NULL)
    {
        printf("Can't open convert tab file %s\n",sFullTableFileName);
        return false;
    }
    return true;
}

MapTable::~MapTable()
{
    if (file)
        fclose(file);
    file=NULL;
}

bool MapTable::vMap(BYTE ch1,BYTE ch2,WORD &output)
{
    assert(file);
    output = ch1*256 + ch2;
    long offset;
    if((ch2>=0xa1) && (ch2<=0xfe))
        offset =  ((ch1-161)*157 + (ch2-161) + 63)*sizeof(WORD); 
    else 
        if((ch2>=0x40) && (ch2<=0x7e)) 
            offset = ((ch1-161)*157 + (ch2-64))* sizeof(WORD);
        else
            return false; 
    // Read the corresponding code from the mapping table.
    if ( fseek(file,offset,SEEK_SET) )
        return false;
    if ( fread(&output,sizeof(WORD),1,file) == 0 )
        return false;
#if WORDS_BIGENDIAN == 0
    vConvertEndian(&output);
#endif
    return true;
}

void MapTable::vMapString(BYTE * sString)
{
    BYTE * s = sString;
    BYTE * end = sString+strlen((char*)sString);
    WORD wMapped;
    while ( s < end )
    {
        if ( *s>=0x81 )
        {
            assert( s<=end-1 );
            //search table
            vMap(*s,*(s+1),wMapped);
            *s = (BYTE)(wMapped/256);
            *(s+1) = (BYTE)wMapped;
            s++;s++;
        }
        else
            s++; 
    }
    return;
}

ConvertTable::ConvertTable()
{
    List = NULL;
    iListCount = 0;
}

bool ConvertTable::Init(const char * TabFileName)
{
    if (TabFileName==NULL)
        return false;
    char sFullTableFileName[PATH_MAX+1];
    strcpy(sFullTableFileName,sExecDir);
    strncat( sFullTableFileName,TabFileName,PATH_MAX-strlen(sFullTableFileName) );
    FILE * file;
    file=fopen(sFullTableFileName,"rb");
    if (file==NULL)
    {
        printf("Can't open convert tab file %s\n",sFullTableFileName);
        return false;
    }
    fread(&iListCount,sizeof(iListCount),1,file);
#if WORDS_BIGENDIAN == 0
    vConvertEndian(&iListCount);
#endif
    List = new WORD[iListCount*2];
    if (List==NULL) return false;    
    fread(List,sizeof(WORD),iListCount*2,file);
#if WORDS_BIGENDIAN == 0
    for (int i=iListCount*2-1;i;i--)
    vConvertEndian(&List[i]);
#endif
    fclose(file);
    return true;
}

ConvertTable::~ConvertTable()
{
    if (List)
        delete [] List;
    List=NULL;
}

int ConvertTable::iConvert(BYTE cHigh,BYTE cLow,WORD &output)
{
    WORD input = cHigh*256 + cLow;
    int iThisIndex=0;
    int iFrom=0;
    int iTo=iListCount-1;
    bool bFound=false;
    while( !bFound && iFrom<=iTo )
    {
        iThisIndex=(iFrom+iTo)>>1;
        if( *(List+iThisIndex*2) == input )
        {
            bFound=true;
        }
        else if( *(List+iThisIndex*2) < input )
        {
            iFrom=iThisIndex+1;
        }
        else 
        {
            iTo=iThisIndex-1;
        }
    }

    if (bFound)
    {
        output = *(List+iThisIndex*2+1);
        return 1;
    }
    else
    {
        output = (WORD) ('?'*256 + '?');
        return 0;
    }
}

MyString::MyString(char *str)
{
    sContent=str;
    return;
}

MyString::MyString()
{
    sContent=NULL;
    return;
}

int MyString::operator==(const char *sString)
{
    return(mystrcmp_ignorecase(sContent,sString)==0); 
}

int MyString::operator<(const char *sString)
{
    return(mystrcmp_ignorecase(sContent,sString)<0); 
}

int operator==(const MyString &oLItem,const MyString &oRItem)
{
    return(mystrcmp_ignorecase(oLItem.sContent,oRItem.sContent)==0);
}

int operator<(const MyString &oLItem,const MyString &oRItem)
{
    return(mystrcmp_ignorecase(oLItem.sContent,oRItem.sContent)<0);
}

WordItem::WordItem(char *sNewWord,char *sNewMeaning,char *sNewMark):
    msWord(sNewWord),msMeaning(sNewMeaning),msMark(sNewMark)
{
    return;
}

int WordItem::operator==(const char* sString)
{
    return(msWord==sString); 
}

int WordItem::operator<(const char* sString)
{
    return(msWord<sString); 
}

const char *WordItem::sGetWord() const
{
    return(msWord.data());
}

const char *WordItem::sGetMeaning() const
{
    return(msMeaning.data());
}

const char *WordItem::sGetMark() const
{
    return(msMark.data());
}

int operator==(const WordItem &oLItem,const WordItem &oRItem)
{
    return(oLItem.msWord==oRItem.msWord);
}

int operator<(const WordItem &oLItem,const WordItem &oRItem)
{
    return(oLItem.msWord<oRItem.msWord);
}

Lib::Lib()
{
    iLength=0;
    pFileMem=NULL;
    cIndex=cWord=cMeaning=cMark=0;
    aWordLib=NULL;
    sDictName=NULL;
    return;
}

/********************************************************************/
/*
        read words from dictionary file.
*/

bool Lib::bGetLib(const char *sDictionaryFile)
{
    if ( strstr(sDictionaryFile,".dic") )
        return bGetVersion1Lib(sDictionaryFile);
    if ( strstr(sDictionaryFile,".idx") )
        return bGetVersion2Lib(sDictionaryFile);
    return false;
}
 
bool Lib::bGetVersion1Lib(const char *sDictionaryFile)
{
    int fd=open(sDictionaryFile,O_RDONLY);
    if(fd==-1)
    {
        printf("can't open %s !\n",sDictionaryFile);
        return(false);
    }

    // get length of dicfile.
    struct stat stStat;
    if(fstat(fd,&stStat)!=0)
    {
        printf("can't get dictionary stat!\n");
        close(fd);
        return(false);
    }
    iFileSize=stStat.st_size;
    
    // get item count
    lseek(fd,0-sizeof(int)*2,SEEK_END);
    unsigned int iCapacity,iStyle;
    read(fd,&iCapacity,sizeof(int));
    read(fd,&iStyle,sizeof(int));
#if WORDS_BIGENDIAN == 0
    vConvertEndian(&iCapacity);
    vConvertEndian(&iStyle);
#endif
    if ( (int)iCapacity < 0 )
    {
        printf("%s file format error!\n",sDictionaryFile);
        close(fd);
        return(false);
    }
    cIndex=(unsigned char)(iStyle>>24);
    cWord=(unsigned char)(iStyle>>16);
    cMeaning=(unsigned char)(iStyle>>8);
    cMark=(unsigned char)iStyle;
    aWordLib=new (WordItem*[iCapacity]);
    
    printf("Loading %s ( %d words ) ...",basename((char*)sDictionaryFile),iCapacity);
    fflush(stdout);
    
    // mmap the file to memory
    pFileMem=(char *)mmap( NULL,iFileSize-sizeof(int)*2,
                            PROT_READ,MAP_SHARED|MAP_NORESERVE,fd,0 );
    if(pFileMem==MAP_FAILED)
    {
        printf("mmap error!\n");
        close(fd);
        return(false);
    }

    // begin to read items.
    char * p=pFileMem;
    char * pMeaning, *pMark;
    while(p<pFileMem+iFileSize-sizeof(int)*2 && iLength<iCapacity)
    {
        pMeaning=p+strlen(p)+1;
        if ( !cMark )   // no Mark field, eg py2gb
            pMark = NULL;
        else
            pMark = pMeaning+strlen(pMeaning)+1;
        WordItem *pWordItem=new WordItem(p,pMeaning,pMark);
        aWordLib[iLength++]=pWordItem;
        if ( !cMark )
            p = pMeaning+strlen(pMeaning)+1;
        else
            p = pMark+strlen(pMark)+1;
    }

    close(fd);

    char * sBaseName = basename((char*)sDictionaryFile);
    char temp[PATH_MAX+1];
    sDictName = new char[strlen(sBaseName)+15];
    strcpy(temp,sBaseName);
    char * test;
    if ( ( test = strrchr(temp,'.') ) )
        *test = '\0';
#ifdef NO_SNPRINTF
    sprintf(sDictName,"<---- %s ---->\n",temp);
#else
    snprintf(sDictName,strlen(sBaseName)+15,"<---- %s ---->\n",temp);
#endif

    m_iPriority = GetIntFromIni(IniFileName,basename((char*)sDictionaryFile),NULL,1);

    printf(" OK\n");
    iVersion = 1;
    return(true);
}

bool Lib::bGetVersion2Lib(const char *sDictionaryFile)
{
    char sDataFileName[PATH_MAX];

    strncpy( sDataFileName, sDictionaryFile, sizeof( sDataFileName ) );
    assert( strstr(sDataFileName,".idx") );
    strcpy( strstr(sDataFileName,".idx"), ".dict.dz" );

    struct stat stStat;
    if ( stat(sDataFileName,&stStat) !=0 )
    {
        sDataFileName[strlen( sDataFileName ) - 3] = '\0';    // cut ".dz"
        if ( stat(sDataFileName,&stStat) !=0 )
        {
            printf("Can't get data file stat, check file %s !\n",sDataFileName);
            return(false);
        }
    }

/*    int idxfile = open(sDictionaryFile,O_RDONLY);
    if( idxfile == -1 )
    {
        printf("Can't open index file %s !\n",sDictionaryFile);
        return(false);
    }
    char * buffer = NULL;
    if ( !strstr(sDictionaryFile,".idx.gz") )
    {
        lseek(idxfile,0L,SEEK_END);
        long idxsize = tell(idxfile);
        buffer = new char [idxsize+1];  // include end NULL
        if ( read(idxfile,buffer,idxsize) != idxsize )
        {
            printf("Warning: read index file error.\n");
            delete [] buffer, buffer = NULL;
            close(idxfile);
            return false;
        }
        buffer[idxsize+1] = '\0';
        close(idxfile);
    } else  // compressed file
    {
        // get uncompressed size to avoid realloc, I'm sure the index file less than 2G :-)
        lseek(idxfile,4L,SEEK_END);
        int len;
        read(idxfile,&len,sizeof(len));
        printf("Len1:%d",len);
        len = ntohl(len);
        printf("Len2:%d \n",len);
        close(idxfile);
        
        buffer = new char * [len+1];    // include end NULL
        gzFile in;  
        in = gzopen(sDictionaryFile,"rb");
        if (in == NULL)
        {
            printf("Can't open index file %s !\n",sDictionaryFile);
            delete [] buffer, buffer = NULL;
            gzclose(in);
            return false;
        }
        if ( gzread(in,buffer,sizeof(len)) != len )
        {
            printf("gzread error.\n");
            delete [] buffer, buffer = NULL;
            gzclose(in);
            return false;
        }
        buffer[len] = '\0';
        if (gzclose(in) != Z_OK)
        {
            printf("gzclose failed!\n");
            delete [] buffer, buffer = NULL;
            return false;
        }
    }
    
    const char * const StarDictID = "StarDict's idx file\nversion=2.1.0\n";
    const char * const BookName = "\nbookname=";
    const char * const SameTypeSequence = "\nsametypesequence=";
    const char * const Begin = "\nBEGIN:\n";
    
    if ( strstr( buffer, StarDictID ) != buffer )
    {
        printf( "Bad idx file, skipped!\n" );
        delete [] buffer, buffer = NULL;
        return false;
    }
    
    char *p1 = buffer + strlen( StarDictID ) - 1 - 1;
    char *p2, *p3;
    p2 = strstr( p1, BookName );
    if ( !p2 )
    {
        printf( "Can't get bookname, skipped\n");
        delete [] buffer, buffer = NULL;
        return false;
    }
    p3 = strchr( p2 + strlen( BookName ) - 1, '\n' );
    *p3 = '\0';
    sDictName = new char * [p3-p2-strlen(BookName)+15];
#ifdef NO_SNPRINTF
    sprintf(sDictName,"<---- %s ---->\n",p2+strlen(BookName)-1);
#else
    snprintf(sDictname,p3-p2-strlen(BookName)+15,"<---- %s ---->\n",p2+strlen(BookName)-1);
#endif
    *p3 = '\n';

    char sametypesequen[256];
    p2 = strstr( p1, SameTypeSequence );
    if ( p2 )
    {
        p3 = strchr( p2 + strlen( SameTypeSequence ) - 1, '\n' );
        *p3 = '\0';
        strcpy(sametypesequence,p2+strlen(SameTypeSequence)-1);
        *p3 = '\n';
    }

    p2 = strstr( p1, Begin );
    if ( !p2 )
    {
        printf("Can't get book data, skipped\n");
        delete [] buffer, buffer = NULL;
        return false;
    }
    p1 = p2 + strlen( Begin ) - 1;   // begin wordcount.

    long tmpglong;
    unsigned int iCapacity;
    memcpy( &tmpglong, p1, sizeof( long ) );
    iCapacity = ntohl( tmpglong );
    printf( "bookname: %s wordcount %ld\n", sDictName, iCapacity );
    aWordLib=new WordItem* [iCapacity];
    p1 += sizeof( long );          // begin word data.
    
    // open data file
    int datafile = open(sDataFileName,O_RDONLY);
    if( datafile == -1 )
    {
        printf("Can't open data file %s !\n",sDataFileName);
        delete [] buffer, buffer = NULL;
        return(false);
    }
    if ( !strstr(sDataFileName,".dict.dz") )
    {
        lseek(datafile,0L,SEEK_END);
        long datafilesize = tell(datafile);
        
        buffer = new char *[idxsize+1];  // include end NULL
        if ( read(idxfile,buffer,idxsize) != idxsize )
        {
            printf("Warning: read index file error.\n");
            delete [] buffer, buffer = NULL;
            close(idxfile);
            return false;
        }
        buffer[idxsize+1] = '\0';
        close(idxfile);
    } else  // compressed file
    {
        // get uncompressed size to guess need size, I'm sure the data file also less than 2G :-)
        lseed(datafilesize,4L,SEEK_END);
        int len;
        read(datafilesize,&len,sizeof(len));
        printf("Len3:%d",len);
        len = ntohl(len);
        printf("Len4:%d \n",len);
        close(datafile), datafile = -1;
        
        buffer = new char * [len+1];    // include end NULL
        gzFile in;  
        in = gzopen(sDataFileName,"rb");
    }

    // foreach item
    //      set word
    //      get offset and size
    //      read in data file
    //      analyze meaning, get yinbiao or pinyin
    //      union code ?
    //      set meaning, yinbiao or pinyin
    
    pFileMem = (char *) malloc(iCapacity*sizeof);
    idxbuffer = ( gchar * ) g_malloc( wordcount * sizeof( idxItem ) );
    int len;
    for ( int i = 0; i < wordcount; i++ )
    {
        len = strlen( p1 ) + 1;
        ( ( idxItem * ) ( ( idxbuffer + i * ( sizeof( idxItem ) ) ) ) )->word =
            ( gchar * ) g_memdup( p1, len );
        p1 += len;
        memcpy( &( tmpglong ), p1, sizeof( glong ) );
        ( ( idxItem * ) ( ( idxbuffer + i * ( sizeof( idxItem ) ) ) ) )->offset = g_ntohl( tmpglong );
        p1 += sizeof( glong );
        memcpy( &( tmpglong ), p1, sizeof( glong ) );
        ( ( idxItem * ) ( ( idxbuffer + i * ( sizeof( idxItem ) ) ) ) )->size = g_ntohl( tmpglong );
        p1 += sizeof( glong );
    }
    
    delete [] buffer, buffer = NULL;

    read(fd,&iStyle,sizeof(int));
#if WORDS_BIGENDIAN == 0
    vConvertEndian(&iCapacity);
    vConvertEndian(&iStyle);
#endif
    if ( (int)iCapacity < 0 )
    {
        printf("%s file format error!\n",sDictionaryFile);
        close(fd);
        return(false);
    }
    cIndex=(unsigned char)(iStyle>>24);
    cWord=(unsigned char)(iStyle>>16);
    cMeaning=(unsigned char)(iStyle>>8);
    cMark=(unsigned char)iStyle;
    aWordLib=new (WordItem*[iCapacity]);
    
    printf("Loading %s ( %d words ) ...",basename((char*)sDictionaryFile),iCapacity);
    
    // mmap the file to memory
    pFileMem=(char *)mmap( NULL,iFileSize-sizeof(int)*2,
                            PROT_READ,MAP_SHARED|MAP_NORESERVE,fd,0 );
    if(pFileMem==MAP_FAILED)
    {
        printf("mmap error!\n");
        close(fd);
        return(false);
    }

    // begin to read items.
    char * p=pFileMem;
    char * pMeaning, *pMark;
    while(p<pFileMem+iFileSize-sizeof(int)*2 && iLength<iCapacity)
    {
        pMeaning=p+strlen(p)+1;
        if ( !cMark )   // no Mark field, eg py2gb
            pMark = NULL;
        else
            pMark = pMeaning+strlen(pMeaning)+1;
        WordItem *pWordItem=new WordItem(p,pMeaning,pMark);
        aWordLib[iLength++]=pWordItem;
        if ( !cMark )
            p = pMeaning+strlen(pMeaning)+1;
        else
            p = pMark+strlen(pMark)+1;
    }

    close(fd);

   */
    m_iPriority = GetIntFromIni(IniFileName,basename((char*)sDictionaryFile),NULL,1);

    printf(" OK\n");
    iVersion = 2;
    return(true);
}

const char * Lib::sGetDictName(void)
{
    return (const char *)(sDictName);
}

int Lib::iGetPriority()
{
    return m_iPriority;
}

/********************************************************************/
// to find the index of the word.
// if not found,return the index with which some similar words could be found.
// return true if found,else return false.

bool Lib::bLookup(const char* sWord,int *pIndex,int iSimularDirection,int iStringType)
{
    assert(length()!=0);
    if (!length())
    {
        *pIndex = 0;
        return false;
    }

    int iThisIndex=0;
    int iFrom=0;
    int iTo=length()-1;
    bool bFound=false;
    while( !bFound && iFrom<=iTo )
    {
        iThisIndex=(iFrom+iTo)>>1;
        if(*aWordLib[iThisIndex]==sWord)
        {
            bFound=true;
        }
        else if(*aWordLib[iThisIndex]<sWord)
        {
            iFrom=iThisIndex+1;
        }
        else 
        {
            iTo=iThisIndex-1;
        }
    }
    
    //make sure iThisIndex is the one below,opera changed
    assert (iSimularDirection!=0);
    if (!bFound)
    {
        if (iSimularDirection==0)
            *pIndex = iThisIndex;
        else if (iSimularDirection>0)
            *pIndex = iFrom;    //next
        else *pIndex = iTo;     //pre
    }
    else if ( iStringType ==  STRING_TYPE_PY )   // because of this index is not unique
    {
        while ( iThisIndex-1>=0 && *aWordLib[iThisIndex-1]==sWord  )
            iThisIndex--;
        *pIndex = iThisIndex;
    }
    else
        *pIndex = iThisIndex;
    
    return(bFound);
}

bool Lib::bLookupWithRule(const MSARegExp &rule,int *aIndex,int iBuffLen)
{
    assert(length());
    if (!length())
    {
        aIndex[0] = -1;
        return false;
    }
    int iIndexCount=0;
    int i;
    for(i=0;i<length() && iIndexCount<iBuffLen-1;i++)
    {
        if(rule.bIsEntireMatch(aWordLib[i]->sGetWord()))
        {
            aIndex[iIndexCount++]=i;
        }
    }
    aIndex[iIndexCount]= -1; // -1 is the end.

    return(iIndexCount>0);
}

bool Lib::bLookupWithMeaning(const char * sWord,int * aiIndexes, int iLen)
{
    assert(length());
    if (!length())
    {
        aiIndexes[0] = -1;
        return false;
    }
    int iIndexCount=0;
    int i;
    const char * sMatchMeaning;
    char * sSearch, * sMeaningEnd;
    char * sBegin, * sEnd;
    bool bCannotAddToList;
    for(i=0;i<length() && iIndexCount<iLen-1;i++)
    {
        sMatchMeaning = GetWordItem(i).sGetMeaning();
        if ( strstr(sMatchMeaning,sWord) )
        {
            bCannotAddToList = false;
            // skip error findings, maybe got "glove" in searching "love".
            if ( cMeaning == LIB_MEANING_ENGLISH )
//                 || (cMeaning==LIB_MEANING_CHINESE_AND_ENGLISH && !bIsChinese(*sWord)) )
            {
                sSearch = (char *)sMatchMeaning;
                sMeaningEnd = (char *)(sMatchMeaning + strlen(sMatchMeaning));
                while ( sSearch < sMeaningEnd )
                {
                    sBegin = strstr(sSearch,sWord);
                    if ( !sBegin )
                    {
                        bCannotAddToList = true;
                        break;   // try next word
                    }
                    if ( sBegin == sMatchMeaning
                         || ( sBegin>sMatchMeaning && !myisalpha(*(sBegin-1)) )
                       )    // Begin is OK
                    {
                        sEnd = sBegin + strlen(sWord);
                        if ( *sEnd == '\0'
                             || ( *sEnd != '\0' && !myisalpha(*sEnd) )
                           )    // End is OK
                            break;  // OK
                    }
                    sSearch = sBegin +1;
                }
                if ( sSearch >= sMeaningEnd )
                    bCannotAddToList = true;
            }   // end of STRING_TYPE_ASCII
            else if ( cMeaning == LIB_MEANING_GB || cMeaning == LIB_MEANING_BIG5 )
//                      || (cMeaning==LIB_MEANING_CHINESE_AND_ENGLISH && bIsChinese(*sWord)) )
            {
                // get the start position of each character
                char * cStartBitList = new char [strlen(sMatchMeaning)];
                assert(cStartBitList);
                if ( !cStartBitList ) exit(-1);
                sSearch = (char *)sMatchMeaning;
                sBegin = cStartBitList;
                while( *sSearch )
                {
                    if ( bIsChinese(*sSearch) )
                    {
                        if ( bIsChinese(*(sSearch+1)) )
                        {
                            *sBegin++ = '\1';
                            *sBegin++ = '\0';
                            sSearch += 2;
                        }
                        else    // half chinese character
                        {
                            assert(0);
                            *sBegin++ = '\0';
                            sSearch++;
                        }
                    }
                    else
                    {
                        *sBegin++ = '\1';
                        sSearch++;
                    }
                }
                
                sSearch = (char *)sMatchMeaning;
                sMeaningEnd = (char *)(sMatchMeaning + strlen(sMatchMeaning));
                while ( sSearch < sMeaningEnd )
                {
                    sBegin = strstr(sSearch,sWord);
                    if ( !sBegin )
                    {
                        bCannotAddToList = true;
                        break;   // try next word
                    }
                    if ( *(cStartBitList+(sBegin-sMatchMeaning)) )    // start position
                        break;  // OK
                    sSearch = sBegin +1;
                }
                if ( sSearch >= sMeaningEnd )
                    bCannotAddToList = true;
                delete [] cStartBitList;
            }   // end of STRING_TYPE_CHINESE
            if ( bCannotAddToList )
                continue;   // try next word, skip current one.
            aiIndexes[iIndexCount++]=i;
        }
    }
    aiIndexes[iIndexCount]= -1; // -1 is the end.
    return(iIndexCount>0);
}

WordItem &Lib::GetWordItem(int i)
{
    return(*aWordLib[i]); 
}

Lib::~Lib()
{
    if ( aWordLib )
    {
        delete [] aWordLib;
        aWordLib=NULL;
    }
    if ( pFileMem != NULL )
        munmap(pFileMem,iFileSize-sizeof(int)*2);
    pFileMem=NULL;
    if ( sDictName )
    {
        delete [] sDictName;
        sDictName = NULL;
    }
}

//===================================================================
Libs::Libs()
{
    oLib = NULL;
    m_iTotalLibs = 0;
}

Libs::~Libs()
{
    if ( oLib && m_iTotalLibs )
    {
        while ( m_iTotalLibs )
            delete oLib[--m_iTotalLibs];
    }
    free(oLib);
    oLib = NULL;
}

const char * Libs::sGetLibName(int iLib)
{
    return ( oLib[iLib]->sGetDictName() );
}

int Libs::iLength(int iLib)
{
    return(oLib[iLib]->length());
}

int Libs::iGetTotalLibs()
{
    return m_iTotalLibs;
}

bool Libs::bGetWordsWithRule(const char* sWord,int* aiIndexes,int iLen,int iLib,char * sErrorMessage)
{
    // change rule to regular exp.
    char sRule[MAX_STR_LEN+1];
    if(!bMakeRule(sWord,sRule,MAX_STR_LEN+1))
    {
        strcpy(sErrorMessage,RULE_ERR_S);
        return(false);
    }

    MSARegExp rule(sRule);
    if(rule.iStatus()!=MSARegExp::OK)
    {
        strcpy(sErrorMessage,RULE_ERR_S);
        return(false);
    }

    if(!oLib[iLib]->bLookupWithRule(rule,aiIndexes,iLen))
    {
        strcpy(sErrorMessage,NOTFOUND_S);
        return(false);
    }

    return(true);
}

bool Libs::bGetWordsWithMeaning(const char * sWord,int * aiIndexes, int iLen, int iLib)
{
    return oLib[iLib]->bLookupWithMeaning(sWord,aiIndexes,iLen);
}

void Libs::vLoadDir(const char * path)
{
    assert( path && strlen(path) && path[strlen(path)-1]=='/' );
    
    DIR * dirp;
    dirent * dp;
    char sFullPath[PATH_MAX+1];
    dirp = opendir(path);
    if ( dirp )
    {
        while ( ( dp = readdir( dirp ) ) != NULL )
        {
            struct stat FileStat;
#ifdef NO_SNPRINTF
            sprintf(sFullPath,"%s%s",path,dp->d_name);
#else
            snprintf(sFullPath,sizeof(sFullPath),"%s%s",path,dp->d_name);
#endif
            if ( stat(sFullPath,&FileStat)==0 && FileStat.st_mode==S_IFDIR )
                vLoadDir(sFullPath);
            else if ( strstr( dp->d_name, ".dic" ) || strstr( dp->d_name, ".idx" ) )
            {
                if ( GetIntFromIni(IniFileName,basename((char*)sFullPath),NULL,1) == 0 )
                {
#ifndef NDEBUG
                    fprintf(stderr,"Skip dict library: %s\n",sFullPath);
#endif
                } else
                {
                    Lib * newLib = new Lib;
                    if ( newLib->bGetLib(sFullPath) )
                    {
                        m_iTotalLibs++;
                        oLib = (Lib **)realloc((void*)oLib,m_iTotalLibs*sizeof(Lib *));
                        oLib[m_iTotalLibs-1] = newLib;
                    } else
                    {
                        delete newLib;
                        newLib = NULL;
                    }
                }
            }
        }
        closedir( dirp );
    } else
    {
#ifndef NDEBUG
        printf("Can't open dir %s !\n",path);
#endif
    }
}

int Libs::iGetLibs()
{
    assert( sExecDir && strlen(sExecDir) && sExecDir[strlen(sExecDir)-1]=='/' );
    char sDictDir[PATH_MAX+1];
#ifdef NO_SNPRINTF
    sprintf(sDictDir,"%s%s",sExecDir,"dic/");
#else
    snprintf(sDictDir,sizeof(sDictDir),"%s%s",sExecDir,"dic/");
#endif
    vLoadDir(sDictDir);

    char* pUserHome;
    struct passwd *pw;
    setpwent();
    pw = getpwuid(getuid());
    endpwent();

    if (pw)
        pUserHome=pw->pw_dir;
    if (!pUserHome)
        pUserHome=getenv("HOME");
    
    if(pUserHome!=NULL)
    {
        strcpy(sDictDir,pUserHome);
        if ( strlen(sDictDir) && pUserHome[strlen(sDictDir)]!='/' )
            strcat(sDictDir,"/");
    }
#ifdef NO_SNPRINTF
    sprintf(sDictDir,"%s%s",sDictDir,".stardict/dict/");
#else
    snprintf(sDictDir,sizeof(sDictDir),"%s%s",sDictDir,".stardict/dict/");
#endif
    vLoadDir(sDictDir);
    
    // sort with priority
    qsort((void *)oLib,m_iTotalLibs,sizeof(Lib *),compare);
    
    return iGetTotalLibs();
}

int Libs::compare(const void * p1, const void * p2)
{
    int i = (*(Lib **)p1)->iGetPriority();
    int j = (*(Lib **)p2)->iGetPriority();
    if ( i > j )
        return ( 1 );
    if ( i < j )
        return ( -1 );
    return ( 0 );
}

const WordItem* Libs::poGetWordItem(int iIndex,int iLib)
{
    return( &oLib[iLib]->GetWordItem(iIndex) );
}

const BYTE Libs::cGetLibType(const int iLib,const BYTE cIndex) const
{
    switch (cIndex)
    {
        case LIB_TYPE_RESERVED:
            return oLib[iLib]->cIndex;
        case LIB_TYPE_WORD:
            return oLib[iLib]->cWord;
        case LIB_TYPE_MEANING:
            return oLib[iLib]->cMeaning;
        case LIB_TYPE_MARK:
            return oLib[iLib]->cMark;
        default:
            return 0;
    }
    return 0;
}

const WordItem* Libs::poGetNextWordItem(int * iCurrent,BYTE * cType)
{
    const WordItem * poCurrent = (const WordItem *)NULL;
    int iCurrentLib=0;
    
    for (int iLib=0;iLib<m_iTotalLibs;iLib++)
    {
        if (iCurrent[iLib]==INVALID_INDEX)
            continue;
        if (iCurrent[iLib]<0)
            iCurrent[iLib]=0;
        if ( iCurrent[iLib]<iLength(iLib) )
        {
            if ( poCurrent == (const WordItem *)NULL )
            {
                poCurrent = poGetWordItem(iCurrent[iLib],iLib);
                iCurrentLib = iLib;
                if (cType)
                    *cType = oLib[iLib]->cWord;
            }
            else
            {
                int iResult = mystrcmp_ignorecase(poCurrent->sGetWord(),poGetWordItem(iCurrent[iLib],iLib)->sGetWord());
                if (  iResult > 0 )
                {
                    poCurrent = poGetWordItem(iCurrent[iLib],iLib);
                    iCurrentLib = iLib;
                }
                else if (iResult == 0 )
                {
                    iCurrent[iLib]++;
                }
            }
        }
    }
    if (poCurrent)
        iCurrent[iCurrentLib]++;
    return poCurrent;
}

const WordItem* Libs::poGetPreWordItem(int * iCurrent,BYTE * cType)
{
    const WordItem * poCurrent = (const WordItem *)NULL;
    int iCurrentLib=0;
    
    for (int iLib=0;iLib<m_iTotalLibs;iLib++)
    {
        if ( iCurrent[iLib] == INVALID_INDEX )
            continue;
        if ( iCurrent[iLib]>=iLength(iLib) )
            iCurrent[iLib]=iLength(iLib)-1;
        if ( iCurrent[iLib] >= 0 )
        {
            if ( poCurrent == (const WordItem *)NULL )
            {
                poCurrent = poGetWordItem(iCurrent[iLib],iLib);
                iCurrentLib = iLib;
                if (cType)
                    *cType = oLib[iLib]->cWord;
            }
            else
            {
                int iResult = mystrcmp_ignorecase(poCurrent->sGetWord(),poGetWordItem(iCurrent[iLib],iLib)->sGetWord());
                if (  iResult < 0 )
                {
                    poCurrent = poGetWordItem(iCurrent[iLib],iLib);
                    iCurrentLib = iLib;
                }
                else if (iResult == 0 )
                {
                    iCurrent[iLib]--;
                }
            }
        }
    }
    if (poCurrent)
        iCurrent[iCurrentLib]--;
    return poCurrent;
}

bool Libs::bSimpleGetWord(const char* sWord,int& iWordIndex,int iLib,int iSimularDirection,int iStringType)
{
    int iIndex;
//    int iSimilarIndex;
    char sNewWord[MAX_STR_LEN+1];
    int iWordLen;
    bool bFound=false;
    if(oLib[iLib]->bLookup(sWord,&iIndex,iSimularDirection,iStringType))
    {
        bFound=true;
    }
//    iSimilarIndex=iIndex;
    
    if ( !bFound && iStringType==STRING_TYPE_ASCII )
    {
        // If not Found , try other status of sWord.
        int iWordLen=strlen(sWord);

        //cut one char "s" or "d"
        if( !bFound && iWordLen>1
            && ( mytoupper((unsigned char)sWord[iWordLen-1])=='S'
                 || !strncasecmp(&sWord[iWordLen-2],"ed",2)) )
        {
            strcpy(sNewWord,sWord);
            sNewWord[iWordLen-1]='\0'; // cut "s" or "d"
            if(oLib[iLib]->bLookup(sNewWord,&iIndex,iSimularDirection,iStringType))
                bFound=true;
        }
        
        //cut "ly"
        if(!bFound && iWordLen>2 && 
            !strncasecmp(&sWord[iWordLen-2],"ly",2) )
        {
            strcpy(sNewWord,sWord);
            sNewWord[iWordLen-2]='\0';  // cut "ly"
            if ( iWordLen>5 && (sNewWord[iWordLen-3]==sNewWord[iWordLen-4])
                 && !bIsVowel(sNewWord[iWordLen-4]) && bIsVowel(sNewWord[iWordLen-5]) )   //doubled
            {
                sNewWord[iWordLen-3]='\0';
                if( oLib[iLib]->bLookup(sNewWord,&iIndex,iSimularDirection,iStringType) )
                    bFound=true;
                else
                    sNewWord[iWordLen-3]=sNewWord[iWordLen-4];  //restore
            }
            if( !bFound && oLib[iLib]->bLookup(sNewWord,&iIndex,iSimularDirection,iStringType))
                bFound=true;
        }
        
        //cut "ing"
        if(!bFound && iWordLen>3 && 
            !strncasecmp(&sWord[iWordLen-3],"ing",3) )
        {
            strcpy(sNewWord,sWord);
            sNewWord[iWordLen-3]='\0';
            if ( iWordLen>6 && (sNewWord[iWordLen-4]==sNewWord[iWordLen-5])
                 && !bIsVowel(sNewWord[iWordLen-5]) && bIsVowel(sNewWord[iWordLen-6]) )   //doubled
            {
                sNewWord[iWordLen-4]='\0';
                if (oLib[iLib]->bLookup(sNewWord,&iIndex,iSimularDirection,iStringType))
                    bFound=true;
                else
                    sNewWord[iWordLen-4]=sNewWord[iWordLen-5];  //restore
            }
            if( !bFound && oLib[iLib]->bLookup(sNewWord,&iIndex,iSimularDirection,iStringType) )
                bFound=true;
            if(!bFound)
            {
                strcat(sNewWord,"e"); // add a char "e"
                if(oLib[iLib]->bLookup(sNewWord,&iIndex,iSimularDirection,iStringType))
                    bFound=true;
            }
        }

        //cut two char "es"
        if(!bFound && iWordLen>3 &&
            (!strncasecmp(&sWord[iWordLen-3],"ses",3) ||
            !strncasecmp(&sWord[iWordLen-3],"oes",3) ||
            !strncasecmp(&sWord[iWordLen-3],"xes",3)) )
        {
            strcpy(sNewWord,sWord);
            sNewWord[iWordLen-2]='\0';
            if(oLib[iLib]->bLookup(sNewWord,&iIndex,iSimularDirection,iStringType))
                bFound=true;
        }

        //cut "ed"
        if( !bFound && iWordLen>3 &&
            !strncasecmp(&sWord[iWordLen-2],"ed",2) )
        {
            strcpy(sNewWord,sWord);
            sNewWord[iWordLen-2]='\0';
            if ( iWordLen>5 && (sNewWord[iWordLen-3]==sNewWord[iWordLen-4])
                 && !bIsVowel(sNewWord[iWordLen-4]) && bIsVowel(sNewWord[iWordLen-5]) )   //doubled
            {
                sNewWord[iWordLen-3]='\0';
                if( oLib[iLib]->bLookup(sNewWord,&iIndex,iSimularDirection,iStringType) )
                    bFound=true;
                else
                    sNewWord[iWordLen-3]=sNewWord[iWordLen-4];  //restore
            }
            if( !bFound && oLib[iLib]->bLookup(sNewWord,&iIndex,iSimularDirection,iStringType))
                bFound=true;
        }
        
        // cut "ied" , add "y".
        if(!bFound && iWordLen>3 && 
            !strncasecmp(&sWord[iWordLen-3],"ied",3) )
        {
            strcpy(sNewWord,sWord);
            sNewWord[iWordLen-3]='\0';
            strcat(sNewWord,"y"); // add a char "y"
            if(oLib[iLib]->bLookup(sNewWord,&iIndex,iSimularDirection,iStringType))
                bFound=true;
        }

        // cut "ies" , add "y".
        if(!bFound && iWordLen>3 && 
            !strncasecmp(&sWord[iWordLen-3],"ies",3) )
        {
            strcpy(sNewWord,sWord);
            sNewWord[iWordLen-3]='\0';
            strcat(sNewWord,"y"); // add a char "y"
            if(oLib[iLib]->bLookup(sNewWord,&iIndex,iSimularDirection,iStringType))
                bFound=true;
        }
    }   // end of STRING_TYPE_ASCII
    else if ( !bFound && iStringType==STRING_TYPE_PY )
    {
        // if no digital, try set digital to 1 and 5
        char sNewWordEnd[MAX_STR_LEN+1],sRuleWord[MAX_STR_LEN+1];
        iWordLen=strlen(sWord);
        int i = 0;
        char * p = (char *)sWord;
        while ( *p )
        {
            sRuleWord[i] = sNewWordEnd[i] = sNewWord[i] = *p;
            i++;
            if (  ( *(p+1)==' ' || *(p+1)=='\0' )   &&  ( (*p<='Z'&&*p>='A') || (*p<='z'&&*p>='a') )  )
            {
                sNewWord[i] = '1';
                sNewWordEnd[i] = '5';
                sRuleWord[i] = '?';
                i++;
            }
            p++;
        }
        sRuleWord[i] = sNewWordEnd[i] = sNewWord[i] = '\0';
        // find begin and end index
        int iEndIndex,iSearchIndex,iChar;
        const char * sSearch;
        bool bIgnoreASCII,bDifferentASCII,bBreak=false;
        oLib[iLib]->bLookup(sNewWord,&iIndex,iSimularDirection,iStringType);
        oLib[iLib]->bLookup(sNewWordEnd,&iEndIndex,iSimularDirection,iStringType);
        int iMaxMatchedLen=0,iMaxMatchedIndex=iIndex;
        for ( iSearchIndex=iIndex;!bFound && iSearchIndex<oLib[iLib]->length() && iSearchIndex<=iEndIndex;iSearchIndex++)
        {
            sSearch = oLib[iLib]->GetWordItem(iSearchIndex).sGetWord();
            // compare sSearch and sRuleWord;
            for (iChar=0,bDifferentASCII=bIgnoreASCII=false;sSearch[iChar] && sRuleWord[iChar];iChar++ )
            {
                if ( sRuleWord[iChar]=='?' && myisdigit(sSearch[iChar]) )
                {
                    bIgnoreASCII = true;
                    if ( !sRuleWord[iChar+1] && iChar-1>iMaxMatchedLen )
                    {
                        iMaxMatchedLen = iChar-1;
                        iMaxMatchedIndex = iSearchIndex;
                    }
                    continue;                               // compare next char
                }
                if ( myisdigit(sRuleWord[iChar]) )
                {
                    if ( myisdigit(sSearch[iChar]) )          // both are digital
                    {
                        if ( sSearch[iChar] > sRuleWord[iChar])  // over
                        {
                            if ( iChar-1 > iMaxMatchedLen )
                            {
                                iMaxMatchedLen = iChar-1;
                                iMaxMatchedIndex = iSearchIndex;
                            }
                            if ( !bIgnoreASCII )
                                bBreak = true;
                            break;
                        }
                    }
                    else                                    // search is next word, over
                    {   
                        if ( iChar-1 > iMaxMatchedLen )
                        {
                            iMaxMatchedLen = iChar-1;
                            iMaxMatchedIndex = iSearchIndex;
                        }
                        if ( !bIgnoreASCII )
                            bBreak = true;
                        break;
                    }
                }
                else if ( myisdigit(sSearch[iChar]) )         // not arrive,next search
                {
                    if ( iChar-1 > iMaxMatchedLen )
                    {
                        iMaxMatchedLen = iChar-1;
                        iMaxMatchedIndex = iSearchIndex;
                    }
                    break;
                }
                else                                        // both are ascii
                {
                    if ( tolower(sSearch[iChar]) != tolower(sRuleWord[iChar]) )
                    {
                        if ( iChar-1 > iMaxMatchedLen )
                        {
                            iMaxMatchedLen = iChar-1;
                            iMaxMatchedIndex = iSearchIndex;
                        }
                        bDifferentASCII = true;
                        if ( tolower(sSearch[iChar]) > tolower(sRuleWord[iChar]) )
                        {
                            if ( !bIgnoreASCII )
                                bBreak = true;
                        }
                        break;
                    }
                }   // end of compare byte
            }   // end of for bytes
            if  ( bBreak )
                break;
            if ( !bDifferentASCII && sSearch[iChar]=='\0' && sRuleWord[iChar]=='\0' )
            {
                bFound = true;
                iIndex = iSearchIndex;
                break;
            }
            if ( iChar-1 > iMaxMatchedLen )
            {
                iMaxMatchedLen = iChar-1;
                iMaxMatchedIndex = iSearchIndex;
            }
        }   // Search Index
        if ( !bFound && iMaxMatchedLen )
        {
            iIndex = iMaxMatchedIndex;
        }
    }   // end of STRING_TYPE_PY

    iWordIndex = iIndex;

    return(bFound);
}

// change "*" to "[!-~]*" and "?" to "[!-~]"
bool Libs::bMakeRule(const char* sWord,char* sRule,int iLength)
{
    char *sOneAnyChar="[!-~]";
    char *sAnyChar="[!-~]*";
    int iOneAnyCharLen=strlen(sOneAnyChar);
    int iAnyCharLen=strlen(sAnyChar);

    int iUseLen=0;
    bool bRuleOK=true;
    int i;
    for(i=0;sWord[i]!='\0' && bRuleOK;i++)
    {
        const char* sAddString;
        int iAddLen;

        if(sWord[i]=='?')
        {
            sAddString=sOneAnyChar;
            iAddLen=iOneAnyCharLen;
        }
        else if(sWord[i]=='*')
        {
            sAddString=sAnyChar;
            iAddLen=iAnyCharLen;
        }
        else
        {
            sAddString=sWord+i;
            iAddLen=1;
        }

        if(iAddLen+iUseLen<=iLength-1)
        {
            memcpy(sRule+iUseLen,sAddString,iAddLen);
            iUseLen+=iAddLen;
        }
        else
        {
            bRuleOK=false;
        }
    }
    sRule[iUseLen]='\0';

    return(bRuleOK);
}

bool Libs::bIsOKForSearch(const int iLib,const int iType,const BYTE cSearch_Type)
{
    if ( !oLib[iLib]->length() )
        return false;
    switch ( cSearch_Type )
    {
        case LIB_SEARCH_TYPE_INDEX:
            switch ( iType )
            {
                case STRING_TYPE_ASCII:
                    if ( oLib[iLib]->cWord == LIB_WORD_ENGLISH )
                        return true;
                    else
                        return false;
                case STRING_TYPE_CHINESE:
                    if ( oLib[iLib]->cWord==LIB_WORD_GB || oLib[iLib]->cWord==LIB_WORD_BIG5 )
                        return true;
                    else
                        return false;
                case STRING_TYPE_CHINESE_GB:
                    if ( oLib[iLib]->cWord == LIB_WORD_GB )
                        return true;
                    else
                        return false;
                case STRING_TYPE_CHINESE_BIG5:
                    if ( oLib[iLib]->cWord == LIB_WORD_BIG5 )
                        return true;
                    else
                        return false;
                case STRING_TYPE_PY:
                    if ( oLib[iLib]->cWord == LIB_WORD_PY )
                        return true;
                    else
                        return false;
                default:
                        return false;
            }
        case LIB_SEARCH_TYPE_MEANING:
            switch ( iType )
            {
                case STRING_TYPE_ASCII:
                    if ( oLib[iLib]->cMeaning == LIB_MEANING_ENGLISH ) //|| oLib[iLib]->cMeaning == LIB_MEANING_CHINESE_AND_ENGLISH )
                        return true;
                    else
                        return false;
                case STRING_TYPE_CHINESE:
                    if ( oLib[iLib]->cMeaning==LIB_MEANING_GB || oLib[iLib]->cMeaning==LIB_MEANING_BIG5 )// || oLib[iLib]->cMeaning == LIB_MEANING_CHINESE_AND_ENGLISH )
                        return true;
                    else
                        return false;
                case STRING_TYPE_CHINESE_GB:
                    if ( oLib[iLib]->cMeaning==LIB_MEANING_GB ) //|| oLib[iLib]->cMeaning == LIB_MEANING_CHINESE_AND_ENGLISH )
                        return true;
                    else
                        return false;
                case STRING_TYPE_CHINESE_BIG5:
                    if ( oLib[iLib]->cMeaning==LIB_MEANING_BIG5 ) //|| oLib[iLib]->cMeaning == LIB_MEANING_CHINESE_AND_ENGLISH )
                        return true;
                    else
                        return false;
                default:
                        return false;
            }
        default:
            return false;
    }
    return false;
}

// end of file
