• Skip to content
  • Skip to link menu
KDE 4.3 API Reference
  • KDE API Reference
  • kdelibs
  • Sitemap
  • Contact Us
 

KDECore

JpCntx.h

Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
00002 /*  -*- C++ -*-
00003 *  Copyright (C) 1998 <developer@mozilla.org>
00004 *
00005 *
00006 *  Permission is hereby granted, free of charge, to any person obtaining
00007 *  a copy of this software and associated documentation files (the
00008 *  "Software"), to deal in the Software without restriction, including
00009 *  without limitation the rights to use, copy, modify, merge, publish,
00010 *  distribute, sublicense, and/or sell copies of the Software, and to
00011 *  permit persons to whom the Software is furnished to do so, subject to
00012 *  the following conditions:
00013 *
00014 *  The above copyright notice and this permission notice shall be included 
00015 *  in all copies or substantial portions of the Software.
00016 *
00017 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00018 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00019 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00020 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
00021 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
00022 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
00023 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00024 */
00025 
00026 #ifndef __JPCNTX_H__
00027 #define __JPCNTX_H__
00028 
00029 #include "kdemacros.h"
00030 
00031 #define NUM_OF_CATEGORY 6
00032 
00033 #define ENOUGH_REL_THRESHOLD  100
00034 #define MAX_REL_THRESHOLD     1000
00035 namespace kencodingprober {
00036 //hiragana frequency category table
00037 extern const char jp2CharContext[83][83];
00038 
00039 class KDE_NO_EXPORT JapaneseContextAnalysis
00040 {
00041 public:
00042   JapaneseContextAnalysis() {Reset();};
00043   virtual ~JapaneseContextAnalysis() {};
00044 
00045   void HandleData(const char* aBuf, unsigned int aLen);
00046 
00047   void HandleOneChar(const char* aStr, unsigned int aCharLen)
00048   {
00049     int order;
00050 
00051     //if we received enough data, stop here   
00052     if (mTotalRel > MAX_REL_THRESHOLD)   mDone = true;
00053     if (mDone)       return;
00054      
00055     //Only 2-bytes characters are of our interest
00056     order = (aCharLen == 2) ? GetOrder(aStr) : -1;
00057     if (order != -1 && mLastCharOrder != -1)
00058     {
00059       mTotalRel++;
00060       //count this sequence to its category counter
00061       mRelSample[(int)jp2CharContext[mLastCharOrder][order]]++;
00062     }
00063     mLastCharOrder = order;
00064   };
00065 
00066   float GetConfidence();
00067   void      Reset(void);
00068   void      SetOpion(){};
00069   bool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;};
00070 
00071 protected:
00072   virtual int GetOrder(const char* str, unsigned int *charLen) = 0;
00073   virtual int GetOrder(const char* str) = 0;
00074 
00075   //category counters, each interger counts sequence in its category
00076   unsigned int mRelSample[NUM_OF_CATEGORY];
00077 
00078   //total sequence received
00079   unsigned int mTotalRel;
00080   
00081   //The order of previous char
00082   int  mLastCharOrder;
00083 
00084   //if last byte in current buffer is not the last byte of a character, we
00085   //need to know how many byte to skip in next buffer.
00086   unsigned int mNeedToSkipCharNum;
00087 
00088   //If this flag is set to true, detection is done and conclusion has been made
00089   bool   mDone;
00090 };
00091 
00092 
00093 class KDE_NO_EXPORT SJISContextAnalysis : public JapaneseContextAnalysis
00094 {
00095   //SJISContextAnalysis(){};
00096 protected:
00097   int GetOrder(const char* str, unsigned int *charLen);
00098 
00099   int GetOrder(const char* str)
00100   {
00101     //We only interested in Hiragana, so first byte is '\202'
00102     if (*str == '\202' && 
00103           (unsigned char)*(str+1) >= (unsigned char)0x9f && 
00104           (unsigned char)*(str+1) <= (unsigned char)0xf1)
00105       return (unsigned char)*(str+1) - (unsigned char)0x9f;
00106     return -1;
00107   };
00108 };
00109 
00110 class KDE_NO_EXPORT EUCJPContextAnalysis : public JapaneseContextAnalysis
00111 {
00112 protected:
00113   int GetOrder(const char* str, unsigned int *charLen);
00114   int GetOrder(const char* str)
00115     //We only interested in Hiragana, so first byte is '\244'
00116   {
00117     if (*str == '\244' &&
00118           (unsigned char)*(str+1) >= (unsigned char)0xa1 &&
00119           (unsigned char)*(str+1) <= (unsigned char)0xf3)
00120       return (unsigned char)*(str+1) - (unsigned char)0xa1;
00121     return -1;
00122   };
00123 };
00124 }
00125 #endif /* __JPCNTX_H__ */
00126 

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.6.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal