KDECore
CharDistribution.cpp
Go to the documentation of this file.00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 00002 /* -*- C++ -*- 00003 * Copyright (C) 1998 <developer@mozilla.org> 00004 * 00005 * 00006 * Permission is hereby granted, free of charge, to any person obtaining 00007 * a copy of this software and associated documentation files (the 00008 * "Software"), to deal in the Software without restriction, including 00009 * without limitation the rights to use, copy, modify, merge, publish, 00010 * distribute, sublicense, and/or sell copies of the Software, and to 00011 * permit persons to whom the Software is furnished to do so, subject to 00012 * the following conditions: 00013 * 00014 * The above copyright notice and this permission notice shall be included 00015 * in all copies or substantial portions of the Software. 00016 * 00017 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00018 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00019 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 00020 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 00021 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 00022 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 00023 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00024 */ 00025 00026 00027 #include "CharDistribution.h" 00028 00029 #include "tables/JISFreq.tab" 00030 #include "tables/Big5Freq.tab" 00031 #include "tables/EUCKRFreq.tab" 00032 #include "tables/EUCTWFreq.tab" 00033 #include "tables/GB2312Freq.tab" 00034 00035 #define SURE_YES 0.99f 00036 #define SURE_NO 0.01f 00037 00038 namespace kencodingprober { 00039 //return confidence base on received data 00040 float CharDistributionAnalysis::GetConfidence() 00041 { 00042 //if we didn't receive any character in our consideration range, return negative answer 00043 if (mTotalChars <= 0) 00044 return SURE_NO; 00045 00046 if (mTotalChars != mFreqChars) { 00047 float r = mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio); 00048 00049 if (r < SURE_YES) 00050 return r; 00051 } 00052 //normalize confidence, (we don't want to be 100% sure) 00053 return SURE_YES; 00054 } 00055 00056 EUCTWDistributionAnalysis::EUCTWDistributionAnalysis() 00057 { 00058 mCharToFreqOrder = EUCTWCharToFreqOrder; 00059 mTableSize = EUCTW_TABLE_SIZE; 00060 mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO; 00061 } 00062 00063 EUCKRDistributionAnalysis::EUCKRDistributionAnalysis() 00064 { 00065 mCharToFreqOrder = EUCKRCharToFreqOrder; 00066 mTableSize = EUCKR_TABLE_SIZE; 00067 mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO; 00068 } 00069 00070 GB2312DistributionAnalysis::GB2312DistributionAnalysis() 00071 { 00072 mCharToFreqOrder = GB2312CharToFreqOrder; 00073 mTableSize = GB2312_TABLE_SIZE; 00074 mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO; 00075 } 00076 00077 Big5DistributionAnalysis::Big5DistributionAnalysis() 00078 { 00079 mCharToFreqOrder = Big5CharToFreqOrder; 00080 mTableSize = BIG5_TABLE_SIZE; 00081 mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO; 00082 } 00083 00084 SJISDistributionAnalysis::SJISDistributionAnalysis() 00085 { 00086 mCharToFreqOrder = JISCharToFreqOrder; 00087 mTableSize = JIS_TABLE_SIZE; 00088 mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO; 00089 } 00090 00091 EUCJPDistributionAnalysis::EUCJPDistributionAnalysis() 00092 { 00093 mCharToFreqOrder = JISCharToFreqOrder; 00094 mTableSize = JIS_TABLE_SIZE; 00095 mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO; 00096 } 00097 } 00098