2024-03-19 17:45:12 +08:00

314 lines
9.8 KiB
C++

//
// Created by kaishiqi on 14-8-5.
// Copyright (c) 2014 kaishiqi. All rights reserved.
//
#include "MO.h"
#include <fstream>
#include <cstring>
#include <sstream>
NS_I18N_BEGIN
namespace {
inline const char* findChar(const char* first, const char* last, const char value) {
for (auto itr = first; (last - itr) > 0; itr++) {
if (*itr == value) {
return itr;
}
}
return last;
}
inline int maxInt(int a, int b) {
return (a > b ? a : b);
}
inline int minInt(int a, int b) {
return (a < b ? a : b);
}
}
//--------------------------------------------------------------------
//
// MO Class
//
//--------------------------------------------------------------------
//0x04 as original string context separator.
const char MO::CONTEXT_SEPARATOR = 0x04;
//0x00 as original string singular/plural separator or a plural translations separator.
const char MO::PLURAL_SEPARATOR = 0x00;
/** The common logic for English is implemented */
const unsigned int MO::DEFAULT_NPLURALS = 2;
const std::function<int(int n)> MO::DEFAULT_PLURAL_EXPRESSION = [](int n){ return (n!=1); };
MO::MO(std::string filePath/*=""*/, std::function<int(int n)> onPluralExpression/*=DEFAULT_PLURAL_EXPRESSION*/, unsigned int nplurals/*=DEFAULT_NPLURALS*/)
:_isInited(false)
,nplurals(nplurals)
,onPluralExpression(onPluralExpression)
{
if (!filePath.empty()) {
this->loadMoFile(filePath);
}
}
MO::MO(unsigned char* bytes, std::function<int(int n)> onPluralExpression/*=DEFAULT_PLURAL_EXPRESSION*/, unsigned int nplurals/*=DEFAULT_NPLURALS*/)
:_isInited(false)
,nplurals(nplurals)
,onPluralExpression(onPluralExpression)
{
if (bytes) {
this->parseMoFile(bytes);
}
}
MO::~MO()
{
}
bool MO::loadMoFile(std::string filePath)
{
if (_isInited || filePath.empty()) {
return false;
}
std::ifstream fin(filePath, std::ifstream::in | std::ifstream::binary);
unsigned char* bytes = nullptr;
unsigned long bytesLen = 0;
if (fin && fin.is_open()) {
fin.seekg(0, fin.end);
unsigned long size = fin.tellg();
fin.seekg(0, fin.beg);
bytesLen = sizeof(unsigned char) * size;
bytes = new unsigned char[bytesLen];
fin.read((char*)bytes, size);
fin.close();
} else {
return false;
}
if (!bytes || 0 == bytesLen) {
if (bytes) {
delete[] bytes;
bytes = nullptr;
}
return false;
}
// parse *.mo File
this->parseMoFile(bytes);
delete[] bytes;
bytes = nullptr;
return true;
}
void MO::parseMoFile(unsigned char* bytes)
{
if (!bytes || _isInited) {
return;
}
/** precache some functions */
unsigned long (*peekLong)(unsigned char* bytes, unsigned long offs) = nullptr;
auto peekLongBig = [](unsigned char* bytes, unsigned long offs)->unsigned long {
unsigned char a = bytes[offs];
unsigned char b = bytes[offs+1];
unsigned char c = bytes[offs+2];
unsigned char d = bytes[offs+3];
return ((d*256+c)*256+b)*256+a;
};
auto peekLongLittle = [](unsigned char* bytes, unsigned long offs)->unsigned long {
unsigned char a = bytes[offs];
unsigned char b = bytes[offs+1];
unsigned char c = bytes[offs+2];
unsigned char d = bytes[offs+3];
return ((a*256+b)*256+c)*256+d;
};
auto copyBytes = [](char* srcBytes, int len) {
char* chars = new char[len+1];// +1 is null-terminated byte string pointed
strncpy(chars, srcBytes, len);
chars[len] = 0;
std::string str(chars);
delete [] chars;
return str;
};
/**
* Check format in MO file.
* @see http://www.gnu.org/savannah-checkouts/gnu/gettext/manual/html_node/MO-Files.html
*/
/** magic */
const unsigned long MAGIC_BIG = 0xde120495;
const unsigned long MAGIC_LITTLE = 0x950412de;
const int BYTE_SIZE = 4;
unsigned char magicBytes[BYTE_SIZE];
memcpy(magicBytes, bytes, BYTE_SIZE);
unsigned long magicNumber = 0;
for (int i = 0; i < BYTE_SIZE; i++)
magicNumber += (unsigned long)magicBytes[i] << ((BYTE_SIZE-1-i)*8);
switch (magicNumber) {
case MAGIC_BIG:
peekLong = peekLongBig;
break;
case MAGIC_LITTLE:
peekLong = peekLongLittle;
break;
default:
//no valid mo-file
return;
break;
}
/** revision */
unsigned long revision = peekLong(bytes, 4);
// support revision 0 of MO format specs, only
if (0 != revision) {
//unsupported version
return;
}
/** get number of N, O, T, S, H */
unsigned long moN = peekLong(bytes, 8);
unsigned long moO = peekLong(bytes, 12);
unsigned long moT = peekLong(bytes, 16);
unsigned long moS = peekLong(bytes, 20);
unsigned long moH = peekLong(bytes, 24);
// skip hash table
(void)moS;
(void)moH;
/** traverse and get strings */
const int CHUNK_SIZE = 8;
for (unsigned long i = 0; i < moN; i++) {
auto originalLength = peekLong(bytes, moO);
auto originalOffset = peekLong(bytes, moO + BYTE_SIZE);
auto translationLength = peekLong(bytes, moT);
auto translationOffset = peekLong(bytes, moT + BYTE_SIZE);
char* originalBytes = new char[originalLength];
memcpy(originalBytes, bytes + originalOffset, originalLength);
char* translationBytes = new char[translationLength];
memcpy(translationBytes, bytes + translationOffset, translationLength);
moO += CHUNK_SIZE;
moT += CHUNK_SIZE;
/** mo file headers info */
if (0 == originalLength && _headersInfo.empty()) {
_headersInfo = copyBytes(translationBytes, (int)translationLength);
} else {
/**
* Build a TranslationEntry from original string and translation strings,
* found in a MO file.
*
* char[] {originalBytes} original string to translate from MO file. Might contain
* 0x04 as context separator or 0x00 as singular/plural separator.
* char[] {translationBytes} translation string from MO file. Might contain
* 0x00 as a plural translations separator.
*/
TranslateEntry entry;
// check original
do {
auto originalItr = findChar(originalBytes, originalBytes + originalLength, CONTEXT_SEPARATOR);
size_t bytesLen = originalItr - originalBytes;
// look for context
if (bytesLen < originalLength) {
entry.setContext(copyBytes(originalBytes, (int)bytesLen));
bytesLen += sizeof(CONTEXT_SEPARATOR);
} else {
bytesLen = 0;
}
// look for singular original
originalItr = findChar(originalBytes + bytesLen, originalBytes + originalLength, PLURAL_SEPARATOR);
entry.setSingular(copyBytes(originalBytes + bytesLen, (int)(originalItr - originalBytes - bytesLen)));
bytesLen = originalItr - originalBytes + sizeof(PLURAL_SEPARATOR);
// look for plural original
if (bytesLen < originalLength) {
entry.setPlural(copyBytes(originalBytes + bytesLen, (int)(originalLength - (originalItr - originalBytes))));
}
}while (0);
// check translation
do {
size_t bytesLen = 0;
std::vector<std::string> translations;
// plural translations are also separated by \0
while (true) {
auto translationItr = findChar(translationBytes + bytesLen, translationBytes + translationLength, PLURAL_SEPARATOR);
translations.push_back(copyBytes(translationBytes + bytesLen, (int)(translationItr - translationBytes - bytesLen)));
bytesLen = translationItr - translationBytes + sizeof(PLURAL_SEPARATOR);
if (bytesLen >= translationLength) {
break;
}
}
entry.setTranslations(translations);
} while (0);
// save
_entryMap[entry.getKey()] = entry;
// debug
//printf("%s\n", entry.toString().c_str());
}
delete [] originalBytes;
delete [] translationBytes;
}
_isInited = true;
}
unsigned int MO::selectPluralForm(int n)
{
auto index = onPluralExpression ? onPluralExpression(n) : MO::DEFAULT_PLURAL_EXPRESSION(n);
return maxInt(0, minInt(index, nplurals-1));
}
std::string MO::translate(std::string singular, std::string context/*=""*/)
{
auto entryItr = _entryMap.find(Entry::createKey(singular, context));
auto isTranslated = (entryItr != _entryMap.end());
return (isTranslated ? (*entryItr).second.getTranslations().at(0) : singular);
}
std::string MO::translatePlural(std::string singular, std::string plural, int count, std::string context/*=""*/)
{
auto entryItr = _entryMap.find(Entry::createKey(singular, context));
auto pluralFormsIndex = this->selectPluralForm(count);
auto isTranslated = (entryItr != _entryMap.end());
if (isTranslated && pluralFormsIndex < (*entryItr).second.getTranslations().size()) {
return (*entryItr).second.getTranslations().at(pluralFormsIndex);
} else {
return (1==count) ? singular : plural;
}
}
NS_I18N_END