一种不太完善的OpenStreetMap字典汉化方法
来源:互联网 发布:在哪购买淘宝小号靠谱 编辑:程序博客网 时间:2024/06/06 01:43
根据世界地名词典,对OpenStreetMap进行汉化,使用了下面的代码。不太完善,这里仅贴出来。
地名字典在我的资源世界地名大词典下载。
#include <QCoreApplication>#include <QDebug>#include <QFile>#include <QHash>#include <QMap>#include <QRegExp>#include <QString>#include <QSqlDatabase>#include <QSqlError>#include <QSqlQuery>#include <QTextStream>#include <QVector>QHash <QString, QMap<int,QVector<QString> > > make_dictionary(QSqlDatabase db);void outputDictionary(QHash <QString, QMap<int,QVector<QString> > > dict);void prepareToTranslate(const QHash <QString, QMap<int,QVector<QString> > > dict, QSqlDatabase db, const QString & tableName, QVector<qint64> & vec_osmid, QVector<QString> & vec_rawName, QVector<QString> & vec_TransName );int main(int argc, char *argv[]){ QCoreApplication a(argc, argv); QTextStream Stdout(stdout,QIODevice::WriteOnly); QSqlDatabase db = QSqlDatabase::addDatabase("QPSQL"); if (db.isValid()==false) return 0; db.setHostName("127.0.0.1"); db.setDatabaseName("gis"); db.setUserName("archosm"); db.setPassword("archosm"); if (db.open()==false) { Stdout << db.lastError().text()<<"\n"; qDebug() << db.lastError().text(); return 0; } try { QHash <QString, QMap<int,QVector<QString> > > dict = make_dictionary(db); outputDictionary(dict); //! start to translate QSqlQuery queryWordsToTrans(db); const QString tableNames[4] = { QString("planet_osm_line"),QString("planet_osm_point"),QString("planet_osm_polygon"),QString("planet_osm_roads") }; //输出 QFile fpDict(QCoreApplication::applicationDirPath()+"/trans.txt"); if (fpDict.open(QIODevice::WriteOnly)==false) return 0; QTextStream stout(&fpDict); QSqlQuery queryUpdate(db); queryUpdate.setForwardOnly(true); db.transaction(); for (int i=0;i<4;++i) { QVector<qint64> vec_osmid; QVector<QString> vec_rawName; QVector<QString> vec_TransName; prepareToTranslate(dict,db,tableNames[i],vec_osmid,vec_rawName,vec_TransName); QMap<QString, QString> map_trans; int nTransed = vec_osmid.size(); for (int j=0;j<nTransed;++j) map_trans[vec_rawName[j]] = vec_TransName[j]; QList<QString> key_raws = map_trans.keys(); foreach (QString str_rawName, key_raws) { QString strTransName = map_trans[str_rawName]; stout<<tableNames[i]<<","<<str_rawName<<","<<strTransName<<"\n"; queryUpdate.prepare(QString("update %1 set name = ? , trans_name_chs = ? where name = ? and trans_name_chs is null;").arg(tableNames[i])); queryUpdate.addBindValue(str_rawName + ","+strTransName); queryUpdate.addBindValue(strTransName); queryUpdate.addBindValue(str_rawName); if (queryUpdate.exec()==false) throw queryUpdate.lastError().text(); stout.flush(); fpDict.flush(); } } db.commit(); fpDict.close(); } catch (QString errMessage) { db.rollback(); Stdout<<"Error!"<<errMessage<<"\n"; qDebug()<<"Error!"<<errMessage; } db.close(); Stdout<<"Finished!\n"; qDebug()<<"Finished!"; exit(0); return a.exec();}//预处理原始数据,生成词典QHash <QString, QMap<int,QVector<QString> > > make_dictionary(QSqlDatabase db){ QVector<QString> lst_tails; //这些后缀去掉后,会得到更多的有效词根。 lst_tails.push_back(QString::fromUtf8("国家野生动物保护区")); lst_tails.push_back(QString::fromUtf8("国家森林公园")); lst_tails.push_back(QString::fromUtf8("野生动物保护区")); lst_tails.push_back(QString::fromUtf8("森林公园")); lst_tails.push_back(QString::fromUtf8("国家公园")); lst_tails.push_back(QString::fromUtf8("深海平原")); lst_tails.push_back(QString::fromUtf8("海底峡谷")); lst_tails.push_back(QString::fromUtf8("断裂带")); lst_tails.push_back(QString::fromUtf8("自治区")); lst_tails.push_back(QString::fromUtf8("裂口")); lst_tails.push_back(QString::fromUtf8("盐湖")); lst_tails.push_back(QString::fromUtf8("内湖")); lst_tails.push_back(QString::fromUtf8("海岭")); lst_tails.push_back(QString::fromUtf8("环礁")); lst_tails.push_back(QString::fromUtf8("大区")); lst_tails.push_back(QString::fromUtf8("机场")); lst_tails.push_back(QString::fromUtf8("山口")); lst_tails.push_back(QString::fromUtf8("公园")); lst_tails.push_back(QString::fromUtf8("半岛")); lst_tails.push_back(QString::fromUtf8("冰川")); lst_tails.push_back(QString::fromUtf8("沙漠")); lst_tails.push_back(QString::fromUtf8("峡谷")); lst_tails.push_back(QString::fromUtf8("山谷")); lst_tails.push_back(QString::fromUtf8("海沟")); lst_tails.push_back(QString::fromUtf8("水道")); lst_tails.push_back(QString::fromUtf8("水库")); lst_tails.push_back(QString::fromUtf8("大坝")); lst_tails.push_back(QString::fromUtf8("神庙")); lst_tails.push_back(QString::fromUtf8("干河")); lst_tails.push_back(QString::fromUtf8("平原")); lst_tails.push_back(QString::fromUtf8("海岸")); lst_tails.push_back(QString::fromUtf8("群岛")); lst_tails.push_back(QString::fromUtf8("火山")); lst_tails.push_back(QString::fromUtf8("浅滩")); lst_tails.push_back(QString::fromUtf8("大桥")); lst_tails.push_back(QString::fromUtf8("洼地")); lst_tails.push_back(QString::fromUtf8("瀑布")); lst_tails.push_back(QString::fromUtf8("海峡")); lst_tails.push_back(QString::fromUtf8("熔岩")); lst_tails.push_back(QString::fromUtf8("岛")); lst_tails.push_back(QString::fromUtf8("湖")); lst_tails.push_back(QString::fromUtf8("湾")); lst_tails.push_back(QString::fromUtf8("山")); lst_tails.push_back(QString::fromUtf8("河")); lst_tails.push_back(QString::fromUtf8("滩")); lst_tails.push_back(QString::fromUtf8("村")); lst_tails.push_back(QString::fromUtf8("市")); lst_tails.push_back(QString::fromUtf8("坝")); lst_tails.push_back(QString::fromUtf8("港")); lst_tails.push_back(QString::fromUtf8("区")); lst_tails.push_back(QString::fromUtf8("县")); lst_tails.push_back(QString::fromUtf8("省")); lst_tails.push_back(QString::fromUtf8("礁")); lst_tails.push_back(QString::fromUtf8("角")); lst_tails.push_back(QString::fromUtf8("峰")); lst_tails.push_back(QString::fromUtf8("站")); lst_tails.push_back(QString::fromUtf8("岭")); const int remvSz = lst_tails.size(); QSqlQuery query(db); query.setForwardOnly(true); if (false == query.exec("select * from national_place_names")) throw query.lastError().text(); QHash <QString, QMap<int,QVector<QString> > > hash_dict; //Make dictionary while (query.next()) { const QString raw_name = query.value("place_name").toString() .replace("<u>","") .replace("</u>","") .replace("<rt>","") .replace("</rt>","") .replace("<ruby>","") .replace("</ruby>",""); const QString raw_trans = query.value("trans_name").toString(); ///Replace some split comma. //! Replace "见" QStringList lst_raw_name = raw_name.split(QRegExp(QString::fromUtf8("[〈〉见,()]")),QString::SkipEmptyParts); if (lst_raw_name.size()) { QString word = lst_raw_name.first(); QString upperKey = word.toUpper().trimmed(); upperKey.replace(QRegExp(QString::fromUtf8("[ ,, ]")),"_"); upperKey.replace("-","_"); upperKey.replace(".","_"); QStringList listWordsKey = upperKey.split("_"); int n = listWordsKey.size(); for (int i = 0 ;i < n; ++i) { QString finalKey; for (int j = 0;j<=i;++j) { if (j) finalKey += "_"; finalKey += listWordsKey.at(j); } //CHS QStringList chslists = raw_trans.split(QRegExp(QString::fromUtf8("[()(),;。]")),QString::SkipEmptyParts); if (chslists.size()) { bool bfound = false; int deleted = 0; QString chs_value = chslists.first(); do { bfound = false; for (int k = 0; k< remvSz ;++k) { if (chs_value.endsWith(lst_tails[k])) { QString newv = chs_value.left(chs_value.length()-lst_tails[k].length()); if (newv.size()) { bfound = true; chs_value = newv; ++deleted; break; } } }//end for (int k = 0; k< remvSz && bfound==true;++k) }while (bfound); //end do remove laterFix hash_dict[finalKey][deleted-i].push_back(chs_value); }//end if (chslists.size()) }//end for i = 1 ~ n n = listWordsKey.size(); }//end if (lst_raw_name.size()) } return hash_dict;}void outputDictionary(QHash <QString, QMap<int,QVector<QString> > > dict){ QFile fpDict(QCoreApplication::applicationDirPath()+"/dict.txt"); if (fpDict.open(QIODevice::WriteOnly)==false) return; QTextStream stout(&fpDict); QList<QString> words = dict.keys(); std::sort(words.begin(),words.end()); foreach (QString word, words) { stout<<word<<":"; const QMap<int,QVector<QString> > & vals = dict[word]; QList<int> simrts = vals.keys(); foreach (int simrt, simrts) { stout<<simrt<<"={"; const QVector<QString> & transs = vals[simrt]; const int nPoss = transs.size(); for(int i=0;i<nPoss;++i) stout<<transs[i]<<","; stout<<simrt<<"}; "; } stout<<"\n"; } fpDict.close();}void prepareToTranslate(QHash <QString, QMap<int,QVector<QString> > > dict, QSqlDatabase db, const QString & tableName, QVector<qint64> & vec_osmid, QVector<QString> & vec_rawName, QVector<QString> & vec_TransName ){ QSqlQuery query(db); query.setForwardOnly(true); if (false == query.exec(QString("select osm_id,name from %1 where name > ' ';").arg(tableName))) throw query.lastError().text(); while (query.next()) { qint64 osmid = query.value(0).toLongLong(); const QString strRawName = query.value(1).toString(); QString transName; if (strRawName.size()>1) { QString upperKey = strRawName.toUpper().trimmed(); upperKey.replace(QRegExp(QString::fromUtf8("[ ,, ]")),"_"); upperKey.replace("-","_"); upperKey.replace(".","_"); QStringList listWordsKey = upperKey.split("_"); int n = listWordsKey.size(); if (n ) { for (int i = n-1 ;i >=0; --i) { QString finalKey; for (int j = 0;j<=i;++j) { if (j) finalKey += "_"; finalKey += listWordsKey.at(j); } if (dict.contains(finalKey)) { if (finalKey.size()>3) { if (transName.size()) transName +="_"; transName += dict[finalKey].first().first(); for (int j = 0; j<=i;++j) listWordsKey.pop_front(); } break; } if (i<2) break; } } } if (transName.size()) { vec_osmid.push_back(osmid); vec_rawName.push_back(strRawName); vec_TransName.push_back(transName); } }}
阅读全文
0 0
- 一种不太完善的OpenStreetMap字典汉化方法
- 获取国内任意一个城市的OpenStreetMap数据的一种方法
- 一种没有语料字典的分词方法
- 一种没有语料字典的分词方法
- 一个不太完善的ASP整站静态生成程序
- 一个不太完善的ASP整站静态生成程序
- 一个不太完善的ASP整站静态生成程序
- 87.54中OC与js交互不太完善,这里有全面的解析
- 一种图片不变形的方法
- boj 1333 想法十分简单。。不多说了。。复杂的我不会。。这个题给的不太完善感觉
- android 联动listview 的一种不太健康的实现方式
- pg_ctl 加载启动参数文件一种不太常见的写法
- 人民币金额拆分——很笨的一种方法(没完善)
- 【PB】PowerBuilder中的一些不太常用的方法
- PowerBuilder中的一些不太常用的方法
- 【PB】PowerBuilder中的一些不太常用的方法
- 使文本框不可编辑[不太常用的方法]
- 【PB】PowerBuilder中的一些不太常用的方法
- TX2(1)--Jetson TX2 刷机并安装JetPack3.0
- list去除所有为null元素
- hdu 6103 Kirinriki(尺取法)
- node.js学习
- Java中出现[Ljava.lang.String的问题
- 一种不太完善的OpenStreetMap字典汉化方法
- 聊一聊分布式锁的设计
- 网络命令
- 结构体中的深拷贝和浅拷贝
- vim命令
- 百度云盘分享:MySQL零基础入门视频教程
- 调用浏览器中的前进和后退
- js判断两个时间段是否有重合部分(是否冲突)
- 创建vue cli项目