文字匹配解析例子(读取Json 格式配置文件)
来源:互联网 发布:电视定时开关机软件 编辑:程序博客网 时间:2024/05/16 12:24
目标:做一个文字匹配解析例子(这里这个例子是通过职位 title 得到真正级别 band,从而获取上下级关系)。
功能:
1)Json 格式配置文件读取;
2)通过配置文件灵活控制解析功能,如:filter 匹配,bypass 不匹配,filter_reg 正则匹配,bypass_reg 正则不匹配。
e.g. LangRecogUtils
public class LangRecogUtils {static final Logger logger = LoggerFactory.getLogger(LangRecogUtils.class);public static int getBand(String title) {final String PATH = "band.dic";final int DEFAULT_BAND = 0;// default: no bandint band = DEFAULT_BAND;String allLines = "";boolean continueScanFlag = true;boolean completeFlag = false;if (title == null || title.equalsIgnoreCase("NULL")) {return band;}String titleInLowerCase = title.toLowerCase();allLines = build(PATH);ObjectMapper mapper = new ObjectMapper();JsonNode rootNode;try {rootNode = mapper.readValue(allLines.getBytes(), 0, allLines.getBytes().length, JsonNode.class);if (rootNode == null) {return band;}Iterator<JsonNode> jsonItr_1 = rootNode.getElements();logger.debug("LangRecogUtils - rootNode Size: {}", rootNode.size());while (jsonItr_1.hasNext()) {JsonNode subNode = jsonItr_1.next();continueScanFlag = true;// get the band from the dictionaryint tempBand = DEFAULT_BAND;if (subNode.get("band") != null) {tempBand = subNode.get("band").getIntValue();logger.debug("LangRecogUtils - temp band: {}", tempBand);} else {logger.error("The \"band\" session is madatory.");}// get the bypass list from the dictionaryif (subNode.get("bypass_list") != null) {Iterator<JsonNode> jsonItr_3 = subNode.get("bypass_list").getElements();while (continueScanFlag && jsonItr_3.hasNext()) {JsonNode filterNode = jsonItr_3.next();if (filterNode.get("bypass") != null) {String bypass = filterNode.get("bypass").getTextValue();logger.debug("LangRecogUtils - bypass: {}", bypass);if (titleInLowerCase.contains(bypass)) {band = DEFAULT_BAND;continueScanFlag = false;// bypass coming scanning until next band session}}if (filterNode.get("bypass_reg") != null) {String bypassReg = filterNode.get("bypass_reg").getTextValue();logger.debug("LangRecogUtils - bypass reg: {}", bypassReg);Pattern bypassPattern = Pattern.compile(bypassReg);Matcher bypassMatcher = bypassPattern.matcher(titleInLowerCase);if (bypassMatcher.matches()) {band = DEFAULT_BAND;continueScanFlag = false;// bypass coming scanning until next band session}}}}// get the filter list from the dictionaryif (subNode.get("filter_list") != null) {Iterator<JsonNode> jsonItr_2 = subNode.get("filter_list").getElements();while (continueScanFlag && jsonItr_2.hasNext()) {JsonNode filterNode = jsonItr_2.next();if (filterNode.get("filter") != null) {String filter = filterNode.get("filter").getTextValue();logger.debug("LangRecogUtils - filter: {}", filter);if (titleInLowerCase.contains(filter)) {band = tempBand;completeFlag = true;}}if (filterNode.get("filter_reg") != null) {String filterReg = filterNode.get("filter_reg").getTextValue();logger.debug("LangRecogUtils - filter reg: {}", filterReg);Pattern filterPattern = Pattern.compile(filterReg);Matcher filterMatcher = filterPattern.matcher(titleInLowerCase);if (filterMatcher.matches()) {band = tempBand;completeFlag = true;}}}}if (completeFlag) {return band;}}} catch (JsonParseException e) {logger.error(e.getMessage(), e);} catch (JsonMappingException e) {logger.error(e.getMessage(), e);} catch (IOException e) {logger.error(e.getMessage(), e);}return band;}public static String build(String dictName) { BufferedReader reader = null; int i = 0; String allLines = ""; try { reader = new BufferedReader( new InputStreamReader(Util.getInputStream(dictName), "utf-8")); String line = reader.readLine(); while (line != null && !line.trim().equals("")) { i++; allLines += line; line = reader.readLine(); } logger.debug("LangRecogUtils - total read lines: {}", i); } catch (Exception e) { logger.error(e.getMessage(), e); } finally { if (reader != null) { try { reader.close(); } catch (Exception e) { logger.error(e.getMessage(), e); } } } return allLines.toLowerCase(); }public static void main(String[] args) {//String testStr = "Human Resources Manager";//System.out.println("LangRecogUtils - band:" + getBand(testStr));String[] testStrs = {"Executive Officer & Chief of Staff for the CIO","Senior Program Manager-Public Key Infrastructure (PKI)","Deputy Director of Intelligence (Deployment)","Human Resources Manager","Overt Debriefing Team Chief","lead Security Contractor","Assistant Project Manager","Senior Watch Officer","Naval Attaché","Operations Officer","Executive Admin Assistant - E4","Engineer Intern"};/* * Expectation: * 5,10,10,15,20,20,25,25,30,30,35,35 */for (int i = 0; i < testStrs.length; i++) {System.out.println("LangRecogUtils - Str:" + (i+1) + ", band:" + getBand(testStrs[i]));}}}
配置文件,band.dic:
[ { "band": 5, "filter_list": [ { "filter": "chief" } ], "bypass_list": [ { "bypass": "team chief" }, { "bypass": "Colonel" } ] }, { "band": 10, "filter_list": [ { "filter": "director" }, { "filter_reg": "senior(.*?)manager(.*)" }, { "filter_reg": "senior(.*?)strategist(.*)" } ] }, { "band": 15, "filter_list": [ { "filter": "manager" }, { "filter": "strategist" }, { "filter": "Subject Matter Expert" }, { "filter": "Consultant" } ], "bypass_list": [ { "bypass_reg": "senior(.*?)manager(.*)" }, { "bypass_reg": "senior(.*?)strategist(.*)" }, { "bypass_reg": "Assistant(.*?)manager(.*)" } ] }, { "band": 20, "filter_list": [ { "filter": "instructor" }, { "filter": "Lead" }, { "filter": "Team Chief" }, { "filter": "Superintendent" }, { "filter": "Supervisor" }, { "filter": "Specialist" } ] }, { "band": 25, "filter_list": [ { "filter_reg": "Assistant(.*?)manager(.*)" }, { "filter_reg": "senior(.*)" } ], "bypass_list": [ { "bypass_reg": "senior(.*?)manager(.*)" }, { "bypass_reg": "senior(.*?)strategist(.*)" } ] }, { "band": 35, "filter_list": [ { "filter": "Administrative Assistant" }, { "filter": "Admin Assistant" }, { "filter": "Coordinator" }, { "filter": "Contractor" }, { "filter": "Internship" }, { "filter": "Intern" }, { "filter": "Student" }, { "filter": "Trainee" }, { "filter": "Security Guard" }, { "filter": "Part-Time" }, { "filter": "Volunteer" } ] }, { "band": 30, "filter_list": [ { "filter_reg": "(.*)" } ] }]
0 0
- 文字匹配解析例子(读取Json 格式配置文件)
- DOS批处理:读取配置文件,格式 与 程序,例子
- 读取五种格式的配置文件(xml(两种方式),txt,excel,csv,json)
- [Android] Json格式解析和文字图片传输
- go语言导入文件(固定长解析)(二) 增加读取json配置文件
- php读取web服务并解析json一例子
- JSON格式配置文件
- Unity读取 JSon配置文件
- Unity读取 JSon配置文件
- 读取Json配置文件问题
- java读取文件内容,解析Json格式数据
- Json特殊格式解析(动态解析)
- jquery读取json格式
- jquery读取json格式
- js读取json格式
- Tomcat源码解析(九):配置文件读取
- perl读取解析配置文件
- 配置文件读取和解析
- HeapAlloc与malloc的区别
- 虚析构函数
- HttpClient用POST上传文件
- linux 安装配置java环境
- Annotation自定义注解
- 文字匹配解析例子(读取Json 格式配置文件)
- TCP Incast学习之中遇到的问题
- 遗传算法解决TSP问题
- 聊天登陆之注册界面
- isodata算法确定k均值聚类的k值
- Objective-C中的instancetype和id区别
- 神秘的js执行顺序
- xshell配置
- NSIS脚本详解