文字匹配解析例子(读取Json 格式配置文件)

来源:互联网 发布:电视定时开关机软件 编辑:程序博客网 时间:2024/05/16 12:24

目标:做一个文字匹配解析例子(这里这个例子是通过职位 title 得到真正级别 band,从而获取上下级关系)。

功能:

1)Json 格式配置文件读取;

2)通过配置文件灵活控制解析功能,如:filter 匹配,bypass 不匹配,filter_reg 正则匹配,bypass_reg 正则不匹配。

e.g. LangRecogUtils

public class LangRecogUtils {static final Logger logger = LoggerFactory.getLogger(LangRecogUtils.class);public static int getBand(String title) {final String PATH = "band.dic";final int DEFAULT_BAND = 0;// default: no bandint band = DEFAULT_BAND;String allLines = "";boolean continueScanFlag = true;boolean completeFlag = false;if (title == null || title.equalsIgnoreCase("NULL")) {return band;}String titleInLowerCase = title.toLowerCase();allLines = build(PATH);ObjectMapper mapper = new ObjectMapper();JsonNode rootNode;try {rootNode = mapper.readValue(allLines.getBytes(), 0, allLines.getBytes().length, JsonNode.class);if (rootNode == null) {return band;}Iterator<JsonNode> jsonItr_1 = rootNode.getElements();logger.debug("LangRecogUtils - rootNode Size: {}", rootNode.size());while (jsonItr_1.hasNext()) {JsonNode subNode = jsonItr_1.next();continueScanFlag = true;// get the band from the dictionaryint tempBand = DEFAULT_BAND;if (subNode.get("band") != null) {tempBand = subNode.get("band").getIntValue();logger.debug("LangRecogUtils - temp band: {}", tempBand);} else {logger.error("The \"band\" session is madatory.");}// get the bypass list from the dictionaryif (subNode.get("bypass_list") != null) {Iterator<JsonNode> jsonItr_3 = subNode.get("bypass_list").getElements();while (continueScanFlag && jsonItr_3.hasNext()) {JsonNode filterNode = jsonItr_3.next();if (filterNode.get("bypass") != null) {String bypass = filterNode.get("bypass").getTextValue();logger.debug("LangRecogUtils - bypass: {}", bypass);if (titleInLowerCase.contains(bypass)) {band = DEFAULT_BAND;continueScanFlag = false;// bypass coming scanning until next band session}}if (filterNode.get("bypass_reg") != null) {String bypassReg = filterNode.get("bypass_reg").getTextValue();logger.debug("LangRecogUtils - bypass reg: {}", bypassReg);Pattern bypassPattern = Pattern.compile(bypassReg);Matcher bypassMatcher = bypassPattern.matcher(titleInLowerCase);if (bypassMatcher.matches()) {band = DEFAULT_BAND;continueScanFlag = false;// bypass coming scanning until next band session}}}}// get the filter list from the dictionaryif (subNode.get("filter_list") != null) {Iterator<JsonNode> jsonItr_2 = subNode.get("filter_list").getElements();while (continueScanFlag && jsonItr_2.hasNext()) {JsonNode filterNode = jsonItr_2.next();if (filterNode.get("filter") != null) {String filter = filterNode.get("filter").getTextValue();logger.debug("LangRecogUtils - filter: {}", filter);if (titleInLowerCase.contains(filter)) {band = tempBand;completeFlag = true;}}if (filterNode.get("filter_reg") != null) {String filterReg = filterNode.get("filter_reg").getTextValue();logger.debug("LangRecogUtils - filter reg: {}", filterReg);Pattern filterPattern = Pattern.compile(filterReg);Matcher filterMatcher = filterPattern.matcher(titleInLowerCase);if (filterMatcher.matches()) {band = tempBand;completeFlag = true;}}}}if (completeFlag) {return band;}}} catch (JsonParseException e) {logger.error(e.getMessage(), e);} catch (JsonMappingException e) {logger.error(e.getMessage(), e);} catch (IOException e) {logger.error(e.getMessage(), e);}return band;}public static String build(String dictName) {        BufferedReader reader = null;        int i = 0;        String allLines = "";        try {            reader = new BufferedReader( new InputStreamReader(Util.getInputStream(dictName), "utf-8"));            String line = reader.readLine();            while (line != null && !line.trim().equals("")) {            i++;            allLines += line;            line = reader.readLine();            }            logger.debug("LangRecogUtils - total read lines: {}", i);        } catch (Exception e) {            logger.error(e.getMessage(), e);        } finally {            if (reader != null) {                try {                    reader.close();                } catch (Exception e) {                logger.error(e.getMessage(), e);                }            }        }        return allLines.toLowerCase();    }public static void main(String[] args) {//String testStr = "Human Resources Manager";//System.out.println("LangRecogUtils - band:" + getBand(testStr));String[] testStrs = {"Executive Officer & Chief of Staff for the CIO","Senior Program Manager-Public Key Infrastructure (PKI)","Deputy Director of Intelligence (Deployment)","Human Resources Manager","Overt Debriefing Team Chief","lead Security Contractor","Assistant Project Manager","Senior Watch Officer","Naval Attaché","Operations Officer","Executive Admin Assistant - E4","Engineer Intern"};/* * Expectation: * 5,10,10,15,20,20,25,25,30,30,35,35 */for (int i = 0; i < testStrs.length; i++) {System.out.println("LangRecogUtils - Str:" + (i+1) + ", band:" + getBand(testStrs[i]));}}}

配置文件,band.dic:

[    {        "band": 5,        "filter_list": [            {                "filter": "chief"            }        ],        "bypass_list": [            {                "bypass": "team chief"            },            {                "bypass": "Colonel"            }        ]    },    {        "band": 10,        "filter_list": [            {                "filter": "director"            },            {                "filter_reg": "senior(.*?)manager(.*)"            },            {                "filter_reg": "senior(.*?)strategist(.*)"            }        ]    },    {        "band": 15,        "filter_list": [            {                "filter": "manager"            },            {                "filter": "strategist"            },            {                "filter": "Subject Matter Expert"            },            {                "filter": "Consultant"            }        ],        "bypass_list": [            {                "bypass_reg": "senior(.*?)manager(.*)"            },            {                "bypass_reg": "senior(.*?)strategist(.*)"            },            {                "bypass_reg": "Assistant(.*?)manager(.*)"            }        ]    },    {        "band": 20,        "filter_list": [            {                "filter": "instructor"            },            {                "filter": "Lead"            },            {                "filter": "Team Chief"            },            {                "filter": "Superintendent"            },            {                "filter": "Supervisor"            },            {                "filter": "Specialist"            }        ]    },    {        "band": 25,        "filter_list": [            {                "filter_reg": "Assistant(.*?)manager(.*)"            },            {                "filter_reg": "senior(.*)"            }        ],        "bypass_list": [            {                "bypass_reg": "senior(.*?)manager(.*)"            },            {                "bypass_reg": "senior(.*?)strategist(.*)"            }        ]    },    {        "band": 35,        "filter_list": [            {                "filter": "Administrative Assistant"            },            {                "filter": "Admin Assistant"            },            {                "filter": "Coordinator"            },            {                "filter": "Contractor"            },            {                "filter": "Internship"            },            {                "filter": "Intern"            },            {                "filter": "Student"            },            {                "filter": "Trainee"            },            {                "filter": "Security Guard"            },            {                "filter": "Part-Time"            },            {                "filter": "Volunteer"            }        ]    },    {        "band": 30,        "filter_list": [            {                "filter_reg": "(.*)"            }        ]    }]



0 0
原创粉丝点击