regular expression for CEDICT


/ Published in: Java
Save to your folder(s)



Copy this code and paste it in your HTML
  1. //http://cc-cedict.org/wiki/start
  2.  
  3. Pattern line_pattern = Pattern.compile("([^\\s]+)\\s([^\\s]+)\\s(\\[.+\\])\\s(/.+/)");
  4.  
  5. Matcher matcher = line_pattern.matcher(line);
  6. boolean matchFound = matcher.find();
  7. while(matchFound) {
  8. System.out.println(matcher.start() + "-" + matcher.end());
  9. for(int i = 0; i <= matcher.groupCount(); i++) {
  10. String groupStr = matcher.group(i);
  11. System.out.println(i + ":" + groupStr);
  12. }
  13. if(matcher.end() + 1 <= line.length()) {
  14. matchFound = matcher.find(matcher.end());
  15. }
  16. else{
  17. break;
  18. }
  19. }
  20.  
  21. T字帳 T字帐 [T zi4 zhang4] /T-account (accounting)/
  22. 0-47
  23. 0:T字帳 T字帐 [T zi4 zhang4] /T-account (accounting)/
  24. 1:T字帳
  25. 2:T字帐
  26. 3:[T zi4 zhang4]
  27. 4:/T-account (accounting)/

Report this snippet


Comments

RSS Icon Subscribe to comments

You need to login to post a comment.