본문 바로가기
개발/IR_ML_NLP

아리랑 형태소 분석기에서 형태소 정보 추출

by 로그인시러 2016. 8. 2.

형태소 분석결과 클래스 AnalysisOutput 에서 stem, pos 가져오기



 
	public static class _MORPH {
		private String stem;
		private char pos;

		public _MORPH() {
			// TODO Auto-generated constructor stub
		}

		public _MORPH(String stem, char pos) {
			this.stem = stem;
			this.pos = pos;
		}

		public String getStem() {
			return stem;
		}

		public void setStem(String stem) {
			this.stem = stem;
		}

		public char getPos() {
			return pos;
		}

		public void setPos(char pos) {
			this.pos = pos;
		}

		@Override
		public String toString() {
			return "_MORPH [stem=" + stem + ", pos=" + pos + "]";
		}

		public static List<_MORPH> valueOf(AnalysisOutput o) {
			// TODO Auto-generated method stub
			List<_MORPH> morphs = new ArrayList<_MORPH>();
			morphs.add(new _MORPH(o.getStem(), o.getPos()));
			
			if (o.getNsfx() != null)
				morphs.add(new _MORPH(o.getNsfx(), PatternConstants.POS_SFX_N));

			if (o.getPatn() == PatternConstants.PTN_NJ || o.getPatn() == PatternConstants.PTN_ADVJ) {
				morphs.add(new _MORPH(o.getJosa(), PatternConstants.POS_JOSA));
			} else if (o.getPatn() == PatternConstants.PTN_NSM) {
				morphs.add(new _MORPH(o.getVsfx(), PatternConstants.POS_SFX_V));
				if (o.getPomi() != null)
					morphs.add(new _MORPH(o.getPomi(), PatternConstants.POS_PEOMI));
				morphs.add(new _MORPH(o.getEomi(), PatternConstants.POS_EOMI));
			} else if (o.getPatn() == PatternConstants.PTN_NSMJ) {
				morphs.add(new _MORPH(o.getVsfx(), PatternConstants.POS_SFX_V));
				if (o.getPomi() != null)
					morphs.add(new _MORPH(o.getPomi(), PatternConstants.POS_PEOMI));
				morphs.add(new _MORPH(o.getElist().get(0), PatternConstants.POS_NEOMI));
				morphs.add(new _MORPH(o.getJosa(), PatternConstants.POS_JOSA));
			} else if (o.getPatn() == PatternConstants.PTN_NSMXM) {
				morphs.add(new _MORPH(o.getVsfx(), PatternConstants.POS_SFX_V));
				morphs.add(new _MORPH(o.getElist().get(0), PatternConstants.POS_COPULA));
				morphs.add(new _MORPH(o.getXverb(), PatternConstants.POS_XVERB));
				if (o.getPomi() != null)
					morphs.add(new _MORPH(o.getPomi(), PatternConstants.POS_PEOMI));
				morphs.add(new _MORPH(o.getEomi(), PatternConstants.POS_EOMI));
			} else if (o.getPatn() == PatternConstants.PTN_NJCM) {
				morphs.add(new _MORPH(o.getJosa(), PatternConstants.POS_JOSA));
				morphs.add(new _MORPH(o.getElist().get(0), PatternConstants.POS_SFX_V));
				if (o.getPomi() != null)
					morphs.add(new _MORPH(o.getPomi(), PatternConstants.POS_PEOMI));
				morphs.add(new _MORPH(o.getEomi(), PatternConstants.POS_EOMI));
			} else if (o.getPatn() == PatternConstants.PTN_NSMXMJ) {
				morphs.add(new _MORPH(o.getVsfx(), PatternConstants.POS_SFX_V));
				morphs.add(new _MORPH(o.getElist().get(1), PatternConstants.POS_COPULA));
				morphs.add(new _MORPH(o.getXverb(), PatternConstants.POS_XVERB));
				if (o.getPomi() != null)
					morphs.add(new _MORPH(o.getPomi(), PatternConstants.POS_PEOMI));
				morphs.add(new _MORPH(o.getElist().get(0), PatternConstants.POS_NEOMI));
				morphs.add(new _MORPH(o.getJosa(), PatternConstants.POS_JOSA));
			} else if (o.getPatn() == PatternConstants.PTN_VM) {
				if (o.getPomi() != null)
					morphs.add(new _MORPH(o.getPomi(), PatternConstants.POS_PEOMI));
				morphs.add(new _MORPH(o.getEomi(), PatternConstants.POS_EOMI));
			} else if (o.getPatn() == PatternConstants.PTN_VMJ) {
				morphs.add(new _MORPH(o.getElist().get(0), PatternConstants.POS_NEOMI));
				morphs.add(new _MORPH(o.getJosa(), PatternConstants.POS_JOSA));
			} else if (o.getPatn() == PatternConstants.PTN_VMCM) {
				morphs.add(new _MORPH(o.getElist().get(0), PatternConstants.POS_NEOMI));
				morphs.add(new _MORPH(o.getElist().get(1), PatternConstants.POS_SFX_N));
				if (o.getPomi() != null)
					morphs.add(new _MORPH(o.getPomi(), PatternConstants.POS_PEOMI));
				morphs.add(new _MORPH(o.getEomi(), PatternConstants.POS_EOMI));
			} else if (o.getPatn() == PatternConstants.PTN_VMXM) {
				morphs.add(new _MORPH(o.getElist().get(0), PatternConstants.POS_COPULA));
				morphs.add(new _MORPH(o.getXverb(), PatternConstants.POS_XVERB));
				if (o.getPomi() != null)
					morphs.add(new _MORPH(o.getPomi(), PatternConstants.POS_PEOMI));
				morphs.add(new _MORPH(o.getEomi(), PatternConstants.POS_EOMI));
			} else if (o.getPatn() == PatternConstants.PTN_VMXMJ) {
				morphs.add(new _MORPH(o.getElist().get(1), PatternConstants.POS_COPULA));
				morphs.add(new _MORPH(o.getXverb(), PatternConstants.POS_XVERB));
				if (o.getPomi() != null)
					morphs.add(new _MORPH(o.getPomi(), PatternConstants.POS_PEOMI));
				morphs.add(new _MORPH(o.getElist().get(0), PatternConstants.POS_NEOMI));
				morphs.add(new _MORPH(o.getJosa(), PatternConstants.POS_JOSA));
			}

			return morphs;
		}
	}


'개발 > IR_ML_NLP' 카테고리의 다른 글

spyder 사용법  (0) 2017.01.08
검색개론  (0) 2016.12.13
elasticsearch vs solr  (0) 2016.12.02
Penn treebank Tagset  (0) 2016.07.28
elasticsearch 2.3.3 bulk index 예제  (0) 2016.06.14

댓글