/*pos_tran.cpp to translate moby part-of-speech into a Prolog lexicon ftp://ftp.dcs.shef.ac.uk/share/ilash/Moby/moby.tar.Z info at http://www.clres.com/dict.html */ #include void WritePos(FILE* out,char * W, char POS_M,bool ParseError){ char POS_Pl[32]; strcpy(POS_Pl,((POS_M=='N')?"n": (POS_M=='p')?"n_pl": (POS_M=='h')?"np": (POS_M=='V')?"v": (POS_M=='t')?"v_tr": (POS_M=='i')?"v_in": (POS_M=='A')?"adj": (POS_M=='v')?"adv": (POS_M=='C')?"conj": //conjuction (POS_M=='P')?"prep": (POS_M=='!')?"intj": //interjection (POS_M=='r')?"prn": //pronoun (POS_M=='D')?"art_d": //Definite Article (POS_M=='I')?"art_i": //Indefinite Article (POS_M=='o')?"nom":"error") //Nominative ); if(!ParseError)fprintf(out,"%s([%s]).\n",POS_Pl,W); else fprintf(out,"/*PARSEERROR:%s([%s]).*/\n",POS_Pl,W); if (!strcmp(POS_Pl,"error")) fprintf(out,"/*The invalid part-of-speech code for %s is %c*/\n",W,POS_M); return; } main(int argc, char**argv){ FILE * infile,*outfile; char t,i,word[256]; int l; bool InWord;//in text of word vs. in part of speech bool ProblemWord;//funny character -- eliminate word if (argc != 3){ printf("\tusage: %s inputfile outputfile\n",argv[0]); printf("\tThis program translates the moby part-of-speech file into a Prolog lexicon\n"); exit(0); } if ((infile=fopen(argv[1],"r")) == NULL){ printf("invalid input file\n"); exit(1); } if ((outfile=fopen(argv[2],"w")) == NULL){ printf("invalid output file\n"); exit(1); } // for (i=0;i<126;i++){ //limited for debugging while(!feof(infile)){ //unlimited for SERIOUS work word[0]=0; InWord = true; ProblemWord=false; while ((t=fgetc(infile))!= 13){ //printf("%c\t%d\n",t,t); if (t == -41){ InWord= false; } else if (InWord){ //concatenate t onto word l=strlen(word); if(t>='A' && t<='Z') t = t | 0x20; word[l]=(t==' ')?',':t; if(t=='\'' || t<' ') ProblemWord = true; word[l+1]=0; } else WritePos(outfile,word,t,ProblemWord); } } }