/*710_tran.cpp to translate text710.txt into a Prolog lexicon taken from the Oxford Text Archive ftp://ota.ox.ac.uk/pub/ota/public/dicts/ */ #include #include #define RecSize 129 void WritePos(FILE* out,char * W, char POS_M,bool ParseError){ char POS_Pl[32]; strcpy(POS_Pl,((POS_M=='G')?"v": //anomalous verb (POS_M=='H')?"v_tr": (POS_M=='I')?"v_in": (POS_M=='J')?"v": //tr & intr verb (POS_M=='K')?"n": //count noun (POS_M=='L')?"n": //uncountable noun (POS_M=='M')?"n": //both countable & non (POS_M=='N')?"n": //Proper Noun (POS_M=='O')?"adj": (POS_M=='P')?"/*adv": (POS_M=='Q')?"prn": //pronoun (POS_M=='R')?"art_d": //Definite Article (POS_M=='S')?"art_i": //Indefinite Article (POS_M=='T')?"prep": (POS_M=='U')?"/*prefix": (POS_M=='V')?"conj": //conjuction (POS_M=='W')?"/*intj": //interjection (POS_M=='X')?"/*particle": (POS_M=='Y')?"/*abbreviation": (POS_M=='Z')?"/*not classified":"error") //Nominative ); if(!ParseError)fprintf(out,"%s([%s]).%s\n",POS_Pl,W,(POS_Pl[0]=='/')?"*/":""); else fprintf(out,"/*PARSEERROR:%s([%s]).*/\n",POS_Pl,W); if (!strcmp(POS_Pl,"error")) fprintf(out,"/*The invalid part-of-speech code for %s is %c*/\n",W,POS_M); return; } void terminatestr(char* str, int length){ int i; for (i=length;i>=0;i--){ if(str[i]!=' ' && str[i]!=0){ str[i+1]=0; break; } } return; } main(int argc, char**argv){ FILE * infile,*outfile; char t,i,j,word[24],line[RecSize]; int l; bool InWord;//in text of word vs. in part of speech bool ProblemWord;//funny character -- eliminate word if (argc != 3){ printf("\tusage: %s inputfile outputfile\n",argv[0]); printf("\tThis program translates the COMPUTER-USABLE DICTIONARY FILE BASED ON\n\ THE OXFORD ADVANCED LEARNER'S DICTIONARY OF CURRENT ENGLISH into a Prolog lexicon\n"); exit(0); } if ((infile=fopen(argv[1],"r")) == NULL){ printf("invalid input file\n"); exit(1); } if ((outfile=fopen(argv[2],"w")) == NULL){ printf("invalid output file\n"); exit(1); } for (i=0;i<127;i++) line[i]=0; for(i=0;i<24;i++)word[i]=0; // for (i=0;i<126;i++){ //limited for debugging while(!feof(infile)){ //unlimited for SERIOUS work /* word[0]=0; InWord = true; ProblemWord=false; while ((t=fgetc(infile))!= 13){ //printf("%c\t%d\n",t,t); if (t == -41){ InWord= false; } else if (InWord){ //concatenate t onto word l=strlen(word); if(t>='A' && t<='Z') t = t | 0x20; word[l]=(t==' ')?',':t; if(t=='\'' || t<' ') ProblemWord = true; word[l+1]=0; } else WritePos(outfile,word,t,ProblemWord); }*/ if(!fread(line,1,RecSize,infile)) break;//end while ProblemWord= false; strncpy(word,line,23); terminatestr(word,23); for(j=0;j='A' && t<='Z') word[j] = t | 0x20;//lowercase if(t=='\''||t=='`'||t=='"'||t=='.'||t=='~') ProblemWord = true; if(t==' ') word[j]=','; } for(j=46;j<70;j+=4){ if(line[j]==' ')break; //printf("%s %d %c\n",word,j,line[j]); WritePos(outfile,word,line[j],ProblemWord); } } }