; docformat = 'idldoc' ;+ ; Parses a file given a pattern of delimiters and produces tokens one at a time. ;- ;+ ; Restores the tokenizer to the state/location it was in when the given ; memento was produced. ; ; @param memento {in}{required}{type=structure} memento produced by save_pos ; method ;- pro mgfftokenizer::restorePos, memento compile_opt strictarr self.lineNumber = memento.lineNumber *self.tokens = memento.tokens *self.tokenLength = memento.tokenLength self.tokenCounter = memento.tokenCounter self.line = memento.line end ;+ ; Saves the current state/location of the tokenizer in a memento structure. ; ; @returns structure ;- function mgfftokenizer::savePos compile_opt strictarr memento = { lineNumber : self.lineNumber, $ tokens : *self.tokens, $ tokenLength : *self.tokenLength, $ tokenCounter : self.tokenCounter, $ line : self.line $ } return, memento end ;+ ; Returns the current line of the tokenized file. ; ; @returns string ; @keyword number {out}{optional}{type=long} line number of returned line ;- function mgfftokenizer::getCurrentLine, number=number compile_opt strictarr number = self.lineNumber + 1L return, self.line end ;+ ; Returns the next token of the file. ; ; @returns string ; @keyword pre_delim {out}{optional}{type=string} delimiter before the returned token ; @keyword post_delim {out}{optional}{type=string} delimiter after the returned token ; @keyword newline {out}{optional}{type=boolean} true if token is first on a new line ;- function mgfftokenizer::next, pre_delim=pre_delim, post_delim=post_delim, newline=newline compile_opt strictarr if (self->done()) then begin pre_delim = '' post_delim = '' return, '' endif newline = 0B token_start = (*self.tokens)[self.tokenCounter] token_length = (*self.tokenLength)[self.tokenCounter] token = strmid(self.line, token_start, token_length) newline = self.tokenCounter eq 0 and self.lineNumber gt 0 if (arg_present(pre_delim)) then begin if (self.tokenCounter eq 0) then begin pre_delim = '' if ((*self.tokens)[0] ne 0) then begin pre_delim = strmid(self.line, 0, (*self.tokens)[0]) endif endif else begin delim_start = (*self.tokens)[self.tokenCounter - 1L] $ + (*self.tokenLength)[self.tokenCounter - 1L] delim_length = (*self.tokens)[self.tokenCounter] - delim_start pre_delim = strmid(self.line, delim_start, delim_length) endelse endif if (arg_present(post_delim)) then begin ; if last token on the line if (self.tokenCounter eq n_elements(*self.tokens) - 1) then begin post_delim = '' delim_start $ = (*self.tokens)[self.tokenCounter] $ + (*self.tokenLength)[self.tokenCounter] if (delim_start lt strlen(self.line) - 1) then begin post_delim = strmid(self.line, delim_start) endif endif else begin delim_start = (*self.tokens)[self.tokenCounter] $ + (*self.tokenLength)[self.tokenCounter] delim_length = (*self.tokens)[self.tokenCounter + 1L] - delim_start post_delim = strmid(self.line, delim_start, delim_length) endelse endif ++self.tokenCounter return, token end ;+ ; Returns whether there are any more tokens in the file. Parses a new line of ; the file if necessary. ; ; @returns 1B if no more tokens or 0B otherwise ;- function mgfftokenizer::done compile_opt strictarr ; already have more tokens in hand, so not done if (self.tokenCounter lt n_elements(*self.tokens)) then return, 0B ; handle: EOF, no tokens if (self.lineNumber ge self.nlines - 1L) then return, 1B ; skip blank lines self.line = (*self.data)[++self.lineNumber] ; new tokens *self.tokens = strsplit(self.line, self.pattern, /regex, length=len) *self.tokenLength = len self.tokenCounter = 0L return, 0B end ;+ ; Resets the tokenizer to the beginning of the tokenized file. ;- pro mgfftokenizer::reset compile_opt strictarr ptr_free, self.tokens, self.tokenLength self.lineNumber = -1L self.tokenCounter = 0L self.tokens = ptr_new(/allocate_heap) self.tokenLength = ptr_new(/allocate_heap) check = self->done() end ;+ ; Frees resources. ;- pro mgfftokenizer::cleanup compile_opt strictarr ptr_free, self.tokens, self.tokenLength, self.data end ;+ ; Creates a tokenizer for a given file with a given pattern. Creating the ; tokenizer opens the file. ; ; @returns 1 if successful, 0 otherwise ; @param filename {in}{required}{type=string} filename of the file to be ; tokenized ; @keyword pattern {in}{optional}{type=string}{default=space} regular expression ; (as in STRPSLIT) to split the text of the file into tokens ;- function mgfftokenizer::init, filename, pattern=pattern compile_opt strictarr on_error, 2 if (n_params() ne 1) then message, 'filename parameter required' self.pattern = n_elements(pattern) eq 0 ? '[[:space:]]' : pattern file_present = file_test(filename) if (~file_present) then message, 'file not found: ' + filename ; read the entire file self.nlines = file_lines(filename) data = strarr(self.nlines) openr, lun, filename, /get_lun readf, lun, data free_lun, lun self.data = ptr_new(data) self.tokens = ptr_new(/allocate_heap) self.tokenLength = ptr_new(/allocate_heap) self.tokenCounter = 0L self.lineNumber = -1L return, 1 end ;+ ; Define instance variables. ; ; @field data contents of file to be tokenized ; @field pattern regular expression to split lines on ; @field lineNumber indicates the line number in the file of line (starts at 0) ; @field nlines number of lines in file to be tokenized ; @field line current line read by tokenizer ; @field tokens pointer to long array which indicates the beginnings of the ; tokens in line ; @field tokenLength pointer to long array which indicates the length of the ; tokens in line ; @field tokenCounter next token in tokens and token_length ; ; @requires IDL 6.0 ; ; @categories input/output ; ; @author Michael Galloy ;- pro mgfftokenizer__define compile_opt strictarr define = { MGffTokenizer, $ data: ptr_new(), $ pattern: '', $ lineNumber: 0L, $ nlines: 0L, $ line: '', $ tokens: ptr_new(), $ tokenLength: ptr_new(), $ tokenCounter: 0L $ } end