1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- import os
- import sys
-
- class readsample:
-
- def __init__(self, file, hash_keys=None, delimeter=None, skip=2):
- "Open file and read the header."
- self.fd = open(file, "r")
- self.delimeter = delimeter
- self.hash_keys = hash_keys
- self.read_head(skip)
- pass
-
- def next(self):
- "Read one record of the file and hash the fields"
- line = self.fd.readline()
-
- if not line:
- return None
- if self.delimeter != None:
- result = line.split(delimeter)
- else:
- result = line.split()
- if self.hash_keys:
- return self.createHash(result, self.hash_keys)
- else:
- return [float(x) for x in result]
-
-
- def readAll(self):
- "Read all records and return a list"
- sample_list = []
- sample = self.next()
- while sample:
- sample_list.append(sample)
- sample = self.next()
-
- return sample_list
-
- def read_head(self, skip = 2):
- "This is to remove the header of the file"
- "Reusing this code requires modifying this function"
- "Most times just skip lines, and just change the default skip number"
-
- for i in range(skip):
- self.fd.readline()
-
- def createHash(self, s_line, hash_keys):
- "This is to create a hash with the fields of the record"
- "The record is already splitted in one list"
- hash_res = {}
- for i in range(len(hash_keys)):
- hash_res[hash_keys[i]] = float(s_line[i]) # want to improve this to tuples
-
- return hash_res
-
-
- def close(self):
- self.fd.close()
-
-
- def main():
- "Main to test the library"
- samples = readsample(sys.argv[1], ["peak", "intensity"])
-
- value = samples.next()
- while value:
- print value
- value = samples.next()
-
- samples.close()
-
- samples = readsample(sys.argv[2], None, None, 0)
- print samples.readAll()
-
- if __name__ == '__main__':
- main()
|