12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- import numpy
- import os
- import sys
- import csv
- import scipy.io
- import pickle
- import time
- import datetime
- import glob
- import helperFunctions
- ###
- def readData(configFile):
- dataset = helperFunctions.getConfig(configFile, 'data', 'dataset', None, 'str', True)
- if dataset == 'uci':
- x,y = readUCIcsv(configFile)
- else:
- raise Exception('Unknown dataset %s!'%dataset)
- if x.shape[0] != y.shape[0]:
- raise Exception('#data = {} != #labels = {}'.format(x.shape[0],y.shape[0]))
- if not numpy.all(numpy.isfinite(x)):
- raise Exception('not numpy.all(numpy.isfinite(x))')
- if not numpy.all(numpy.isfinite(y)):
- raise Exception('not numpy.all(numpy.isfinite(y))')
- return x,y
- ###
- def readUCIcsv(configFile):
- dataFileName = helperFunctions.getConfig(configFile, 'data', 'dataFileName', None, 'str', True)
- labelCol = helperFunctions.getConfig(configFile, 'data', 'labelCol', None, 'int', True)
- forbiddenCols = helperFunctions.getConfig(configFile, 'data', 'forbiddenCols', None, 'intList', True)
- delimiter = helperFunctions.getConfig(configFile, 'data', 'delimiter', ',', 'str', True)
- quoteChar = helperFunctions.getConfig(configFile, 'data', 'quoteChar', '|', 'str', True)
- firstDataRowNumber = helperFunctions.getConfig(configFile, 'data', 'firstDataRowNumber', 1, 'int', True)
- normalizeFeatures = helperFunctions.getConfig(configFile, 'data', 'normalizeFeatures', -1, 'str', True)
- if delimiter == '':
- delimiter = ' '
- ###
- csvFile = open(dataFileName, 'rb')
- csvReader = csv.reader(csvFile, delimiter=delimiter, quotechar=quoteChar)
- idx = 0;
- x = None
- y = None
- for row in csvReader:
- xRow = []
- if idx < firstDataRowNumber:
- idx = idx + 1
- continue
- for colIdx in range(len(row)):
- if forbiddenCols is not None and colIdx in forbiddenCols:
- continue
- elif colIdx == labelCol:
- yRow = float(row[colIdx])
- else:
- xRow.append(float(row[colIdx]))
- if x is None:
- x = numpy.asmatrix(xRow, dtype=numpy.float)
- else:
- x = numpy.append(x, numpy.asmatrix(xRow), axis=0)
- if y is None:
- y = numpy.asmatrix(yRow, dtype=numpy.float)
- else:
- y = numpy.append(y, numpy.asmatrix(yRow), axis=0)
- csvFile.close()
- if normalizeFeatures == 'uci':
- x = helperFunctions.normalizeUCI(x)
- elif int(normalizeFeatures) > 0:
- x = helperFunctions.normalizeLP(x, int(normalizeFeatures))
- return x,y
|