data_io.py - For pandas >= 10.1 this will trigger the colum…

/data_io.py

https://github.com/benhamner/JobSalaryPrediction · Python · 46 lines · 37 code · 8 blank · 1 comment · 2 complexity · dea7861b6509badf0524c386244c4577 MD5 · raw file


import csv
import json
import numpy as np
import os
import pandas as pd
import pickle

def get_paths():
    paths = json.loads(open("Settings.json").read())
    for key in paths:
        paths[key] = os.path.expandvars(paths[key])
    return paths

def identity(x):
    return x

# For pandas >= 10.1 this will trigger the columns to be parsed as strings
converters = { "FullDescription" : identity
             , "Title": identity
             , "LocationRaw": identity
             , "LocationNormalized": identity
             }

def get_train_df():
    train_path = get_paths()["train_data_path"]
    return pd.read_csv(train_path, converters=converters)

def get_valid_df():
    valid_path = get_paths()["valid_data_path"]
    return pd.read_csv(valid_path, converters=converters)

def save_model(model):
    out_path = get_paths()["model_path"]
    pickle.dump(model, open(out_path, "w"))

def load_model():
    in_path = get_paths()["model_path"]
    return pickle.load(open(in_path))

def write_submission(predictions):
    prediction_path = get_paths()["prediction_path"]
    writer = csv.writer(open(prediction_path, "w"), lineterminator="\n")
    valid = get_valid_df()
    rows = [x for x in zip(valid["Id"], predictions.flatten())]
    writer.writerow(("Id", "SalaryNormalized"))
    writer.writerows(rows)

Tech Fingerprint

Alerts (11)

'def' Ensure functions have docstrings for documentation
8 14 24 28 32 36 40
'open(' Use 'with open()' to ensure Files are properly closed
9 34 38 42