PageRenderTime 48ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/data_io.py

https://github.com/benhamner/JobSalaryPrediction
Python | 46 lines | 37 code | 8 blank | 1 comment | 2 complexity | dea7861b6509badf0524c386244c4577 MD5 | raw file
Possible License(s): BSD-2-Clause
  1. import csv
  2. import json
  3. import numpy as np
  4. import os
  5. import pandas as pd
  6. import pickle
  7. def get_paths():
  8. paths = json.loads(open("Settings.json").read())
  9. for key in paths:
  10. paths[key] = os.path.expandvars(paths[key])
  11. return paths
  12. def identity(x):
  13. return x
  14. # For pandas >= 10.1 this will trigger the columns to be parsed as strings
  15. converters = { "FullDescription" : identity
  16. , "Title": identity
  17. , "LocationRaw": identity
  18. , "LocationNormalized": identity
  19. }
  20. def get_train_df():
  21. train_path = get_paths()["train_data_path"]
  22. return pd.read_csv(train_path, converters=converters)
  23. def get_valid_df():
  24. valid_path = get_paths()["valid_data_path"]
  25. return pd.read_csv(valid_path, converters=converters)
  26. def save_model(model):
  27. out_path = get_paths()["model_path"]
  28. pickle.dump(model, open(out_path, "w"))
  29. def load_model():
  30. in_path = get_paths()["model_path"]
  31. return pickle.load(open(in_path))
  32. def write_submission(predictions):
  33. prediction_path = get_paths()["prediction_path"]
  34. writer = csv.writer(open(prediction_path, "w"), lineterminator="\n")
  35. valid = get_valid_df()
  36. rows = [x for x in zip(valid["Id"], predictions.flatten())]
  37. writer.writerow(("Id", "SalaryNormalized"))
  38. writer.writerows(rows)