PageRenderTime 46ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/2010_12/questionare1.r

http://github.com/rssh/dou_pl_questionare
R | 311 lines | 229 code | 37 blank | 45 comment | 12 complexity | 8008f51cef83815cc83e17a512d905f3 MD5 | raw file
  1. # R scripts for cleanup and produsing some statistics for dou questionare.
  2. # (see report on http://www.developers.org.ua/archives/rssh/2010/12/14/programming-languages-rating-2010/ )
  3. # Statistics is incomplete, some steps (such as calculating right bounds
  4. # for statistically corrected data) not included.
  5. #
  6. # Use this on own risc.
  7. #
  8. # You can use and redistribute this scripts on conditions of
  9. # GNU General Public License version 2 or later.
  10. #
  11. #
  12. # (C) Ruslan Shevchenko <ruslan@shevchenko.kiev.ua> 2010
  13. #
  14. # note, that file contains cyrillic comments in utf8 encoding.
  15. #config
  16. drawNow=TRUE
  17. qs <- read.csv(file="questionare1.csv", head=TRUE, sep=",")
  18. # normalize data
  19. # ????????? ???????? ? ??????????? ???? ?????? ?????????????? ??????:
  20. normalizeLanguage <- function(x) {
  21. x <- gsub("(^ )|( $)","",x);
  22. x <- gsub("(ActionScript(.+)$)|(Action *Script.*$)|AS3|AS|as3",
  23. "JavaScript",x);
  24. x <- gsub("actionscript|Actionscript|ActionScript|actionScript|(Adobe Fl.*)",
  25. "JavaScript",x);
  26. x <- gsub("JavaScript(.*)","JavaScript",x);
  27. x <- gsub("groovy","Groovy",x);
  28. x <- gsub("Groovy / Grails at back-end","Groovy",x);
  29. x <- gsub("(Groovy.*)|& Groovy","Groovy",x);
  30. x <- gsub("Android Java|Android","Java",x);
  31. x <- gsub("xslt","XSLT",x);
  32. x <- gsub("(built-in.*)","Other",x);
  33. x <- gsub("Delphi.*","Delphi",x);
  34. x <- gsub("(erlang)|(Erlang \\(server side\\))","Erlang",x);
  35. x <- gsub("Flash|Flex","JavaScript",x);
  36. x <- gsub("Google Go","Go",x);
  37. x <- gsub("Shell.*|bash|UNIX shell|unix shell|Shell.*Bash|shell|Bash|ksh","Shell",x);
  38. x <- gsub("(Shell.scripts)|(Shell-scripting)","Shell",x);
  39. x <- gsub("FoxPro","DBase-????????",x);
  40. x <- gsub("Matlab","MatLab",x);
  41. x <- gsub("Ocaml|ocaml","OCaml",x);
  42. x <- gsub("(VBScript)|(VB.Net)|(VB.NET)|VBS|PureBasic","Basic",x);
  43. x <- gsub("JavaScript(.*)$","JavaScript",x);
  44. x <- gsub("MXML","",x);
  45. x <- gsub("(^XML$)|(^xml$)","",x);
  46. x <- gsub("^XSL$","XSLT",x);
  47. x <- gsub("Qt","",x);
  48. x <- gsub("English","",x);
  49. x <- gsub("sh","Shell",x);
  50. x <- gsub("php","PHP",x);
  51. x <- gsub("go!|go","Go",x);
  52. x <- gsub("TCL|tcl","Tcl",x);
  53. x <- gsub("TurboProlog","Prolog",x);
  54. x <- gsub("CPL(.*)","CPL",x);
  55. x <- gsub("D \\(client side\\)","D",x);
  56. x <- gsub("jruby","Ruby",x);
  57. x <- gsub("clojure","Clojure",x);
  58. x <- gsub("f#","F#",x);
  59. x <- gsub("^c$","C/C++",x);
  60. x <- gsub("??? ?????(.*)","",x);
  61. x <- gsub("PL-SQL|pl/sql|pl/pgsql","PL/SQL",x);
  62. x <- gsub("^sql$","SQL",x);
  63. x
  64. }
  65. #
  66. languagesColumn <- function(cname) { al=NULL
  67. for(a in strsplit(as.character(qs[,cname]),",")) {
  68. al=c(al,normalizeLanguage(a))
  69. }
  70. al <- factor(al)
  71. al
  72. }
  73. al <- languagesColumn('AdditionalLanguages')
  74. pl <- languagesColumn('PetProjectsLanguages')
  75. # ? ????????????? ?????? ?????? ???? ???? ?????
  76. normalizeFixLanguage <- function(x) {
  77. x <- gsub("(^ )|( $)","",x);
  78. x<-gsub("\\(???????? ?? ??????\\)","??????",x)
  79. }
  80. qs['NowLanguage'] <- lapply(qs['NowLanguage'],
  81. function(x){factor(normalizeFixLanguage(x))})
  82. qs['NextLanguage'] <- lapply(qs['NextLanguage'],
  83. function(x){factor(normalizeFixLanguage(x))})
  84. qs['FirstLanguage'] <- lapply(qs['FirstLanguage'],
  85. function(x){factor(normalizeFixLanguage(x))})
  86. # ?????? ?????????????? ???????
  87. qs$Comment <- 0
  88. qs$Timestamp <- 0
  89. # ?? ??? ???????? ?
  90. sfl <- summary(qs[['FirstLanguage']])
  91. sfl <- sfl[order(sfl)]
  92. sfl <- sfl[-match("??????",names(sfl))]
  93. if (drawNow) {
  94. png(file="sfl.png")
  95. dotchart(sfl[sfl>10])
  96. title("?? ????? ????? ?? ???????? ????????.")
  97. dev.off()
  98. }
  99. # ?? ??? ????? ?????? ?
  100. snl <- summary(qs[['NowLanguage']])
  101. snl <- snl[order(snl)]
  102. snl <- snl[-match("??????",names(snl))]
  103. # ???????? ????????
  104. if (drawNow) {
  105. png(file="snl.png")
  106. dotchart(snl)
  107. title("?? ????? ????? ?? ?????? ??? ?????? ?????? ")
  108. dev.off()
  109. }
  110. # ?? ??? ????? ??????? ?????? ?
  111. sxl <- summary(qs[['NextLanguage']])
  112. sxl <- sxl[rev(order(sxl))]
  113. sxl <- sxl[-match("??????",names(sxl))]
  114. if (drawNow) {
  115. png(file="sxl.png")
  116. dotchart(sxl)
  117. title("???? ?? ?? ???????? ?????? ???????????? ?????? \n ? ? ??? ????-?? ??????? ?????? ...")
  118. dev.off()
  119. }
  120. #
  121. # ?????? ????????????????? ?????? (??? ??????????????):
  122. lct <- table(qs$NowLanguage,qs$Change)
  123. lci <- lct[,"???"]/(lct[,"???"]+lct[,"??"])
  124. lci <- lci[names(snl[snl>15])]
  125. lci <- lci[order(lci)]
  126. if (drawNow) {
  127. png(file="lci.png")
  128. barplot(lci,las=2)
  129. title("?????? ?????????????? ? ?????? ...")
  130. dev.off()
  131. }
  132. lct <- table(qs$NowLanguage,qs$NextLanguage)
  133. languageAfter <- function(name) {
  134. mgX <- lct[name,]/snl[name]
  135. mgX <- mgX[mgX>0.01]
  136. mgX <- mgX[rev(order(mgX))]
  137. }
  138. #
  139. # ???? ????????? ??????? PHP-????:
  140. print("mirgation from PHP:");
  141. print(languageAfter("PHP"))
  142. # ??-?? ????? ??? l=?????? ??????
  143. print("mirgation from Delphi:");
  144. print(languageAfter("Delphi"))
  145. print("mirgation from Java:");
  146. print(languageAfter("Java"))
  147. print("mirgation from C#");
  148. print(languageAfter("C#"))
  149. # ????????????? ?? ?????? ? ????? ??????
  150. lct <- table(qs$NowLanguage,qs$Experience)
  151. langPercentage <- function(x) {
  152. lct[x,]/snl[x]
  153. }
  154. lct <- lct[names(snl[snl>50]),]
  155. lct <- lct[,c("0","1","2","3","4","5","6","7","8","9","10 ? ??????")]
  156. colnames(lct)<-c("0","1","2","3","4","5","6","7","8","9",">10")
  157. plct <- langPercentage(rownames(lct))
  158. if (drawNow) {
  159. png(file="experience1.png")
  160. #oldpar <- par(mfrow=c(1,2))
  161. names<-c("C#","Java","C/C++")
  162. barplot(lct[names,],beside=TRUE,col=rainbow(3))
  163. legend("topleft",names,fill=rainbow(3))
  164. title("???? ??????: C#, Java, C/C++")
  165. #barplot(plct[names,],beside=TRUE,col=rainbow(3))
  166. dev.off()
  167. # ? ??? ??????-??????
  168. png(file="experience2.png")
  169. #oldpar <- par(mfrow=c(1,2))
  170. names<-c("PHP","Python","Ruby")
  171. barplot(lct[names,],beside=TRUE,col=rainbow(3))
  172. title("???? ??????: ??????-?????")
  173. legend("topleft",names,fill=rainbow(3))
  174. #barplot(plct[names,],beside=TRUE,col=rainbow(3))
  175. dev.off()
  176. #par(oldpar)
  177. }
  178. # ????? ????? ???????????? ??? ??????????????
  179. sal<-summary(al)
  180. sal<-sal[sal>10]
  181. sal<-sal[order(sal)]
  182. if (drawNow) {
  183. png(file="sal.png")
  184. dotchart(sal)
  185. title("????? ?????????????? ????? ?? ??????????? ??? ?????? ?")
  186. dev.off()
  187. }
  188. # ? ??? ???????????
  189. spl <- summary(pl)
  190. spl <- spl[spl>10]
  191. spl <- spl[order(spl)]
  192. if (drawNow) {
  193. png(file="spl.png")
  194. dotchart(spl)
  195. title("???? ?? ? ??? ???? pet-projects ?\n???? ????, ?? ?? ????? ?????? ?")
  196. dev.off()
  197. }
  198. ## ??????? ????????????? ????? ????? ??? ???????????:
  199. ppl<-qs[["PetProjectsLanguages"]]
  200. cat("?-???? ?????????????, ? ???? ???? ???? ???????:",
  201. length(ppl[ppl!=""])
  202. ,"\n"
  203. )
  204. ## ??? ??????????? ??????? ? ?????? ?????:
  205. scl=summary(qs$Choos)
  206. names(scl) <- c("??????? \n?? ???????","??????????","????????????","??? ???????????\n ???????????????? ?? ???? ?????","?????????")
  207. if (drawNow) {
  208. png(file="scl.png")
  209. barplot(scl)
  210. title("??? ??????????? ??????? ? ?????? ????? ?")
  211. text(3,1,labels=c("????????????"))
  212. text(5.5,1,labels=c("?????????"))
  213. dev.off()
  214. }
  215. ## ???? ?????? ? ???????? ? ?????????? ?? ???????
  216. stu <- table(qs$inUA,qs$Experience)
  217. stu <- stu[,c("0","1","2","3","4","5","6","7","8","9","10 ? ??????")]
  218. colnames(stu)<-c("0","1","2","3","4","5","6","7","8","9",">10")
  219. stu <- stu[c("??","???"),]
  220. normalizePercents<-function(tbl)
  221. {
  222. for(x in rownames(tbl)) {
  223. tbl[x,]<-tbl[x,]/sum(tbl[x,])
  224. }
  225. tbl
  226. }
  227. stu<-normalizePercents(stu)
  228. # barcode.
  229. #
  230. if (drawNow) {
  231. png(file="expUA.png")
  232. barplot(stu,beside=TRUE,legend=TRUE)
  233. dev.off()
  234. }
  235. #
  236. stu <- table(qs$NowLanguage,qs$inUA)
  237. linUA=stu[,"??"]
  238. linNotUA=stu[,"???"]
  239. linUA<-linUA/sum(linUA)
  240. linUA<-linUA[order(linUA)]
  241. linUA<-linUA[linUA>0.03]
  242. linNotUA<-linNotUA/sum(linNotUA)
  243. linNotUA<-linNotUA[order(linNotUA)]
  244. linNotUA<-linNotUA[linNotUA>0.05]
  245. names<-c("C#","Java","PHP","C/C++","Python","Ruby")
  246. lnua<-cbind(linUA[names],linNotUA[names])
  247. colnames(lnua)<-c("in UA","not in UA")
  248. if (drawNow) {
  249. png(file="lnUA.png")
  250. barplot(lnua,beside=TRUE,col=rainbow(6))
  251. legend("topright",names,fill=rainbow(6))
  252. dev.off()
  253. }
  254. # now do summary of languages.
  255. names<-c("C#","Java","C/C++","PHP","Python","Ruby","Objective-C","Scala",
  256. "Delphi", "JavaScript", "Perl", "Haskell", "Lisp", "1?", "Basic",
  257. "DBase-????????", "Asm", "Lua", "Fortran", "Cobol","Groovy")
  258. snl['Groovy']=sal['Groovy']
  259. snl['Cobol']=0
  260. snl <- snl[rev(order(snl))]
  261. names <- names(snl)
  262. rxsnl <- snl[names]
  263. sfl['Cobol']=0
  264. sfl['Groovy']=NA
  265. rxsfl <- sfl[names]
  266. sxl['Cobol']=0
  267. sxl['Groovy']=NA
  268. rxsxl <- sxl[names]
  269. splf <- summary(pl)
  270. rxspl <- splf[names]
  271. salf <- summary(al)
  272. rxsal <- salf[names]
  273. rxpsnl <- rxsnl/sum(rxsnl)*100
  274. res<-cbind(rxpsnl,rxsnl,rxsxl,rxsal,rxspl)