Read the files with read.csv(filename) or read.table(sep=",", filename) so the commas don't become part of the R data.frame.
Bill Dunlap TIBCO Software wdunlap tibco.com On Tue, Apr 21, 2020 at 10:17 AM Helen Sawaya <helensaw...@hotmail.com> wrote: > Thank you for your patience. > > This is the output of dput(head(d, 10)) > > structure(list(V1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L), .Label = "9.9761E+11,", class = "factor"), V2 = structure(c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "threat,", class = > "factor"), > V3 = structure(c(1L, 28L, 37L, 48L, 55L, 63L, 73L, 88L, 2L, > 20L), .Label = c("1,", "10,", "100,", "101,", "102,", "104,", > "107,", "108,", "109,", "110,", "111,", "112,", "113,", "114,", > "115,", "116,", "117,", "118,", "119,", "12,", "13,", "14,", > "15,", "16,", "17,", "18,", "19,", "2,", "20,", "21,", "22,", > "23,", "24,", "27,", "28,", "29,", "3,", "30,", "31,", "32,", > "33,", "34,", "35,", "36,", "37,", "38,", "39,", "4,", "42,", > "44,", "46,", "47,", "48,", "49,", "5,", "50,", "52,", "53,", > "54,", "55,", "57,", "59,", "6,", "60,", "61,", "62,", "63,", > "64,", "65,", "66,", "68,", "69,", "7,", "71,", "74,", "75,", > "76,", "78,", "81,", "82,", "83,", "84,", "85,", "86,", "87,", > "88,", "89,", "9,", "90,", "91,", "92,", "94,", "95,", "96,", > "97,", "98,"), class = "factor"), V4 = structure(c(1L, 2L, > 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L), .Label = c("1,", "2,"), class = > "factor"), > V5 = structure(c(2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L), .Label = > c("1,", > "2,"), class = "factor"), V6 = structure(c(2L, 1L, 2L, 2L, > 1L, 2L, 2L, 1L, 2L, 2L), .Label = c("1,", "2,"), class = "factor"), > V7 = structure(c(2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L), .Label = > c("1,", > "2,"), class = "factor"), V8 = structure(c(41L, 92L, 63L, > 36L, 2L, 81L, 12L, 14L, 23L, 33L), .Label = c("abduction,", > "abortion,", "abuse,", "accident,", "addicted,", "agony,", > "anger,", "angry,", "anguish,", "assault,", "bankrupt,", > "bullet,", "burial,", "cancer,", "cemetery,", "coffin,", > "corpse,", "crash,", "crisis,", "cruel,", "death,", "defeated,", > "depressed,", "deserted,", "despair,", "destroy,", "disaster,", > "disloyal,", "distress,", "dreadful,", "drown,", "dull,", > "dump,", "emaciated,", "failure,", "fatigue,", "fault,", > "feeble,", "fever,", "filth,", "forlorn,", "germs,", "gloomy,", > "hardship,", "hell,", "helpless,", "horror,", "hostage,", > "hostile,", "hurt,", "idiot,", "infest,", "injury,", "irritable,", > "jail,", "killer,", "lonely,", "malaria,", "messy,", "misery,", > "mistake,", "morbid,", "murder,", "mutilate,", "pain,", "panic,", > "poison,", "prison,", "pus,", "rape,", "rat,", "rejected,", > "sad,", "scum,", "shame,", "sick,", "slap,", "snake,", "spider,", > "suicide,", "surgery,", "terrible,", "tormented,", "trash,", > "trauma,", "ugly,", "ulcer,", "unease,", "unhappy,", "useless,", > "victim,", "wasp,", "weep,", "worm,", "wound,"), class = "factor"), > V9 = structure(c(24L, 90L, 73L, 10L, 92L, 33L, 84L, 96L, > 70L, 57L), .Label = c("alley,", "ankle,", "appliance,", "audience,", > "bandage,", "bathroom,", "bookcase,", "border,", "branch,", > "cabinet,", "category,", "clean,", "cliff,", "cold,", "consider,", > "consoled,", "context,", "country,", "crop,", "dentist,", > "detail,", "dinner,", "doctor,", "dynamic,", "easygoing,", > "elbow,", "energetic,", "farm,", "faucet,", "flat,", "flowing,", > "fork,", "freezer,", "glass,", "grass,", "guess,", "humble,", > "icebox,", "industry,", "invisible,", "jug,", "lighting,", > "lion,", "listen,", "little,", "machine,", "metal,", "month,", > "mushroom,", "napkin,", "news,", "noisy,", "north,", "nudge,", > "number,", "numerous,", "obey,", "odd,", "oval,", "plant,", > "possible,", "pot,", "public,", "puzzled,", "quarter,", "rational,", > "ready,", "reflect,", "reliable,", "repentant,", "sand,", > "school,", "secret,", "series,", "shark,", "shoe,", "shop,", > "shortened,", "skyline,", "stable,", "storm,", "stove,", > "table,", "theory,", "tower,", "truck,", "upgrade,", "upright,", > "utensil,", "vest,", "vision,", "volcano,", "walk,", "watchful,", > "window,", "winter,"), class = "factor"), V10 = structure(c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "NA,", class = > "factor"), > V11 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = > "NA,", class = "factor"), > V12 = structure(c(2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L), .Label = > c("203,", > "205,"), class = "factor"), V13 = structure(c(1L, 1L, 1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "1,", class = "factor"), > V14 = c(4063L, 4914L, 1508L, 1819L, 1228L, 992L, 1898L, 1174L, > 1294L, 1417L)), row.names = c(NA, 10L), class = "data.frame”) > > When I use the following: > > all.files <- list.files(".") > txt.files <- grep("threat.txt",all.files,value=T) > > for(i in txt.files) { > d<-read.table(i, header=FALSE) > d[] <- lapply(d, function(x) {is.na(x) <- x == 0; x}) > write.table(d,paste0(i, "trial.txt"), quote=FALSE, row.names=FALSE)} > > I get this (an example of one of the output files with zeros in V13): > > V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 > 3.17903E+11, threat, 1, 1, 2, 2, 1, useless, flowing, NA, NA, 203, 1, 949 > 3.17903E+11, threat, 3, 2, 2, 1, 1, hostage, skyline, NA, NA, 203, 1, 1116 > 3.17903E+11, threat, 4, 1, 1, 1, 2, messy, ready, NA, NA, 205, 1, 1277 > 3.17903E+11, threat, 6, 2, 1, 2, 2, emaciated, shortened, NA, NA, 205, 1, > 691 > 3.17903E+11, threat, 7, 1, 1, 1, 1, abuse, plant, NA, NA, 203, 1, 660 > 3.17903E+11, threat, 8, 2, 1, 2, 2, tormented, easygoing, NA, NA, 205, 1, > 812 > 3.17903E+11, threat, 9, 1, 2, 2, 2, hurt, sand, NA, NA, 205, 1, 917 > 3.17903E+11, threat, 10, 1, 1, 1, 1, surgery, freezer, NA, NA, 203, 1, 1829 > 3.17903E+11, threat, 12, 2, 2, 1, 2, accident, category, NA, NA, 205, 1, > 821 > 3.17903E+11, threat, 13, 2, 1, 2, 2, terrible, energetic, NA, NA, 205, 1, > 783 > 3.17903E+11, threat, 14, 1, 2, 2, 1, wound, storm, NA, NA, 203, 1, 813 > 3.17903E+11, threat, 15, 1, 1, 1, 2, victim, utensil, NA, NA, 205, 1, 1132 > 3.17903E+11, threat, 16, 2, 2, 1, 2, bankrupt, lighting, NA, NA, 203, 0, > 1510 > 3.17903E+11, threat, 17, 1, 1, 1, 2, anguish, country, NA, NA, 203, 0, 811 > 3.17903E+11, threat, 18, 2, 2, 1, 1, snake, table, NA, NA, 203, 1, 805 > 3.17903E+11, threat, 19, 1, 1, 1, 2, slap, crop, NA, NA, 205, 1, 1180 > 3.17903E+11, threat, 20, 2, 1, 2, 2, scum, shoe, NA, NA, 205, 1, 792 > 3.17903E+11, threat, 21, 1, 2, 2, 1, weep, shop, NA, NA, 203, 1, 870 > 3.17903E+11, threat, 23, 2, 1, 2, 1, spider, border, NA, NA, 203, 1, 871 > > str(d) gives me the following: > > 'data.frame': 96 obs. of 14 variables: > $ V1 : Factor w/ 1 level "9.9761E+11,": 1 1 1 1 1 1 1 1 1 1 ... > $ V2 : Factor w/ 1 level "threat,": 1 1 1 1 1 1 1 1 1 1 ... > $ V3 : Factor w/ 96 levels "1,","10,","100,",..: 1 28 37 48 55 63 73 88 2 > 20 ... > $ V4 : Factor w/ 2 levels "1,","2,": 1 2 1 2 2 2 2 2 1 1 ... > $ V5 : Factor w/ 2 levels "1,","2,": 2 2 2 1 2 1 1 2 2 2 ... > $ V6 : Factor w/ 2 levels "1,","2,": 2 1 2 2 1 2 2 1 2 2 ... > $ V7 : Factor w/ 2 levels "1,","2,": 2 1 2 2 2 2 1 2 1 2 ... > $ V8 : Factor w/ 95 levels "abduction,","abortion,",..: 41 92 63 36 2 81 > 12 14 23 33 ... > $ V9 : Factor w/ 96 levels "alley,","ankle,",..: 24 90 73 10 92 33 84 96 > 70 57 ... > $ V10: Factor w/ 1 level "NA,": 1 1 1 1 1 1 1 1 1 1 ... > $ V11: Factor w/ 1 level "NA,": 1 1 1 1 1 1 1 1 1 1 ... > $ V12: Factor w/ 2 levels "203,","205,": 2 1 2 2 2 2 1 2 1 2 ... > $ V13: Factor w/ 1 level "1,": 1 1 1 1 1 1 1 1 1 1 ... > $ V14: int 4063 4914 1508 1819 1228 992 1898 1174 1294 1417 … > > When I use this: > > for(i in txt.files) { > d<-read.table(i, header=FALSE) > d2<-d[d$V13==1,] > write.table(d2,sub("[.]",".trial.",i),quote=FALSE,row.names=FALSE) > } > > I get empty files: > > str(d2) > 'data.frame': 0 obs. of 14 variables: > $ V1 : Factor w/ 1 level "9.9761E+11,": > $ V2 : Factor w/ 1 level "threat,": > $ V3 : Factor w/ 96 levels "1,","10,","100,",..: > $ V4 : Factor w/ 2 levels "1,","2,": > $ V5 : Factor w/ 2 levels "1,","2,": > $ V6 : Factor w/ 2 levels "1,","2,": > $ V7 : Factor w/ 2 levels "1,","2,": > $ V8 : Factor w/ 95 levels "abduction,","abortion,",..: > $ V9 : Factor w/ 96 levels "alley,","ankle,",..: > $ V10: Factor w/ 1 level "NA,": > $ V11: Factor w/ 1 level "NA,": > $ V12: Factor w/ 2 levels "203,","205,": > $ V13: Factor w/ 1 level "1,": > $ V14: int > > When I use as.integer to change V13 to an integer, the output of this > column is replaced by 1s and 2s.. > > > > On Apr 21, 2020, at 1:14 AM, Rui Barradas <ruipbarra...@sapo.pt> wrote: > > > > Hello, > > > > Thanks for the data. But since the replacements still do not work, > please post the output of > > > > dput(head(d, 10)) > > > > > > in order for us to have an *exact* copy of the data structure. > > I had asked for 20 or 30 rows but given your post 10 are enough. > > With a way to exactly reproduce what you have, it will be much easier to > try code and find a solution. I, and I believe most R users, will run > > > > str(d) > > > > as one of the first steps to know what is in that problem column. And go > from there. > > > > > > Hope this helps, > > > > Rui Barradas > > > > Às 04:52 de 21/04/20, Helen Sawaya escreveu: > >> Thank you all for your input. > >> This is an example of one data file (I have 74 data files): > >> 2.90546E+11, threat, 1, 2, 1, 2, 1, death, stove, > NA, NA, 205, 0, 394 > >> 2.90546E+11, threat, 2, 2, 2, 1, 1, emaciated, shortened, > NA, NA, 205, 0, 502 > >> 2.90546E+11, threat, 3, 1, 1, 1, 2, mutilate, consider, > NA, NA, 205, 1, 468 > >> 2.90546E+11, threat, 6, 1, 2, 2, 1, weep, shop, > NA, NA, 203, 1, 345 > >> 2.90546E+11, threat, 9, 2, 1, 2, 2, tormented, easygoing, > NA, NA, 205, 1, 373 > >> 2.90546E+11, threat, 10, 1, 2, 2, 2, snake, table, > NA, NA, 205, 1, 343 > >> 2.90546E+11, threat, 11, 2, 2, 1, 1, crisis, faucet, > NA, NA, 203, 1, 437 > >> 2.90546E+11, threat, 12, 1, 1, 1, 1, victim, utensil, > NA, NA, 203, 1, 343 > >> 2.90546E+11, threat, 14, 1, 2, 2, 1, depressed, repentant, > NA, NA, 203, 1, 441 > >> 2.90546E+11, threat, 15, 2, 2, 1, 2, scum, shoe, > NA, NA, 205, 1, 475 > >> Column 13 has values of 0s and 1s which my cognitive task outputted. > Column 14 is the reaction time (ms) data. I want to get rid of the rows > that contain zeros so I thought I'd first replace zeros with NAs then use > complete.cases function to get rid of the NAs. I also wanted to apply other > functions so I included them all in a loop. All work fine except for the > one where I try to turn the zeros to NAs. > >> Jim when I tried your mockdata example, it worked fine. But when I > translated it to my data, I still get zeros in the output. Can you identify > any mistranslations I'm doing? > >> txt.files<-list.files(".",pattern="dotprobe") #all my data files are > text files in one folder > >> for(tf in txt.files) { > >> d<-read.table(tf) > >> d[,13][d[,13]==0]<-NA #column 13 contains zeros > >> d<-d[ ,-c(10,11)] #get rid of columns 10 and 11 > >> write.table(d,sub("[.]",".tlbs.",tf),quote=FALSE, row.names=FALSE) > >> } > >> That's an example of one of the output I get: > >> V1 V2 V3 V4 V5 V6 V7 V8 V9 V12 V13 V14 > >> 2.90546E+11, threat, 1, 2, 1, 2, 1, death, stove, 205, 0, 394 > >> 2.90546E+11, threat, 2, 2, 2, 1, 1, emaciated, shortened, 205, 0, 502 > >> 2.90546E+11, threat, 3, 1, 1, 1, 2, mutilate, consider, 205, 1, 468 > >> 2.90546E+11, threat, 6, 1, 2, 2, 1, weep, shop, 203, 1, 345 > >> 2.90546E+11, threat, 9, 2, 1, 2, 2, tormented, easygoing, 205, 1, 373 > >> 2.90546E+11, threat, 10, 1, 2, 2, 2, snake, table, 205, 1, 343 > >> Columns 10 and 11 were deleted. But zeros were not replaced by NAs. > >> After all the data cleaning, the functions I'm interested in including > in the loop are: get_tlbs and summarize_bias (and these also work fine in > my loop). > >> Thanks again 🙂 > >> Sincerely > >> Helen > >> ------------------------------------------------------------------------ > >> *From:* Jim Lemon <drjimle...@gmail.com> > >> *Sent:* Tuesday, April 21, 2020 2:52 AM > >> *To:* Rui Barradas <ruipbarra...@sapo.pt> > >> *Cc:* Helen Sawaya <helensaw...@hotmail.com>; Michael Dewey < > li...@dewey.myzen.co.uk>; r-help@R-project.org <r-help@r-project.org> > >> *Subject:* Re: [R] NA command in a 'for' loop > >> Hi Helen, > >> Your problem may lie in using row.names=TRUE. I was puzzled when an > >> extra column kept popping up in the output files. For reading in and > >> replacing zeros with NAs, this seems to work: > >> for(mockdata in 1:3) { > >> mdf<-data.frame(sample(2:20,10),sample(2:20,10),sample(0:1,10,TRUE)) > >> write.table(mdf,file=paste0("threat",mockdata,".txt"),quote=FALSE, > >> row.names=FALSE,col.names=FALSE) > >> } > >> txt.files<-list.files(".",pattern="threat[1-3]") > >> for(tf in txt.files) { > >> d<-read.table(tf) > >> d[,3][d[,3]==0]<-NA > >> write.table(d,sub("[.]",".tbls.",tf),quote=FALSE,row.names=FALSE) > >> } > >> Jim > >> On Tue, Apr 21, 2020 at 7:57 AM Rui Barradas <ruipbarra...@sapo.pt> > wrote: > >>> > >>> Hello, > >>> > >>> I believe the only way we have to see what is happening is for you to > >>> post the output of > >>> > >>> > >>> dput(head(d, 20)) # or 30 > >>> > >>> > >>> or, with d2 a subset of d that includes zeros, > >>> > >>> > >>> dput(head(d2, 20)) > >>> > >>> > >>> Hope this helps, > >>> > >>> Rui Barradas > >>> > >>> Às 17:48 de 20/04/20, Helen Sawaya escreveu: > >>> > I have one column that represents correct response versus error > (correct > >>> > is coded as 1 and error is coded as 0). Nowhere else in the dataset > are > >>> > there values of 0. The vector is treated as an integer. > >>> > > ------------------------------------------------------------------------ > >>> > *From:* Michael Dewey <li...@dewey.myzen.co.uk> > >>> > *Sent:* Monday, April 20, 2020 7:35 PM > >>> > *To:* Helen Sawaya <helensaw...@hotmail.com>; Rui Barradas > >>> > <ruipbarra...@sapo.pt>; r-help@R-project.org <r-help@R-project.org> > >>> > *Subject:* Re: [R] NA command in a 'for' loop > >>> > Just a thought Helen but is x being treated as a real and what you > think > >>> > are zero and are printed as zero are in fact some very small number? > If > >>> > so you need to alter your test appropriately. > >>> > > >>> > Michael > >>> > > >>> > On 20/04/2020 17:25, Helen Sawaya wrote: > >>> >> Thank you for your reply. > >>> >> > >>> >> I tried d[] <- lapply(d, function(x) {is.na(x) <- x == 0; x}) > >>> >> but I am still getting zeros instead of NAs in my output.. > >>> >> > >>> >> I wonder if the problem is that some of my data files don't have > any zeros (participants made no errors).. > >>> >> ________________________________ > >>> >> From: Rui Barradas <ruipbarra...@sapo.pt> > >>> >> Sent: Monday, April 20, 2020 9:05 AM > >>> >> To: Helen Sawaya <helensaw...@hotmail.com>; r-help@R-project.org > <r-help@R-project.org> > >>> >> Subject: Re: [R] NA command in a 'for' loop > >>> >> > >>> >> Hello, > >>> >> > >>> >> Instead of > >>> >> > >>> >> d[d == 0] <- NA > >>> >> > >>> >> try > >>> >> > >>> >> d[] <- lapply(d, function(x) {is.na(x) <- x == 0; x}) > >>> >> > >>> >> > >>> >> Also, in the first for loop > >>> >> > >>> >> paste(i, sep = "") > >>> >> > >>> >> does nothing, it's the same as i. > >>> >> And the same for > >>> >> > >>> >> (d2$V4 == 1) == TRUE > >>> >> > >>> >> Since (d2$V4 == 1) already is FALSE/TRUE there is no need for > >>> >> > >>> >> (.) == TRUE > >>> >> > >>> >> > >>> >> Hope this helps, > >>> >> > >>> >> Rui Barradas > >>> >> > >>> >> > >>> >> > >>> >> Às 20:52 de 19/04/20, Helen Sawaya escreveu: > >>> >>> Dear R experts, > >>> >>> > >>> >>> I am using a 'for' loop to apply commands to multiple datasets > (each file is one participant). The only one not working is the command > that identifies zeros in my datasets and changes them to NAs. But when I > look at the output, zeros ("0") are still present. Surprisingly, the > functions work fine when I apply them to a single > >>> > dataset (outside the loop). I've tried: > >>> >>> > >>> >>> all.files <- list.files(".") > >>> >>> txt.files <- grep("threat.txt",all.files,value=T) > >>> >>> > >>> >>> for(i in txt.files){ > >>> >>> d <- read.table(paste(i,sep=""),header=F) > >>> >>> d[d==0] <- NA #replace zeros with NA > >>> >>> write.table(d, paste0(i,".tlbs.txt"), quote=FALSE, > row.names=TRUE)} > >>> >>> d<-d[ ,-c(10,11)] > >>> >>> d2<-d[complete.cases(d), ] > >>> >>> d2$V4<-as.numeric(d2$V4) > >>> >>> congruent <- (d2$V4 == 1) == TRUE > >>> >>> x <- get_tlbs(d2$V14, congruent, prior_weights = NULL, method > = "weighted", fill_gaps = FALSE) > >>> >>> write.table(x, paste0(i,".tlbs.txt"), quote=FALSE, > row.names=TRUE)} > >>> >>> > >>> >>> I've also tried: > >>> >>> > >>> >>> for(i in txt.files){ > >>> >>> d <- read.table(paste(i,sep=""),header=F) > >>> >>> if (0 %in% d) > >>> >>> {replace_with_na(d,replace = list(x = 0))} # replace zeros > with NA > >>> >>> d<-d[ ,-c(10,11)] > >>> >>> d2<-d[complete.cases(d), ] > >>> >>> d2$V4<-as.numeric(d2$V4) > >>> >>> congruent <- (d2$V4 == 1) == TRUE > >>> >>> x <- get_tlbs(d2$V14, congruent, prior_weights = NULL, method > = "weighted", fill_gaps = FALSE) > >>> >>> write.table(x, paste0(i,".summaryoutput.txt"), quote=FALSE, > row.names=TRUE)} > >>> >>> > >>> >>> Thank you for your help. > >>> >>> Sincerely > >>> >>> Helen > >>> >>> > >>> >>> [[alternative HTML version deleted]] > >>> >>> > >>> >>> ______________________________________________ > >>> >>> R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see > >>> >>> https://stat.ethz.ch/mailman/listinfo/r-help > >>> >>> PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html > >>> >>> and provide commented, minimal, self-contained, reproducible code. > >>> >>> > >>> >> > >>> >> [[alternative HTML version deleted]] > >>> >> > >>> >> ______________________________________________ > >>> >> R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see > >>> >> https://stat.ethz.ch/mailman/listinfo/r-help > >>> >> PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html > >>> >> and provide commented, minimal, self-contained, reproducible code. > >>> >> > >>> >> > >>> > > >>> > -- > >>> > Michael > >>> > http://www.dewey.myzen.co.uk/home.html > >>> > >>> ______________________________________________ > >>> R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see > >>> https://stat.ethz.ch/mailman/listinfo/r-help > >>> PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html > >>> and provide commented, minimal, self-contained, reproducible code. > > ______________________________________________ > R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. > [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.