Re: [R] NA command in a 'for' loop

Rui Barradas Tue, 21 Apr 2020 11:39:14 -0700

Hello,

Much better, you have "," at the end of your data elements so nothing isworking.


The following 3 instructions

1. remove those commas,
2. create a logical vector trying to guess which columns are numeric
3. coerce those columns to numeric.


d[] <- lapply(d, function(x){sub(",$", "", x)})
not_num <- sapply(d, function(x) all(is.na(as.numeric(as.character(x)))))
d[!not_num] <- lapply(d[!not_num], function(x) as.numeric(as.character(x)))



Then, if you want just d$V13 == 0 to become NA, this will do it.


is.na(d[["V13"]]) <- d[["V13"]] == 0


If you want to do this to all numeric columns, try


d[!not_num] <- lapply(d[!not_num], function(x){
  is.na(x) <- x == 0
  x
})


Hope this helps,

Rui Barradas


Às 18:11 de 21/04/20, Helen Sawaya escreveu:

Thank you for your patience.

This is the output of dput(head(d, 10))

structure(list(V1 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "9.9761E+11,", class = "factor"), V2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "threat,", class = "factor"),
     V3 = structure(c(1L, 28L, 37L, 48L, 55L, 63L, 73L, 88L, 2L,
     20L), .Label = c("1,", "10,", "100,", "101,", "102,", "104,",
     "107,", "108,", "109,", "110,", "111,", "112,", "113,", "114,",
     "115,", "116,", "117,", "118,", "119,", "12,", "13,", "14,",
     "15,", "16,", "17,", "18,", "19,", "2,", "20,", "21,", "22,",
     "23,", "24,", "27,", "28,", "29,", "3,", "30,", "31,", "32,",
     "33,", "34,", "35,", "36,", "37,", "38,", "39,", "4,", "42,",
     "44,", "46,", "47,", "48,", "49,", "5,", "50,", "52,", "53,",
     "54,", "55,", "57,", "59,", "6,", "60,", "61,", "62,", "63,",
     "64,", "65,", "66,", "68,", "69,", "7,", "71,", "74,", "75,",
     "76,", "78,", "81,", "82,", "83,", "84,", "85,", "86,", "87,",
     "88,", "89,", "9,", "90,", "91,", "92,", "94,", "95,", "96,",
     "97,", "98,"), class = "factor"), V4 = structure(c(1L, 2L,
     1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L), .Label = c("1,", "2,"), class = "factor"),
     V5 = structure(c(2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L), .Label = c("1,",
     "2,"), class = "factor"), V6 = structure(c(2L, 1L, 2L, 2L,
     1L, 2L, 2L, 1L, 2L, 2L), .Label = c("1,", "2,"), class = "factor"),
     V7 = structure(c(2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L), .Label = c("1,",
     "2,"), class = "factor"), V8 = structure(c(41L, 92L, 63L,
     36L, 2L, 81L, 12L, 14L, 23L, 33L), .Label = c("abduction,",
     "abortion,", "abuse,", "accident,", "addicted,", "agony,",
     "anger,", "angry,", "anguish,", "assault,", "bankrupt,",
     "bullet,", "burial,", "cancer,", "cemetery,", "coffin,",
     "corpse,", "crash,", "crisis,", "cruel,", "death,", "defeated,",
     "depressed,", "deserted,", "despair,", "destroy,", "disaster,",
     "disloyal,", "distress,", "dreadful,", "drown,", "dull,",
     "dump,", "emaciated,", "failure,", "fatigue,", "fault,",
     "feeble,", "fever,", "filth,", "forlorn,", "germs,", "gloomy,",
     "hardship,", "hell,", "helpless,", "horror,", "hostage,",
     "hostile,", "hurt,", "idiot,", "infest,", "injury,", "irritable,",
     "jail,", "killer,", "lonely,", "malaria,", "messy,", "misery,",
     "mistake,", "morbid,", "murder,", "mutilate,", "pain,", "panic,",
     "poison,", "prison,", "pus,", "rape,", "rat,", "rejected,",
     "sad,", "scum,", "shame,", "sick,", "slap,", "snake,", "spider,",
     "suicide,", "surgery,", "terrible,", "tormented,", "trash,",
     "trauma,", "ugly,", "ulcer,", "unease,", "unhappy,", "useless,",
     "victim,", "wasp,", "weep,", "worm,", "wound,"), class = "factor"),
     V9 = structure(c(24L, 90L, 73L, 10L, 92L, 33L, 84L, 96L,
     70L, 57L), .Label = c("alley,", "ankle,", "appliance,", "audience,",
     "bandage,", "bathroom,", "bookcase,", "border,", "branch,",
     "cabinet,", "category,", "clean,", "cliff,", "cold,", "consider,",
     "consoled,", "context,", "country,", "crop,", "dentist,",
     "detail,", "dinner,", "doctor,", "dynamic,", "easygoing,",
     "elbow,", "energetic,", "farm,", "faucet,", "flat,", "flowing,",
     "fork,", "freezer,", "glass,", "grass,", "guess,", "humble,",
     "icebox,", "industry,", "invisible,", "jug,", "lighting,",
     "lion,", "listen,", "little,", "machine,", "metal,", "month,",
     "mushroom,", "napkin,", "news,", "noisy,", "north,", "nudge,",
     "number,", "numerous,", "obey,", "odd,", "oval,", "plant,",
     "possible,", "pot,", "public,", "puzzled,", "quarter,", "rational,",
     "ready,", "reflect,", "reliable,", "repentant,", "sand,",
     "school,", "secret,", "series,", "shark,", "shoe,", "shop,",
     "shortened,", "skyline,", "stable,", "storm,", "stove,",
     "table,", "theory,", "tower,", "truck,", "upgrade,", "upright,",
     "utensil,", "vest,", "vision,", "volcano,", "walk,", "watchful,",
     "window,", "winter,"), class = "factor"), V10 = structure(c(1L,
     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "NA,", class = "factor"),
     V11 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "NA,", class = 
"factor"),
     V12 = structure(c(2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L), .Label = 
c("203,",
     "205,"), class = "factor"), V13 = structure(c(1L, 1L, 1L,
     1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "1,", class = "factor"),
     V14 = c(4063L, 4914L, 1508L, 1819L, 1228L, 992L, 1898L, 1174L,
     1294L, 1417L)), row.names = c(NA, 10L), class = "data.frame”)

When I use the following:

all.files <- list.files(".")
txt.files <- grep("threat.txt",all.files,value=T)

for(i in txt.files) {
   d<-read.table(i, header=FALSE)
   d[] <- lapply(d, function(x) {is.na(x) <- x == 0; x})
   write.table(d,paste0(i, "trial.txt"), quote=FALSE, row.names=FALSE)}

I get this (an example of one of the output files with zeros in V13):

V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14
3.17903E+11, threat, 1, 1, 2, 2, 1, useless, flowing, NA, NA, 203, 1, 949
3.17903E+11, threat, 3, 2, 2, 1, 1, hostage, skyline, NA, NA, 203, 1, 1116
3.17903E+11, threat, 4, 1, 1, 1, 2, messy, ready, NA, NA, 205, 1, 1277
3.17903E+11, threat, 6, 2, 1, 2, 2, emaciated, shortened, NA, NA, 205, 1, 691
3.17903E+11, threat, 7, 1, 1, 1, 1, abuse, plant, NA, NA, 203, 1, 660
3.17903E+11, threat, 8, 2, 1, 2, 2, tormented, easygoing, NA, NA, 205, 1, 812
3.17903E+11, threat, 9, 1, 2, 2, 2, hurt, sand, NA, NA, 205, 1, 917
3.17903E+11, threat, 10, 1, 1, 1, 1, surgery, freezer, NA, NA, 203, 1, 1829
3.17903E+11, threat, 12, 2, 2, 1, 2, accident, category, NA, NA, 205, 1, 821
3.17903E+11, threat, 13, 2, 1, 2, 2, terrible, energetic, NA, NA, 205, 1, 783
3.17903E+11, threat, 14, 1, 2, 2, 1, wound, storm, NA, NA, 203, 1, 813
3.17903E+11, threat, 15, 1, 1, 1, 2, victim, utensil, NA, NA, 205, 1, 1132
3.17903E+11, threat, 16, 2, 2, 1, 2, bankrupt, lighting, NA, NA, 203, 0, 1510
3.17903E+11, threat, 17, 1, 1, 1, 2, anguish, country, NA, NA, 203, 0, 811
3.17903E+11, threat, 18, 2, 2, 1, 1, snake, table, NA, NA, 203, 1, 805
3.17903E+11, threat, 19, 1, 1, 1, 2, slap, crop, NA, NA, 205, 1, 1180
3.17903E+11, threat, 20, 2, 1, 2, 2, scum, shoe, NA, NA, 205, 1, 792
3.17903E+11, threat, 21, 1, 2, 2, 1, weep, shop, NA, NA, 203, 1, 870
3.17903E+11, threat, 23, 2, 1, 2, 1, spider, border, NA, NA, 203, 1, 871

str(d) gives me the following:

'data.frame':   96 obs. of  14 variables:
  $ V1 : Factor w/ 1 level "9.9761E+11,": 1 1 1 1 1 1 1 1 1 1 ...
  $ V2 : Factor w/ 1 level "threat,": 1 1 1 1 1 1 1 1 1 1 ...
  $ V3 : Factor w/ 96 levels "1,","10,","100,",..: 1 28 37 48 55 63 73 88 2 20 
...
  $ V4 : Factor w/ 2 levels "1,","2,": 1 2 1 2 2 2 2 2 1 1 ...
  $ V5 : Factor w/ 2 levels "1,","2,": 2 2 2 1 2 1 1 2 2 2 ...
  $ V6 : Factor w/ 2 levels "1,","2,": 2 1 2 2 1 2 2 1 2 2 ...
  $ V7 : Factor w/ 2 levels "1,","2,": 2 1 2 2 2 2 1 2 1 2 ...
  $ V8 : Factor w/ 95 levels "abduction,","abortion,",..: 41 92 63 36 2 81 12 
14 23 33 ...
  $ V9 : Factor w/ 96 levels "alley,","ankle,",..: 24 90 73 10 92 33 84 96 70 
57 ...
  $ V10: Factor w/ 1 level "NA,": 1 1 1 1 1 1 1 1 1 1 ...
  $ V11: Factor w/ 1 level "NA,": 1 1 1 1 1 1 1 1 1 1 ...
  $ V12: Factor w/ 2 levels "203,","205,": 2 1 2 2 2 2 1 2 1 2 ...
  $ V13: Factor w/ 1 level "1,": 1 1 1 1 1 1 1 1 1 1 ...
  $ V14: int  4063 4914 1508 1819 1228 992 1898 1174 1294 1417 …

When I use this:

for(i in txt.files) {
   d<-read.table(i, header=FALSE)
   d2<-d[d$V13==1,]
   write.table(d2,sub("[.]",".trial.",i),quote=FALSE,row.names=FALSE)
}

I get empty files:

str(d2)
'data.frame':   0 obs. of  14 variables:
  $ V1 : Factor w/ 1 level "9.9761E+11,":
  $ V2 : Factor w/ 1 level "threat,":
  $ V3 : Factor w/ 96 levels "1,","10,","100,",..:
  $ V4 : Factor w/ 2 levels "1,","2,":
  $ V5 : Factor w/ 2 levels "1,","2,":
  $ V6 : Factor w/ 2 levels "1,","2,":
  $ V7 : Factor w/ 2 levels "1,","2,":
  $ V8 : Factor w/ 95 levels "abduction,","abortion,",..:
  $ V9 : Factor w/ 96 levels "alley,","ankle,",..:
  $ V10: Factor w/ 1 level "NA,":
  $ V11: Factor w/ 1 level "NA,":
  $ V12: Factor w/ 2 levels "203,","205,":
  $ V13: Factor w/ 1 level "1,":
  $ V14: int

When I use as.integer to change V13 to an integer, the output of this column is 
replaced by 1s and 2s..

On Apr 21, 2020, at 1:14 AM, Rui Barradas <ruipbarra...@sapo.pt> wrote:

Hello,

Thanks for the data. But since the replacements still do not work, please post 
the output of

dput(head(d, 10))


in order for us to have an *exact* copy of the data structure.
I had asked for 20 or 30 rows but given your post 10 are enough.
With a way to exactly reproduce what you have, it will be much easier to try 
code and find a solution. I, and I believe most R users, will run

str(d)

as one of the first steps to know what is in that problem column. And go from 
there.


Hope this helps,

Rui Barradas

Às 04:52 de 21/04/20, Helen Sawaya escreveu:

Thank you all for your input.
This is an example of one data file (I have 74 data files):
2.90546E+11, threat,    1, 2, 1, 2, 1,        death,        stove,            
NA,           NA,  205,    0,  394
2.90546E+11, threat,    2, 2, 2, 1, 1,    emaciated,    shortened,            
NA,           NA,  205,    0,  502
2.90546E+11, threat,    3, 1, 1, 1, 2,     mutilate,     consider,            
NA,           NA,  205,    1,  468
2.90546E+11, threat,    6, 1, 2, 2, 1,         weep,         shop,            
NA,           NA,  203,    1,  345
2.90546E+11, threat,    9, 2, 1, 2, 2,    tormented,    easygoing,            
NA,           NA,  205,    1,  373
2.90546E+11, threat,   10, 1, 2, 2, 2,        snake,        table,            
NA,           NA,  205,    1,  343
2.90546E+11, threat,   11, 2, 2, 1, 1,       crisis,       faucet,            
NA,           NA,  203,    1,  437
2.90546E+11, threat,   12, 1, 1, 1, 1,       victim,      utensil,            
NA,           NA,  203,    1,  343
2.90546E+11, threat,   14, 1, 2, 2, 1,    depressed,    repentant,            
NA,           NA,  203,    1,  441
2.90546E+11, threat,   15, 2, 2, 1, 2,         scum,         shoe,            
NA,           NA,  205,    1,  475
Column 13 has values of 0s and 1s which my cognitive task outputted. Column 14 
is the reaction time (ms) data. I want to get rid of the rows that contain 
zeros so I thought I'd first replace zeros with NAs then use complete.cases 
function to get rid of the NAs. I also wanted to apply other functions so I 
included them all in a loop. All work fine except for the one where I try to 
turn the zeros to NAs.
Jim when I tried your mockdata example, it worked fine. But when I translated 
it to my data, I still get zeros in the output. Can you identify any 
mistranslations I'm doing?
txt.files<-list.files(".",pattern="dotprobe") #all my data files are text files 
in one folder
for(tf in txt.files) {
   d<-read.table(tf)
   d[,13][d[,13]==0]<-NA #column 13 contains zeros
   d<-d[ ,-c(10,11)] #get rid of columns 10 and 11
   write.table(d,sub("[.]",".tlbs.",tf),quote=FALSE, row.names=FALSE)
}
That's an example of one of the output I get:
V1 V2 V3 V4 V5 V6 V7 V8 V9 V12 V13 V14
2.90546E+11, threat, 1, 2, 1, 2, 1, death, stove, 205, 0, 394
2.90546E+11, threat, 2, 2, 2, 1, 1, emaciated, shortened, 205, 0, 502
2.90546E+11, threat, 3, 1, 1, 1, 2, mutilate, consider, 205, 1, 468
2.90546E+11, threat, 6, 1, 2, 2, 1, weep, shop, 203, 1, 345
2.90546E+11, threat, 9, 2, 1, 2, 2, tormented, easygoing, 205, 1, 373
2.90546E+11, threat, 10, 1, 2, 2, 2, snake, table, 205, 1, 343
Columns 10 and 11 were deleted. But zeros were not replaced by NAs.
After all the data cleaning, the functions I'm interested in including in the 
loop are: get_tlbs and summarize_bias (and these also work fine in my loop).
Thanks again 🙂
Sincerely
Helen
------------------------------------------------------------------------
*From:* Jim Lemon <drjimle...@gmail.com>
*Sent:* Tuesday, April 21, 2020 2:52 AM
*To:* Rui Barradas <ruipbarra...@sapo.pt>
*Cc:* Helen Sawaya <helensaw...@hotmail.com>; Michael Dewey 
<li...@dewey.myzen.co.uk>; r-help@R-project.org <r-help@r-project.org>
*Subject:* Re: [R] NA command in a 'for' loop
Hi Helen,
Your problem may lie in using row.names=TRUE. I was puzzled when an
extra column kept popping up in the output files. For reading in and
replacing zeros with NAs, this seems to work:
for(mockdata in 1:3) {
  mdf<-data.frame(sample(2:20,10),sample(2:20,10),sample(0:1,10,TRUE))
  write.table(mdf,file=paste0("threat",mockdata,".txt"),quote=FALSE,
   row.names=FALSE,col.names=FALSE)
}
txt.files<-list.files(".",pattern="threat[1-3]")
for(tf in txt.files) {
  d<-read.table(tf)
  d[,3][d[,3]==0]<-NA
  write.table(d,sub("[.]",".tbls.",tf),quote=FALSE,row.names=FALSE)
}
Jim
On Tue, Apr 21, 2020 at 7:57 AM Rui Barradas <ruipbarra...@sapo.pt> wrote:


Hello,

I believe the only way we have to see what is happening is for you to
post the output of


dput(head(d, 20))  # or 30


or, with d2 a subset of d that includes zeros,


dput(head(d2, 20))


Hope this helps,

Rui Barradas

Às 17:48 de 20/04/20, Helen Sawaya escreveu:

I have one column that represents correct response versus error (correct
is coded as 1 and error is coded as 0). Nowhere else in the dataset are
there values of 0. The vector is treated as an integer.
------------------------------------------------------------------------
*From:* Michael Dewey <li...@dewey.myzen.co.uk>
*Sent:* Monday, April 20, 2020 7:35 PM
*To:* Helen Sawaya <helensaw...@hotmail.com>; Rui Barradas
<ruipbarra...@sapo.pt>; r-help@R-project.org <r-help@R-project.org>
*Subject:* Re: [R] NA command in a 'for' loop
Just a thought Helen but is x being treated as a real and what you think
are zero and are printed as zero are in fact some very small number? If
so you need to alter your test appropriately.

Michael

On 20/04/2020 17:25, Helen Sawaya wrote:

Thank you for your reply.

I tried d[] <- lapply(d, function(x) {is.na(x) <- x == 0; x})
but I am still getting zeros instead of NAs in my output..

I wonder if the problem is that some of my data files don't have any zeros 
(participants made no errors)..
________________________________
From: Rui Barradas <ruipbarra...@sapo.pt>
Sent: Monday, April 20, 2020 9:05 AM
To: Helen Sawaya <helensaw...@hotmail.com>; r-help@R-project.org 
<r-help@R-project.org>
Subject: Re: [R] NA command in a 'for' loop

Hello,

Instead of

d[d == 0] <- NA

try

d[] <- lapply(d, function(x) {is.na(x) <- x == 0; x})


Also, in the first for loop

paste(i, sep = "")

does nothing, it's the same as i.
And the same for

(d2$V4 == 1) == TRUE

Since (d2$V4 == 1)  already is FALSE/TRUE there is no need for

(.) == TRUE


Hope this helps,

Rui Barradas



Às 20:52 de 19/04/20, Helen Sawaya escreveu:

Dear R experts,

I am using a 'for' loop to apply commands to multiple datasets (each file is one 
participant). The only one not working is the command that identifies zeros in my 
datasets and changes them to NAs. But when I look at the output, zeros ("0") 
are still present. Surprisingly, the functions work fine when I apply them to a single

dataset (outside the loop). I've tried:


all.files <- list.files(".")
txt.files <- grep("threat.txt",all.files,value=T)

for(i in txt.files){
     d <- read.table(paste(i,sep=""),header=F)
     d[d==0] <- NA #replace zeros with NA
     write.table(d, paste0(i,".tlbs.txt"), quote=FALSE, row.names=TRUE)}
     d<-d[ ,-c(10,11)]
     d2<-d[complete.cases(d), ]
     d2$V4<-as.numeric(d2$V4)
     congruent <- (d2$V4 == 1) == TRUE
     x <- get_tlbs(d2$V14, congruent, prior_weights = NULL, method = 
"weighted", fill_gaps = FALSE)
     write.table(x, paste0(i,".tlbs.txt"), quote=FALSE, row.names=TRUE)}

I've also tried:

for(i in txt.files){
     d <- read.table(paste(i,sep=""),header=F)
     if (0 %in% d)
     {replace_with_na(d,replace = list(x = 0))} # replace zeros with NA
     d<-d[ ,-c(10,11)]
     d2<-d[complete.cases(d), ]
     d2$V4<-as.numeric(d2$V4)
     congruent <- (d2$V4 == 1) == TRUE
     x <- get_tlbs(d2$V14, congruent, prior_weights = NULL, method = 
"weighted", fill_gaps = FALSE)
     write.table(x, paste0(i,".summaryoutput.txt"), quote=FALSE, 
row.names=TRUE)}

Thank you for your help.
Sincerely
Helen

         [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


        [[alternative HTML version deleted]]

______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


--
Michael
http://www.dewey.myzen.co.uk/home.html


______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.


______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Re: [R] NA command in a 'for' loop

Reply via email to