I have a really long functions, and at the end of the function, I am using a if statement to tag certain keywords based on whether they have certain values contained in them.
However, the if statement doesn't seem to work. When I had split up the commands into various functions, it worked fine, but I'm not sure what going on now that it's combined into a single function. myfunc <- function(lst) { options(max.print=100000) setwd("~/Desktop/RStuff") state <- c("Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Deleware", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tenessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming", "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY") inscompany <- c("21st Century", "AAA", "Alliance United", "Allied", "American Automobile Association", "AARP", "AIG", "American International Group", "Allstate", "All State", "All-state", "American States", "American Income", "AMICA", "American Family", "ANPAC", "American National Property and Casualty", "AutoOne", "Auto One", "Auto-One", "Auto-Owners", "Auto Owners", "AutoOwners", "Balboa", "Chubb Corporation", "Commerce", "Conseco", "Country Financial", "DeerBrook", "Eastwood", "East Wood", "East-Wood", "Encompass", "Erie", "Esurance", "E-surance", "Evergreen", "Farmers", "Geico", "General", "GMAC", "Hanover", "Hartford", "Infinity", "Kemper", "Liberty Mutual", "Loya", "Mercury", "MetLife", "Met Life", "Met-Life", "Mid-Century", "Mid Century", "Nationwide", "OldAmerican", "Old-American", "Old American", "Pemco", "Progressive", "Regence Group", "Reliance", "Response", "Safe", "Safe Auto", "SafeAuto", "Safe-Auto", "Safeco", "SafeCo", "Safeway", "Santa Fe", "Santa-Fe", "SantaFe", "Sentry", "Shelter", "Standard", "State Farm", "StateFarm", "State-Farm", "Titan", "Travelers", "Unitrin", "USAA", "Wells Fargo", "Western", "Westfield", "West Coast", "West-Coast", "WestCoast") agency <- c("Eastwood", "Tompkins", "ABC", "United", "Trusted Choice") city = c("New York City", "Los Angeles", "Chicago", "Houston", "Philadelphia", "Phoenix", "San Diego", "San Antonio", "Dallas", "Detroit", "San Jose", "Indianapolis", "Jacksonville", "San Francisco", "Columbus", "Austin", "Memphis", "Baltimore", "Milwaukee", "Fort Worth", "Charlotte", "El Paso", "Boston", "Seattle", "Washington DC", "Denver", "Nashville", "Portland", "Oklahoma City", "Las Vegas", "Tucson", "Long Beach", "Albuquerque", "New Orleans", "Cleveland", "Fresno", "Sacramento", "Kansas City", "Virginia Beach", "Mesa", "Atlanta", "Omaha", "Oakland", "Tulsa", "Honolulu", "Miami", "Minneapolis", "Colorado Springs", "Arlington", "Wichita", "Santa Ana", "Anaheim", "St. Louis", "Pittsburgh", "Tampa", "Cincinnati", "Raleigh", "Toledo", "Aurora", "Buffalo", "Riverside", "St. Paul", "Corpus Christi", "Newark", "Stockton", "Bakersfield", "Anchorage", "Lexington", "Louisville", "St. Petersburg", "Plano", "Norfolk", "Birmingham", "Lincoln", "Glendale", "Greensboro", "Hialeah", "Baton Rouge", "Fort Wayne", "Madison", "Garland", "Scottsdale", "Rochester", "Henderson", "Akron", "Chandler", "Chesapeake", "Modesto", "Lubbock", "Fremont", "Glendale", "Montgomery", "Orlando", "Chula Vista", "Durham", "Shreveport", "Laredo", "Yonkers", "Tacoma", "Anaconda", "Butte", "Suffolk", "Buckeye", "Augusta", "Cusseta", "Huntsville", "Boulder City", "Goodyear", "Hibbing", "Norman", "Sierra Vista", "Georgetown", "Carson City", "Chattanooga", "Lynchburg", "Columbia", "Mobile", "Athens", "Little Rock", "Yuma", "Babbitt", "Cape Coral", "Abilene", "Palmdale", "Jackson", "Plymouth", "Clarksville", "Palm Springs", "Lancaster", "Ellsworth", "Knoxville", "Amarillo", "Dothan", "Oak Ridge", "Edmond", "Beaumont", "Waco", "Port Arthur", "Toledo", "Brownsville", "El Reno", "Henderson", "Jonesboro", "Caribou", "Ellsworth", "Fort Wayne", "Independence", "Des Moines", "Lawton", "Rome", "North Port", "Savannah", "Lincoln", "Apple Valley", "Springfield", "Victorville", "Marana", "Eloy", "Sarasota", "Concord", "Grand Rapids", "Mission Viejo", "New Haven", "McAllen", "Worcester", "Syracuse", "Scranton", "Flint", "Harrisburg", "Poughkeepsie", "Spokane", "Cape Coral", "Fort Wayne", "Santa Rosa", "Ann Arbor", "South Bend", "Daytona Beach", "Peoria", "Atlantic City", "Antioch", "Thousand Oaks") cityst = c("New York City, NY", "Los Angeles, CA", "Chicago, IL", "Houston, TX", "Philadelphia, PA", "Phoenix, AR", "San Diego, CA", "San Antonio, TX", "Dallas, TX", "Detroit, MI", "San Jose, CA", "Indianapolis, IN", "Jacksonville, FL", "San Francisco, CA", "Columbus, OH", "Austin, TX", "Memphis, TN", "Baltimore, MD", "Milwaukee, WI", "Fort Worth, TX", "Charlotte, NC", "El Paso, TX", "Boston, MA", "Seattle, WA", "Washington DC", "Denver, CO", "Nashville, TN", "Portland, OR", "Oklahoma City, OK", "Las Vegas, NV", "Tucson, AR", "Long Beach, CA", "Albuquerque, NM", "New Orleans, LA", "Cleveland, OH", "Fresno, CA", "Sacramento, CA", "Kansas City, MO", "Virginia Beach, VA", "Mesa, CA", "Atlanta, GA", "Omaha, NE", "Oakland, CA", "Tulsa, OK", "Honolulu, HI", "Miami, FL", "Minneapolis, MN", "Colorado Springs, CO", "Arlington, TX", "Wichita, KS", "Santa Ana, CA", "Anaheim, CA", "St. Louis, MO", "Pittsburgh, PA", "Tampa, FL", "Cincinnati, OH", "Raleigh, NC", "Toledo, OH", "Aurora, CO", "Buffalo, NY", "Riverside, CA", "St. Paul, MN", "Corpus Christi, TX", "Newark, NJ", "Stockton, NJ", "Bakersfield, CA", "Anchorage, AK", "Lexington, KY", "Louisville, TN", "St. Petersburg, FL", "Plano, TX", "Norfolk, VA", "Birmingham, AL", "Lincoln, NE", "Glendale, AR", "Greensboro, NC", "Hialeah, FL", "Baton Rouge, LA", "Fort Wayne, TX", "Madison, WI", "Garland, TX", "Scottsdale, AR", "Rochester, NY", "Henderson, NV ", "Akron, OH", "Chandler, AR", "Chesapeake, VA", "Modesto, CA", "Lubbock, TX", "Fremont, CA", "Glendale, AR", "Montgomery, AL", "Orlando, FL", "Chula Vista, CA", "Durham, NC", "Shreveport, LA", "Laredo, TX", "Yonkers, NY", "Tacoma, WA", "Anaconda, MT", "Butte, MT", "Suffolk, VA", "Buckeye, AR", "Augusta, GA", "Cusseta, GA", "Huntsville, AL", "Boulder City, NV", "Goodyear, AZ", "Hibbing, MN", "Norman, OK", "Sierra Vista, AZ", "Georgetown, GA", "Carson City, NV", "Chattanooga, TN", "Lynchburg, TN", "Columbia, SC", "Kansas City, KS", "Mobile, AL", "Athens, GA", "Little Rock, AR", "Yuma, AZ", "Babbitt, MN", "Cape Coral, FL", "Abilene, TX", "Palmdale, CA", "Jackson, MS", "Plymouth, MA", "Clarksville, TN", "Palm Springs, CA", "Lancaster, CA", "Ellsworth, ME", "Knoxville, TN", "Amarillo, TX", "Dothan, AL", "Oak Ridge, TN", "Edmond, OK", "Beaumont, TX", "Waco, TX", "Port Arthur, TX", "Toledo, OH", "Brownsville, TX", "El Reno, OK", "Henderson, NV", "Jonesboro, AR", "Caribou, ME", "Ellsworth, ME", "Fort Wayne, IN", "Independence, MO", "Des Moines, IA", "Lawton, OK", "Rome, NY", "North Port, FL", "Savannah, GA", "Lincoln, NE", "Apple Valley, CA", "Springfield, MO", "Victorville, CA", "Marana, AZ", "Eloy, AZ", "Sarasota, FL", "Concord, CA", "Grand Rapids, MI", "Mission Viejo, CA", "New Haven, CT", "McAllen, TX", "Worcester, MA", "Syracuse, NY", "Scranton, PA", "Flint, MI", "Harrisburg, PA", "Poughkeepsie, NY", "Augusta, CA", "Spokane, WA", "Cape Coral, FL", "Fort Wayne, IN", "Santa Rosa, CA", "Ann Arbor, MI", "South Bend, IN", "Daytona Beach, FL", "Peoria, IL", "Atlantic City, NJ", "Antioch, CA", "Thousand Oaks, CA") lst <- list(roots = c("car insurance", "auto insurance"), roots2 = c("insurance"), prefix = c("cheap", "budget"), prefix2 = c("low cost"), suffix = c("quote", "quotes"), suffix2 = c("rate", "rates"), suffix3 = c("comparison"), state = c(state), inscompany = c(inscompany), city=c(city), cityst = c(cityst)) myone <- function(x, y) { m1 <- do.call(paste, expand.grid(lst[[x]], lst[[y]])) } mytwo <- function(x, y, z){ m2 <- do.call(paste, expand.grid(lst[[x]], lst[[y]], lst[[z]])) } d1 = mytwo("prefix", "roots", "suffix") d2 = mytwo("prefix", "roots", "suffix2") d3 = mytwo("prefix", "roots", "suffix3") d4 = mytwo("prefix2", "roots", "suffix") d5 = mytwo("prefix2", "roots", "suffix2") d6 = mytwo("prefix2", "roots", "suffix3") d7 = mytwo("prefix", "roots2", "suffix") d8 = mytwo("prefix", "roots2", "suffix2") d9 = mytwo("prefix", "roots2", "suffix3") d10 = mytwo("prefix2", "roots2", "suffix") d11 = mytwo("prefix2", "roots2", "suffix2") d12 = mytwo("prefix2", "roots2", "suffix3") d13 = myone("prefix", "roots") d14 = myone("prefix2", "roots") d15 = myone("prefix", "roots2") d16 = myone("prefix2", "roots2") d17 = myone("roots", "suffix") d18 = myone("roots", "suffix2") d19 = myone("roots", "suffix3") d20 = myone("roots2", "suffix") d21 = myone("roots2", "suffix2") d22 = myone("roots2", "suffix3") d23 = myone("state", "roots") d24 = myone("city", "roots") d25 = myone("cityst", "roots") d26 = myone("inscompany", "roots") d27 = myone("state", "roots2") d28 = myone("city", "roots2") d29 = myone("cityst", "roots2") d30 = myone("inscompany", "roots2") d31 = mytwo("state", "roots", "suffix") d32 = mytwo("city", "roots", "suffix") d33 = mytwo("cityst", "roots", "suffix") d34 = mytwo("inscompany", "roots", "suffix") d35 = mytwo("state", "roots", "suffix2") d36 = mytwo("city", "roots", "suffix2") d37 = mytwo("cityst", "roots", "suffix2") d38 = mytwo("inscompany", "roots", "suffix2") d39 = mytwo("state", "roots", "suffix3") d40 = mytwo("city", "roots", "suffix3") d41 = mytwo("cityst", "roots", "suffix3") d42 = mytwo("inscompany", "roots", "suffix3") d43 = mytwo("state", "roots2", "suffix") d44 = mytwo("city", "roots2", "suffix") d45 = mytwo("cityst", "roots2", "suffix") d46 = mytwo("inscompany", "roots2", "suffix") d47 = mytwo("state", "roots2", "suffix2") d48 = mytwo("city", "roots2", "suffix2") d49 = mytwo("cityst", "roots2", "suffix2") d50 = mytwo("inscompany", "roots2", "suffix2") d51 = mytwo("state", "roots2", "suffix3") d52 = mytwo("city", "roots2", "suffix3") d53 = mytwo("cityst", "roots2", "suffix3") d54 = mytwo("inscompany", "roots2", "suffix3") d55 = mytwo("prefix", "state", "roots") d56 = mytwo("prefix", "city", "roots") d57 = mytwo("prefix", "cityst", "roots") d58 = mytwo("prefix", "inscompany", "roots") d59 = mytwo("prefix2", "state", "roots") d60 = mytwo("prefix2", "city", "roots") d61 = mytwo("prefix2", "cityst", "roots") d62 = mytwo("prefix2", "inscompany", "roots") d63 = mytwo("prefix", "state", "roots2") d64 = mytwo("prefix", "city", "roots2") d65 = mytwo("prefix", "cityst", "roots2") d66 = mytwo("prefix", "inscompany", "roots2") d67 = mytwo("prefix2", "state", "roots2") d68 = mytwo("prefix2", "city", "roots2") d69 = mytwo("prefix2", "cityst", "roots2") d70 = mytwo("prefix2", "inscompany", "roots2") mydf <- rbind(d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12, d13, d14, d15, d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31, d32, d33, d34, d35, d36, d37, d38, d39, d40, d41, d42, d43, d44, d45, d46, d47, d48, d49, d50, d51, d52, d53, d54, d55, d56, d57, d58, d59, d60, d61, d62, d63, d64, d65, d66, d67, d68, d69, d70) library(stringr) inscompany_match <- str_c(inscompany, collapse = "|") state_match <- str_c(state, collapse = "|") city_match <- str_c(city, collapse = "|") mydf$inscompany <- as.numeric(str_detect(mydf$keyword, inscompany_match)) mydf$state <- as.numeric(str_detect(mydf$keyword, state_match)) mydf$city <- as.numeric(str_detect(mydf$keyword, city_match)) for (i in 1:nrow(mydf)) { Words = strsplit(as.character(mydf[i, "keyword"]), " ")[[1]] if(any(Words == "Colorado")){ if(Words[which(Words == "Colorado") + 1] == "Springs") mydf[i, "state"] <- 0 } if(any(Words == "Virginia")){ if(Words[which(Words == "Virginia") + 1] == "Beach") mydf[i, "state"] <- 0 } if(any(Words == "Oklahoma")){ if(Words[which(Words == "Oklahoma") + 1] == "City") mydf[i, "state"] <- 0 } if(any(Words == "Kansas")){ if(Words[which(Words == "Kansas") + 1] == "City") mydf[i, "state"] <- 0 } if(any(Words == "Washington")){ if(Words[which(Words == "Washington") + 1] == "DC") mydf[i, "state"] <- 0 } if(any(Words == "York")){ if(Words[which(Words == "York") + 1] == "City") mydf[i, "state"] <- 0 } if(any(Words == "Indianapolis")){ mydf[i, 'state'] <- 0 } if(any(Words == "AARP")){ mydf[i, 'state'] <- 0 } if(any(Words == "ANPAC")){ mydf[i, 'state'] <- 0 } if(any(Words == "AMICA")){ mydf[i, 'state'] <- 0 } if(any(Words == "GMAC")){ mydf[i, 'state'] <- 0 } if(any(Words == "USAA")){ mydf[i, 'state'] <- 0 } return(mydf) } } newdf <- myfunc(lst) newdf Help, Abraham WebRep Overall rating [[alternative HTML version deleted]] ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.