Hi, here a script you can use in a jupyter notebook or colab:
!pip install biopython
importos
importrequests
fromBio importPDB
importshutil
fromgoogle.colab importfiles
# Create the folder if it doesn't exist
ifnotos.path.exists('pdb_files'):
os.makedirs('pdb_files')
# Function to download PDB file
defdownload_pdb(pdb_id, save_path):
url = f"https://files.rcsb.org/download/{pdb_id}.pdb"
response = requests.get(url)
ifresponse.status_code == 200:
withopen(os.path.join(save_path, f"{pdb_id}.pdb"), 'w') asf:
f.write(response.text)
else:
print(f"Failed to download {pdb_id}")
# Function to extract a specific chain from a PDB file and save it
defsave_chain_from_pdb(pdb_id, chain_id, save_path):
parser = PDB.PDBParser(QUIET=True)
structure = parser.get_structure(pdb_id, os.path.join(save_path,
f"{pdb_id}.pdb"))
io = PDB.PDBIO()
io.set_structure(structure)
# Save only the specified chain
io.save(os.path.join(save_path, f"{pdb_id}_{chain_id}.pdb"),
select=ChainSelector(chain_id))
# Class to select a specific chain
classChainSelector(PDB.Select):
def__init__(self, chain_id):
self.chain_id = chain_id
defaccept_chain(self, chain):
returnchain.id == self.chain_id
# Define the folder where you want to save the files
# Make sure to run this Python script with sufficient permissions to
write to this directory.
save_path = os.path.expanduser('/content/pdb_files')
# List of PDB IDs and the chain IDs you are interested in
pdb_chain_list = [
{"pdb": "1TUP", "chain": "A"},
{"pdb": "2HRK", "chain": "B"},
{"pdb": "1LCD", "chain": "A"}
]
# Download PDB files and then extract the specific chain
foritem inpdb_chain_list:
pdb_id = item["pdb"]
chain_id = item["chain"]
print(f"Processing {pdb_id}chain {chain_id}")
# Download PDB file
download_pdb(pdb_id, save_path)
# Extract and save the chain
save_chain_from_pdb(pdb_id, chain_id, save_path)
# Create a Zip file (archive)
shutil.make_archive('pdb_files', 'zip', 'pdb_files')
# Download Zip file
files.download('pdb_files.zip')
Br, Georg.
Am 07.09.2023 um 18:17 schrieb Whitley, Matthew (NIH/NCI) [C]:
Hello all,
I have a script I use from time to time to batch download multiple PDB
files using curl.
I have certain use cases in which I only need a specific chain from a
PDB file that contains multiple chains. At present, I download the
complete PDB file and then use pdbtools to pull out the specific chain
I want.
Does anyone know a way to batch download /specific chains/ from the
PDB? I reviewed the “File Download Services” page at the RCSB PDB,
but I didn’t find anything about downloading specific chains, so I
don’t know if this functionality exists.
Thanks for your suggestions.
Matthew
------------------------------------------------------------------------
To unsubscribe from the CCP4BB list, click the following link:
https://www.jiscmail.ac.uk/cgi-bin/WA-JISC.exe?SUBED1=CCP4BB&A=1
<https://www.jiscmail.ac.uk/cgi-bin/WA-JISC.exe?SUBED1=CCP4BB&A=1>
########################################################################
To unsubscribe from the CCP4BB list, click the following link:
https://www.jiscmail.ac.uk/cgi-bin/WA-JISC.exe?SUBED1=CCP4BB&A=1
This message was issued to members of www.jiscmail.ac.uk/CCP4BB, a mailing list
hosted by www.jiscmail.ac.uk, terms & conditions are available at
https://www.jiscmail.ac.uk/policyandsecurity/