From 5ea857b0e948b687785b8e55e08866c6171fb715 Mon Sep 17 00:00:00 2001 From: Jonathan Dieter Date: Thu, 29 Mar 2007 19:43:26 +0300 Subject: Split server and client Signed-off-by: Jonathan Dieter --- COPYING | 339 ------------------ ChangeLog | 31 -- Makefile | 17 - README | 27 -- createprestorepo/Makefile | 6 + createprestorepo/createprestorepo.py | 308 +++++++++++++++++ createprestorepo/dumpMetadata.py | 456 ++++++++++++++++++++++++ createprestorepo/genprestometadata.py | 496 ++++++++++++++++++++++++++ makerepo/createprestorepo.py | 308 ----------------- makerepo/dumpMetadata.py | 456 ------------------------ makerepo/genprestometadata.py | 496 -------------------------- presto.conf | 9 - presto.py | 149 -------- shared/deltarpm.py | 86 ----- shared/prestoDownload.py | 169 --------- shared/prestoLog.py | 71 ---- shared/prestoRepo.py | 612 --------------------------------- shared/prestoTransaction.py | 97 ------ shared/prestomdparser.py | 167 --------- yum-presto/COPYING | 339 ++++++++++++++++++ yum-presto/ChangeLog | 36 ++ yum-presto/Makefile | 16 + yum-presto/README | 26 ++ yum-presto/presto.conf | 9 + yum-presto/presto.py | 149 ++++++++ yum-presto/shared/deltarpm.py | 86 +++++ yum-presto/shared/prestoDownload.py | 171 +++++++++ yum-presto/shared/prestoLog.py | 71 ++++ yum-presto/shared/prestoRepo.py | 612 +++++++++++++++++++++++++++++++++ yum-presto/shared/prestoTransaction.py | 97 ++++++ yum-presto/shared/prestomdparser.py | 166 +++++++++ 31 files changed, 3044 insertions(+), 3034 deletions(-) delete mode 100644 COPYING delete mode 100644 ChangeLog delete mode 100644 Makefile delete mode 100644 README create mode 100644 createprestorepo/Makefile create mode 100755 createprestorepo/createprestorepo.py create mode 100755 createprestorepo/dumpMetadata.py create mode 100644 createprestorepo/genprestometadata.py delete mode 100755 makerepo/createprestorepo.py delete mode 100755 makerepo/dumpMetadata.py delete mode 100644 makerepo/genprestometadata.py delete mode 100644 presto.conf delete mode 100644 presto.py delete mode 100644 shared/deltarpm.py delete mode 100644 shared/prestoDownload.py delete mode 100644 shared/prestoLog.py delete mode 100644 shared/prestoRepo.py delete mode 100644 shared/prestoTransaction.py delete mode 100644 shared/prestomdparser.py create mode 100644 yum-presto/COPYING create mode 100644 yum-presto/ChangeLog create mode 100644 yum-presto/Makefile create mode 100644 yum-presto/README create mode 100644 yum-presto/presto.conf create mode 100644 yum-presto/presto.py create mode 100644 yum-presto/shared/deltarpm.py create mode 100644 yum-presto/shared/prestoDownload.py create mode 100644 yum-presto/shared/prestoLog.py create mode 100644 yum-presto/shared/prestoRepo.py create mode 100644 yum-presto/shared/prestoTransaction.py create mode 100644 yum-presto/shared/prestomdparser.py diff --git a/COPYING b/COPYING deleted file mode 100644 index e77696a..0000000 --- a/COPYING +++ /dev/null @@ -1,339 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 675 Mass Ave, Cambridge, MA 02139, USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) 19yy - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) 19yy name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. diff --git a/ChangeLog b/ChangeLog deleted file mode 100644 index 624c2d7..0000000 --- a/ChangeLog +++ /dev/null @@ -1,31 +0,0 @@ -* Wed Mar 28 2007 Jonathan Dieter - 0.3.0 - - Massive changes to downloading structure - - When unable to rebuild drpm, we now download full rpm - - Stop doing slow MD5 check and just check RPM header - while we have a prelink bug - -* Mon Mar 26 2007 Jonathan Dieter - 0.2.9 - - Fix another mirrorlist bug - - Minor optimization - - Added logging to /var/log/presto.log - - Fix another mirrorlist bug - - Fix bug where we sometimes die if delta repository doesn't exist - - Properly exit when unable to rebuild drpm - - Do full (slow) MD5 check when checking to see if we can - build RPM from disk - -* Sat Mar 24 2007 Jonathan Dieter - 0.2.3 - - Fixed bug that breaks yum install - -* Sat Mar 24 2007 Jonathan Dieter - 0.2.2 - - Fixed "not showing download error" bug - - Added --disablepresto yum command-line option - - Added code to trap the (hopefully) unlikely scenario where applydeltarpm - fails - - Show byte savings at end of yum update - -* Fri Mar 23 2007 Jonathan Dieter - 0.2.1 - - Fixed bug in handling mirrorlists in original repositories - -* Thu Mar 22 2007 Jonathan Dieter - 0.2.0 - - Initial release diff --git a/Makefile b/Makefile deleted file mode 100644 index 5e1b97b..0000000 --- a/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -clean: - rm -f *.pyc *.pyo *~ - cd makerepo; rm -f *.pyc *.pyo *~ - cd shared; rm -f *.pyc *.pyo *~ - -install: - mkdir -p $(DESTDIR)/usr/lib/yum-plugins - install -m 644 presto.py $(DESTDIR)/usr/lib/yum-plugins - mkdir -p $(DESTDIR)/etc/yum/pluginconf.d - install -m 644 presto.conf $(DESTDIR)/etc/yum/pluginconf.d - mkdir -p $(DESTDIR)/usr/share/presto - install -m 644 shared/prestoRepo.py $(DESTDIR)/usr/share/presto - install -m 644 shared/prestomdparser.py $(DESTDIR)/usr/share/presto - install -m 644 shared/prestoTransaction.py $(DESTDIR)/usr/share/presto - install -m 644 shared/prestoLog.py $(DESTDIR)/usr/share/presto - install -m 644 shared/prestoDownload.py $(DESTDIR)/usr/share/presto - install -m 644 shared/deltarpm.py $(DESTDIR)/usr/share/presto diff --git a/README b/README deleted file mode 100644 index e37f369..0000000 --- a/README +++ /dev/null @@ -1,27 +0,0 @@ -Presto: A project to add delta rpm support into yum for Fedora users -https://hosted.fedoraproject.org/projects/presto/wiki/WikiStart -Most of the code base has been written by Marcel Hild as -up2date/satellite-server delta rpm support. Code adaptation for yum done by -Ahmed Kamal . Support for storing repository -information added by Jonathan Dieter - -Installation: -============= -1- Make sure deltarpm is installed on your system (yum -y install deltarpm) -2- Place the files in the following locations: - share-presto => /usr/share/presto/* - presto.py => /usr/lib/yum-plugins/presto.py - presto.conf => /etc/yum/pluginconf.d/presto.conf) -3- Use makerepo/createdeltarepo.py to create a Presto repository on top of a - normal yum repository. - NOTE: createdeltarepo.py will NOT change any files created by createrepo -4- In your repository conf file, set deltaurl to point to your Presto - repository (in most cases, it will be the same as baseurl). -5- Now install an old rpm from your repo using rpm, then try updating it using - yum. The plugin should kick in, try to download the drpm, reconstruct the - full rpm, and yum should install that - -Notes: -===== -- The code is in an early stage. Minimal testing has been done. - diff --git a/createprestorepo/Makefile b/createprestorepo/Makefile new file mode 100644 index 0000000..38fbfc6 --- /dev/null +++ b/createprestorepo/Makefile @@ -0,0 +1,6 @@ +clean: + rm -f *.pyc *.pyo *~ + +install: +# mkdir -p $(DESTDIR)/usr/share/createprestorepo +# install -m 644 presto.py $(DESTDIR)/usr/lib/yum-plugins diff --git a/createprestorepo/createprestorepo.py b/createprestorepo/createprestorepo.py new file mode 100755 index 0000000..a217045 --- /dev/null +++ b/createprestorepo/createprestorepo.py @@ -0,0 +1,308 @@ +#!/usr/bin/python -t +# -*- mode: Python; indent-tabs-mode: nil; -*- +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import errno, os, sys, gzip +import fnmatch, re +import rpmUtils.transaction, rpmUtils.miscutils +import commands, libxml2 +import dumpMetadata +from dumpMetadata import _gzipOpen, getChecksum +#### import Utils + +DEBUG = True +#### Utils.setdebug(DEBUG) + +SUFFIX='drpm' +DRPMWORTHKEEPINGTHRESH=0.5 +DEBUG=0 +REPODATA="repodata" +REPOFILE="presto.xml" +REPOMDFILE="prestomd.xml" +SUM_TYPE="sha" + +def XML_start_newrpm(node, (f, n, e, v, r, a), srcdir_len): + newrpm_node = node.newChild(None, "package", None) + newrpm_node.newProp("type", "rpm") + newrpm_node.newChild(None, "name", n) + newrpm_node.newChild(None, "arch", str(a)) + version = newrpm_node.newChild(None, "version", None) + version.newProp("epoch", str(e)) + version.newProp("ver", str(v)) + version.newProp("rel", str(r)) + deltas = newrpm_node.newChild(None, "deltas", None) + return deltas + +def XML_oldrpm(newrpm_node, drpm_file, oldrpm, newrpm, sequence, size): + (f, n, e, v, r, a) = oldrpm + (nf, nn, ne, nv, nr, na) = newrpm + oldrpm_node = newrpm_node.newChild(None, "oldrpm", None) + checksum = getChecksum(SUM_TYPE, drpm_file) + if n != nn: + oldrpm_node.newChild(None, "name", n) + if a != na: + oldrpm_node.newChild(None, "arch", str(a)) + version = oldrpm_node.newChild(None, "version", None) + if e != ne: + version.newProp("epoch", str(e)) + if v != nv: + version.newProp("ver", str(v)) + version.newProp("rel", str(r)) + oldrpm_node.newChild(None, "drpm_filename", drpm_file) + oldrpm_node.newChild(None, "size", str(size)) + oldrpm_node.newChild(None, "sequence", str(sequence)) + cs_node = oldrpm_node.newChild(None, "checksum", str(checksum)) + cs_node.newProp("type", SUM_TYPE) + +def startXML(): + basedoc = libxml2.newDoc("1.0") + baseroot = basedoc.newChild(None, "metadata", None) + basens = baseroot.newNs('http://linux.duke.edu/metadata/common', None) + formatns = baseroot.newNs('http://linux.duke.edu/metadata/rpm', 'rpm') + baseroot.setNs(basens) + return (basedoc, baseroot) + +def endXML(xmldoc, filename, srcdir, compressed=True): + if compressed: + outfile = _gzipOpen("%s%s/%s.gz" % (srcdir, REPODATA, filename), "w") + output = xmldoc.serialize('UTF-8', 1) + outfile.write(output) + outfile.close() + else: + xmldoc.saveFormatFileEnc("%s%s/%s" % (srcdir, REPODATA, filename), 'UTF-8', 1) + xmldoc.freeDoc() + +def repoXML(srcdir): + """generate the repomd.xml file that stores the info on the other files""" + repodoc = libxml2.newDoc("1.0") + reporoot = repodoc.newChild(None, "repomd", None) + repons = reporoot.newNs('http://linux.duke.edu/metadata/repo', None) + reporoot.setNs(repons) + repofilepath = "%s%s/%s" % (srcdir, REPODATA, REPOMDFILE) + filename = "%s%s/%s.gz" % (srcdir, REPODATA, REPOFILE) + filetype = "deltas" + zfo = _gzipOpen(filename, "rb") + uncsum = getChecksum(SUM_TYPE, zfo) + zfo.close() + csum = getChecksum(SUM_TYPE, filename) + timestamp = os.stat(filename)[8] + data = reporoot.newChild(None, 'data', None) + data.newProp('type', filetype) + location = data.newChild(None, 'location', None) + location.newProp('href', "%s/%s.gz" % (REPODATA, REPOFILE)) + checksum = data.newChild(None, 'checksum', csum) + checksum.newProp('type', SUM_TYPE) + timestamp = data.newChild(None, 'timestamp', str(timestamp)) + unchecksum = data.newChild(None, 'open-checksum', uncsum) + unchecksum.newProp('type', SUM_TYPE) + endXML(repodoc, REPOMDFILE, srcdir, False) + +def genDeltaRPM(ts, newrpm, oldrpm, is_new_package, srcdir, dstdir, locroot): + (f1,n1,e1,v1,r1,a1) = newrpm + (f2,n2,e2,v2,r2,a2) = oldrpm + hdr = rpmUtils.miscutils.hdrFromPackage(ts,f1) + arch = hdr['arch'] + v12 = "_".join([v1,v2]) + r12 = "_".join([r1,r2]) + deltaRPMName= '%s/%s.%s.%s' % (dstdir, "-".join([n1,v12,r12]), a1, SUFFIX) + if DEBUG: + print "DEBUG " + deltaCommand + # If the drpm doesn't exists, make it, else skip it + if os.path.exists("%s%s" % (srcdir, deltaRPMName)): + dsize = os.path.getsize("%s%s" % (srcdir, deltaRPMName)) + if e1 == e2: + print 'Using pre-generated delta rpm for %s.%s - %s.%s => %s.%s' % (n1, a1, v2, r2, v1, r1) + else: + print 'Using pre-generated delta rpm for %s.%s - %s:%s.%s => %s:%s.%s' % (n1, a1, e2, v2, r2, e1, v1, r1) + # Get checksum + seqfile = open("%s%s.seq" % (srcdir, deltaRPMName), "r") + sequence = seqfile.read()[:-1] + sequence = sequence[sequence.rfind("-")+1:] + seqfile.close() + if is_new_package: + locroot = XML_start_newrpm(locroot, newrpm, len(srcdir)) + is_new_package = False + XML_oldrpm(locroot, deltaRPMName, oldrpm, newrpm, sequence, dsize) + if DEBUG: + print "DEBUG skipping %s" % (deltaRPMName) + elif os.path.exists("%s%s.dontdelta" % (srcdir, deltaRPMName)): + pass + else: + deltaCommand = 'makedeltarpm -s %s%s.seq %s %s %s%s' % (srcdir, deltaRPMName, f2, f1, srcdir, deltaRPMName) + (code, out) = commands.getstatusoutput(deltaCommand) + if code: + #raise Exception("genDeltaRPM: exitcode was %s - Reported Error: %s" % (code, out)) + print "Error genDeltaRPM for %s: exitcode was %s - Reported Error: %s" % (n1, code, out) + + # Get size + dsize = os.path.getsize("%s%s" % (srcdir, deltaRPMName)) + + # Get checksum + seqfile = open("%s%s.seq" % (srcdir, deltaRPMName), "r") + sequence = seqfile.read()[:-1] + sequence = sequence[sequence.rfind("-")+1:] + seqfile.close() + + # Check whether or not we should keep the drpm + if not drpmIsWorthKeeping(deltaRPMName, f1, srcdir): + if DEBUG: + print 'deleting %s' % (deltaRPMName) + try: + os.unlink("%s%s" % (srcdir, deltaRPMName)) + except Exception, e: + print "Error deleting deltarpm %s" % (deltaRPMName), str(e) + try: + os.unlink("%s%s.seq" % (srcdir, deltaRPMName)) + except Exception, e: + print "Error deleting checksum %s.seq" % (deltaRPMName), str(e) + f = open("%s%s.dontdelta" % (srcdir, deltaRPMName), "w") + f.close() + else: + if e1 == e2: + print 'Generated delta rpm for %s.%s - %s.%s => %s.%s' % (n1, a1, v2, r2, v1, r1) + else: + print 'Generated delta rpm for %s.%s - %s:%s.%s => %s:%s.%s' % (n1, a1, e2, v2, r2, e1, v1, r1) + + if is_new_package: + locroot = XML_start_newrpm(locroot, newrpm, len(srcdir)) + is_new_package = False + XML_oldrpm(locroot, deltaRPMName, oldrpm, newrpm, sequence, dsize) + return (is_new_package, locroot) + +def drpmIsWorthKeeping(deltaRPMName, newrpm, srcdir): + newsize = os.path.getsize(newrpm) + drpmsize = os.path.getsize("%s%s" % (srcdir, deltaRPMName)) + # Delete the drpm if it's too large + if drpmsize > DRPMWORTHKEEPINGTHRESH * newsize: + return False + return True + +def createPrestoRepo(srcdir, dstdir): + ts = rpmUtils.transaction.initReadOnlyTransaction() + changed = False + + # Create list of .rpm files. + # We don't use "glob", so sub-directories are supported. + print 'Using source dir: %s' % srcdir + print 'Using destination dir: %s' % dstdir + if dstdir[-1] == "/": + dstdir = dstdir[:-1] + srcfiles = [] + for root, dirs, files in os.walk(srcdir): + for f in fnmatch.filter(files,'*.rpm'): + srcfiles.append(os.path.join(root,f)) + if not len(srcfiles): + print ' Nothing found.' + return changed + assert srcfiles[0].startswith(srcdir) + + # Check whether dstdir exists, and if it doesn't, create it + if not os.access(dstdir, os.F_OK): + os.makedirs(dstdir, 0755) + elif not os.access(dstdir, os.W_OK): + print 'ERROR: Unable to write to %s' % dstdir + sys.exit(1) + + # Check whether REPODATA exists, and if it doesn't, create it + if not os.access("%s%s" % (srcdir, REPODATA), os.F_OK): + os.makedirs("%s%s" % (srcdir, REPODATA), 0755) + elif not os.access(dstdir, os.W_OK): + print 'ERROR: Unable to write to %s' % REPODATA + sys.exit(1) + + # Create XML document +# xmldoc = libxml2.newDoc("1.0") +# xmlroot = xmldoc + (xmldoc, xmlroot) = startXML() + + # Create map: rpm %name -> list of tuples (filename,name,e,v,r) + newestsrcrpms = {} + for f in srcfiles: + hdr = rpmUtils.miscutils.hdrFromPackage(ts, f) + nm = hdr['name'] + "." + hdr['arch'] + n = hdr['name'] + a = hdr['arch'] + v = hdr['version'] + r = hdr['release'] + e = hdr['epoch'] + if e is None: + e = 0 + newestsrcrpms.setdefault(nm,[]) + newestsrcrpms[nm].append((f,n,e,v,r,a)) + + # Now purge old src.rpm unless their %name matches a white-list pattern. + for l in newestsrcrpms.itervalues(): + x = len(l) + + if x > 1: + def sortByEVR(fnevr1, fnevr2): + (f1,n1,e1,v1,r1,a1) = fnevr1 + (f2,n2,e2,v2,r2,a2) = fnevr2 + rc = rpmUtils.miscutils.compareEVR((e1,v1,r1),(e2,v2,r2)) + if rc == 0: + return 0 + if rc > 0: + return -1 + if rc < 0: + return 1 + + l.sort(sortByEVR) # highest first in list + + # Generate delta rpm + is_new_package = True + locroot = xmlroot + for rpm in l[1:]: + (is_new_package, locroot) = genDeltaRPM(ts, l[0], rpm, is_new_package, srcdir, dstdir, locroot) + + if not len(srcfiles): + print 'WARNING: No .rpms left. Stopping here.' + return changed + + # Write out end of deltas.xml file + endXML(xmldoc, REPOFILE, srcdir, True) + repoXML(srcdir) + + # Examine binary repository directories and remove everything which + # is missing its corresponding src.rpm. + return changed + + +def main(bin_rpm_path, delta_rpm_path): + assert rpmUtils.miscutils.compareEVR((1,2,3),(1,2,0)) > 0 + assert rpmUtils.miscutils.compareEVR((0,1,2),(0,1,2)) == 0 + assert rpmUtils.miscutils.compareEVR((1,2,3),(4,0,99)) < 0 + + return createPrestoRepo(bin_rpm_path, delta_rpm_path) + + +if __name__ == '__main__': + if len(sys.argv) < 2: + print 'Usage: %s \n' % os.path.basename(sys.argv[0]) + sys.exit(errno.EINVAL) + bin_rpm_path = sys.argv[1] + delta_rpm_path = sys.argv[2] + + #### cfg = Utils.load_config_module(sys.argv[1]) + + #### Utils.signer_gid_check(cfg.signersgid) + #### os.umask(cfg.signersumask) + + #### for dist in sys.argv[2:]: + #### if not cfg.archdict.has_key(dist): + #### print "No distribution release named '%s' found" % dist + #### sys.exit(errno.EINVAL) + main(bin_rpm_path, delta_rpm_path) + sys.exit(0) diff --git a/createprestorepo/dumpMetadata.py b/createprestorepo/dumpMetadata.py new file mode 100755 index 0000000..2f074da --- /dev/null +++ b/createprestorepo/dumpMetadata.py @@ -0,0 +1,456 @@ +#!/usr/bin/python -t +# base classes and functions for dumping out package Metadata +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2004 Duke University + +# $Id: dumpMetadata.py,v 1.36 2006/02/21 20:10:08 pnasrat Exp $ + +import os +import rpm +import exceptions +import md5 +import sha +import types +import struct +import re +import stat + +# done to fix gzip randomly changing the checksum +import gzip +from zlib import error as zlibError +from gzip import write32u, FNAME + +__all__ = ["GzipFile","open"] + +class GzipFile(gzip.GzipFile): + def _write_gzip_header(self): + self.fileobj.write('\037\213') # magic header + self.fileobj.write('\010') # compression method + fname = self.filename[:-3] + flags = 0 + if fname: + flags = FNAME + self.fileobj.write(chr(flags)) + write32u(self.fileobj, long(0)) + self.fileobj.write('\002') + self.fileobj.write('\377') + if fname: + self.fileobj.write(fname + '\000') + + +def _gzipOpen(filename, mode="rb", compresslevel=9): + return GzipFile(filename, mode, compresslevel) + + + +def returnFD(filename): + try: + fdno = os.open(filename, os.O_RDONLY) + except OSError: + raise MDError, "Error opening file" + return fdno + +def returnHdr(ts, package): + """hand back the rpm header or raise an Error if the pkg is fubar""" + opened_here = 0 + try: + if type(package) is types.StringType: + opened_here = 1 + fdno = os.open(package, os.O_RDONLY) + else: + fdno = package # let's assume this is an fdno and go with it :) + except OSError: + raise MDError, "Error opening file" + ts.setVSFlags((rpm._RPMVSF_NOSIGNATURES|rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD)) + try: + hdr = ts.hdrFromFdno(fdno) + except rpm.error: + raise MDError, "Error opening package" + if type(hdr) != rpm.hdr: + raise MDError, "Error opening package" + ts.setVSFlags(0) + + if opened_here: + os.close(fdno) + del fdno + + return hdr + +def getChecksum(sumtype, file, CHUNK=2**16): + """takes filename, hand back Checksum of it + sumtype = md5 or sha + filename = /path/to/file + CHUNK=65536 by default""" + + # chunking brazenly lifted from Ryan Tomayko + opened_here = 0 + try: + if type(file) is not types.StringType: + fo = file # assume it's a file-like-object + else: + opened_here = 1 + fo = open(file, 'rb', CHUNK) + + if sumtype == 'md5': + sum = md5.new() + elif sumtype == 'sha': + sum = sha.new() + else: + raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype + chunk = fo.read + while chunk: + chunk = fo.read(CHUNK) + sum.update(chunk) + + if opened_here: + fo.close() + del fo + + return sum.hexdigest() + except: + raise MDError, 'Error opening file for checksum: %s' % file + + +def utf8String(string): + """hands back a unicoded string""" + if string is None: + return '' + elif isinstance(string, unicode): + return string + try: + x = unicode(string, 'ascii') + return string + except UnicodeError: + encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2'] + for enc in encodings: + try: + x = unicode(string, enc) + except UnicodeError: + pass + else: + if x.encode(enc) == string: + return x.encode('utf-8') + newstring = '' + for char in string: + if ord(char) > 127: + newstring = newstring + '?' + else: + newstring = newstring + char + return newstring + + +def byteranges(file): + """takes an rpm file or fileobject and returns byteranges for location of the header""" + opened_here = 0 + if type(file) is not types.StringType: + fo = file + else: + opened_here = 1 + fo = open(file, 'r') + #read in past lead and first 8 bytes of sig header + fo.seek(104) + # 104 bytes in + binindex = fo.read(4) + # 108 bytes in + (sigindex, ) = struct.unpack('>I', binindex) + bindata = fo.read(4) + # 112 bytes in + (sigdata, ) = struct.unpack('>I', bindata) + # each index is 4 32bit segments - so each is 16 bytes + sigindexsize = sigindex * 16 + sigsize = sigdata + sigindexsize + # we have to round off to the next 8 byte boundary + disttoboundary = (sigsize % 8) + if disttoboundary != 0: + disttoboundary = 8 - disttoboundary + # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data + hdrstart = 112 + sigsize + disttoboundary + + fo.seek(hdrstart) # go to the start of the header + fo.seek(8,1) # read past the magic number and reserved bytes + + binindex = fo.read(4) + (hdrindex, ) = struct.unpack('>I', binindex) + bindata = fo.read(4) + (hdrdata, ) = struct.unpack('>I', bindata) + + # each index is 4 32bit segments - so each is 16 bytes + hdrindexsize = hdrindex * 16 + # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the + # end of the sig and the header. + hdrsize = hdrdata + hdrindexsize + 16 + + # header end is hdrstart + hdrsize + hdrend = hdrstart + hdrsize + if opened_here: + fo.close() + del fo + return (hdrstart, hdrend) + + +class MDError(exceptions.Exception): + def __init__(self, args=None): + exceptions.Exception.__init__(self) + self.args = args + + + +class RpmMetaData: + """each drpm is one object, you pass it an rpm file + it opens the file, and pulls the information out in bite-sized chunks :) + """ + + mode_cache = {} + + def __init__(self, ts, basedir, filename, options): + try: + stats = os.stat(os.path.join(basedir, filename)) + self.size = stats[6] + self.mtime = stats[8] + del stats + except OSError, e: + raise MDError, "Error Stat'ing file %s %s" % (basedir, filename) + self.options = options + self.localurl = options['baseurl'] + self.relativepath = filename + fd = returnFD(os.path.join(basedir, filename)) + self.hdr = returnHdr(ts, fd) + os.lseek(fd, 0, 0) + fo = os.fdopen(fd, 'rb') + self.pkgid = self.doChecksumCache(fo) + fo.seek(0) + (self.rangestart, self.rangeend) = byteranges(fo) + fo.close() + del fo + del fd + + def arch(self): + if self.tagByName('sourcepackage') == 1: + return 'src' + else: + return self.tagByName('arch') + + def _correctVersion(self, vers): + returnvers = [] + vertuple = (None, None, None) + if vers is None: + returnvers.append(vertuple) + return returnvers + + if type(vers) is not types.ListType: + if vers is not None: + vertuple = self._stringToVersion(vers) + else: + vertuple = (None, None, None) + returnvers.append(vertuple) + else: + for ver in vers: + if ver is not None: + vertuple = self._stringToVersion(ver) + else: + vertuple = (None, None, None) + returnvers.append(vertuple) + return returnvers + + + def _stringToVersion(self, strng): + i = strng.find(':') + if i != -1: + epoch = strng[:i] + else: + epoch = '0' + j = strng.find('-') + if j != -1: + if strng[i + 1:j] == '': + version = None + else: + version = strng[i + 1:j] + release = strng[j + 1:] + else: + if strng[i + 1:] == '': + version = None + else: + version = strng[i + 1:] + release = None + return (epoch, version, release) + + ########### + # Title: Remove duplicates from a sequence + # Submitter: Tim Peters + # From: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560 + + def _uniq(self,s): + """Return a list of the elements in s, but without duplicates. + + For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], + unique("abcabc") some permutation of ["a", "b", "c"], and + unique(([1, 2], [2, 3], [1, 2])) some permutation of + [[2, 3], [1, 2]]. + + For best speed, all sequence elements should be hashable. Then + unique() will usually work in linear time. + + If not possible, the sequence elements should enjoy a total + ordering, and if list(s).sort() doesn't raise TypeError it's + assumed that they do enjoy a total ordering. Then unique() will + usually work in O(N*log2(N)) time. + + If that's not possible either, the sequence elements must support + equality-testing. Then unique() will usually work in quadratic + time. + """ + + n = len(s) + if n == 0: + return [] + + # Try using a dict first, as that's the fastest and will usually + # work. If it doesn't work, it will usually fail quickly, so it + # usually doesn't cost much to *try* it. It requires that all the + # sequence elements be hashable, and support equality comparison. + u = {} + try: + for x in s: + u[x] = 1 + except TypeError: + del u # move on to the next method + else: + return u.keys() + + # We can't hash all the elements. Second fastest is to sort, + # which brings the equal elements together; then duplicates are + # easy to weed out in a single pass. + # NOTE: Python's list.sort() was designed to be efficient in the + # presence of many duplicate elements. This isn't true of all + # sort functions in all languages or libraries, so this approach + # is more effective in Python than it may be elsewhere. + try: + t = list(s) + t.sort() + except TypeError: + del t # move on to the next method + else: + assert n > 0 + last = t[0] + lasti = i = 1 + while i < n: + if t[i] != last: + t[lasti] = last = t[i] + lasti += 1 + i += 1 + return t[:lasti] + + # Brute force is all that's left. + u = [] + for x in s: + if x not in u: + u.append(x) + return u + + def tagByName(self, tag): + data = self.hdr[tag] + if type(data) is types.ListType: + if len(data) > 0: + return data[0] + else: + return '' + else: + return data + + def listTagByName(self, tag): + """take a tag that should be a list and make sure it is one""" + lst = [] + data = self.hdr[tag] + if data is None: + return lst + + if type(data) is types.ListType: + lst.extend(data) + else: + lst.append(data) + return lst + + def epoch(self): + if self.hdr['epoch'] is None: + return 0 + else: + return self.tagByName('epoch') + + def doChecksumCache(self, fo): + """return a checksum for a package: + - check if the checksum cache is enabled + if not - return the checksum + if so - check to see if it has a cache file + if so, open it and return the first line's contents + if not, grab the checksum and write it to a file for this pkg + """ + if not self.options['cache']: + return getChecksum(self.options['sumtype'], fo) + + csumtag = '%s-%s' % (self.hdr['name'] , self.hdr[rpm.RPMTAG_SHA1HEADER]) + csumfile = '%s/%s' % (self.options['cachedir'], csumtag) + if os.path.exists(csumfile) and self.mtime <= os.stat(csumfile)[8]: + csumo = open(csumfile, 'r') + checksum = csumo.readline() + csumo.close() + + else: + checksum = getChecksum(self.options['sumtype'], fo) + csumo = open(csumfile, 'w') + csumo.write(checksum) + csumo.close() + + return checksum + + + +def generateXML(doc, node, formatns, drpmObj, sumtype): + """takes an xml doc object and a package metadata entry node, populates a + package node with the md information""" + ns = node.ns() + pkgNode = node.newChild(None, "package", None) + pkgNode.newProp('type', 'rpm') + pkgNode.newChild(None, 'name', drpmObj.tagByName('name')) + pkgNode.newChild(None, 'arch', drpmObj.arch()) + version = pkgNode.newChild(None, 'version', None) + version.newProp('epoch', str(drpmObj.epoch())) + version.newProp('ver', str(drpmObj.tagByName('version'))) + version.newProp('rel', str(drpmObj.tagByName('release'))) + return pkgNode + +def repoXML(node, cmds): + """generate the repomd.xml file that stores the info on the other files""" + sumtype = cmds['sumtype'] + workfiles = [(cmds['prestofile'], 'deltas')] + + + for (file, ftype) in workfiles: + zfo = _gzipOpen(os.path.join(cmds['outputdir'], cmds['tempdir'], file)) + uncsum = getChecksum(sumtype, zfo) + zfo.close() + csum = getChecksum(sumtype, os.path.join(cmds['outputdir'], cmds['tempdir'], file)) + timestamp = os.stat(os.path.join(cmds['outputdir'], cmds['tempdir'], file))[8] + data = node.newChild(None, 'data', None) + data.newProp('type', ftype) + location = data.newChild(None, 'location', None) + if cmds['baseurl'] is not None: + location.newProp('xml:base', cmds['baseurl']) + location.newProp('href', os.path.join(cmds['finaldir'], file)) + checksum = data.newChild(None, 'checksum', csum) + checksum.newProp('type', sumtype) + timestamp = data.newChild(None, 'timestamp', str(timestamp)) + unchecksum = data.newChild(None, 'open-checksum', uncsum) + unchecksum.newProp('type', sumtype) diff --git a/createprestorepo/genprestometadata.py b/createprestorepo/genprestometadata.py new file mode 100644 index 0000000..402af25 --- /dev/null +++ b/createprestorepo/genprestometadata.py @@ -0,0 +1,496 @@ +#!/usr/bin/python -t +# primary functions and glue for generating the repository metadata +# + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2004 Duke University +# Copyright 2007 Jonathan Dieter + + +import os +import sys +import getopt +import rpm +import libxml2 +import string +import fnmatch +import urlgrabber + +import dumpMetadata +from dumpMetadata import _gzipOpen +__version__ = '0.4.3' + +def errorprint(stuff): + print >> sys.stderr, stuff + +def _(args): + """Stub function for translation""" + return args + +def usage(retval=1): + print _(""" + createrepo [options] directory-of-packages + + Options: + -u, --baseurl = optional base url location for all files + -o, --outputdir = optional directory to output to + -x, --exclude = files globs to exclude, can be specified multiple times + -q, --quiet = run quietly + -v, --verbose = run verbosely + -c, --cachedir = specify which dir to use for the checksum cache + -h, --help = show this help + -V, --version = output version + -p, --pretty = output xml files in pretty format. + """) + + sys.exit(retval) + +class MetaDataGenerator: + def __init__(self, cmds): + self.cmds = cmds + self.ts = rpm.TransactionSet() + self.pkgcount = 0 + self.files = [] + + def getFileList(self, basepath, path, ext, filelist): + """Return all files in path matching ext, store them in filelist, + recurse dirs. Returns a list object""" + + extlen = len(ext) + totalpath = os.path.normpath(os.path.join(basepath, path)) + try: + dir_list = os.listdir(totalpath) + except OSError, e: + errorprint(_('Error accessing directory %s, %s') % (totalpath, e)) + sys.exit(1) + + for d in dir_list: + if os.path.isdir(totalpath + '/' + d): + filelist = self.getFileList(basepath, os.path.join(path, d), ext, filelist) + else: + if string.lower(d[-extlen:]) == '%s' % (ext): + if totalpath.find(basepath) == 0: + relativepath = totalpath.replace(basepath, "", 1) + relativepath = relativepath.lstrip("/") + filelist.append(os.path.join(relativepath, d)) + else: + raise "basepath '%s' not found in path '%s'" % (basepath, totalpath) + + return filelist + + + def trimRpms(self, files): + badrpms = [] + for file in files: + for glob in self.cmds['excludes']: + if fnmatch.fnmatch(file, glob): + # print 'excluded: %s' % file + if file not in badrpms: + badrpms.append(file) + for file in badrpms: + if file in files: + files.remove(file) + return files + + def doPkgMetadata(self, directory): + """all the heavy lifting for the package metadata""" + + # rpms we're going to be dealing with + files = self.getFileList(self.cmds['basedir'], directory, '.dpm', []) + files = self.trimRpms(files) + self.pkgcount = len(files) + self.openMetadataDocs() + self.writeMetadataDocs(files) + self.closeMetadataDocs() + + + def openMetadataDocs(self): + self._setupPresto() + + def _setupPresto(self): + # setup the base metadata doc + self.prestodoc = libxml2.newDoc("1.0") + self.prestoroot = self.prestodoc.newChild(None, "metadata", None) + basens = self.prestoroot.newNs('http://linux.duke.edu/metadata/common', None) + self.formatns = self.prestoroot.newNs('http://linux.duke.edu/metadata/rpm', 'rpm') + self.prestoroot.setNs(basens) + prestofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['prestofile']) + self.prestofile = _gzipOpen(prestofilepath, 'w') + self.prestofile.write('\n') + self.prestofile.write('\n' % + self.pkgcount) + + + def writeMetadataDocs(self, files, current=0): + for file in files: + current+=1 + try: + mdobj = dumpMetadata.RpmMetaData(self.ts, self.cmds['basedir'], file, self.cmds) + if not self.cmds['quiet']: + if self.cmds['verbose']: + print '%d/%d - %s' % (current, len(files), file) + else: + sys.stdout.write('\r' + ' ' * 80) + sys.stdout.write("\r%d/%d - %s" % (current, self.pkgcount, file)) + sys.stdout.flush() + except dumpMetadata.MDError, e: + errorprint('\n%s - %s' % (e, file)) + continue + else: + try: + node = dumpMetadata.generateXML(self.prestodoc, self.prestoroot, self.formatns, mdobj, self.cmds['sumtype']) + except dumpMetadata.MDError, e: + errorprint(_('\nAn error occurred creating presto metadata: %s') % e) + continue + else: + output = node.serialize('UTF-8', self.cmds['pretty']) + self.prestofile.write(output) + self.prestofile.write('\n') + node.unlinkNode() + node.freeNode() + del node + + return current + + + def closeMetadataDocs(self): + if not self.cmds['quiet']: + print '' + + # save them up to the tmp locations: + if not self.cmds['quiet']: + print _('Saving Presto metadata') + self.prestofile.write('\n') + self.prestofile.close() + self.prestodoc.freeDoc() + + def doRepoMetadata(self): + """wrapper to generate the prestomd.xml file that stores the info on the other files""" + repodoc = libxml2.newDoc("1.0") + reporoot = repodoc.newChild(None, "repomd", None) + repons = reporoot.newNs('http://linux.duke.edu/metadata/repo', None) + reporoot.setNs(repons) + repofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['prestomdfile']) + + try: + dumpMetadata.repoXML(reporoot, self.cmds) + except dumpMetadata.MDError, e: + errorprint(_('Error generating repo xml file: %s') % e) + sys.exit(1) + + try: + repodoc.saveFormatFileEnc(repofilepath, 'UTF-8', 1) + except: + errorprint(_('Error saving temp file for rep xml: %s') % repofilepath) + sys.exit(1) + + del repodoc + +class SplitMetaDataGenerator(MetaDataGenerator): + + def __init__(self, cmds): + MetaDataGenerator.__init__(self, cmds) + self.initialdir = self.cmds['basedir'] + + def _getFragmentUrl(self, url, fragment): + import urlparse + urlparse.uses_fragment.append('media') + if not url: + return url + (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url) + return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment))) + + def doPkgMetadata(self, directories): + """all the heavy lifting for the package metadata""" + import types + if type(directories) == types.StringType: + MetaDataGenerator.doPkgMetadata(self, directories) + return + filematrix = {} + for mydir in directories: + filematrix[mydir] = self.getFileList(os.path.join(self.initialdir, mydir), '.', '.rpm', []) + self.trimRpms(filematrix[mydir]) + self.pkgcount += len(filematrix[mydir]) + + mediano = 1 + current = 0 + self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano) + self.cmds['basedir'] = os.path.join(self.initialdir, directories[0]) + self.openMetadataDocs() + for mydir in directories: + self.cmds['basedir'] = os.path.join(self.initialdir, mydir) + self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano) + current = self.writeMetadataDocs(filematrix[mydir], current) + mediano += 1 + self.cmds['basedir'] = os.path.join(self.initialdir, directories[0]) + self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], 1) + self.closeMetadataDocs() + + +def checkAndMakeDir(dir): + """ + check out the dir and make it, if possible, return 1 if done, else return 0 + """ + if os.path.exists(dir): + if not os.path.isdir(dir): + errorprint(_('%s is not a dir') % dir) + result = False + else: + if not os.access(dir, os.W_OK): + errorprint(_('%s is not writable') % dir) + result = False + else: + result = True + else: + try: + os.mkdir(dir) + except OSError, e: + errorprint(_('Error creating dir %s: %s') % (dir, e)) + result = False + else: + result = True + return result + +def parseArgs(args): + """ + Parse the command line args return a commands dict and directory. + Sanity check all the things being passed in. + """ + cmds = {} + cmds['quiet'] = 0 + cmds['verbose'] = 0 + cmds['excludes'] = [] + cmds['baseurl'] = None + cmds['sumtype'] = 'sha' + cmds['pretty'] = 0 + cmds['cachedir'] = None + cmds['basedir'] = os.getcwd() + cmds['cache'] = False + cmds['split'] = False + cmds['outputdir'] = "" + cmds['file-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$'] + cmds['dir-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*'] + + try: + gopts, argsleft = getopt.getopt(args, 'phqVvs:x:u:c:o:', ['help', 'exclude=', + 'quiet', 'verbose', 'cachedir=', 'basedir=', + 'baseurl=', 'checksum=', + 'version', 'pretty', 'split', 'outputdir=']) + except getopt.error, e: + errorprint(_('Options Error: %s.') % e) + usage() + + try: + for arg,a in gopts: + if arg in ['-h','--help']: + usage(retval=0) + elif arg in ['-V', '--version']: + print '%s' % __version__ + sys.exit(0) + elif arg == '--split': + cmds['split'] = True + except ValueError, e: + errorprint(_('Options Error: %s') % e) + usage() + + + # make sure our dir makes sense before we continue + if len(argsleft) > 1 and not cmds['split']: + errorprint(_('Error: Only one directory allowed per run.')) + usage() + elif len(argsleft) == 0: + errorprint(_('Error: Must specify a directory to index.')) + usage() + else: + directories = argsleft + + try: + for arg,a in gopts: + if arg in ['-v', '--verbose']: + cmds['verbose'] = 1 + elif arg in ["-q", '--quiet']: + cmds['quiet'] = 1 + elif arg in ['-u', '--baseurl']: + if cmds['baseurl'] is not None: + errorprint(_('Error: Only one baseurl allowed.')) + usage() + else: + cmds['baseurl'] = a + elif arg in ['-x', '--exclude']: + cmds['excludes'].append(a) + elif arg in ['-p', '--pretty']: + cmds['pretty'] = 1 + elif arg in ['-c', '--cachedir']: + cmds['cache'] = True + cmds['cachedir'] = a + elif arg == '--basedir': + cmds['basedir'] = a + elif arg in ['-o','--outputdir']: + cmds['outputdir'] = a + + except ValueError, e: + errorprint(_('Options Error: %s') % e) + usage() + + directory = directories[0] +# Fix paths + directory = os.path.normpath(directory) + if cmds['split']: + pass + elif os.path.isabs(directory): + cmds['basedir'] = directory + directory = '.' + else: + cmds['basedir'] = os.path.realpath(os.path.join(cmds['basedir'], directory)) + directory = '.' + if not cmds['outputdir']: + cmds['outputdir'] = cmds['basedir'] + if cmds['groupfile']: + a = cmds['groupfile'] + if cmds['split']: + a = os.path.join(cmds['basedir'], directory, cmds['groupfile']) + elif not os.path.isabs(a): + a = os.path.join(cmds['basedir'], cmds['groupfile']) + if not os.path.exists(a): + errorprint(_('Error: groupfile %s cannot be found.' % a)) + usage() + cmds['groupfile'] = a + if cmds['cachedir']: + a = cmds ['cachedir'] + if not os.path.isabs(a): + a = os.path.join(cmds['basedir'] ,a) + if not checkAndMakeDir(a): + errorprint(_('Error: cannot open/write to cache dir %s' % a)) + usage() + cmds['cachedir'] = a + + #setup some defaults + cmds['prestofile'] = 'presto.xml.gz' + cmds['prestomdfile'] = 'prestomd.xml' + cmds['tempdir'] = '.repodata' + cmds['finaldir'] = 'repodata' + cmds['olddir'] = '.olddata' + + # Fixup first directory + directories[0] = directory + return cmds, directories + +def main(args): + cmds, directories = parseArgs(args) + directory = directories[0] + # start the sanity/stupidity checks + if not os.path.exists(os.path.join(cmds['basedir'], directory)): + errorprint(_('Directory must exist')) + sys.exit(1) + + if not os.path.isdir(os.path.join(cmds['basedir'], directory)): + errorprint(_('Directory of packages must be a directory.')) + sys.exit(1) + + if not os.access(cmds['outputdir'], os.W_OK): + errorprint(_('Directory must be writable.')) + sys.exit(1) + + if cmds['split']: + oldbase = cmds['basedir'] + cmds['basedir'] = os.path.join(cmds['basedir'], directory) + if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['tempdir'])): + sys.exit(1) + + if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['finaldir'])): + sys.exit(1) + + if os.path.exists(os.path.join(cmds['outputdir'], cmds['olddir'])): + errorprint(_('Old data directory exists, please remove: %s') % cmds['olddir']) + sys.exit(1) + + # make sure we can write to where we want to write to: + for direc in ['tempdir', 'finaldir']: + for file in ['prestofile', 'prestomdfile']: + filepath = os.path.join(cmds['outputdir'], cmds[direc], cmds[file]) + if os.path.exists(filepath): + if not os.access(filepath, os.W_OK): + errorprint(_('error in must be able to write to metadata files:\n -> %s') % filepath) + usage() + + if cmds['split']: + cmds['basedir'] = oldbase + mdgen = SplitMetaDataGenerator(cmds) + mdgen.doPkgMetadata(directories) + else: + mdgen = MetaDataGenerator(cmds) + mdgen.doPkgMetadata(directory) + mdgen.doRepoMetadata() + + if os.path.exists(os.path.join(cmds['outputdir'], cmds['finaldir'])): + try: + os.rename(os.path.join(cmds['outputdir'], cmds['finaldir']), + os.path.join(cmds['outputdir'], cmds['olddir'])) + except: + errorprint(_('Error moving final %s to old dir %s' % (os.path.join(cmds['outputdir'], cmds['finaldir']), + os.path.join(cmds['outputdir'], cmds['olddir'])))) + sys.exit(1) + + try: + os.rename(os.path.join(cmds['outputdir'], cmds['tempdir']), + os.path.join(cmds['outputdir'], cmds['finaldir'])) + except: + errorprint(_('Error moving final metadata into place')) + # put the old stuff back + os.rename(os.path.join(cmds['outputdir'], cmds['olddir']), + os.path.join(cmds['outputdir'], cmds['finaldir'])) + sys.exit(1) + + for file in ['prestofile', 'prestomdfile']: + if cmds[file]: + fn = os.path.basename(cmds[file]) + else: + continue + oldfile = os.path.join(cmds['outputdir'], cmds['olddir'], fn) + if os.path.exists(oldfile): + try: + os.remove(oldfile) + except OSError, e: + errorprint(_('Could not remove old metadata file: %s') % oldfile) + errorprint(_('Error was %s') % e) + sys.exit(1) + + # Clean up any update metadata + mdpath = os.path.join(cmds['basedir'], cmds['olddir'], cmds['update-info-dir']) + if os.path.isdir(mdpath): + for file in os.listdir(mdpath): + os.remove(os.path.join(mdpath, file)) + os.rmdir(mdpath) + + +#XXX: fix to remove tree as we mung basedir + try: + os.rmdir(os.path.join(cmds['outputdir'], cmds['olddir'])) + except OSError, e: + errorprint(_('Could not remove old metadata dir: %s') % cmds['olddir']) + errorprint(_('Error was %s') % e) + errorprint(_('Please clean up this directory manually.')) + +if __name__ == "__main__": + if len(sys.argv) > 1: + if sys.argv[1] == 'profile': + import hotshot + p = hotshot.Profile(os.path.expanduser("~/createprestorepo.prof")) + p.run('main(sys.argv[2:])') + p.close() + else: + main(sys.argv[1:]) + else: + main(sys.argv[1:]) diff --git a/makerepo/createprestorepo.py b/makerepo/createprestorepo.py deleted file mode 100755 index a217045..0000000 --- a/makerepo/createprestorepo.py +++ /dev/null @@ -1,308 +0,0 @@ -#!/usr/bin/python -t -# -*- mode: Python; indent-tabs-mode: nil; -*- -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -import errno, os, sys, gzip -import fnmatch, re -import rpmUtils.transaction, rpmUtils.miscutils -import commands, libxml2 -import dumpMetadata -from dumpMetadata import _gzipOpen, getChecksum -#### import Utils - -DEBUG = True -#### Utils.setdebug(DEBUG) - -SUFFIX='drpm' -DRPMWORTHKEEPINGTHRESH=0.5 -DEBUG=0 -REPODATA="repodata" -REPOFILE="presto.xml" -REPOMDFILE="prestomd.xml" -SUM_TYPE="sha" - -def XML_start_newrpm(node, (f, n, e, v, r, a), srcdir_len): - newrpm_node = node.newChild(None, "package", None) - newrpm_node.newProp("type", "rpm") - newrpm_node.newChild(None, "name", n) - newrpm_node.newChild(None, "arch", str(a)) - version = newrpm_node.newChild(None, "version", None) - version.newProp("epoch", str(e)) - version.newProp("ver", str(v)) - version.newProp("rel", str(r)) - deltas = newrpm_node.newChild(None, "deltas", None) - return deltas - -def XML_oldrpm(newrpm_node, drpm_file, oldrpm, newrpm, sequence, size): - (f, n, e, v, r, a) = oldrpm - (nf, nn, ne, nv, nr, na) = newrpm - oldrpm_node = newrpm_node.newChild(None, "oldrpm", None) - checksum = getChecksum(SUM_TYPE, drpm_file) - if n != nn: - oldrpm_node.newChild(None, "name", n) - if a != na: - oldrpm_node.newChild(None, "arch", str(a)) - version = oldrpm_node.newChild(None, "version", None) - if e != ne: - version.newProp("epoch", str(e)) - if v != nv: - version.newProp("ver", str(v)) - version.newProp("rel", str(r)) - oldrpm_node.newChild(None, "drpm_filename", drpm_file) - oldrpm_node.newChild(None, "size", str(size)) - oldrpm_node.newChild(None, "sequence", str(sequence)) - cs_node = oldrpm_node.newChild(None, "checksum", str(checksum)) - cs_node.newProp("type", SUM_TYPE) - -def startXML(): - basedoc = libxml2.newDoc("1.0") - baseroot = basedoc.newChild(None, "metadata", None) - basens = baseroot.newNs('http://linux.duke.edu/metadata/common', None) - formatns = baseroot.newNs('http://linux.duke.edu/metadata/rpm', 'rpm') - baseroot.setNs(basens) - return (basedoc, baseroot) - -def endXML(xmldoc, filename, srcdir, compressed=True): - if compressed: - outfile = _gzipOpen("%s%s/%s.gz" % (srcdir, REPODATA, filename), "w") - output = xmldoc.serialize('UTF-8', 1) - outfile.write(output) - outfile.close() - else: - xmldoc.saveFormatFileEnc("%s%s/%s" % (srcdir, REPODATA, filename), 'UTF-8', 1) - xmldoc.freeDoc() - -def repoXML(srcdir): - """generate the repomd.xml file that stores the info on the other files""" - repodoc = libxml2.newDoc("1.0") - reporoot = repodoc.newChild(None, "repomd", None) - repons = reporoot.newNs('http://linux.duke.edu/metadata/repo', None) - reporoot.setNs(repons) - repofilepath = "%s%s/%s" % (srcdir, REPODATA, REPOMDFILE) - filename = "%s%s/%s.gz" % (srcdir, REPODATA, REPOFILE) - filetype = "deltas" - zfo = _gzipOpen(filename, "rb") - uncsum = getChecksum(SUM_TYPE, zfo) - zfo.close() - csum = getChecksum(SUM_TYPE, filename) - timestamp = os.stat(filename)[8] - data = reporoot.newChild(None, 'data', None) - data.newProp('type', filetype) - location = data.newChild(None, 'location', None) - location.newProp('href', "%s/%s.gz" % (REPODATA, REPOFILE)) - checksum = data.newChild(None, 'checksum', csum) - checksum.newProp('type', SUM_TYPE) - timestamp = data.newChild(None, 'timestamp', str(timestamp)) - unchecksum = data.newChild(None, 'open-checksum', uncsum) - unchecksum.newProp('type', SUM_TYPE) - endXML(repodoc, REPOMDFILE, srcdir, False) - -def genDeltaRPM(ts, newrpm, oldrpm, is_new_package, srcdir, dstdir, locroot): - (f1,n1,e1,v1,r1,a1) = newrpm - (f2,n2,e2,v2,r2,a2) = oldrpm - hdr = rpmUtils.miscutils.hdrFromPackage(ts,f1) - arch = hdr['arch'] - v12 = "_".join([v1,v2]) - r12 = "_".join([r1,r2]) - deltaRPMName= '%s/%s.%s.%s' % (dstdir, "-".join([n1,v12,r12]), a1, SUFFIX) - if DEBUG: - print "DEBUG " + deltaCommand - # If the drpm doesn't exists, make it, else skip it - if os.path.exists("%s%s" % (srcdir, deltaRPMName)): - dsize = os.path.getsize("%s%s" % (srcdir, deltaRPMName)) - if e1 == e2: - print 'Using pre-generated delta rpm for %s.%s - %s.%s => %s.%s' % (n1, a1, v2, r2, v1, r1) - else: - print 'Using pre-generated delta rpm for %s.%s - %s:%s.%s => %s:%s.%s' % (n1, a1, e2, v2, r2, e1, v1, r1) - # Get checksum - seqfile = open("%s%s.seq" % (srcdir, deltaRPMName), "r") - sequence = seqfile.read()[:-1] - sequence = sequence[sequence.rfind("-")+1:] - seqfile.close() - if is_new_package: - locroot = XML_start_newrpm(locroot, newrpm, len(srcdir)) - is_new_package = False - XML_oldrpm(locroot, deltaRPMName, oldrpm, newrpm, sequence, dsize) - if DEBUG: - print "DEBUG skipping %s" % (deltaRPMName) - elif os.path.exists("%s%s.dontdelta" % (srcdir, deltaRPMName)): - pass - else: - deltaCommand = 'makedeltarpm -s %s%s.seq %s %s %s%s' % (srcdir, deltaRPMName, f2, f1, srcdir, deltaRPMName) - (code, out) = commands.getstatusoutput(deltaCommand) - if code: - #raise Exception("genDeltaRPM: exitcode was %s - Reported Error: %s" % (code, out)) - print "Error genDeltaRPM for %s: exitcode was %s - Reported Error: %s" % (n1, code, out) - - # Get size - dsize = os.path.getsize("%s%s" % (srcdir, deltaRPMName)) - - # Get checksum - seqfile = open("%s%s.seq" % (srcdir, deltaRPMName), "r") - sequence = seqfile.read()[:-1] - sequence = sequence[sequence.rfind("-")+1:] - seqfile.close() - - # Check whether or not we should keep the drpm - if not drpmIsWorthKeeping(deltaRPMName, f1, srcdir): - if DEBUG: - print 'deleting %s' % (deltaRPMName) - try: - os.unlink("%s%s" % (srcdir, deltaRPMName)) - except Exception, e: - print "Error deleting deltarpm %s" % (deltaRPMName), str(e) - try: - os.unlink("%s%s.seq" % (srcdir, deltaRPMName)) - except Exception, e: - print "Error deleting checksum %s.seq" % (deltaRPMName), str(e) - f = open("%s%s.dontdelta" % (srcdir, deltaRPMName), "w") - f.close() - else: - if e1 == e2: - print 'Generated delta rpm for %s.%s - %s.%s => %s.%s' % (n1, a1, v2, r2, v1, r1) - else: - print 'Generated delta rpm for %s.%s - %s:%s.%s => %s:%s.%s' % (n1, a1, e2, v2, r2, e1, v1, r1) - - if is_new_package: - locroot = XML_start_newrpm(locroot, newrpm, len(srcdir)) - is_new_package = False - XML_oldrpm(locroot, deltaRPMName, oldrpm, newrpm, sequence, dsize) - return (is_new_package, locroot) - -def drpmIsWorthKeeping(deltaRPMName, newrpm, srcdir): - newsize = os.path.getsize(newrpm) - drpmsize = os.path.getsize("%s%s" % (srcdir, deltaRPMName)) - # Delete the drpm if it's too large - if drpmsize > DRPMWORTHKEEPINGTHRESH * newsize: - return False - return True - -def createPrestoRepo(srcdir, dstdir): - ts = rpmUtils.transaction.initReadOnlyTransaction() - changed = False - - # Create list of .rpm files. - # We don't use "glob", so sub-directories are supported. - print 'Using source dir: %s' % srcdir - print 'Using destination dir: %s' % dstdir - if dstdir[-1] == "/": - dstdir = dstdir[:-1] - srcfiles = [] - for root, dirs, files in os.walk(srcdir): - for f in fnmatch.filter(files,'*.rpm'): - srcfiles.append(os.path.join(root,f)) - if not len(srcfiles): - print ' Nothing found.' - return changed - assert srcfiles[0].startswith(srcdir) - - # Check whether dstdir exists, and if it doesn't, create it - if not os.access(dstdir, os.F_OK): - os.makedirs(dstdir, 0755) - elif not os.access(dstdir, os.W_OK): - print 'ERROR: Unable to write to %s' % dstdir - sys.exit(1) - - # Check whether REPODATA exists, and if it doesn't, create it - if not os.access("%s%s" % (srcdir, REPODATA), os.F_OK): - os.makedirs("%s%s" % (srcdir, REPODATA), 0755) - elif not os.access(dstdir, os.W_OK): - print 'ERROR: Unable to write to %s' % REPODATA - sys.exit(1) - - # Create XML document -# xmldoc = libxml2.newDoc("1.0") -# xmlroot = xmldoc - (xmldoc, xmlroot) = startXML() - - # Create map: rpm %name -> list of tuples (filename,name,e,v,r) - newestsrcrpms = {} - for f in srcfiles: - hdr = rpmUtils.miscutils.hdrFromPackage(ts, f) - nm = hdr['name'] + "." + hdr['arch'] - n = hdr['name'] - a = hdr['arch'] - v = hdr['version'] - r = hdr['release'] - e = hdr['epoch'] - if e is None: - e = 0 - newestsrcrpms.setdefault(nm,[]) - newestsrcrpms[nm].append((f,n,e,v,r,a)) - - # Now purge old src.rpm unless their %name matches a white-list pattern. - for l in newestsrcrpms.itervalues(): - x = len(l) - - if x > 1: - def sortByEVR(fnevr1, fnevr2): - (f1,n1,e1,v1,r1,a1) = fnevr1 - (f2,n2,e2,v2,r2,a2) = fnevr2 - rc = rpmUtils.miscutils.compareEVR((e1,v1,r1),(e2,v2,r2)) - if rc == 0: - return 0 - if rc > 0: - return -1 - if rc < 0: - return 1 - - l.sort(sortByEVR) # highest first in list - - # Generate delta rpm - is_new_package = True - locroot = xmlroot - for rpm in l[1:]: - (is_new_package, locroot) = genDeltaRPM(ts, l[0], rpm, is_new_package, srcdir, dstdir, locroot) - - if not len(srcfiles): - print 'WARNING: No .rpms left. Stopping here.' - return changed - - # Write out end of deltas.xml file - endXML(xmldoc, REPOFILE, srcdir, True) - repoXML(srcdir) - - # Examine binary repository directories and remove everything which - # is missing its corresponding src.rpm. - return changed - - -def main(bin_rpm_path, delta_rpm_path): - assert rpmUtils.miscutils.compareEVR((1,2,3),(1,2,0)) > 0 - assert rpmUtils.miscutils.compareEVR((0,1,2),(0,1,2)) == 0 - assert rpmUtils.miscutils.compareEVR((1,2,3),(4,0,99)) < 0 - - return createPrestoRepo(bin_rpm_path, delta_rpm_path) - - -if __name__ == '__main__': - if len(sys.argv) < 2: - print 'Usage: %s \n' % os.path.basename(sys.argv[0]) - sys.exit(errno.EINVAL) - bin_rpm_path = sys.argv[1] - delta_rpm_path = sys.argv[2] - - #### cfg = Utils.load_config_module(sys.argv[1]) - - #### Utils.signer_gid_check(cfg.signersgid) - #### os.umask(cfg.signersumask) - - #### for dist in sys.argv[2:]: - #### if not cfg.archdict.has_key(dist): - #### print "No distribution release named '%s' found" % dist - #### sys.exit(errno.EINVAL) - main(bin_rpm_path, delta_rpm_path) - sys.exit(0) diff --git a/makerepo/dumpMetadata.py b/makerepo/dumpMetadata.py deleted file mode 100755 index 2f074da..0000000 --- a/makerepo/dumpMetadata.py +++ /dev/null @@ -1,456 +0,0 @@ -#!/usr/bin/python -t -# base classes and functions for dumping out package Metadata -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# Copyright 2004 Duke University - -# $Id: dumpMetadata.py,v 1.36 2006/02/21 20:10:08 pnasrat Exp $ - -import os -import rpm -import exceptions -import md5 -import sha -import types -import struct -import re -import stat - -# done to fix gzip randomly changing the checksum -import gzip -from zlib import error as zlibError -from gzip import write32u, FNAME - -__all__ = ["GzipFile","open"] - -class GzipFile(gzip.GzipFile): - def _write_gzip_header(self): - self.fileobj.write('\037\213') # magic header - self.fileobj.write('\010') # compression method - fname = self.filename[:-3] - flags = 0 - if fname: - flags = FNAME - self.fileobj.write(chr(flags)) - write32u(self.fileobj, long(0)) - self.fileobj.write('\002') - self.fileobj.write('\377') - if fname: - self.fileobj.write(fname + '\000') - - -def _gzipOpen(filename, mode="rb", compresslevel=9): - return GzipFile(filename, mode, compresslevel) - - - -def returnFD(filename): - try: - fdno = os.open(filename, os.O_RDONLY) - except OSError: - raise MDError, "Error opening file" - return fdno - -def returnHdr(ts, package): - """hand back the rpm header or raise an Error if the pkg is fubar""" - opened_here = 0 - try: - if type(package) is types.StringType: - opened_here = 1 - fdno = os.open(package, os.O_RDONLY) - else: - fdno = package # let's assume this is an fdno and go with it :) - except OSError: - raise MDError, "Error opening file" - ts.setVSFlags((rpm._RPMVSF_NOSIGNATURES|rpm.RPMVSF_NOMD5|rpm.RPMVSF_NEEDPAYLOAD)) - try: - hdr = ts.hdrFromFdno(fdno) - except rpm.error: - raise MDError, "Error opening package" - if type(hdr) != rpm.hdr: - raise MDError, "Error opening package" - ts.setVSFlags(0) - - if opened_here: - os.close(fdno) - del fdno - - return hdr - -def getChecksum(sumtype, file, CHUNK=2**16): - """takes filename, hand back Checksum of it - sumtype = md5 or sha - filename = /path/to/file - CHUNK=65536 by default""" - - # chunking brazenly lifted from Ryan Tomayko - opened_here = 0 - try: - if type(file) is not types.StringType: - fo = file # assume it's a file-like-object - else: - opened_here = 1 - fo = open(file, 'rb', CHUNK) - - if sumtype == 'md5': - sum = md5.new() - elif sumtype == 'sha': - sum = sha.new() - else: - raise MDError, 'Error Checksumming file, wrong checksum type %s' % sumtype - chunk = fo.read - while chunk: - chunk = fo.read(CHUNK) - sum.update(chunk) - - if opened_here: - fo.close() - del fo - - return sum.hexdigest() - except: - raise MDError, 'Error opening file for checksum: %s' % file - - -def utf8String(string): - """hands back a unicoded string""" - if string is None: - return '' - elif isinstance(string, unicode): - return string - try: - x = unicode(string, 'ascii') - return string - except UnicodeError: - encodings = ['utf-8', 'iso-8859-1', 'iso-8859-15', 'iso-8859-2'] - for enc in encodings: - try: - x = unicode(string, enc) - except UnicodeError: - pass - else: - if x.encode(enc) == string: - return x.encode('utf-8') - newstring = '' - for char in string: - if ord(char) > 127: - newstring = newstring + '?' - else: - newstring = newstring + char - return newstring - - -def byteranges(file): - """takes an rpm file or fileobject and returns byteranges for location of the header""" - opened_here = 0 - if type(file) is not types.StringType: - fo = file - else: - opened_here = 1 - fo = open(file, 'r') - #read in past lead and first 8 bytes of sig header - fo.seek(104) - # 104 bytes in - binindex = fo.read(4) - # 108 bytes in - (sigindex, ) = struct.unpack('>I', binindex) - bindata = fo.read(4) - # 112 bytes in - (sigdata, ) = struct.unpack('>I', bindata) - # each index is 4 32bit segments - so each is 16 bytes - sigindexsize = sigindex * 16 - sigsize = sigdata + sigindexsize - # we have to round off to the next 8 byte boundary - disttoboundary = (sigsize % 8) - if disttoboundary != 0: - disttoboundary = 8 - disttoboundary - # 112 bytes - 96 == lead, 8 = magic and reserved, 8 == sig header data - hdrstart = 112 + sigsize + disttoboundary - - fo.seek(hdrstart) # go to the start of the header - fo.seek(8,1) # read past the magic number and reserved bytes - - binindex = fo.read(4) - (hdrindex, ) = struct.unpack('>I', binindex) - bindata = fo.read(4) - (hdrdata, ) = struct.unpack('>I', bindata) - - # each index is 4 32bit segments - so each is 16 bytes - hdrindexsize = hdrindex * 16 - # add 16 to the hdrsize to account for the 16 bytes of misc data b/t the - # end of the sig and the header. - hdrsize = hdrdata + hdrindexsize + 16 - - # header end is hdrstart + hdrsize - hdrend = hdrstart + hdrsize - if opened_here: - fo.close() - del fo - return (hdrstart, hdrend) - - -class MDError(exceptions.Exception): - def __init__(self, args=None): - exceptions.Exception.__init__(self) - self.args = args - - - -class RpmMetaData: - """each drpm is one object, you pass it an rpm file - it opens the file, and pulls the information out in bite-sized chunks :) - """ - - mode_cache = {} - - def __init__(self, ts, basedir, filename, options): - try: - stats = os.stat(os.path.join(basedir, filename)) - self.size = stats[6] - self.mtime = stats[8] - del stats - except OSError, e: - raise MDError, "Error Stat'ing file %s %s" % (basedir, filename) - self.options = options - self.localurl = options['baseurl'] - self.relativepath = filename - fd = returnFD(os.path.join(basedir, filename)) - self.hdr = returnHdr(ts, fd) - os.lseek(fd, 0, 0) - fo = os.fdopen(fd, 'rb') - self.pkgid = self.doChecksumCache(fo) - fo.seek(0) - (self.rangestart, self.rangeend) = byteranges(fo) - fo.close() - del fo - del fd - - def arch(self): - if self.tagByName('sourcepackage') == 1: - return 'src' - else: - return self.tagByName('arch') - - def _correctVersion(self, vers): - returnvers = [] - vertuple = (None, None, None) - if vers is None: - returnvers.append(vertuple) - return returnvers - - if type(vers) is not types.ListType: - if vers is not None: - vertuple = self._stringToVersion(vers) - else: - vertuple = (None, None, None) - returnvers.append(vertuple) - else: - for ver in vers: - if ver is not None: - vertuple = self._stringToVersion(ver) - else: - vertuple = (None, None, None) - returnvers.append(vertuple) - return returnvers - - - def _stringToVersion(self, strng): - i = strng.find(':') - if i != -1: - epoch = strng[:i] - else: - epoch = '0' - j = strng.find('-') - if j != -1: - if strng[i + 1:j] == '': - version = None - else: - version = strng[i + 1:j] - release = strng[j + 1:] - else: - if strng[i + 1:] == '': - version = None - else: - version = strng[i + 1:] - release = None - return (epoch, version, release) - - ########### - # Title: Remove duplicates from a sequence - # Submitter: Tim Peters - # From: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560 - - def _uniq(self,s): - """Return a list of the elements in s, but without duplicates. - - For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], - unique("abcabc") some permutation of ["a", "b", "c"], and - unique(([1, 2], [2, 3], [1, 2])) some permutation of - [[2, 3], [1, 2]]. - - For best speed, all sequence elements should be hashable. Then - unique() will usually work in linear time. - - If not possible, the sequence elements should enjoy a total - ordering, and if list(s).sort() doesn't raise TypeError it's - assumed that they do enjoy a total ordering. Then unique() will - usually work in O(N*log2(N)) time. - - If that's not possible either, the sequence elements must support - equality-testing. Then unique() will usually work in quadratic - time. - """ - - n = len(s) - if n == 0: - return [] - - # Try using a dict first, as that's the fastest and will usually - # work. If it doesn't work, it will usually fail quickly, so it - # usually doesn't cost much to *try* it. It requires that all the - # sequence elements be hashable, and support equality comparison. - u = {} - try: - for x in s: - u[x] = 1 - except TypeError: - del u # move on to the next method - else: - return u.keys() - - # We can't hash all the elements. Second fastest is to sort, - # which brings the equal elements together; then duplicates are - # easy to weed out in a single pass. - # NOTE: Python's list.sort() was designed to be efficient in the - # presence of many duplicate elements. This isn't true of all - # sort functions in all languages or libraries, so this approach - # is more effective in Python than it may be elsewhere. - try: - t = list(s) - t.sort() - except TypeError: - del t # move on to the next method - else: - assert n > 0 - last = t[0] - lasti = i = 1 - while i < n: - if t[i] != last: - t[lasti] = last = t[i] - lasti += 1 - i += 1 - return t[:lasti] - - # Brute force is all that's left. - u = [] - for x in s: - if x not in u: - u.append(x) - return u - - def tagByName(self, tag): - data = self.hdr[tag] - if type(data) is types.ListType: - if len(data) > 0: - return data[0] - else: - return '' - else: - return data - - def listTagByName(self, tag): - """take a tag that should be a list and make sure it is one""" - lst = [] - data = self.hdr[tag] - if data is None: - return lst - - if type(data) is types.ListType: - lst.extend(data) - else: - lst.append(data) - return lst - - def epoch(self): - if self.hdr['epoch'] is None: - return 0 - else: - return self.tagByName('epoch') - - def doChecksumCache(self, fo): - """return a checksum for a package: - - check if the checksum cache is enabled - if not - return the checksum - if so - check to see if it has a cache file - if so, open it and return the first line's contents - if not, grab the checksum and write it to a file for this pkg - """ - if not self.options['cache']: - return getChecksum(self.options['sumtype'], fo) - - csumtag = '%s-%s' % (self.hdr['name'] , self.hdr[rpm.RPMTAG_SHA1HEADER]) - csumfile = '%s/%s' % (self.options['cachedir'], csumtag) - if os.path.exists(csumfile) and self.mtime <= os.stat(csumfile)[8]: - csumo = open(csumfile, 'r') - checksum = csumo.readline() - csumo.close() - - else: - checksum = getChecksum(self.options['sumtype'], fo) - csumo = open(csumfile, 'w') - csumo.write(checksum) - csumo.close() - - return checksum - - - -def generateXML(doc, node, formatns, drpmObj, sumtype): - """takes an xml doc object and a package metadata entry node, populates a - package node with the md information""" - ns = node.ns() - pkgNode = node.newChild(None, "package", None) - pkgNode.newProp('type', 'rpm') - pkgNode.newChild(None, 'name', drpmObj.tagByName('name')) - pkgNode.newChild(None, 'arch', drpmObj.arch()) - version = pkgNode.newChild(None, 'version', None) - version.newProp('epoch', str(drpmObj.epoch())) - version.newProp('ver', str(drpmObj.tagByName('version'))) - version.newProp('rel', str(drpmObj.tagByName('release'))) - return pkgNode - -def repoXML(node, cmds): - """generate the repomd.xml file that stores the info on the other files""" - sumtype = cmds['sumtype'] - workfiles = [(cmds['prestofile'], 'deltas')] - - - for (file, ftype) in workfiles: - zfo = _gzipOpen(os.path.join(cmds['outputdir'], cmds['tempdir'], file)) - uncsum = getChecksum(sumtype, zfo) - zfo.close() - csum = getChecksum(sumtype, os.path.join(cmds['outputdir'], cmds['tempdir'], file)) - timestamp = os.stat(os.path.join(cmds['outputdir'], cmds['tempdir'], file))[8] - data = node.newChild(None, 'data', None) - data.newProp('type', ftype) - location = data.newChild(None, 'location', None) - if cmds['baseurl'] is not None: - location.newProp('xml:base', cmds['baseurl']) - location.newProp('href', os.path.join(cmds['finaldir'], file)) - checksum = data.newChild(None, 'checksum', csum) - checksum.newProp('type', sumtype) - timestamp = data.newChild(None, 'timestamp', str(timestamp)) - unchecksum = data.newChild(None, 'open-checksum', uncsum) - unchecksum.newProp('type', sumtype) diff --git a/makerepo/genprestometadata.py b/makerepo/genprestometadata.py deleted file mode 100644 index 402af25..0000000 --- a/makerepo/genprestometadata.py +++ /dev/null @@ -1,496 +0,0 @@ -#!/usr/bin/python -t -# primary functions and glue for generating the repository metadata -# - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# Copyright 2004 Duke University -# Copyright 2007 Jonathan Dieter - - -import os -import sys -import getopt -import rpm -import libxml2 -import string -import fnmatch -import urlgrabber - -import dumpMetadata -from dumpMetadata import _gzipOpen -__version__ = '0.4.3' - -def errorprint(stuff): - print >> sys.stderr, stuff - -def _(args): - """Stub function for translation""" - return args - -def usage(retval=1): - print _(""" - createrepo [options] directory-of-packages - - Options: - -u, --baseurl = optional base url location for all files - -o, --outputdir = optional directory to output to - -x, --exclude = files globs to exclude, can be specified multiple times - -q, --quiet = run quietly - -v, --verbose = run verbosely - -c, --cachedir = specify which dir to use for the checksum cache - -h, --help = show this help - -V, --version = output version - -p, --pretty = output xml files in pretty format. - """) - - sys.exit(retval) - -class MetaDataGenerator: - def __init__(self, cmds): - self.cmds = cmds - self.ts = rpm.TransactionSet() - self.pkgcount = 0 - self.files = [] - - def getFileList(self, basepath, path, ext, filelist): - """Return all files in path matching ext, store them in filelist, - recurse dirs. Returns a list object""" - - extlen = len(ext) - totalpath = os.path.normpath(os.path.join(basepath, path)) - try: - dir_list = os.listdir(totalpath) - except OSError, e: - errorprint(_('Error accessing directory %s, %s') % (totalpath, e)) - sys.exit(1) - - for d in dir_list: - if os.path.isdir(totalpath + '/' + d): - filelist = self.getFileList(basepath, os.path.join(path, d), ext, filelist) - else: - if string.lower(d[-extlen:]) == '%s' % (ext): - if totalpath.find(basepath) == 0: - relativepath = totalpath.replace(basepath, "", 1) - relativepath = relativepath.lstrip("/") - filelist.append(os.path.join(relativepath, d)) - else: - raise "basepath '%s' not found in path '%s'" % (basepath, totalpath) - - return filelist - - - def trimRpms(self, files): - badrpms = [] - for file in files: - for glob in self.cmds['excludes']: - if fnmatch.fnmatch(file, glob): - # print 'excluded: %s' % file - if file not in badrpms: - badrpms.append(file) - for file in badrpms: - if file in files: - files.remove(file) - return files - - def doPkgMetadata(self, directory): - """all the heavy lifting for the package metadata""" - - # rpms we're going to be dealing with - files = self.getFileList(self.cmds['basedir'], directory, '.dpm', []) - files = self.trimRpms(files) - self.pkgcount = len(files) - self.openMetadataDocs() - self.writeMetadataDocs(files) - self.closeMetadataDocs() - - - def openMetadataDocs(self): - self._setupPresto() - - def _setupPresto(self): - # setup the base metadata doc - self.prestodoc = libxml2.newDoc("1.0") - self.prestoroot = self.prestodoc.newChild(None, "metadata", None) - basens = self.prestoroot.newNs('http://linux.duke.edu/metadata/common', None) - self.formatns = self.prestoroot.newNs('http://linux.duke.edu/metadata/rpm', 'rpm') - self.prestoroot.setNs(basens) - prestofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['prestofile']) - self.prestofile = _gzipOpen(prestofilepath, 'w') - self.prestofile.write('\n') - self.prestofile.write('\n' % - self.pkgcount) - - - def writeMetadataDocs(self, files, current=0): - for file in files: - current+=1 - try: - mdobj = dumpMetadata.RpmMetaData(self.ts, self.cmds['basedir'], file, self.cmds) - if not self.cmds['quiet']: - if self.cmds['verbose']: - print '%d/%d - %s' % (current, len(files), file) - else: - sys.stdout.write('\r' + ' ' * 80) - sys.stdout.write("\r%d/%d - %s" % (current, self.pkgcount, file)) - sys.stdout.flush() - except dumpMetadata.MDError, e: - errorprint('\n%s - %s' % (e, file)) - continue - else: - try: - node = dumpMetadata.generateXML(self.prestodoc, self.prestoroot, self.formatns, mdobj, self.cmds['sumtype']) - except dumpMetadata.MDError, e: - errorprint(_('\nAn error occurred creating presto metadata: %s') % e) - continue - else: - output = node.serialize('UTF-8', self.cmds['pretty']) - self.prestofile.write(output) - self.prestofile.write('\n') - node.unlinkNode() - node.freeNode() - del node - - return current - - - def closeMetadataDocs(self): - if not self.cmds['quiet']: - print '' - - # save them up to the tmp locations: - if not self.cmds['quiet']: - print _('Saving Presto metadata') - self.prestofile.write('\n') - self.prestofile.close() - self.prestodoc.freeDoc() - - def doRepoMetadata(self): - """wrapper to generate the prestomd.xml file that stores the info on the other files""" - repodoc = libxml2.newDoc("1.0") - reporoot = repodoc.newChild(None, "repomd", None) - repons = reporoot.newNs('http://linux.duke.edu/metadata/repo', None) - reporoot.setNs(repons) - repofilepath = os.path.join(self.cmds['outputdir'], self.cmds['tempdir'], self.cmds['prestomdfile']) - - try: - dumpMetadata.repoXML(reporoot, self.cmds) - except dumpMetadata.MDError, e: - errorprint(_('Error generating repo xml file: %s') % e) - sys.exit(1) - - try: - repodoc.saveFormatFileEnc(repofilepath, 'UTF-8', 1) - except: - errorprint(_('Error saving temp file for rep xml: %s') % repofilepath) - sys.exit(1) - - del repodoc - -class SplitMetaDataGenerator(MetaDataGenerator): - - def __init__(self, cmds): - MetaDataGenerator.__init__(self, cmds) - self.initialdir = self.cmds['basedir'] - - def _getFragmentUrl(self, url, fragment): - import urlparse - urlparse.uses_fragment.append('media') - if not url: - return url - (scheme, netloc, path, query, fragid) = urlparse.urlsplit(url) - return urlparse.urlunsplit((scheme, netloc, path, query, str(fragment))) - - def doPkgMetadata(self, directories): - """all the heavy lifting for the package metadata""" - import types - if type(directories) == types.StringType: - MetaDataGenerator.doPkgMetadata(self, directories) - return - filematrix = {} - for mydir in directories: - filematrix[mydir] = self.getFileList(os.path.join(self.initialdir, mydir), '.', '.rpm', []) - self.trimRpms(filematrix[mydir]) - self.pkgcount += len(filematrix[mydir]) - - mediano = 1 - current = 0 - self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano) - self.cmds['basedir'] = os.path.join(self.initialdir, directories[0]) - self.openMetadataDocs() - for mydir in directories: - self.cmds['basedir'] = os.path.join(self.initialdir, mydir) - self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], mediano) - current = self.writeMetadataDocs(filematrix[mydir], current) - mediano += 1 - self.cmds['basedir'] = os.path.join(self.initialdir, directories[0]) - self.cmds['baseurl'] = self._getFragmentUrl(self.cmds['baseurl'], 1) - self.closeMetadataDocs() - - -def checkAndMakeDir(dir): - """ - check out the dir and make it, if possible, return 1 if done, else return 0 - """ - if os.path.exists(dir): - if not os.path.isdir(dir): - errorprint(_('%s is not a dir') % dir) - result = False - else: - if not os.access(dir, os.W_OK): - errorprint(_('%s is not writable') % dir) - result = False - else: - result = True - else: - try: - os.mkdir(dir) - except OSError, e: - errorprint(_('Error creating dir %s: %s') % (dir, e)) - result = False - else: - result = True - return result - -def parseArgs(args): - """ - Parse the command line args return a commands dict and directory. - Sanity check all the things being passed in. - """ - cmds = {} - cmds['quiet'] = 0 - cmds['verbose'] = 0 - cmds['excludes'] = [] - cmds['baseurl'] = None - cmds['sumtype'] = 'sha' - cmds['pretty'] = 0 - cmds['cachedir'] = None - cmds['basedir'] = os.getcwd() - cmds['cache'] = False - cmds['split'] = False - cmds['outputdir'] = "" - cmds['file-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*', '^\/usr\/lib\/sendmail$'] - cmds['dir-pattern-match'] = ['.*bin\/.*', '^\/etc\/.*'] - - try: - gopts, argsleft = getopt.getopt(args, 'phqVvs:x:u:c:o:', ['help', 'exclude=', - 'quiet', 'verbose', 'cachedir=', 'basedir=', - 'baseurl=', 'checksum=', - 'version', 'pretty', 'split', 'outputdir=']) - except getopt.error, e: - errorprint(_('Options Error: %s.') % e) - usage() - - try: - for arg,a in gopts: - if arg in ['-h','--help']: - usage(retval=0) - elif arg in ['-V', '--version']: - print '%s' % __version__ - sys.exit(0) - elif arg == '--split': - cmds['split'] = True - except ValueError, e: - errorprint(_('Options Error: %s') % e) - usage() - - - # make sure our dir makes sense before we continue - if len(argsleft) > 1 and not cmds['split']: - errorprint(_('Error: Only one directory allowed per run.')) - usage() - elif len(argsleft) == 0: - errorprint(_('Error: Must specify a directory to index.')) - usage() - else: - directories = argsleft - - try: - for arg,a in gopts: - if arg in ['-v', '--verbose']: - cmds['verbose'] = 1 - elif arg in ["-q", '--quiet']: - cmds['quiet'] = 1 - elif arg in ['-u', '--baseurl']: - if cmds['baseurl'] is not None: - errorprint(_('Error: Only one baseurl allowed.')) - usage() - else: - cmds['baseurl'] = a - elif arg in ['-x', '--exclude']: - cmds['excludes'].append(a) - elif arg in ['-p', '--pretty']: - cmds['pretty'] = 1 - elif arg in ['-c', '--cachedir']: - cmds['cache'] = True - cmds['cachedir'] = a - elif arg == '--basedir': - cmds['basedir'] = a - elif arg in ['-o','--outputdir']: - cmds['outputdir'] = a - - except ValueError, e: - errorprint(_('Options Error: %s') % e) - usage() - - directory = directories[0] -# Fix paths - directory = os.path.normpath(directory) - if cmds['split']: - pass - elif os.path.isabs(directory): - cmds['basedir'] = directory - directory = '.' - else: - cmds['basedir'] = os.path.realpath(os.path.join(cmds['basedir'], directory)) - directory = '.' - if not cmds['outputdir']: - cmds['outputdir'] = cmds['basedir'] - if cmds['groupfile']: - a = cmds['groupfile'] - if cmds['split']: - a = os.path.join(cmds['basedir'], directory, cmds['groupfile']) - elif not os.path.isabs(a): - a = os.path.join(cmds['basedir'], cmds['groupfile']) - if not os.path.exists(a): - errorprint(_('Error: groupfile %s cannot be found.' % a)) - usage() - cmds['groupfile'] = a - if cmds['cachedir']: - a = cmds ['cachedir'] - if not os.path.isabs(a): - a = os.path.join(cmds['basedir'] ,a) - if not checkAndMakeDir(a): - errorprint(_('Error: cannot open/write to cache dir %s' % a)) - usage() - cmds['cachedir'] = a - - #setup some defaults - cmds['prestofile'] = 'presto.xml.gz' - cmds['prestomdfile'] = 'prestomd.xml' - cmds['tempdir'] = '.repodata' - cmds['finaldir'] = 'repodata' - cmds['olddir'] = '.olddata' - - # Fixup first directory - directories[0] = directory - return cmds, directories - -def main(args): - cmds, directories = parseArgs(args) - directory = directories[0] - # start the sanity/stupidity checks - if not os.path.exists(os.path.join(cmds['basedir'], directory)): - errorprint(_('Directory must exist')) - sys.exit(1) - - if not os.path.isdir(os.path.join(cmds['basedir'], directory)): - errorprint(_('Directory of packages must be a directory.')) - sys.exit(1) - - if not os.access(cmds['outputdir'], os.W_OK): - errorprint(_('Directory must be writable.')) - sys.exit(1) - - if cmds['split']: - oldbase = cmds['basedir'] - cmds['basedir'] = os.path.join(cmds['basedir'], directory) - if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['tempdir'])): - sys.exit(1) - - if not checkAndMakeDir(os.path.join(cmds['outputdir'], cmds['finaldir'])): - sys.exit(1) - - if os.path.exists(os.path.join(cmds['outputdir'], cmds['olddir'])): - errorprint(_('Old data directory exists, please remove: %s') % cmds['olddir']) - sys.exit(1) - - # make sure we can write to where we want to write to: - for direc in ['tempdir', 'finaldir']: - for file in ['prestofile', 'prestomdfile']: - filepath = os.path.join(cmds['outputdir'], cmds[direc], cmds[file]) - if os.path.exists(filepath): - if not os.access(filepath, os.W_OK): - errorprint(_('error in must be able to write to metadata files:\n -> %s') % filepath) - usage() - - if cmds['split']: - cmds['basedir'] = oldbase - mdgen = SplitMetaDataGenerator(cmds) - mdgen.doPkgMetadata(directories) - else: - mdgen = MetaDataGenerator(cmds) - mdgen.doPkgMetadata(directory) - mdgen.doRepoMetadata() - - if os.path.exists(os.path.join(cmds['outputdir'], cmds['finaldir'])): - try: - os.rename(os.path.join(cmds['outputdir'], cmds['finaldir']), - os.path.join(cmds['outputdir'], cmds['olddir'])) - except: - errorprint(_('Error moving final %s to old dir %s' % (os.path.join(cmds['outputdir'], cmds['finaldir']), - os.path.join(cmds['outputdir'], cmds['olddir'])))) - sys.exit(1) - - try: - os.rename(os.path.join(cmds['outputdir'], cmds['tempdir']), - os.path.join(cmds['outputdir'], cmds['finaldir'])) - except: - errorprint(_('Error moving final metadata into place')) - # put the old stuff back - os.rename(os.path.join(cmds['outputdir'], cmds['olddir']), - os.path.join(cmds['outputdir'], cmds['finaldir'])) - sys.exit(1) - - for file in ['prestofile', 'prestomdfile']: - if cmds[file]: - fn = os.path.basename(cmds[file]) - else: - continue - oldfile = os.path.join(cmds['outputdir'], cmds['olddir'], fn) - if os.path.exists(oldfile): - try: - os.remove(oldfile) - except OSError, e: - errorprint(_('Could not remove old metadata file: %s') % oldfile) - errorprint(_('Error was %s') % e) - sys.exit(1) - - # Clean up any update metadata - mdpath = os.path.join(cmds['basedir'], cmds['olddir'], cmds['update-info-dir']) - if os.path.isdir(mdpath): - for file in os.listdir(mdpath): - os.remove(os.path.join(mdpath, file)) - os.rmdir(mdpath) - - -#XXX: fix to remove tree as we mung basedir - try: - os.rmdir(os.path.join(cmds['outputdir'], cmds['olddir'])) - except OSError, e: - errorprint(_('Could not remove old metadata dir: %s') % cmds['olddir']) - errorprint(_('Error was %s') % e) - errorprint(_('Please clean up this directory manually.')) - -if __name__ == "__main__": - if len(sys.argv) > 1: - if sys.argv[1] == 'profile': - import hotshot - p = hotshot.Profile(os.path.expanduser("~/createprestorepo.prof")) - p.run('main(sys.argv[2:])') - p.close() - else: - main(sys.argv[1:]) - else: - main(sys.argv[1:]) diff --git a/presto.conf b/presto.conf deleted file mode 100644 index 899a4e2..0000000 --- a/presto.conf +++ /dev/null @@ -1,9 +0,0 @@ -[main] -enabled=1 -neverkeepdeltas=1 - -[updates] -deltaurl=http://www.lesbg.com/jdieter/updates/fc6/i386/ - -[extras] -deltaurl=http://www.lesbg.com/jdieter/extras/fc6/i386/ diff --git a/presto.py b/presto.py deleted file mode 100644 index b270212..0000000 --- a/presto.py +++ /dev/null @@ -1,149 +0,0 @@ -# author: Jonathan Dieter -# -# heavily modified from yum-deltarpm.py created by -# Lars Herrmann -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# Copyright 2005 Duke University - -from yum.plugins import TYPE_INTERACTIVE, PluginYumExit -from yum import config - -import os -import sys - -sys.path.append("/usr/share/presto") -import deltarpm -from prestoRepo import PrestoRepository -from prestomdparser import PrestoMDParser -import prestoTransaction -import prestoLog -import prestoDownload - -requires_api_version = '2.1' -LOG_FILE = "/var/log/presto.log" -plugin_type = (TYPE_INTERACTIVE,) - -rpm_size = 0 -drpm_size = 0 -drpm_count = 0 - -# Configuration stuff -def config_hook(conduit): - # Set up repository specific deltarpm url and mirrorlist - config.RepoConf.deltaurl = config.UrlListOption() - config.RepoConf.deltamirrorlist = config.UrlOption() - - # Add --disable-presto option - parser = conduit.getOptParser() - parser.add_option('', '--disablepresto', dest='disablepresto', - action='store_true', default=False, - help="disable Presto plugin and don't download any deltarpms") - -# Set up Presto repositories -def postreposetup_hook(conduit): - opts, commands = conduit.getCmdLine() - if not opts.disablepresto: - conduit.info(2, 'Setting up Presto') - for active_repo in conduit.getRepos().listEnabled(): - p_repo = PrestoRepository(active_repo, conduit) - p_repo.setup(conduit.getConf().cache) - - conduit.info(2, 'Reading Presto metadata in from local files') - for active_repo in conduit.getRepos().listEnabled(): - xml = active_repo.p_repo.getPrestoXML() - if active_repo.p_repo.enabled: - xmldata = active_repo.p_repo.repoXML.getData('deltas') - (ctype, csum) = xmldata.checksum - parser = PrestoMDParser(xml) - active_repo.p_repo.deltalist = parser.getDeltaList() - else: - conduit.info(5, '--disablepresto specified - Presto disabled') - - -def postresolve_hook(conduit): - global rpm_size - global drpm_size - global drpm_count - - opts, commands = conduit.getCmdLine() - if not opts.disablepresto: - # Cycle through packages to see if there's a deltarpm available - for newpkg in conduit.getTsInfo(): - if newpkg.ts_state != "e": - (chosen_drpm, installed, local, drpm_enabled) = prestoTransaction.find_available_drpms(conduit, newpkg) - - # If a drpm was found, change certain package information so it reflects - # the drpm, not the rpm. - if chosen_drpm != None: - newpkg.po.has_drpm = True - conduit.info(2, "Found deltarpm update for %s.%s %s:%s-%s" % (newpkg.name, newpkg.arch, newpkg.epoch, newpkg.version, newpkg.release)) - # In yum 3.0.x, this doesn't get defined if you run "yum update x" rather than "yum update" - rpm_size += int(newpkg.po.size) - drpm_size += int(chosen_drpm['size']) - newpkg.po.simple['realpackagesize'] = newpkg.po.size - newpkg.po.simple['packagesize'] = chosen_drpm['size'] - newpkg.po.simple['deltasize'] = chosen_drpm['size'] - newpkg.po.simple['deltarelativepath'] = chosen_drpm['drpm_filename'] - newpkg.po.simple['deltachecksumtype'] = chosen_drpm['checksum_type'] - newpkg.po.simple['deltachecksum'] = chosen_drpm['checksum'] - newpkg.po.simple['deltalocalpath'] = newpkg.po.repo.deltasdir + "/" + os.path.basename(chosen_drpm['drpm_filename']) - newpkg.po.to = newpkg - newpkg.realpkgtup = newpkg.pkgtup - newpkg.pkgtup = (newpkg.name + " *", newpkg.arch, newpkg.epoch, newpkg.version, newpkg.release) - newpkg.po.hasdrpm = True - drpm_count += 1 - else: - if installed and drpm_enabled and not local: - try: - rpm_size += int(newpkg.po.simple['packagesize']) - drpm_size += int(newpkg.po.simple['packagesize']) - except: - pass - return - - -def predownload_hook(conduit): - global drpm_count - - pkglist = conduit.getDownloadPackages() - - opts, commands = conduit.getCmdLine() - if not opts.disablepresto and drpm_count > 0: - # Download deltarpms - problems = prestoDownload.downloadPkgs(conduit, pkglist) - - # If 'exitondownloaderror' is on, exit - if conduit.confBool('main', 'exitondownloaderror') and len(problems.keys()) > 0: - errstring = '' - errstring += 'Error Downloading Packages:\n' - for key in problems.keys(): - errors = misc.unique(problems[key]) - for error in errors: - errstring += ' %s: %s\n' % (key, error) - raise PluginYumExit(errstring) - - -def posttrans_hook(conduit): - global rpm_size - global drpm_size - global LOG_FILE - - if rpm_size > 0: - prestoLog.log(conduit, LOG_FILE, rpm_size, drpm_size) - - conduit.info(2, "Size of all updates downloaded from Presto-enabled repositories: %i bytes" % drpm_size) - conduit.info(2, "Size updates would have been downloaded if Presto wasn't enabled: %i bytes" % rpm_size) - conduit.info(2, "This is a savings of %i percent" % (100 - ((drpm_size * 100) / rpm_size))) diff --git a/shared/deltarpm.py b/shared/deltarpm.py deleted file mode 100644 index 710a8bb..0000000 --- a/shared/deltarpm.py +++ /dev/null @@ -1,86 +0,0 @@ -# author: Jonathan Dieter -# -# mostly taken from deltarpm.py created by -# Lars Herrmann -# and modified for Presto by -# Ahmed Kamal -# -# license: GPL (see COPYING file in distribution) -# -# this module provides a python wrapper around deltarpm tools written by suse -# -# TODO: catch exceptions wherever possible and raise useful ones ;) -# see TODO lines in methods - -APPLY='/usr/bin/applydeltarpm' - -import popen2 -import string -import os - -class Process: - """wrapper class to execute programs and return exitcode and output (stdout and stderr combined)""" - def __init__(self, conduit): - self.__stdout=None - self.__returncode=None - self.__command=None - self.__args=None - self.conduit = conduit - - def run(self, command, *args): - self.__command=command - self.__args=args - cmdline=command+" "+string.join(args, " ") - self.conduit.info(7, '%s.%s: executing %s' % (self.__class__, 'run', cmdline)) - pipe = popen2.Popen4(cmdline) - self.__stdout=pipe.fromchild.read() - retcode = pipe.wait() - if os.WIFEXITED(retcode): - self.__returncode = os.WEXITSTATUS(retcode) - else: - self.__returncode = retcode - # fallback to old implementation - works better ? - #stdoutp = os.popen(cmdline,'r',1) - #self.__stdout = stdoutp.read() - #retcode = stdoutp.close() - #if retcode is None: - # self.__returncode = 0 - #else: - # self.__returncode = retcode - - def getOutput(self): - return self.__stdout - - def returnCode(self): - return self.__returncode - -class DeltaRpmWrapper: - """wrapper around deltarpm binaries - implement methods for applying and verifying delta rpms - - raises exceptions if exitcode of binaries was != 0""" - - def __init__(self, conduit): - self.conduit = conduit - self.conduit.info(7, '%s.%s: created' % (self.__class__, '__init__')) - - def apply(self, newrpmfile, deltarpmfile): - """wraps execution of applydeltarpm [-r oldrpm] deltarpm newrpm - - constructs file names and paths based on given RpmDescription and instance settings for directories""" - # TODO: test args for type == instance and __class__ == RpmDescription - self.conduit.info(7, '%s.apply(%s,%s)' % (self.__class__, newrpmfile, deltarpmfile)) - p=Process(self.conduit) - # targetrpm filename - p.run(APPLY, deltarpmfile, newrpmfile) - if p.returnCode(): - # in case of error, raise exception - raise Exception("Could not apply deltarpm: %d" % (p.returnCode())) - return newrpmfile - - def verifySequence(self, sequence): - """wraps execution of applydeltarpm [-r oldrpm] -s seqfilecontent - - constructs file names and paths based on given RpmDescription and instance settings for directories""" - self.conduit.info(7, '%s.verify(%s)' % (self.__class__, sequence)) - p = Process(self.conduit) - p.run(APPLY, '-s', sequence) - if p.returnCode(): - # in case of error, raise exception - raise Exception("Could not verify sequence of deltarpm: %d" % (p.returnCode())) diff --git a/shared/prestoDownload.py b/shared/prestoDownload.py deleted file mode 100644 index 45318ba..0000000 --- a/shared/prestoDownload.py +++ /dev/null @@ -1,169 +0,0 @@ -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# Copyright 2005 Duke University -# Copyright 2007 Jonathan Dieter - -import os -from yum import misc -from yum import Errors -from yum import types -from urlgrabber.grabber import URLGrabError -import deltarpm - -def verifyDelta(fo, po, conduit, raiseError): - """verifies the deltarpm is what we expect it to be - raiseError = defaults to 0 - if 1 then will raise - a URLGrabError if the file does not check out. - otherwise it returns false for a failure, true for success""" - - if type(fo) is types.InstanceType: - fo = fo.filename - - try: - verifyChecksum(fo, po.returnSimple('deltachecksumtype'), po.returnSimple('deltachecksum')) - except: - if raiseError: - raise URLGrabError(-1, 'Package does not match intended download') - else: - return False - - return True - - -def verifyChecksum(filename, checksumType, csum): - """Verify the checksum of the file versus the - provided checksum""" - - try: - - filesum = misc.checksum(checksumType, filename) - except Errors.MiscError, e: - raise URLGrabError(-3, 'Could not perform checksum') - - if filesum != csum: - raise URLGrabError(-1, 'Package does not match checksum') - - return 0 - - -def downloadPkgs(conduit, pkglist): - """download list of package objects handed to you, return errors""" - - opts, commands = conduit.getCmdLine() - - errors = {} - def adderror(po, msg): - errors.setdefault(po, []).append(msg) - - # Check whether drpm is already downloaded - repo_cached = False - remote_pkgs = [] - rebuild_pkgs = [] - for po in conduit.getDownloadPackages(): - if hasattr(po, 'has_drpm') and po.has_drpm: - po.to.pkgtup = po.to.realpkgtup - local = po.returnSimple('deltalocalpath') - if os.path.exists(local): - cursize = os.stat(local)[6] - totsize = long(po.returnSimple('deltasize')) - try: - verifyChecksum(local, po.returnSimple('deltachecksumtype'), po.returnSimple('deltachecksum')) - except: - if po.repo.p_repo.cache: - repo_cached = True - adderror(po, 'package fails checksum but caching is ' - 'enabled for %s' % po.repo.p_repo.id) - - if cursize >= totsize: # otherwise keep it around for regetting - os.unlink(local) - else: - # Deltarpm is local and good, let's put it in the rebuild list - conduit.info(5, "using local copy of deltarpm for %s" % po) - rebuild_pkgs.append(po) - continue - remote_pkgs.append(po) - - # Download deltarpms - i = 0 - for po in remote_pkgs: - i += 1 - checkfunc = (verifyDelta, (po, conduit, 1), {}) - cache = po.repo.p_repo.http_caching != 'none' - dirstat = os.statvfs(po.repo.deltasdir) - if (dirstat.f_bavail * dirstat.f_bsize) <= long(po.size): - adderror(po, 'Insufficient space in download directory %s ' - 'to download' % po.repo.deltasdir) - continue - po.simple['reallocalpath'] = po.localpath - po.localpath = po.returnSimple('deltalocalpath') - po.simple['realrelativepath'] = po.returnSimple('relativepath') - po.simple['relativepath'] = po.returnSimple('deltarelativepath') - try: - text = '(%s/%s): %s' % (i, len(remote_pkgs), os.path.basename(po.returnSimple('deltarelativepath'))) - deltalocal = po.repo.p_repo.getPackage(po, checkfunc=checkfunc, text=text, cache=cache) - except Errors.RepoError, e: - adderror(po, str(e)) - else: - rebuild_pkgs.append(po) - - if errors.has_key(po): - del errors[po] - - po.simple['relativepath'] = po.returnSimple('realrelativepath') - po.localpath = po.returnSimple('reallocalpath') - if po.simple.has_key('realpackagesize'): - po.simple['packagesize'] = po.returnSimple('realpackagesize') - del po.simple['realpackagesize'] - del po.simple['realrelativepath'] - del po.simple['reallocalpath'] - po.simple['deltalocalpath'] = deltalocal - - # Rebuild rpms from downloaded deltarpms - for po in rebuild_pkgs: - deltalocal = po.returnSimple('deltalocalpath') - drpm = deltarpm.DeltaRpmWrapper(conduit) - try: - conduit.info(2, "Building %s from %s" % (os.path.basename(po.localpath), os.path.basename(deltalocal))) - drpm.apply(po.localpath, deltalocal) - except: - conduit.info(2, "Error rebuilding rpm from %s! Will download full package." % os.path.basename(deltalocal)) - try: - os.unlink(po.localpath) - except: - pass - else: - # Set package type to local, so yum doesn't try to download it later - po.pkgtype = "local" - - # Check to see whether or not we should keep the drpms - # FIXME: Is there any way to see whether or not a Boolean option was not set? - if conduit.confBool('main', 'neverkeepdeltas'): - delete = True - elif conduit.confBool('main', 'keepdeltas'): - delete = False - elif conduit.getConf().keepcache != 0: - delete = False - else: - delete = True - - if delete: - try: - os.unlink(deltalocal) - except: - pass - - return errors - - diff --git a/shared/prestoLog.py b/shared/prestoLog.py deleted file mode 100644 index 3c0c1e6..0000000 --- a/shared/prestoLog.py +++ /dev/null @@ -1,71 +0,0 @@ -# author: Jonathan Dieter -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# Copyright 2005 Duke University - -def log(conduit, LOG_FILE, rpm_size, drpm_size): - # Open log file for reading - try: - log_file = open(LOG_FILE, "r") - log_exists = True - except: - conduit.info(5, "Info: %s doesn't exist. Will create." % LOG_FILE) - log_exists = False - - # Log file doesn't exist, create - if not log_exists: - try: - log_file = open(LOG_FILE, "w") - log_file.write("Download Size (without DRPM),Download Size (with DRPM),Percentage Savings,Total Percentage Savings\n") - log_file.close() - log_exists = True - except: - conduit.info(2, "Warning: Unable to write to %s" % LOG_FILE) - if log_exists: - try: - log_file = open(LOG_FILE, "r") - except: - conduit.info(2, "Warning: Unable to open %s for reading." % LOG_FILE) - log_exists = False - - # Cycle through items already in log so we can come up with total savings - if log_exists: - total_rpm_size = 0 - total_drpm_size = 0 - - # Get rid of header line - log_file.readline() - - data = log_file.readline() - while data != "": - fc = data.find(",") - sc = data.find(",", fc + 1) - total_rpm_size += int(data[:fc]) - total_drpm_size += int(data[fc + 1:sc]) - data = log_file.readline() - log_file.close() - total_rpm_size += rpm_size - total_drpm_size += drpm_size - - try: - log_file = open(LOG_FILE, "a") - except: - conduit.info(2, "Warning: Unable to open %s for writing." % LOG_FILE) - log_exists = False - - # Write data to log - if log_exists: - log_file.write("%i,%i,%i,%i\n" % (rpm_size, drpm_size, 100 - ((drpm_size * 100) / rpm_size), 100 - ((total_drpm_size * 100) / total_rpm_size))) - log_file.close() diff --git a/shared/prestoRepo.py b/shared/prestoRepo.py deleted file mode 100644 index 582dc2f..0000000 --- a/shared/prestoRepo.py +++ /dev/null @@ -1,612 +0,0 @@ -# author: Jonathan Dieter -# -# mostly taken from yumRepo.py (part of yum) with a few minor modifications -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# Copyright 2005 Duke University - -import os -import re -import time -import types -import urlparse - -from yum import Errors -from urlgrabber.grabber import URLGrabber -import urlgrabber.mirror -from urlgrabber.grabber import URLGrabError -from yum.repos import Repository -from yum import repoMDObject -from yum import parser -from yum import config -from yum import misc - -class PrestoRepository(Repository): - """ - This is an actual repository object - - Configuration attributes are pulled in from config.RepoConf. - """ - - def __init__(self, repo, conduit): - Repository.__init__(self, repo.id) - - # If there's a specific deltarpm url, use that - is_different = False - if conduit.confString(repo.id, 'deltaurl'): - self.baseurl = [conduit.confString(repo.id, 'deltaurl')] - is_different = True - conduit.info(5, 'Manual url set from presto.conf: %s' % self.baseurl) - elif repo.deltaurl != []: - self.baseurl = repo.deltaurl - is_different = True - conduit.info(5, 'Manual url set from repository conf file: %s' % self.baseurl) - else: - self.baseurl = repo.baseurl - - # If there's a specific mirrorlist, use that - if conduit.confString(repo.id, 'deltamirrorlist'): - self.mirrorlist = conduit.confString(repo.id, 'deltamirrorlist') - self.baseurl = None - is_different = True - conduit.info(5, 'Manual mirrorlist set from presto.conf: %s' % self.mirrorlist) - elif repo.deltamirrorlist != None: - self.mirrorlist = repo.deltamirrorlist - self.baseurl = None - is_different = True - conduit.info(5, 'Manual mirrorlist set from repository conf file: %s' % self.mirrorlist) - else: - if self.baseurl == repo.baseurl: - self.mirrorlist = repo.mirrorlist - else: - self.mirrorlist = None - - self.conduit = conduit - self.urls = [] - self.is_different = is_different - if is_different: - self.repoMDFile = 'repodata/prestomd.xml' - self.metadata_cookie_fn = 'presto_cachecookie' - else: - self.repoMDFile = 'repodata/repomd.xml' - self.metadata_cookie_fn = 'cachecookie' - self.repoXML = None - self.cache = 0 - self.mirrorlistparsed = 0 - self.yumvar = {} # empty dict of yumvariables for $string replacement - self._proxy_dict = {} - self.http_headers = {} - - # throw in some stubs for things that will be set by the config class - self.basecachedir = "" - self.cachedir = "" - self.pkgdir = "" - self.hdrdir = "" - self.enabled = True - - # holder for stuff we've grabbed - self.retrieved = { 'deltas':0 } - - # callbacks - self.keepalive = repo.keepalive - self.bandwidth = repo.bandwidth - self.retries = repo.retries - self.throttle = repo.throttle - self.proxy = repo.proxy - self.proxy_username = repo.proxy_username - self.proxy_password = repo.proxy_password - self.timeout = repo.timeout - self.http_caching = repo.http_caching - self.failovermethod = repo.failovermethod - self.metadata_expire = repo.metadata_expire - self.basecachedir = repo.basecachedir - self.callback = repo.callback - self.failure_obj = repo.failure_obj - self.mirror_failure_obj = repo.mirror_failure_obj - self.interrupt_callback = repo.interrupt_callback - self.drpm_list = {} - self.parent = repo - repo.p_repo = self - - - def __getProxyDict(self): - self.doProxyDict() - if self._proxy_dict: - return self._proxy_dict - return None - - # consistent access to how proxy information should look (and ensuring - # that it's actually determined for the repo) - proxy_dict = property(__getProxyDict) - - def ready(self): - """Returns true if this repository is setup and ready for use.""" - return self.repoXML is not None - - def __cmp__(self, other): - if self.id > other.id: - return 1 - elif self.id < other.id: - return -1 - else: - return 0 - - def __str__(self): - return self.id - - def _checksum(self, sumtype, file, CHUNK=2**16): - """takes filename, hand back Checksum of it - sumtype = md5 or sha - filename = /path/to/file - CHUNK=65536 by default""" - try: - return misc.checksum(sumtype, file, CHUNK) - except (Errors.MiscError, EnvironmentError), e: - raise Errors.RepoError, 'Error opening file for checksum: %s' % e - - def dump(self): - output = '[%s]\n' % self.id - vars = ['id', 'bandwidth', 'enabled', - 'keepalive', 'proxy', - 'proxy_password', 'proxy_username', - 'retries', 'throttle', 'timeout', 'mirrorlist', - 'cachedir' ] - vars.sort() - for attr in vars: - output = output + '%s = %s\n' % (attr, getattr(self, attr)) - output = output + 'baseurl =' - for url in self.urls: - output = output + ' %s\n' % url - - return output - - def check(self): - """self-check the repo information - if we don't have enough to move - on then raise a repo error""" - if len(self.urls) < 1: - raise Errors.RepoError, \ - 'Cannot find a valid deltaurl for repo: %s' % self.id - - def doProxyDict(self): - if self._proxy_dict: - return - - self._proxy_dict = {} # zap it - proxy_string = None - if self.proxy not in [None, '_none_']: - proxy_string = '%s' % self.proxy - if self.proxy_username is not None: - proxy_parsed = urlparse.urlsplit(self.proxy, allow_fragments=0) - proxy_proto = proxy_parsed[0] - proxy_host = proxy_parsed[1] - proxy_rest = proxy_parsed[2] + '?' + proxy_parsed[3] - proxy_string = '%s://%s@%s%s' % (proxy_proto, - self.proxy_username, proxy_host, proxy_rest) - - if self.proxy_password is not None: - proxy_string = '%s://%s:%s@%s%s' % (proxy_proto, - self.proxy_username, self.proxy_password, - proxy_host, proxy_rest) - - if proxy_string is not None: - self._proxy_dict['http'] = proxy_string - self._proxy_dict['https'] = proxy_string - self._proxy_dict['ftp'] = proxy_string - - def __headersListFromDict(self): - """Convert our dict of headers to a list of 2-tuples for urlgrabber.""" - headers = [] - - keys = self.http_headers.keys() - for key in keys: - headers.append((key, self.http_headers[key])) - - return headers - - def setupGrab(self): - """sets up the grabber functions with the already stocked in urls for - the mirror groups""" - - if self.failovermethod == 'roundrobin': - mgclass = urlgrabber.mirror.MGRandomOrder - else: - mgclass = urlgrabber.mirror.MirrorGroup - - headers = tuple(self.__headersListFromDict()) - - self.grabfunc = URLGrabber(keepalive=self.keepalive, - bandwidth=self.bandwidth, - retry=self.retries, - throttle=self.throttle, - progress_obj=self.callback, - proxies = self.proxy_dict, - failure_callback=self.failure_obj, - interrupt_callback=self.interrupt_callback, - timeout=self.timeout, - http_headers=headers, - reget='simple') - - self.grab = mgclass(self.grabfunc, self.urls, - failure_callback=self.mirror_failure_obj) - - def dirSetup(self): - """make the necessary dirs, if possible, raise on failure""" - - cachedir = os.path.join(self.parent.basecachedir, self.id) - deltasdir = os.path.join(cachedir, 'deltas') - self.parent.setAttribute('deltasdir', deltasdir) - self.setAttribute('cachedir', cachedir) - - cookie = cachedir + '/' + self.metadata_cookie_fn - self.setAttribute('metadata_cookie', cookie) - - for dir in [cachedir, self.parent.deltasdir]: - if self.cache == 0: - if os.path.exists(dir) and os.path.isdir(dir): - continue - else: - try: - os.makedirs(dir, mode=0755) - except OSError, e: - raise Errors.RepoError, \ - "Error making cache directory: %s error was: %s" % (dir, e) - else: - if not os.path.exists(dir): - raise Errors.RepoError, \ - "Cannot access repository dir %s" % dir - - def baseurlSetup(self): - """go through the baseurls and mirrorlists and populate self.urls - with valid ones, run self.check() at the end to make sure it worked""" - - goodurls = [] - if self.mirrorlist and not self.mirrorlistparsed: - mirrorurls = getMirrorList(self.mirrorlist, self.proxy_dict) - self.mirrorlistparsed = 1 - for url in mirrorurls: - url = parser.varReplace(url, self.yumvar) - self.baseurl.append(url) - - for url in self.baseurl: - url = parser.varReplace(url, self.yumvar) - (s,b,p,q,f,o) = urlparse.urlparse(url) - if s not in ['http', 'ftp', 'file', 'https']: - print 'not using ftp, http[s], or file for repos, skipping - %s' % (url) - continue - else: - goodurls.append(url) - - self.setAttribute('urls', goodurls) - self.check() - self.setupGrab() # update the grabber for the urls - - def __get(self, url=None, relative=None, local=None, start=None, end=None, - copy_local=0, checkfunc=None, text=None, reget='simple', cache=True): - """retrieve file from the mirrorgroup for the repo - relative to local, optionally get range from - start to end, also optionally retrieve from a specific baseurl""" - - # if local or relative is None: raise an exception b/c that shouldn't happen - # if url is not None - then do a grab from the complete url - not through - # the mirror, raise errors as need be - # if url is None do a grab via the mirror group/grab for the repo - # return the path to the local file - - # Turn our dict into a list of 2-tuples - headers = self.__headersListFromDict() - - # We will always prefer to send no-cache. - if not (cache or self.http_headers.has_key('Pragma')): - headers.append(('Pragma', 'no-cache')) - - headers = tuple(headers) - - if local is None or relative is None: - raise Errors.RepoError, \ - "get request for Repo %s, gave no source or dest" % self.id - - if self.cache == 1: - if os.path.exists(local): # FIXME - we should figure out a way - return local # to run the checkfunc from here - - else: # ain't there - raise - raise Errors.RepoError, \ - "Caching enabled but no local cache of %s from %s" % (local, - self) - if url is not None: - ug = URLGrabber(keepalive = self.keepalive, - bandwidth = self.bandwidth, - retry = self.retries, - throttle = self.throttle, - progress_obj = self.callback, - copy_local = copy_local, - reget = reget, - proxies = self.proxy_dict, - failure_callback = self.failure_obj, - interrupt_callback=self.interrupt_callback, - timeout=self.timeout, - checkfunc=checkfunc, - http_headers=headers, - ) - - remote = url + '/' + relative - - try: - result = ug.urlgrab(remote, local, - text=text, - range=(start, end), - ) - except URLGrabError, e: - raise Errors.RepoError, \ - "failed to retrieve %s from %s\nerror was %s" % (relative, self.id, e) - - else: - try: - result = self.grab.urlgrab(relative, local, - text = text, - range = (start, end), - copy_local=copy_local, - reget = reget, - checkfunc=checkfunc, - http_headers=headers, - ) - except URLGrabError, e: - raise Errors.RepoError, "failure: %s from %s: %s" % (relative, self.id, e) - - return result - - def getPackage(self, package, checkfunc = None, text = None, cache = True): - remote = package.returnSimple('relativepath') - local = package.localPkg() - basepath = package.returnSimple('basepath') - - return self.__get(url=basepath, - relative=remote, - local=local, - checkfunc=checkfunc, - text=text, - cache=cache - ) - - def metadataCurrent(self): - """Check if there is a metadata_cookie and check its age. If the - age of the cookie is less than metadata_expire time then return true - else return False""" - - val = False - if os.path.exists(self.metadata_cookie): - cookie_info = os.stat(self.metadata_cookie) - if cookie_info[8] + self.metadata_expire > time.time(): - val = True - # WE ARE FROM THE FUTURE!!!! - elif cookie_info[8] > time.time(): - val = False - return val - - def setMetadataCookie(self): - """if possible, set touch the metadata_cookie file""" - - check = self.metadata_cookie - if not os.path.exists(self.metadata_cookie): - check = self.cachedir - - if os.access(check, os.W_OK): - fo = open(self.metadata_cookie, 'w+') - fo.close() - del fo - - - def setup(self, cache): - try: - self.cache = cache - self.baseurlSetup() - self.dirSetup() - except Errors.RepoError, e: - raise - - try: - self._loadRepoXML(text=self) - except Errors.RepoError, e: - raise Errors.RepoError, ('Cannot open/read %s file for repository: %s' % (self.repoMDFile, self)) - - - def _loadRepoXML(self, text=None): - """retrieve/check/read in repomd.xml from the repository""" - - remote = self.repoMDFile - if self.is_different: - local = self.cachedir + '/prestomd.xml' - else: - local = self.cachedir + '/repomd.xml' - - if self.repoXML is not None: - return - - if self.cache or self.metadataCurrent(): - if not os.path.exists(local): - raise Errors.RepoError, 'Cannot find %s file for %s' % (self.repoMDFile, self) - else: - result = local - else: - checkfunc = (self._checkRepoXML, (), {}) - try: - result = self.__get(relative=remote, - local=local, - copy_local=1, - text=text, - reget=None, - checkfunc=checkfunc, - cache=self.http_caching == 'all') - - - except URLGrabError, e: - raise Errors.RepoError, 'Error downloading file %s: %s' % (local, e) - # if we have a 'fresh' repomd.xml then update the cookie - self.setMetadataCookie() - - try: - self.repoXML = repoMDObject.RepoMD(self.id, result) - except Errors.RepoMDError, e: - raise Errors.RepoError, 'Error importing %s from %s: %s' % (self.repoMDFile, self, e) - - def _checkRepoXML(self, fo): - if type(fo) is types.InstanceType: - filepath = fo.filename - else: - filepath = fo - - try: - repoMDObject.RepoMD(self.id, filepath) - except Errors.RepoMDError, e: - raise URLGrabError(-1, 'Error importing %s for %s: %s' % (self.repoMDFile, self, e)) - - - def checkMD(self, fn, mdtype): - """check the metadata type against its checksum""" - - thisdata = self.repoXML.getData(mdtype) - - (r_ctype, r_csum) = thisdata.checksum # get the remote checksum - - if type(fn) == types.InstanceType: # this is an urlgrabber check - file = fn.filename - else: - file = fn - - try: - l_csum = self._checksum(r_ctype, file) # get the local checksum - except Errors.RepoError, e: - raise URLGrabError(-3, 'Error performing checksum') - - if l_csum == r_csum: - return 1 - else: - raise URLGrabError(-1, 'Metadata file does not match checksum') - - - - def retrieveMD(self, mdtype): - """base function to retrieve metadata files from the remote url - returns the path to the local metadata file of a 'mdtype' - mdtype must be 'deltas'.""" - try: - thisdata = self.repoXML.getData(mdtype) - except Errors.RepoMDError: - self.enabled = False - self.conduit.info(5, "No drpms available for %s" % self.id) - return - - (r_base, remote) = thisdata.location - fname = os.path.basename(remote) - local = self.cachedir + '/' + fname - - if self.retrieved.has_key(mdtype): - if self.retrieved[mdtype]: # got it, move along - return local - - if self.cache == 1: - if os.path.exists(local): - try: - self.checkMD(local, mdtype) - except URLGrabError, e: - raise Errors.RepoError, \ - "Caching enabled and local cache: %s does not match checksum" % local - else: - return local - - else: # ain't there - raise - raise Errors.RepoError, \ - "Caching enabled but no local cache of %s from %s" % (local, - self) - - if os.path.exists(local): - try: - self.checkMD(local, mdtype) - except URLGrabError, e: - pass - else: - self.retrieved[mdtype] = 1 - return local # it's the same return the local one - - try: - checkfunc = (self.checkMD, (mdtype,), {}) - local = self.__get(relative=remote, local=local, copy_local=1, - checkfunc=checkfunc, reget=None, - cache=self.http_caching == 'all') - except URLGrabError, e: - raise Errors.RepoError, \ - "Could not retrieve %s matching remote checksum from %s" % (local, self) - else: - self.retrieved[mdtype] = 1 - return local - - - def getPrestoXML(self): - """this gets you the path to the primary.xml file, retrieving it if we - need a new one""" - - return self.retrieveMD('deltas') - - def setCallback(self, callback): - self.callback = callback - self.setupGrab() - - def setFailureObj(self, failure_obj): - self.failure_obj = failure_obj - self.setupGrab() - - def setMirrorFailureObj(self, failure_obj): - self.mirror_failure_obj = failure_obj - self.setupGrab() - - def setInterruptCallback(self, callback): - self.interrupt_callback = callback - self.setupGrab() - -def getMirrorList(mirrorlist, pdict = None): - """retrieve an up2date-style mirrorlist file from a url, - we also s/$ARCH/$BASEARCH/ and move along - returns a list of the urls from that file""" - - returnlist = [] - if hasattr(urlgrabber.grabber, 'urlopen'): - urlresolver = urlgrabber.grabber - else: - import urllib - urlresolver = urllib - - scheme = urlparse.urlparse(mirrorlist)[0] - if scheme == '': - url = 'file://' + mirrorlist - else: - url = mirrorlist - - try: - fo = urlresolver.urlopen(url, proxies=pdict) - except urlgrabber.grabber.URLGrabError, e: - print "Could not retrieve mirrorlist %s error was\n%s" % (url, e) - fo = None - - if fo is not None: - content = fo.readlines() - for line in content: - if re.match('^\s*\#.*', line) or re.match('^\s*$', line): - continue - mirror = re.sub('\n$', '', line) # no more trailing \n's - (mirror, count) = re.subn('\$ARCH', '$BASEARCH', mirror) - returnlist.append(mirror) - - return returnlist - diff --git a/shared/prestoTransaction.py b/shared/prestoTransaction.py deleted file mode 100644 index 3d387a4..0000000 --- a/shared/prestoTransaction.py +++ /dev/null @@ -1,97 +0,0 @@ -# author: Jonathan Dieter -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# Copyright 2005 Duke University - -import os -import deltarpm - -def find_available_drpms(conduit, newpkg): - """Find any applicable drpms for newpkg - newpkg is a TransactionMember""" - - rpmdb = conduit.getRpmDB() - - is_local = False - - # Set p_repo to be packages delta repository or set to False if - # there is no delta repository - try: - p_repo = newpkg.po.repo.p_repo - drpm_enabled = p_repo.enabled - - po = newpkg.po - if hasattr(po, 'pkgtype') and po.pkgtype == 'local': - is_local = True - else: - local = po.localPkg() - if os.path.exists(local): - cursize = os.stat(local)[6] - totsize = long(po.size) - if not po.verifyLocalPkg(): - if cursize >= totsize: # otherwise keep it around for regetting - os.unlink(local) - else: - conduit.info(5, "using local copy of %s" % po) - is_local = True - - except: - conduit.info(5, "No Presto repository information for %s.%s %i:%s-%s" % (newpkg.name, newpkg.arch, int(newpkg.epoch), newpkg.version, newpkg.release)) - drpm_enabled = False - is_local = False - - chosen_drpm = None - - # First part of key when matching drpms - key1 = "%s*%s*%i*%s*%s" % (newpkg.name, newpkg.arch, int(newpkg.epoch), newpkg.version, newpkg.release) - - # Find any installed packages that match the ones we want to download - installed = rpmdb.searchNevra(newpkg.name, None, None, None, newpkg.arch) - - if installed == []: - is_installed = False - else: - is_installed = True - - - if is_installed and drpm_enabled and not is_local: - for oldpkg in installed: - # Generate second part of key for matching drpms, then full key - key2 = "%s*%s*%i*%s*%s" % (oldpkg.name, oldpkg.arch, int(oldpkg.epoch), oldpkg.version, oldpkg.release) - key = "%s!!%s" % (key1, key2) - - # Check whether we have a matching drpm - if p_repo.deltalist.has_key(key): - # Check whether or not we already have a matching drpm, then choose smallest of the two if we do - if chosen_drpm == None or p_repo.deltalist[key]['size'] < chosen_drpm['size']: - - # Get sequence code for drpm - sequence = p_repo.deltalist[key]['sequence'] - if int(oldpkg.epoch) == 0: - seq = "%s-%s-%s-%s" % (oldpkg.name, oldpkg.version, oldpkg.release, sequence) - else: - seq = "%s-%i:%s-%s-%s" % (oldpkg.name, int(oldpkg.epoch), oldpkg.version, oldpkg.release, sequence) - drpm = deltarpm.DeltaRpmWrapper(conduit) - - # Attempt to apply sequence code for drpm. If this fails, drpm will not apply cleanly, so - # don't even try to download it. - try: - drpm.verifySequence(seq) - chosen_drpm = p_repo.deltalist[key] - chosen_drpm['baseurl'] = p_repo.baseurl[0] - except: - conduit.info(5, "Verification of %s failed" % seq) - - return (chosen_drpm, installed, is_local, drpm_enabled) diff --git a/shared/prestomdparser.py b/shared/prestomdparser.py deleted file mode 100644 index 1713531..0000000 --- a/shared/prestomdparser.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/python -t -# -# author: Jonathan Dieter -# -# mostly taken from mdparser.py (part of yum) with a few minor modifications -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Library General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# Copyright 2005 Duke University - -import gzip -from cElementTree import iterparse - -from cStringIO import StringIO - -#TODO: document everything here - -class PrestoMDParser: - - def __init__(self, filename): - - # Set up mapping of meta types to handler classes - handlers = { - '{http://linux.duke.edu/metadata/common}metadata': DeltasEntry, - } - - self.total = None - self.count = 0 - self._handlercls = None - - # Read in type, set package node handler and get total number of - # packages - if filename[-3:] == '.gz': fh = gzip.open(filename, 'r') - else: fh = open(filename, 'r') - parser = iterparse(fh, events=('start', 'end')) - self.reader = parser.__iter__() - event, elem = self.reader.next() - self._handlercls = handlers.get(elem.tag, None) - if not self._handlercls: - raise ValueError('Unknown repodata type "%s" in %s' % ( - elem.tag, filename)) - - def getDeltaList(self): - for event, elem in self.reader: - if event == 'end' and elem.tag == '{http://linux.duke.edu/metadata/common}metadata': - return self._handlercls(elem) - - -class BaseEntry: - def __init__(self, elem): - self._p = {} - - def __getitem__(self, k): - return self._p[k] - - def keys(self): - return self._p.keys() - - def values(self): - return self._p.values() - - def has_key(self, k): - return self._p.has_key(k) - - def __str__(self): - out = StringIO() - keys = self.keys() - keys.sort() - for k in keys: - line = u'%s=%s\n' % (k, self[k]) - out.write(line.encode('utf8')) - return out.getvalue() - - def _bn(self, qn): - if qn.find('}') == -1: return qn - return qn.split('}')[1] - - def _prefixprops(self, elem, prefix): - ret = {} - for key in elem.attrib.keys(): - ret[prefix + '_' + self._bn(key)] = elem.attrib[key] - return ret - -class DeltasEntry(BaseEntry): - def __init__(self, deltas): - BaseEntry.__init__(self, deltas) - # Avoid excess typing :) - p = self._p - - for elem in deltas: - temp = {} - key1 = "" - key2 = "" - for child in elem: - name = self._bn(child.tag) - if name in ('name', 'arch'): - temp[name] = child.text - - elif name == 'version': - attrib = child.attrib - try: - attrib['epoch'] = int(attrib['epoch']) - except: - attrib['epoch'] = 0 - key1 = "%s*%s*%i*%s*%s" % (temp['name'], temp['arch'], attrib['epoch'], attrib['ver'], attrib['rel']) - - elif name == 'deltas': - for oldrpm in child: - temp2 = {} - value = {} - key = None - for oldrpm_child in oldrpm: - name = self._bn(oldrpm_child.tag) - if name in ('name', 'arch'): - temp2[name] = oldrpm_child.text - - elif name == 'version': - ch_attrib = oldrpm_child.attrib - try: - ch_attrib['epoch'] = int(ch_attrib['epoch']) - except: - ch_attrib['epoch'] = attrib['epoch'] - try: - ch_attrib['ver'] = ch_attrib['ver'] - except: - ch_attrib['ver'] = attrib['ver'] - if not temp2.has_key('name'): - temp2['name'] = temp['name'] - if not temp2.has_key('arch'): - temp2['arch'] = temp['arch'] - key2 = "%s*%s*%i*%s*%s" % (temp2['name'], temp2['arch'], ch_attrib['epoch'], ch_attrib['ver'], ch_attrib['rel']) - key = "%s!!%s" % (key1, key2) - p[key] = {} - - if name in ('sequence', 'drpm_filename', 'size'): - p[key][name] = oldrpm_child.text - - if name == "checksum": - p[key][name] = oldrpm_child.text - p[key]["%s_type" % name] = oldrpm_child.attrib['type'] - deltas.clear() - -def test(): - import sys - - parser = PrestoMDParser(sys.argv[1]) - - deltalist = parser.getDeltaList() - - print '-' * 40 - print deltalist - - print 'read: %s deltarpms ' % (len(deltalist.keys())) - -if __name__ == '__main__': - test() diff --git a/yum-presto/COPYING b/yum-presto/COPYING new file mode 100644 index 0000000..e77696a --- /dev/null +++ b/yum-presto/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/yum-presto/ChangeLog b/yum-presto/ChangeLog new file mode 100644 index 0000000..7f57c2a --- /dev/null +++ b/yum-presto/ChangeLog @@ -0,0 +1,36 @@ +* Thu Mar 29 2007 Jonathan Dieter - 0.3.1 + - Minor fix so yum doesn't die when download fails + - Minor fix to allow public keys to be imported properly + - Update README + +* Wed Mar 28 2007 Jonathan Dieter - 0.3.0 + - Massive changes to downloading structure + - When unable to rebuild drpm, we now download full rpm + - Stop doing slow MD5 check and just check RPM header + while we have a prelink bug + +* Mon Mar 26 2007 Jonathan Dieter - 0.2.9 + - Fix another mirrorlist bug + - Minor optimization + - Added logging to /var/log/presto.log + - Fix another mirrorlist bug + - Fix bug where we sometimes die if delta repository doesn't exist + - Properly exit when unable to rebuild drpm + - Do full (slow) MD5 check when checking to see if we can + build RPM from disk + +* Sat Mar 24 2007 Jonathan Dieter - 0.2.3 + - Fixed bug that breaks yum install + +* Sat Mar 24 2007 Jonathan Dieter - 0.2.2 + - Fixed "not showing download error" bug + - Added --disablepresto yum command-line option + - Added code to trap the (hopefully) unlikely scenario where applydeltarpm + fails + - Show byte savings at end of yum update + +* Fri Mar 23 2007 Jonathan Dieter - 0.2.1 + - Fixed bug in handling mirrorlists in original repositories + +* Thu Mar 22 2007 Jonathan Dieter - 0.2.0 + - Initial release diff --git a/yum-presto/Makefile b/yum-presto/Makefile new file mode 100644 index 0000000..a335109 --- /dev/null +++ b/yum-presto/Makefile @@ -0,0 +1,16 @@ +clean: + rm -f *.pyc *.pyo *~ + cd shared; rm -f *.pyc *.pyo *~ + +install: + mkdir -p $(DESTDIR)/usr/lib/yum-plugins + install -m 644 presto.py $(DESTDIR)/usr/lib/yum-plugins + mkdir -p $(DESTDIR)/etc/yum/pluginconf.d + install -m 644 presto.conf $(DESTDIR)/etc/yum/pluginconf.d + mkdir -p $(DESTDIR)/usr/share/presto + install -m 644 shared/prestoRepo.py $(DESTDIR)/usr/share/presto + install -m 644 shared/prestomdparser.py $(DESTDIR)/usr/share/presto + install -m 644 shared/prestoTransaction.py $(DESTDIR)/usr/share/presto + install -m 644 shared/prestoLog.py $(DESTDIR)/usr/share/presto + install -m 644 shared/prestoDownload.py $(DESTDIR)/usr/share/presto + install -m 644 shared/deltarpm.py $(DESTDIR)/usr/share/presto diff --git a/yum-presto/README b/yum-presto/README new file mode 100644 index 0000000..7fdea95 --- /dev/null +++ b/yum-presto/README @@ -0,0 +1,26 @@ +Presto: A project to add delta rpm support into yum for Fedora users +https://hosted.fedoraproject.org/projects/presto/wiki/WikiStart + +Installation: +============= +1- Install yum-presto on your system (yum -y install yum-presto) +2- Now install an old rpm from updates or extras using rpm, then try updating + it using yum. The plugin should kick in, try to download the drpm, + reconstruct the full rpm, and yum should install that. + +Notes: +====== +Presto will read the deltaurl from two possible locations: +1. The repository's .repo file ("deltaurl = http://repository.com") +2. Appended to /etc/yum/pluginconf.d/presto.conf in the form: + [repository] + deltaurl = http://repository.com + +Presto.conf has the following options in [main]: +keepdeltas=1 Always keep deltas in cache no matter what keepcache + is set to. +neverkeepdeltas=1 Always remove deltas after creating full rpms. +exitondownloadfailure=0|1 If there is a problem downloading the deltarpm, exit + rather than trying to download the full rpm. +Note: If you specify neither keepdeltas nor neverkeepdeltas, presto will follow + the keepcache option in yum.conf. diff --git a/yum-presto/presto.conf b/yum-presto/presto.conf new file mode 100644 index 0000000..899a4e2 --- /dev/null +++ b/yum-presto/presto.conf @@ -0,0 +1,9 @@ +[main] +enabled=1 +neverkeepdeltas=1 + +[updates] +deltaurl=http://www.lesbg.com/jdieter/updates/fc6/i386/ + +[extras] +deltaurl=http://www.lesbg.com/jdieter/extras/fc6/i386/ diff --git a/yum-presto/presto.py b/yum-presto/presto.py new file mode 100644 index 0000000..e9908ee --- /dev/null +++ b/yum-presto/presto.py @@ -0,0 +1,149 @@ +# author: Jonathan Dieter +# +# heavily modified from yum-deltarpm.py created by +# Lars Herrmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2005 Duke University + +from yum.plugins import TYPE_INTERACTIVE, PluginYumExit +from yum import config + +import os +import sys + +sys.path.append("/usr/share/presto") +import deltarpm +from prestoRepo import PrestoRepository +from prestomdparser import PrestoMDParser +import prestoTransaction +import prestoLog +import prestoDownload + +requires_api_version = '2.1' +LOG_FILE = "/var/log/presto.log" +plugin_type = (TYPE_INTERACTIVE,) + +rpm_size = 0 +drpm_size = 0 +drpm_count = 0 + +# Configuration stuff +def config_hook(conduit): + # Set up repository specific deltarpm url and mirrorlist + config.RepoConf.deltaurl = config.UrlListOption() + config.RepoConf.deltamirrorlist = config.UrlOption() + + # Add --disable-presto option + parser = conduit.getOptParser() + parser.add_option('', '--disablepresto', dest='disablepresto', + action='store_true', default=False, + help="disable Presto plugin and don't download any deltarpms") + +# Set up Presto repositories +def postreposetup_hook(conduit): + opts, commands = conduit.getCmdLine() + if not opts.disablepresto: + conduit.info(2, 'Setting up Presto') + for active_repo in conduit.getRepos().listEnabled(): + p_repo = PrestoRepository(active_repo, conduit) + p_repo.setup(conduit.getConf().cache) + + conduit.info(2, 'Reading Presto metadata in from local files') + for active_repo in conduit.getRepos().listEnabled(): + xml = active_repo.p_repo.getPrestoXML() + if active_repo.p_repo.enabled: + xmldata = active_repo.p_repo.repoXML.getData('deltas') + (ctype, csum) = xmldata.checksum + parser = PrestoMDParser(xml) + active_repo.p_repo.deltalist = parser.getDeltaList() + else: + conduit.info(5, '--disablepresto specified - Presto disabled') + + +def postresolve_hook(conduit): + global rpm_size + global drpm_size + global drpm_count + + opts, commands = conduit.getCmdLine() + if not opts.disablepresto: + # Cycle through packages to see if there's a deltarpm available + for newpkg in conduit.getTsInfo(): + if newpkg.ts_state != "e": + (chosen_drpm, installed, local, drpm_enabled) = prestoTransaction.find_available_drpms(conduit, newpkg) + + # If a drpm was found, change certain package information so it reflects + # the drpm, not the rpm. + if chosen_drpm != None: + newpkg.po.has_drpm = True + conduit.info(2, "Found deltarpm update for %s.%s %s:%s-%s" % (newpkg.name, newpkg.arch, newpkg.epoch, newpkg.version, newpkg.release)) + # In yum 3.0.x, this doesn't get defined if you run "yum update x" rather than "yum update" + rpm_size += int(newpkg.po.size) + drpm_size += int(chosen_drpm['size']) + newpkg.po.simple['realpackagesize'] = newpkg.po.size + newpkg.po.simple['packagesize'] = chosen_drpm['size'] + newpkg.po.simple['deltasize'] = chosen_drpm['size'] + newpkg.po.simple['deltarelativepath'] = chosen_drpm['drpm_filename'] + newpkg.po.simple['deltachecksumtype'] = chosen_drpm['checksum_type'] + newpkg.po.simple['deltachecksum'] = chosen_drpm['checksum'] + newpkg.po.simple['deltalocalpath'] = newpkg.po.repo.deltasdir + "/" + os.path.basename(chosen_drpm['drpm_filename']) + newpkg.po.to = newpkg + newpkg.realpkgtup = newpkg.pkgtup + newpkg.pkgtup = (newpkg.name + " *", newpkg.arch, newpkg.epoch, newpkg.version, newpkg.release) + newpkg.po.hasdrpm = True + drpm_count += 1 + else: + if installed and drpm_enabled and not local: + try: + rpm_size += int(newpkg.po.simple['packagesize']) + drpm_size += int(newpkg.po.simple['packagesize']) + except: + pass + return + + +def predownload_hook(conduit): + global drpm_count + + pkglist = conduit.getDownloadPackages() + + opts, commands = conduit.getCmdLine() + if not opts.disablepresto and drpm_count > 0: + # Download deltarpms + problems = prestoDownload.downloadPkgs(conduit, pkglist) + + # If 'exitondownloaderror' is on, exit + if conduit.confBool('main', 'exitondownloaderror') and len(problems.keys()) > 0: + errstring = '' + errstring += 'Error Downloading Packages:\n' + for key in problems.keys(): + errors = misc.unique(problems[key]) + for error in errors: + errstring += ' %s: %s\n' % (key, error) + raise PluginYumExit(errstring) + + +def posttrans_hook(conduit): + global rpm_size + global drpm_size + global LOG_FILE + + if rpm_size > 0: + prestoLog.log(conduit, LOG_FILE, rpm_size, drpm_size) + + conduit.info(2, "Size of all updates downloaded from Presto-enabled repositories: %i bytes" % drpm_size) + conduit.info(2, "Size of updates that would have been downloaded if Presto wasn't enabled: %i bytes" % rpm_size) + conduit.info(2, "This is a savings of %i percent" % (100 - ((drpm_size * 100) / rpm_size))) diff --git a/yum-presto/shared/deltarpm.py b/yum-presto/shared/deltarpm.py new file mode 100644 index 0000000..710a8bb --- /dev/null +++ b/yum-presto/shared/deltarpm.py @@ -0,0 +1,86 @@ +# author: Jonathan Dieter +# +# mostly taken from deltarpm.py created by +# Lars Herrmann +# and modified for Presto by +# Ahmed Kamal +# +# license: GPL (see COPYING file in distribution) +# +# this module provides a python wrapper around deltarpm tools written by suse +# +# TODO: catch exceptions wherever possible and raise useful ones ;) +# see TODO lines in methods + +APPLY='/usr/bin/applydeltarpm' + +import popen2 +import string +import os + +class Process: + """wrapper class to execute programs and return exitcode and output (stdout and stderr combined)""" + def __init__(self, conduit): + self.__stdout=None + self.__returncode=None + self.__command=None + self.__args=None + self.conduit = conduit + + def run(self, command, *args): + self.__command=command + self.__args=args + cmdline=command+" "+string.join(args, " ") + self.conduit.info(7, '%s.%s: executing %s' % (self.__class__, 'run', cmdline)) + pipe = popen2.Popen4(cmdline) + self.__stdout=pipe.fromchild.read() + retcode = pipe.wait() + if os.WIFEXITED(retcode): + self.__returncode = os.WEXITSTATUS(retcode) + else: + self.__returncode = retcode + # fallback to old implementation - works better ? + #stdoutp = os.popen(cmdline,'r',1) + #self.__stdout = stdoutp.read() + #retcode = stdoutp.close() + #if retcode is None: + # self.__returncode = 0 + #else: + # self.__returncode = retcode + + def getOutput(self): + return self.__stdout + + def returnCode(self): + return self.__returncode + +class DeltaRpmWrapper: + """wrapper around deltarpm binaries - implement methods for applying and verifying delta rpms + - raises exceptions if exitcode of binaries was != 0""" + + def __init__(self, conduit): + self.conduit = conduit + self.conduit.info(7, '%s.%s: created' % (self.__class__, '__init__')) + + def apply(self, newrpmfile, deltarpmfile): + """wraps execution of applydeltarpm [-r oldrpm] deltarpm newrpm - + constructs file names and paths based on given RpmDescription and instance settings for directories""" + # TODO: test args for type == instance and __class__ == RpmDescription + self.conduit.info(7, '%s.apply(%s,%s)' % (self.__class__, newrpmfile, deltarpmfile)) + p=Process(self.conduit) + # targetrpm filename + p.run(APPLY, deltarpmfile, newrpmfile) + if p.returnCode(): + # in case of error, raise exception + raise Exception("Could not apply deltarpm: %d" % (p.returnCode())) + return newrpmfile + + def verifySequence(self, sequence): + """wraps execution of applydeltarpm [-r oldrpm] -s seqfilecontent - + constructs file names and paths based on given RpmDescription and instance settings for directories""" + self.conduit.info(7, '%s.verify(%s)' % (self.__class__, sequence)) + p = Process(self.conduit) + p.run(APPLY, '-s', sequence) + if p.returnCode(): + # in case of error, raise exception + raise Exception("Could not verify sequence of deltarpm: %d" % (p.returnCode())) diff --git a/yum-presto/shared/prestoDownload.py b/yum-presto/shared/prestoDownload.py new file mode 100644 index 0000000..340ad1c --- /dev/null +++ b/yum-presto/shared/prestoDownload.py @@ -0,0 +1,171 @@ +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# Copyright 2005 Duke University +# Copyright 2007 Jonathan Dieter + +import os +from yum import misc +from yum import Errors +from yum import types +from urlgrabber.grabber import URLGrabError +import deltarpm + +def verifyDelta(fo, po, conduit, raiseError): + """verifies the deltarpm is what we expect it to be + raiseError = defaults to 0 - if 1 then will raise + a URLGrabError if the file does not check out. + otherwise it returns false for a failure, true for success""" + + if type(fo) is types.InstanceType: + fo = fo.filename + + try: + verifyChecksum(fo, po.returnSimple('deltachecksumtype'), po.returnSimple('deltachecksum')) + except: + if raiseError: + raise URLGrabError(-1, 'Package does not match intended download') + else: + return False + + return True + + +def verifyChecksum(filename, checksumType, csum): + """Verify the checksum of the file versus the + provided checksum""" + + try: + + filesum = misc.checksum(checksumType, filename) + except Errors.MiscError, e: + raise URLGrabError(-3, 'Could not perform checksum') + + if filesum != csum: + raise URLGrabError(-1, 'Package does not match checksum') + + return 0 + + +def downloadPkgs(conduit, pkglist): + """download list of package objects handed to you, return errors""" + + opts, commands = conduit.getCmdLine() + + errors = {} + def adderror(po, msg): + errors.setdefault(po, []).append(msg) + + # Check whether drpm is already downloaded + repo_cached = False + remote_pkgs = [] + rebuild_pkgs = [] + for po in conduit.getDownloadPackages(): + if hasattr(po, 'has_drpm') and po.has_drpm: + po.to.pkgtup = po.to.realpkgtup + local = po.returnSimple('deltalocalpath') + if os.path.exists(local): + cursize = os.stat(local)[6] + totsize = long(po.returnSimple('deltasize')) + try: + verifyChecksum(local, po.returnSimple('deltachecksumtype'), po.returnSimple('deltachecksum')) + except: + if po.repo.p_repo.cache: + repo_cached = True + adderror(po, 'package fails checksum but caching is ' + 'enabled for %s' % po.repo.p_repo.id) + + if cursize >= totsize: # otherwise keep it around for regetting + os.unlink(local) + else: + # Deltarpm is local and good, let's put it in the rebuild list + conduit.info(5, "using local copy of deltarpm for %s" % po) + rebuild_pkgs.append(po) + continue + remote_pkgs.append(po) + + # Download deltarpms + i = 0 + for po in remote_pkgs: + i += 1 + checkfunc = (verifyDelta, (po, conduit, 1), {}) + cache = po.repo.p_repo.http_caching != 'none' + dirstat = os.statvfs(po.repo.deltasdir) + if (dirstat.f_bavail * dirstat.f_bsize) <= long(po.size): + adderror(po, 'Insufficient space in download directory %s ' + 'to download' % po.repo.deltasdir) + continue + po.simple['reallocalpath'] = po.localpath + po.localpath = po.returnSimple('deltalocalpath') + po.simple['realrelativepath'] = po.returnSimple('relativepath') + po.simple['relativepath'] = po.returnSimple('deltarelativepath') + try: + text = '(%s/%s): %s' % (i, len(remote_pkgs), os.path.basename(po.returnSimple('deltarelativepath'))) + deltalocal = po.repo.p_repo.getPackage(po, checkfunc=checkfunc, text=text, cache=cache) + except Errors.RepoError, e: + adderror(po, str(e)) + else: + rebuild_pkgs.append(po) + po.simple['deltalocalpath'] = deltalocal + + if errors.has_key(po): + del errors[po] + + po.simple['relativepath'] = po.returnSimple('realrelativepath') + po.localpath = po.returnSimple('reallocalpath') + if po.simple.has_key('realpackagesize'): + po.simple['packagesize'] = po.returnSimple('realpackagesize') + del po.simple['realpackagesize'] + del po.simple['realrelativepath'] + del po.simple['reallocalpath'] + + # Rebuild rpms from downloaded deltarpms + for po in rebuild_pkgs: + deltalocal = po.returnSimple('deltalocalpath') + drpm = deltarpm.DeltaRpmWrapper(conduit) + try: + conduit.info(2, "Building %s from %s" % (os.path.basename(po.localpath), os.path.basename(deltalocal))) + drpm.apply(po.localpath, deltalocal) + except: + conduit.info(2, "Error rebuilding rpm from %s! Will download full package." % os.path.basename(deltalocal)) + try: + os.unlink(po.localpath) + except: + pass + else: + # Set package type to local, so yum doesn't try to download it later + # po.pkgtype = "local" # If we set this, we can't auto-install public keys + # and yum is smart enough to detect the full rpm and + # not redownload it. + + # Check to see whether or not we should keep the drpms + # FIXME: Is there any way to see whether or not a Boolean option was not set? + if conduit.confBool('main', 'neverkeepdeltas'): + delete = True + elif conduit.confBool('main', 'keepdeltas'): + delete = False + elif conduit.getConf().keepcache != 0: + delete = False + else: + delete = True + + if delete: + try: + os.unlink(deltalocal) + except: + pass + + return errors + + diff --git a/yum-presto/shared/prestoLog.py b/yum-presto/shared/prestoLog.py new file mode 100644 index 0000000..3c0c1e6 --- /dev/null +++ b/yum-presto/shared/prestoLog.py @@ -0,0 +1,71 @@ +# author: Jonathan Dieter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2005 Duke University + +def log(conduit, LOG_FILE, rpm_size, drpm_size): + # Open log file for reading + try: + log_file = open(LOG_FILE, "r") + log_exists = True + except: + conduit.info(5, "Info: %s doesn't exist. Will create." % LOG_FILE) + log_exists = False + + # Log file doesn't exist, create + if not log_exists: + try: + log_file = open(LOG_FILE, "w") + log_file.write("Download Size (without DRPM),Download Size (with DRPM),Percentage Savings,Total Percentage Savings\n") + log_file.close() + log_exists = True + except: + conduit.info(2, "Warning: Unable to write to %s" % LOG_FILE) + if log_exists: + try: + log_file = open(LOG_FILE, "r") + except: + conduit.info(2, "Warning: Unable to open %s for reading." % LOG_FILE) + log_exists = False + + # Cycle through items already in log so we can come up with total savings + if log_exists: + total_rpm_size = 0 + total_drpm_size = 0 + + # Get rid of header line + log_file.readline() + + data = log_file.readline() + while data != "": + fc = data.find(",") + sc = data.find(",", fc + 1) + total_rpm_size += int(data[:fc]) + total_drpm_size += int(data[fc + 1:sc]) + data = log_file.readline() + log_file.close() + total_rpm_size += rpm_size + total_drpm_size += drpm_size + + try: + log_file = open(LOG_FILE, "a") + except: + conduit.info(2, "Warning: Unable to open %s for writing." % LOG_FILE) + log_exists = False + + # Write data to log + if log_exists: + log_file.write("%i,%i,%i,%i\n" % (rpm_size, drpm_size, 100 - ((drpm_size * 100) / rpm_size), 100 - ((total_drpm_size * 100) / total_rpm_size))) + log_file.close() diff --git a/yum-presto/shared/prestoRepo.py b/yum-presto/shared/prestoRepo.py new file mode 100644 index 0000000..582dc2f --- /dev/null +++ b/yum-presto/shared/prestoRepo.py @@ -0,0 +1,612 @@ +# author: Jonathan Dieter +# +# mostly taken from yumRepo.py (part of yum) with a few minor modifications +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2005 Duke University + +import os +import re +import time +import types +import urlparse + +from yum import Errors +from urlgrabber.grabber import URLGrabber +import urlgrabber.mirror +from urlgrabber.grabber import URLGrabError +from yum.repos import Repository +from yum import repoMDObject +from yum import parser +from yum import config +from yum import misc + +class PrestoRepository(Repository): + """ + This is an actual repository object + + Configuration attributes are pulled in from config.RepoConf. + """ + + def __init__(self, repo, conduit): + Repository.__init__(self, repo.id) + + # If there's a specific deltarpm url, use that + is_different = False + if conduit.confString(repo.id, 'deltaurl'): + self.baseurl = [conduit.confString(repo.id, 'deltaurl')] + is_different = True + conduit.info(5, 'Manual url set from presto.conf: %s' % self.baseurl) + elif repo.deltaurl != []: + self.baseurl = repo.deltaurl + is_different = True + conduit.info(5, 'Manual url set from repository conf file: %s' % self.baseurl) + else: + self.baseurl = repo.baseurl + + # If there's a specific mirrorlist, use that + if conduit.confString(repo.id, 'deltamirrorlist'): + self.mirrorlist = conduit.confString(repo.id, 'deltamirrorlist') + self.baseurl = None + is_different = True + conduit.info(5, 'Manual mirrorlist set from presto.conf: %s' % self.mirrorlist) + elif repo.deltamirrorlist != None: + self.mirrorlist = repo.deltamirrorlist + self.baseurl = None + is_different = True + conduit.info(5, 'Manual mirrorlist set from repository conf file: %s' % self.mirrorlist) + else: + if self.baseurl == repo.baseurl: + self.mirrorlist = repo.mirrorlist + else: + self.mirrorlist = None + + self.conduit = conduit + self.urls = [] + self.is_different = is_different + if is_different: + self.repoMDFile = 'repodata/prestomd.xml' + self.metadata_cookie_fn = 'presto_cachecookie' + else: + self.repoMDFile = 'repodata/repomd.xml' + self.metadata_cookie_fn = 'cachecookie' + self.repoXML = None + self.cache = 0 + self.mirrorlistparsed = 0 + self.yumvar = {} # empty dict of yumvariables for $string replacement + self._proxy_dict = {} + self.http_headers = {} + + # throw in some stubs for things that will be set by the config class + self.basecachedir = "" + self.cachedir = "" + self.pkgdir = "" + self.hdrdir = "" + self.enabled = True + + # holder for stuff we've grabbed + self.retrieved = { 'deltas':0 } + + # callbacks + self.keepalive = repo.keepalive + self.bandwidth = repo.bandwidth + self.retries = repo.retries + self.throttle = repo.throttle + self.proxy = repo.proxy + self.proxy_username = repo.proxy_username + self.proxy_password = repo.proxy_password + self.timeout = repo.timeout + self.http_caching = repo.http_caching + self.failovermethod = repo.failovermethod + self.metadata_expire = repo.metadata_expire + self.basecachedir = repo.basecachedir + self.callback = repo.callback + self.failure_obj = repo.failure_obj + self.mirror_failure_obj = repo.mirror_failure_obj + self.interrupt_callback = repo.interrupt_callback + self.drpm_list = {} + self.parent = repo + repo.p_repo = self + + + def __getProxyDict(self): + self.doProxyDict() + if self._proxy_dict: + return self._proxy_dict + return None + + # consistent access to how proxy information should look (and ensuring + # that it's actually determined for the repo) + proxy_dict = property(__getProxyDict) + + def ready(self): + """Returns true if this repository is setup and ready for use.""" + return self.repoXML is not None + + def __cmp__(self, other): + if self.id > other.id: + return 1 + elif self.id < other.id: + return -1 + else: + return 0 + + def __str__(self): + return self.id + + def _checksum(self, sumtype, file, CHUNK=2**16): + """takes filename, hand back Checksum of it + sumtype = md5 or sha + filename = /path/to/file + CHUNK=65536 by default""" + try: + return misc.checksum(sumtype, file, CHUNK) + except (Errors.MiscError, EnvironmentError), e: + raise Errors.RepoError, 'Error opening file for checksum: %s' % e + + def dump(self): + output = '[%s]\n' % self.id + vars = ['id', 'bandwidth', 'enabled', + 'keepalive', 'proxy', + 'proxy_password', 'proxy_username', + 'retries', 'throttle', 'timeout', 'mirrorlist', + 'cachedir' ] + vars.sort() + for attr in vars: + output = output + '%s = %s\n' % (attr, getattr(self, attr)) + output = output + 'baseurl =' + for url in self.urls: + output = output + ' %s\n' % url + + return output + + def check(self): + """self-check the repo information - if we don't have enough to move + on then raise a repo error""" + if len(self.urls) < 1: + raise Errors.RepoError, \ + 'Cannot find a valid deltaurl for repo: %s' % self.id + + def doProxyDict(self): + if self._proxy_dict: + return + + self._proxy_dict = {} # zap it + proxy_string = None + if self.proxy not in [None, '_none_']: + proxy_string = '%s' % self.proxy + if self.proxy_username is not None: + proxy_parsed = urlparse.urlsplit(self.proxy, allow_fragments=0) + proxy_proto = proxy_parsed[0] + proxy_host = proxy_parsed[1] + proxy_rest = proxy_parsed[2] + '?' + proxy_parsed[3] + proxy_string = '%s://%s@%s%s' % (proxy_proto, + self.proxy_username, proxy_host, proxy_rest) + + if self.proxy_password is not None: + proxy_string = '%s://%s:%s@%s%s' % (proxy_proto, + self.proxy_username, self.proxy_password, + proxy_host, proxy_rest) + + if proxy_string is not None: + self._proxy_dict['http'] = proxy_string + self._proxy_dict['https'] = proxy_string + self._proxy_dict['ftp'] = proxy_string + + def __headersListFromDict(self): + """Convert our dict of headers to a list of 2-tuples for urlgrabber.""" + headers = [] + + keys = self.http_headers.keys() + for key in keys: + headers.append((key, self.http_headers[key])) + + return headers + + def setupGrab(self): + """sets up the grabber functions with the already stocked in urls for + the mirror groups""" + + if self.failovermethod == 'roundrobin': + mgclass = urlgrabber.mirror.MGRandomOrder + else: + mgclass = urlgrabber.mirror.MirrorGroup + + headers = tuple(self.__headersListFromDict()) + + self.grabfunc = URLGrabber(keepalive=self.keepalive, + bandwidth=self.bandwidth, + retry=self.retries, + throttle=self.throttle, + progress_obj=self.callback, + proxies = self.proxy_dict, + failure_callback=self.failure_obj, + interrupt_callback=self.interrupt_callback, + timeout=self.timeout, + http_headers=headers, + reget='simple') + + self.grab = mgclass(self.grabfunc, self.urls, + failure_callback=self.mirror_failure_obj) + + def dirSetup(self): + """make the necessary dirs, if possible, raise on failure""" + + cachedir = os.path.join(self.parent.basecachedir, self.id) + deltasdir = os.path.join(cachedir, 'deltas') + self.parent.setAttribute('deltasdir', deltasdir) + self.setAttribute('cachedir', cachedir) + + cookie = cachedir + '/' + self.metadata_cookie_fn + self.setAttribute('metadata_cookie', cookie) + + for dir in [cachedir, self.parent.deltasdir]: + if self.cache == 0: + if os.path.exists(dir) and os.path.isdir(dir): + continue + else: + try: + os.makedirs(dir, mode=0755) + except OSError, e: + raise Errors.RepoError, \ + "Error making cache directory: %s error was: %s" % (dir, e) + else: + if not os.path.exists(dir): + raise Errors.RepoError, \ + "Cannot access repository dir %s" % dir + + def baseurlSetup(self): + """go through the baseurls and mirrorlists and populate self.urls + with valid ones, run self.check() at the end to make sure it worked""" + + goodurls = [] + if self.mirrorlist and not self.mirrorlistparsed: + mirrorurls = getMirrorList(self.mirrorlist, self.proxy_dict) + self.mirrorlistparsed = 1 + for url in mirrorurls: + url = parser.varReplace(url, self.yumvar) + self.baseurl.append(url) + + for url in self.baseurl: + url = parser.varReplace(url, self.yumvar) + (s,b,p,q,f,o) = urlparse.urlparse(url) + if s not in ['http', 'ftp', 'file', 'https']: + print 'not using ftp, http[s], or file for repos, skipping - %s' % (url) + continue + else: + goodurls.append(url) + + self.setAttribute('urls', goodurls) + self.check() + self.setupGrab() # update the grabber for the urls + + def __get(self, url=None, relative=None, local=None, start=None, end=None, + copy_local=0, checkfunc=None, text=None, reget='simple', cache=True): + """retrieve file from the mirrorgroup for the repo + relative to local, optionally get range from + start to end, also optionally retrieve from a specific baseurl""" + + # if local or relative is None: raise an exception b/c that shouldn't happen + # if url is not None - then do a grab from the complete url - not through + # the mirror, raise errors as need be + # if url is None do a grab via the mirror group/grab for the repo + # return the path to the local file + + # Turn our dict into a list of 2-tuples + headers = self.__headersListFromDict() + + # We will always prefer to send no-cache. + if not (cache or self.http_headers.has_key('Pragma')): + headers.append(('Pragma', 'no-cache')) + + headers = tuple(headers) + + if local is None or relative is None: + raise Errors.RepoError, \ + "get request for Repo %s, gave no source or dest" % self.id + + if self.cache == 1: + if os.path.exists(local): # FIXME - we should figure out a way + return local # to run the checkfunc from here + + else: # ain't there - raise + raise Errors.RepoError, \ + "Caching enabled but no local cache of %s from %s" % (local, + self) + if url is not None: + ug = URLGrabber(keepalive = self.keepalive, + bandwidth = self.bandwidth, + retry = self.retries, + throttle = self.throttle, + progress_obj = self.callback, + copy_local = copy_local, + reget = reget, + proxies = self.proxy_dict, + failure_callback = self.failure_obj, + interrupt_callback=self.interrupt_callback, + timeout=self.timeout, + checkfunc=checkfunc, + http_headers=headers, + ) + + remote = url + '/' + relative + + try: + result = ug.urlgrab(remote, local, + text=text, + range=(start, end), + ) + except URLGrabError, e: + raise Errors.RepoError, \ + "failed to retrieve %s from %s\nerror was %s" % (relative, self.id, e) + + else: + try: + result = self.grab.urlgrab(relative, local, + text = text, + range = (start, end), + copy_local=copy_local, + reget = reget, + checkfunc=checkfunc, + http_headers=headers, + ) + except URLGrabError, e: + raise Errors.RepoError, "failure: %s from %s: %s" % (relative, self.id, e) + + return result + + def getPackage(self, package, checkfunc = None, text = None, cache = True): + remote = package.returnSimple('relativepath') + local = package.localPkg() + basepath = package.returnSimple('basepath') + + return self.__get(url=basepath, + relative=remote, + local=local, + checkfunc=checkfunc, + text=text, + cache=cache + ) + + def metadataCurrent(self): + """Check if there is a metadata_cookie and check its age. If the + age of the cookie is less than metadata_expire time then return true + else return False""" + + val = False + if os.path.exists(self.metadata_cookie): + cookie_info = os.stat(self.metadata_cookie) + if cookie_info[8] + self.metadata_expire > time.time(): + val = True + # WE ARE FROM THE FUTURE!!!! + elif cookie_info[8] > time.time(): + val = False + return val + + def setMetadataCookie(self): + """if possible, set touch the metadata_cookie file""" + + check = self.metadata_cookie + if not os.path.exists(self.metadata_cookie): + check = self.cachedir + + if os.access(check, os.W_OK): + fo = open(self.metadata_cookie, 'w+') + fo.close() + del fo + + + def setup(self, cache): + try: + self.cache = cache + self.baseurlSetup() + self.dirSetup() + except Errors.RepoError, e: + raise + + try: + self._loadRepoXML(text=self) + except Errors.RepoError, e: + raise Errors.RepoError, ('Cannot open/read %s file for repository: %s' % (self.repoMDFile, self)) + + + def _loadRepoXML(self, text=None): + """retrieve/check/read in repomd.xml from the repository""" + + remote = self.repoMDFile + if self.is_different: + local = self.cachedir + '/prestomd.xml' + else: + local = self.cachedir + '/repomd.xml' + + if self.repoXML is not None: + return + + if self.cache or self.metadataCurrent(): + if not os.path.exists(local): + raise Errors.RepoError, 'Cannot find %s file for %s' % (self.repoMDFile, self) + else: + result = local + else: + checkfunc = (self._checkRepoXML, (), {}) + try: + result = self.__get(relative=remote, + local=local, + copy_local=1, + text=text, + reget=None, + checkfunc=checkfunc, + cache=self.http_caching == 'all') + + + except URLGrabError, e: + raise Errors.RepoError, 'Error downloading file %s: %s' % (local, e) + # if we have a 'fresh' repomd.xml then update the cookie + self.setMetadataCookie() + + try: + self.repoXML = repoMDObject.RepoMD(self.id, result) + except Errors.RepoMDError, e: + raise Errors.RepoError, 'Error importing %s from %s: %s' % (self.repoMDFile, self, e) + + def _checkRepoXML(self, fo): + if type(fo) is types.InstanceType: + filepath = fo.filename + else: + filepath = fo + + try: + repoMDObject.RepoMD(self.id, filepath) + except Errors.RepoMDError, e: + raise URLGrabError(-1, 'Error importing %s for %s: %s' % (self.repoMDFile, self, e)) + + + def checkMD(self, fn, mdtype): + """check the metadata type against its checksum""" + + thisdata = self.repoXML.getData(mdtype) + + (r_ctype, r_csum) = thisdata.checksum # get the remote checksum + + if type(fn) == types.InstanceType: # this is an urlgrabber check + file = fn.filename + else: + file = fn + + try: + l_csum = self._checksum(r_ctype, file) # get the local checksum + except Errors.RepoError, e: + raise URLGrabError(-3, 'Error performing checksum') + + if l_csum == r_csum: + return 1 + else: + raise URLGrabError(-1, 'Metadata file does not match checksum') + + + + def retrieveMD(self, mdtype): + """base function to retrieve metadata files from the remote url + returns the path to the local metadata file of a 'mdtype' + mdtype must be 'deltas'.""" + try: + thisdata = self.repoXML.getData(mdtype) + except Errors.RepoMDError: + self.enabled = False + self.conduit.info(5, "No drpms available for %s" % self.id) + return + + (r_base, remote) = thisdata.location + fname = os.path.basename(remote) + local = self.cachedir + '/' + fname + + if self.retrieved.has_key(mdtype): + if self.retrieved[mdtype]: # got it, move along + return local + + if self.cache == 1: + if os.path.exists(local): + try: + self.checkMD(local, mdtype) + except URLGrabError, e: + raise Errors.RepoError, \ + "Caching enabled and local cache: %s does not match checksum" % local + else: + return local + + else: # ain't there - raise + raise Errors.RepoError, \ + "Caching enabled but no local cache of %s from %s" % (local, + self) + + if os.path.exists(local): + try: + self.checkMD(local, mdtype) + except URLGrabError, e: + pass + else: + self.retrieved[mdtype] = 1 + return local # it's the same return the local one + + try: + checkfunc = (self.checkMD, (mdtype,), {}) + local = self.__get(relative=remote, local=local, copy_local=1, + checkfunc=checkfunc, reget=None, + cache=self.http_caching == 'all') + except URLGrabError, e: + raise Errors.RepoError, \ + "Could not retrieve %s matching remote checksum from %s" % (local, self) + else: + self.retrieved[mdtype] = 1 + return local + + + def getPrestoXML(self): + """this gets you the path to the primary.xml file, retrieving it if we + need a new one""" + + return self.retrieveMD('deltas') + + def setCallback(self, callback): + self.callback = callback + self.setupGrab() + + def setFailureObj(self, failure_obj): + self.failure_obj = failure_obj + self.setupGrab() + + def setMirrorFailureObj(self, failure_obj): + self.mirror_failure_obj = failure_obj + self.setupGrab() + + def setInterruptCallback(self, callback): + self.interrupt_callback = callback + self.setupGrab() + +def getMirrorList(mirrorlist, pdict = None): + """retrieve an up2date-style mirrorlist file from a url, + we also s/$ARCH/$BASEARCH/ and move along + returns a list of the urls from that file""" + + returnlist = [] + if hasattr(urlgrabber.grabber, 'urlopen'): + urlresolver = urlgrabber.grabber + else: + import urllib + urlresolver = urllib + + scheme = urlparse.urlparse(mirrorlist)[0] + if scheme == '': + url = 'file://' + mirrorlist + else: + url = mirrorlist + + try: + fo = urlresolver.urlopen(url, proxies=pdict) + except urlgrabber.grabber.URLGrabError, e: + print "Could not retrieve mirrorlist %s error was\n%s" % (url, e) + fo = None + + if fo is not None: + content = fo.readlines() + for line in content: + if re.match('^\s*\#.*', line) or re.match('^\s*$', line): + continue + mirror = re.sub('\n$', '', line) # no more trailing \n's + (mirror, count) = re.subn('\$ARCH', '$BASEARCH', mirror) + returnlist.append(mirror) + + return returnlist + diff --git a/yum-presto/shared/prestoTransaction.py b/yum-presto/shared/prestoTransaction.py new file mode 100644 index 0000000..3d387a4 --- /dev/null +++ b/yum-presto/shared/prestoTransaction.py @@ -0,0 +1,97 @@ +# author: Jonathan Dieter +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2005 Duke University + +import os +import deltarpm + +def find_available_drpms(conduit, newpkg): + """Find any applicable drpms for newpkg + newpkg is a TransactionMember""" + + rpmdb = conduit.getRpmDB() + + is_local = False + + # Set p_repo to be packages delta repository or set to False if + # there is no delta repository + try: + p_repo = newpkg.po.repo.p_repo + drpm_enabled = p_repo.enabled + + po = newpkg.po + if hasattr(po, 'pkgtype') and po.pkgtype == 'local': + is_local = True + else: + local = po.localPkg() + if os.path.exists(local): + cursize = os.stat(local)[6] + totsize = long(po.size) + if not po.verifyLocalPkg(): + if cursize >= totsize: # otherwise keep it around for regetting + os.unlink(local) + else: + conduit.info(5, "using local copy of %s" % po) + is_local = True + + except: + conduit.info(5, "No Presto repository information for %s.%s %i:%s-%s" % (newpkg.name, newpkg.arch, int(newpkg.epoch), newpkg.version, newpkg.release)) + drpm_enabled = False + is_local = False + + chosen_drpm = None + + # First part of key when matching drpms + key1 = "%s*%s*%i*%s*%s" % (newpkg.name, newpkg.arch, int(newpkg.epoch), newpkg.version, newpkg.release) + + # Find any installed packages that match the ones we want to download + installed = rpmdb.searchNevra(newpkg.name, None, None, None, newpkg.arch) + + if installed == []: + is_installed = False + else: + is_installed = True + + + if is_installed and drpm_enabled and not is_local: + for oldpkg in installed: + # Generate second part of key for matching drpms, then full key + key2 = "%s*%s*%i*%s*%s" % (oldpkg.name, oldpkg.arch, int(oldpkg.epoch), oldpkg.version, oldpkg.release) + key = "%s!!%s" % (key1, key2) + + # Check whether we have a matching drpm + if p_repo.deltalist.has_key(key): + # Check whether or not we already have a matching drpm, then choose smallest of the two if we do + if chosen_drpm == None or p_repo.deltalist[key]['size'] < chosen_drpm['size']: + + # Get sequence code for drpm + sequence = p_repo.deltalist[key]['sequence'] + if int(oldpkg.epoch) == 0: + seq = "%s-%s-%s-%s" % (oldpkg.name, oldpkg.version, oldpkg.release, sequence) + else: + seq = "%s-%i:%s-%s-%s" % (oldpkg.name, int(oldpkg.epoch), oldpkg.version, oldpkg.release, sequence) + drpm = deltarpm.DeltaRpmWrapper(conduit) + + # Attempt to apply sequence code for drpm. If this fails, drpm will not apply cleanly, so + # don't even try to download it. + try: + drpm.verifySequence(seq) + chosen_drpm = p_repo.deltalist[key] + chosen_drpm['baseurl'] = p_repo.baseurl[0] + except: + conduit.info(5, "Verification of %s failed" % seq) + + return (chosen_drpm, installed, is_local, drpm_enabled) diff --git a/yum-presto/shared/prestomdparser.py b/yum-presto/shared/prestomdparser.py new file mode 100644 index 0000000..9dbcc1d --- /dev/null +++ b/yum-presto/shared/prestomdparser.py @@ -0,0 +1,166 @@ +# author: Jonathan Dieter +# +# mostly taken from mdparser.py (part of yum) with a few minor modifications +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Library General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# Copyright 2005 Duke University +# Portions copyright 2007 Jonathan Dieter + +import gzip +from cElementTree import iterparse + +from cStringIO import StringIO + +#TODO: document everything here + +class PrestoMDParser: + + def __init__(self, filename): + + # Set up mapping of meta types to handler classes + handlers = { + '{http://linux.duke.edu/metadata/common}metadata': DeltasEntry, + } + + self.total = None + self.count = 0 + self._handlercls = None + + # Read in type, set package node handler and get total number of + # packages + if filename[-3:] == '.gz': fh = gzip.open(filename, 'r') + else: fh = open(filename, 'r') + parser = iterparse(fh, events=('start', 'end')) + self.reader = parser.__iter__() + event, elem = self.reader.next() + self._handlercls = handlers.get(elem.tag, None) + if not self._handlercls: + raise ValueError('Unknown repodata type "%s" in %s' % ( + elem.tag, filename)) + + def getDeltaList(self): + for event, elem in self.reader: + if event == 'end' and elem.tag == '{http://linux.duke.edu/metadata/common}metadata': + return self._handlercls(elem) + + +class BaseEntry: + def __init__(self, elem): + self._p = {} + + def __getitem__(self, k): + return self._p[k] + + def keys(self): + return self._p.keys() + + def values(self): + return self._p.values() + + def has_key(self, k): + return self._p.has_key(k) + + def __str__(self): + out = StringIO() + keys = self.keys() + keys.sort() + for k in keys: + line = u'%s=%s\n' % (k, self[k]) + out.write(line.encode('utf8')) + return out.getvalue() + + def _bn(self, qn): + if qn.find('}') == -1: return qn + return qn.split('}')[1] + + def _prefixprops(self, elem, prefix): + ret = {} + for key in elem.attrib.keys(): + ret[prefix + '_' + self._bn(key)] = elem.attrib[key] + return ret + +class DeltasEntry(BaseEntry): + def __init__(self, deltas): + BaseEntry.__init__(self, deltas) + # Avoid excess typing :) + p = self._p + + for elem in deltas: + temp = {} + key1 = "" + key2 = "" + for child in elem: + name = self._bn(child.tag) + if name in ('name', 'arch'): + temp[name] = child.text + + elif name == 'version': + attrib = child.attrib + try: + attrib['epoch'] = int(attrib['epoch']) + except: + attrib['epoch'] = 0 + key1 = "%s*%s*%i*%s*%s" % (temp['name'], temp['arch'], attrib['epoch'], attrib['ver'], attrib['rel']) + + elif name == 'deltas': + for oldrpm in child: + temp2 = {} + value = {} + key = None + for oldrpm_child in oldrpm: + name = self._bn(oldrpm_child.tag) + if name in ('name', 'arch'): + temp2[name] = oldrpm_child.text + + elif name == 'version': + ch_attrib = oldrpm_child.attrib + try: + ch_attrib['epoch'] = int(ch_attrib['epoch']) + except: + ch_attrib['epoch'] = attrib['epoch'] + try: + ch_attrib['ver'] = ch_attrib['ver'] + except: + ch_attrib['ver'] = attrib['ver'] + if not temp2.has_key('name'): + temp2['name'] = temp['name'] + if not temp2.has_key('arch'): + temp2['arch'] = temp['arch'] + key2 = "%s*%s*%i*%s*%s" % (temp2['name'], temp2['arch'], ch_attrib['epoch'], ch_attrib['ver'], ch_attrib['rel']) + key = "%s!!%s" % (key1, key2) + p[key] = {} + + if name in ('sequence', 'drpm_filename', 'size'): + p[key][name] = oldrpm_child.text + + if name == "checksum": + p[key][name] = oldrpm_child.text + p[key]["%s_type" % name] = oldrpm_child.attrib['type'] + deltas.clear() + +def test(): + import sys + + parser = PrestoMDParser(sys.argv[1]) + + deltalist = parser.getDeltaList() + + print '-' * 40 + print deltalist + + print 'read: %s deltarpms ' % (len(deltalist.keys())) + +if __name__ == '__main__': + test() -- cgit